This guide shows how to create cloud-free composites from Sentinel-2 exports using the Scene Classification Layer (SCL) band for cloud masking.
Prerequisites: First, make sure to export your Sentinel-2 data from the Export tab in Earthscale. Your export must include the SCL band. When exporting Sentinel-2 data from Earthscale, make sure to select SCL alongside your spectral bands (e.g., B02, B03, B04).
The output files maintain the same CRS, resolution, and extent as your input export.
You can directly drag and drop to upload these files back into Earthscale for sharing or analysis. Or, you can pull them into software like QGIS and ArcGIS for further analysis.
#!/usr/bin/env python3
"""
Create cloud-free composites from Sentinel-2 exports using SCL band.
Usage: python cloud_free_composite.py /path/to/export/directory
"""
import sys
import re
from pathlib import Path
from collections import defaultdict
import numpy as np
import rasterio
from rasterio.enums import Resampling
def find_files(export_dir: Path) -> dict[str, dict[str, Path]]:
"""
Discover all timesteps and bands in the export directory.
Returns: {timestamp: {band: filepath}}
"""
pattern = re.compile(r"time-([0-9T]+)_(.+)\.tif$")
files = defaultdict(dict)
for tif in export_dir.glob("*.tif"):
match = pattern.match(tif.name)
if match:
timestamp, band = match.groups()
files[timestamp][band] = tif
return dict(files)
def create_cloud_mask(scl_data: np.ndarray) -> np.ndarray:
"""
Create a boolean mask where True = clear pixel.
Clear pixels are SCL values 4 (vegetation), 5 (bare soil), 6 (water).
"""
return np.isin(scl_data, [4, 5, 6])
def create_composite(
export_dir: Path,
output_dir: Path,
method: str = "median"
) -> list[Path]:
"""
Create cloud-free composites for all bands found in the export.
Args:
export_dir: Path to the export directory containing TIF files
output_dir: Path to save composite TIFs
method: Compositing method - "median" or "first" (first clear pixel)
Returns:
List of created composite file paths
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Discover files
files_by_time = find_files(export_dir)
if not files_by_time:
raise ValueError(f"No TIF files found in {export_dir}")
timestamps = sorted(files_by_time.keys())
print(f"Found {len(timestamps)} timesteps")
# Get all bands (excluding SCL)
all_bands = set()
for bands in files_by_time.values():
all_bands.update(b for b in bands.keys() if b != "SCL")
all_bands = sorted(all_bands)
print(f"Found bands: {all_bands}")
# Check that SCL exists for all timesteps
for ts in timestamps:
if "SCL" not in files_by_time[ts]:
raise ValueError(f"Missing SCL band for timestep {ts}")
# Read reference file for metadata
ref_file = next(iter(next(iter(files_by_time.values())).values()))
with rasterio.open(ref_file) as src:
profile = src.profile.copy()
height, width = src.height, src.width
created_files = []
# Process each band
for band in all_bands:
print(f"Processing {band}...")
# Stack all timesteps for this band
stack = []
masks = []
for ts in timestamps:
if band not in files_by_time[ts]:
print(f" Warning: {band} missing for {ts}, skipping")
continue
# Read band data
with rasterio.open(files_by_time[ts][band]) as src:
data = src.read(1)
# Read corresponding SCL
with rasterio.open(files_by_time[ts]["SCL"]) as src:
scl = src.read(1)
clear_mask = create_cloud_mask(scl)
stack.append(data)
masks.append(clear_mask)
if not stack:
print(f" No data found for {band}, skipping")
continue
# Stack arrays
stack = np.array(stack)
masks = np.array(masks)
# Apply cloud mask - set cloudy pixels to NaN for median calculation
masked_stack = stack.astype(np.float32)
masked_stack[~masks] = np.nan
# Compute composite
if method == "median":
with np.errstate(all='ignore'):
composite = np.nanmedian(masked_stack, axis=0)
elif method == "first":
# First valid (clear) pixel
composite = np.full((height, width), np.nan, dtype=np.float32)
for i in range(len(stack)):
valid = masks[i] & np.isnan(composite)
composite[valid] = stack[i][valid]
else:
raise ValueError(f"Unknown method: {method}")
# Handle remaining NaN (no clear observations)
composite = np.nan_to_num(composite, nan=0).astype(profile['dtype'])
# Write output
output_path = output_dir / f"composite_{band}.tif"
profile.update(count=1, compress='deflate')
with rasterio.open(output_path, 'w', **profile) as dst:
dst.write(composite, 1)
created_files.append(output_path)
print(f" Saved {output_path}")
return created_files
def create_rgb_composite(
output_dir: Path,
red_band: str = "B04",
green_band: str = "B03",
blue_band: str = "B02"
) -> Path | None:
"""
Create an RGB GeoTIFF from individual band composites.
"""
red_path = output_dir / f"composite_{red_band}.tif"
green_path = output_dir / f"composite_{green_band}.tif"
blue_path = output_dir / f"composite_{blue_band}.tif"
if not all(p.exists() for p in [red_path, green_path, blue_path]):
print("RGB bands not all present, skipping RGB composite")
return None
with rasterio.open(red_path) as src:
red = src.read(1)
profile = src.profile.copy()
with rasterio.open(green_path) as src:
green = src.read(1)
with rasterio.open(blue_path) as src:
blue = src.read(1)
rgb_path = output_dir / "composite_RGB.tif"
profile.update(count=3, compress='deflate')
with rasterio.open(rgb_path, 'w', **profile) as dst:
dst.write(red, 1)
dst.write(green, 2)
dst.write(blue, 3)
print(f"Saved RGB composite: {rgb_path}")
return rgb_path
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python cloud_free_composite.py <export_directory> [output_directory]")
print("Example: python cloud_free_composite.py ./Sentinel-2_L2A_Dec_18_2025_04_47_PM")
sys.exit(1)
export_dir = Path(sys.argv[1])
output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else export_dir / "composites"
if not export_dir.exists():
print(f"Error: Directory not found: {export_dir}")
sys.exit(1)
print(f"Creating cloud-free composites from: {export_dir}")
print(f"Output directory: {output_dir}")
print()
created = create_composite(export_dir, output_dir, method="median")
print()
# Create RGB if we have the standard bands
create_rgb_composite(output_dir)
print()
print(f"Done! Created {len(created)} composite files in {output_dir}")
# Install dependencies
pip install rasterio numpy
# Run on your export
python cloud_free_composite.py ./Sentinel-2_L2A_Dec_18_2025_04_47_PM
# Or specify a custom output directory
python cloud_free_composite.py ./Sentinel-2_L2A_Dec_18_2025_04_47_PM ./my_composites
#!/bin/bash
# Cloud-free composite using GDAL
# Usage: ./gdal_composite.sh /path/to/export/directory
set -e
EXPORT_DIR="${1:-.}"
OUTPUT_DIR="${2:-${EXPORT_DIR}/composites}"
mkdir -p "$OUTPUT_DIR"
echo "Processing files in: $EXPORT_DIR"
echo "Output directory: $OUTPUT_DIR"
# Find all unique bands (excluding SCL)
BANDS=$(ls "$EXPORT_DIR"/time-*_*.tif 2>/dev/null | \
sed 's/.*_\([^_]*\)\.tif/\1/' | \
sort -u | \
grep -v SCL)
echo "Found bands: $BANDS"
# Find all timestamps
TIMESTAMPS=$(ls "$EXPORT_DIR"/time-*_*.tif 2>/dev/null | \
sed 's/.*time-\([0-9T]*\)_.*/\1/' | \
sort -u)
echo "Found $(echo "$TIMESTAMPS" | wc -w | tr -d ' ') timesteps"
echo
for BAND in $BANDS; do
echo "Processing $BAND..."
MASKED_FILES=""
for TS in $TIMESTAMPS; do
BAND_FILE="$EXPORT_DIR/time-${TS}_${BAND}.tif"
SCL_FILE="$EXPORT_DIR/time-${TS}_SCL.tif"
MASKED_FILE="$OUTPUT_DIR/masked_${TS}_${BAND}.tif"
if [[ -f "$BAND_FILE" && -f "$SCL_FILE" ]]; then
# Apply cloud mask: keep pixels where SCL is 4, 5, or 6
gdal_calc.py \
-A "$BAND_FILE" \
-B "$SCL_FILE" \
--outfile="$MASKED_FILE" \
--calc="numpy.where((B==4)|(B==5)|(B==6), A, 0)" \
--type=UInt16 \
--NoDataValue=0 \
--quiet
MASKED_FILES="$MASKED_FILES $MASKED_FILE"
fi
done
# Create VRT stack of all masked files
VRT_FILE="$OUTPUT_DIR/stack_${BAND}.vrt"
gdalbuildvrt -separate "$VRT_FILE" $MASKED_FILES
# Compute median composite using gdal_calc with the stack
# Note: For true median, we use a Python approach.
# This creates a maximum composite (last clear pixel wins)
COMPOSITE_FILE="$OUTPUT_DIR/composite_${BAND}.tif"
# Simple approach: use gdal_merge to combine (uses last valid value)
gdal_merge.py \
-o "$COMPOSITE_FILE" \
-n 0 \
-a_nodata 0 \
-co COMPRESS=DEFLATE \
$MASKED_FILES
# Cleanup temp files
rm -f $MASKED_FILES "$VRT_FILE"
echo " Created: $COMPOSITE_FILE"
done
# Create RGB composite if we have B04, B03, B02
if [[ -f "$OUTPUT_DIR/composite_B04.tif" && \
-f "$OUTPUT_DIR/composite_B03.tif" && \
-f "$OUTPUT_DIR/composite_B02.tif" ]]; then
echo
echo "Creating RGB composite..."
gdal_merge.py \
-o "$OUTPUT_DIR/composite_RGB.tif" \
-separate \
-co COMPRESS=DEFLATE \
"$OUTPUT_DIR/composite_B04.tif" \
"$OUTPUT_DIR/composite_B03.tif" \
"$OUTPUT_DIR/composite_B02.tif"
echo " Created: $OUTPUT_DIR/composite_RGB.tif"
fi
echo
echo "Done! Composites saved to: $OUTPUT_DIR"
# Make it executable
chmod +x gdal_composite.sh
# Run on your export
./gdal_composite.sh ./Sentinel-2_L2A_Dec_18_2025_04_47_PM