Memory Querying
Introduction to Memory Querying
The Memory Codex provides powerful capabilities for querying and retrieving Earth Memories. This guide covers the query API, spatial and temporal filtering, and advanced query techniques for accessing Earth observation data.
Basic Query Structure
Memory queries in the Earth Memory framework follow a consistent structure:
from memories.earth import MemoryCodex
# Initialize the Memory Codex
codex = MemoryCodex()
# Basic memory query
result = codex.query(
memory_types=["VegetationMemory"], # Type of memory to query
region=(40.7, -74.0, 41.0, -73.7), # Spatial extent (N, W, S, E)
time_range=("2023-01-01", "2023-12-31"), # Temporal extent
variables=["ndvi", "evi"], # Data variables to retrieve
resolution="30m", # Desired spatial resolution
aggregation=None # No aggregation (raw data)
)
# Print basic information about the result
print(f"Retrieved {result.size} memories")
print(f"Spatial coverage: {result.spatial_coverage.area} km²")
print(f"Temporal range: {result.temporal_range}")
print(f"Variables: {result.variables}")
Spatial Queries
Query memories based on spatial criteria:
# Query by bounding box
by_bbox = codex.query(
region=(40.7, -74.0, 41.0, -73.7), # NYC area bounding box
time="latest"
)
# Query by geometry
from shapely.geometry import Polygon
# Define a polygon for the query region
polygon = Polygon([(-74.0, 40.7), (-73.7, 40.7),
(-73.7, 41.0), (-74.0, 41.0)])
by_polygon = codex.query(
region=polygon,
time="latest"
)
# Query by named geographic area
by_name = codex.query(
region="amazon-basin", # Named region
time="latest"
)
# Query by distance from point
near_point = codex.query(
near_point=(40.7, -74.0), # Latitude, longitude
distance=50, # km
time="latest"
)
Temporal Queries
Query memories based on temporal criteria:
# Query at a specific point in time
at_time = codex.query(
region="global",
time="2023-06-15T12:00:00Z" # Specific timestamp
)
# Query for the latest available data
latest = codex.query(
region="global",
time="latest"
)
# Query for a time range
date_range = codex.query(
region="global",
time_range=("2023-01-01", "2023-12-31")
)
# Query with temporal resolution
monthly = codex.query(
region="global",
time_range=("2023-01-01", "2023-12-31"),
temporal_resolution="monthly"
)
# Query relative to current time
recent = codex.query(
region="global",
time_range="last-30-days"
)
# Query for a specific season across years
summer_pattern = codex.query(
region="global",
time_pattern={
"years": [2020, 2021, 2022, 2023],
"months": [6, 7, 8] # June, July, August
}
)
Filtering and Selection
Filter memories based on specific criteria:
# Filter by variable values
high_ndvi = codex.query(
memory_types=["VegetationMemory"],
region="amazon-basin",
time_range=("2023-01-01", "2023-12-31"),
filters={
"ndvi": {"min": 0.6} # Only areas with NDVI > 0.6
}
)
# Filter by metadata attributes
sentinel_data = codex.query(
region="europe",
time_range=("2023-01-01", "2023-12-31"),
metadata_filters={
"sensor": "sentinel-2",
"cloud_cover": {"max": 20}
}
)
# Filter by quality indicators
quality_data = codex.query(
region="africa",
time_range=("2023-01-01", "2023-12-31"),
quality_filters={
"minimum_valid_pixels": 70, # At least 70% valid pixels
"qa_flags": ["clear", "water"] # Only pixels with these QA flags
}
)
# Complex filtering with logical operators
from memories.earth.query import And, Or, Not
complex_filter = codex.query(
region="global",
time="latest",
complex_filter=And(
Or(
{"ndvi": {"min": 0.7}},
{"evi": {"min": 0.6}}
),
Not({"qa_flags": "cloud"})
)
)
Aggregation and Statistics
Retrieve aggregated statistics from memories:
# Spatial aggregation
mean_by_region = codex.query(
region="europe",
time_range=("2023-01-01", "2023-12-31"),
aggregation="spatial_mean"
)
# Temporal aggregation
annual_means = codex.query(
region="europe",
time_range=("2020-01-01", "2023-12-31"),
aggregation="temporal_mean",
aggregation_period="yearly"
)
# Zonal statistics
from memories.earth.query import ZonalAggregation
import geopandas as gpd
# Load administrative boundaries
countries = gpd.read_file("path/to/countries.geojson")
# Calculate zonal statistics by country
zonal_stats = codex.query(
region="europe",
time="2023-06-15",
aggregation=ZonalAggregation(
zones=countries,
statistics=["mean", "min", "max", "std"],
zone_identity_field="ISO_A3"
)
)
# Print results
for country_code, stats in zonal_stats.items():
print(f"Country: {country_code}")
print(f" Mean temperature: {stats['temperature']['mean']:.1f}°C")
print(f" Temperature range: {stats['temperature']['min']:.1f} - {stats['temperature']['max']:.1f}°C")
Query Across Memory Types
Query multiple memory types in a single operation:
# Query across different memory types
combined = codex.query(
memory_types=["TemperatureMemory", "PrecipitationMemory", "VegetationMemory"],
region="amazon-basin",
time_range=("2023-01-01", "2023-12-31"),
temporal_resolution="monthly"
)
# Calculate relationship between temperature and vegetation
correlation = combined.calculate_correlation(
variable_pairs=[("temperature", "ndvi")],
method="pearson"
)
print(f"Temperature-NDVI correlation: {correlation['temperature']['ndvi']:.3f}")
Working with Query Results
Results from memory queries can be processed in various ways:
# Get query result as xarray Dataset
result = codex.query(
memory_types=["TemperatureMemory"],
region="europe",
time_range=("2023-01-01", "2023-12-31"),
temporal_resolution="monthly"
)
# Convert to xarray for analysis
ds = result.to_xarray()
# Calculate monthly anomalies
climatology = ds.groupby("time.month").mean()
anomalies = ds.groupby("time.month") - climatology
# Export to other formats
result.to_netcdf("temperature_2023.nc")
result.to_geotiff("temperature_2023.tif")
result.to_zarr("temperature_2023.zarr")
# Plot the data
result.plot(
variable="temperature",
time="2023-07-15",
cmap="RdBu_r",
vmin=10, vmax=35,
title="July 2023 Temperature"
)
Advanced Query Capabilities
The Memory Codex supports advanced query capabilities for complex scenarios:
Spatiotemporal Patterns
Search for specific spatiotemporal patterns:
from memories.earth.query import SpatiotemporalPattern
# Define a pattern to search for
drought_pattern = SpatiotemporalPattern(
variables=["soil_moisture", "precipitation", "temperature"],
pattern_definition={
"soil_moisture": {"trend": "decreasing", "duration": "60 days", "magnitude": "severe"},
"precipitation": {"anomaly": "negative", "duration": "60 days", "percentile": 10},
"temperature": {"anomaly": "positive", "duration": "30 days", "percentile": 90}
}
)
# Search for the pattern
drought_events = codex.query_pattern(
pattern=drought_pattern,
region="western-us",
time_range=("2000-01-01", "2023-12-31")
)
# Print detected events
for event in drought_events:
print(f"Drought event detected:")
print(f" Region: {event.region}")
print(f" Start date: {event.start_date}")
print(f" End date: {event.end_date}")
print(f" Severity: {event.severity}")
Memory Similarity Search
Find memories similar to a reference memory:
from memories.earth.query import SimilarityQuery
# Get a reference memory
reference = codex.get_memory("amazon-drought-2015")
# Find similar events
similar_events = codex.query_similarity(
reference=reference,
search_space={
"region": "south-america",
"time_range": ("2000-01-01", "2023-12-31"),
"memory_types": ["DroughtMemory"]
},
similarity_metrics=["pattern", "intensity", "spatial_extent"],
top_k=5
)
# Print similar events
for idx, event in enumerate(similar_events):
print(f"#{idx+1} Similar event: {event.name}")
print(f" Similarity score: {event.similarity:.2f}")
print(f" Time period: {event.start_date} to {event.end_date}")
print(f" Pattern similarity: {event.similarity_components['pattern']:.2f}")
print(f" Intensity similarity: {event.similarity_components['intensity']:.2f}")
Anomaly Detection
Detect anomalies in Earth memory data:
from memories.earth.query import AnomalyDetection
# Configure anomaly detection
anomaly_detector = AnomalyDetection(
method="isolation_forest",
baseline_period=("2000-01-01", "2020-12-31"),
variables=["temperature", "precipitation"],
contamination=0.05, # Expected proportion of anomalies
seasonality=True
)
# Detect anomalies
anomalies = codex.query_anomalies(
detector=anomaly_detector,
region="global",
time_range=("2021-01-01", "2023-12-31"),
temporal_resolution="monthly"
)
# Print anomalies
for anomaly in anomalies:
print(f"Anomaly detected:")
print(f" Region: {anomaly.region}")
print(f" Time: {anomaly.time}")
print(f" Type: {anomaly.type}")
print(f" Severity: {anomaly.severity}")
print(f" Contributing variables: {anomaly.contributing_variables}")
Cross-modal Queries
Query across different data modalities:
# Query relating satellite imagery and ground measurements
cross_modal = codex.cross_modal_query(
primary_modal={
"memory_types": ["SatelliteImagery"],
"variables": ["rgb"]
},
secondary_modal={
"memory_types": ["GroundSensorNetwork"],
"variables": ["air_quality"]
},
region="urban-areas",
time_range=("2023-01-01", "2023-12-31"),
relationship="co-located",
max_distance=1000, # meters
max_time_difference="1 day"
)
# Analyze the relationship between modalities
for pair in cross_modal:
print(f"Matching pair:")
print(f" Satellite image: {pair.primary.id}")
print(f" Ground measurement: {pair.secondary.id}")
print(f" Spatial distance: {pair.spatial_distance} meters")
print(f" Temporal distance: {pair.temporal_distance} hours")
Scheduled and Persistent Queries
Set up scheduled queries that run automatically:
from memories.earth.query import ScheduledQuery
# Define a query to run daily
daily_monitoring = ScheduledQuery(
name="global-temperature-monitoring",
query={
"memory_types": ["TemperatureMemory"],
"region": "global",
"time": "latest",
"variables": ["temperature"],
"aggregation": "spatial_mean"
},
schedule="daily at 00:00 UTC",
store_results=True,
result_retention="90 days",
notifications={
"on_completion": True,
"email": "alerts@example.org"
}
)
# Register the scheduled query
query_id = codex.register_scheduled_query(daily_monitoring)
# Update an existing scheduled query
codex.update_scheduled_query(
query_id=query_id,
updates={
"schedule": "daily at 06:00 UTC",
"notifications": {
"on_completion": True,
"on_error": True,
"email": ["alerts@example.org", "admin@example.org"]
}
}
)
# List all scheduled queries
scheduled_queries = codex.list_scheduled_queries()
for query in scheduled_queries:
print(f"Query: {query.name} (ID: {query.id})")
print(f" Schedule: {query.schedule}")
print(f" Last run: {query.last_run}")
print(f" Status: {query.status}")
Query Optimization
Optimize query performance for different scenarios:
# Standard query without optimization
standard_query = codex.query(
memory_types=["SatelliteImagery"],
region="europe",
time_range=("2023-01-01", "2023-12-31")
)
# Query with performance optimization
optimized_query = codex.query(
memory_types=["SatelliteImagery"],
region="europe",
time_range=("2023-01-01", "2023-12-31"),
optimization={
"strategy": "performance",
"cache": True,
"parallel": True,
"chunk_size": (1024, 1024),
"max_memory": "16GB"
}
)
# Query with storage tier optimizations
tier_optimized = codex.query(
memory_types=["TemperatureMemory"],
region="global",
time_range=("2000-01-01", "2023-12-31"),
optimization={
"strategy": "storage_aware",
"prefer_tiers": ["warm", "cold"],
"allow_degraded_resolution": True,
"max_retrieval_time": "5 minutes"
}
)
Building Search Indexes
Create and use search indexes to accelerate common queries:
from memories.earth.index import MemorySearchIndex
# Create a spatial search index
spatial_index = MemorySearchIndex(
name="vegetation-spatial-index",
memory_types=["VegetationMemory"],
index_type="spatial",
resolution="1km",
update_frequency="weekly"
)
# Register the index with the codex
codex.register_index(spatial_index)
# Create a temporal search index
temporal_index = MemorySearchIndex(
name="temperature-temporal-index",
memory_types=["TemperatureMemory"],
index_type="temporal",
granularity="daily",
update_frequency="daily"
)
# Register the index with the codex
codex.register_index(temporal_index)
# Use indexes in queries
indexed_query = codex.query(
memory_types=["VegetationMemory"],
region="amazon-basin",
time="latest",
use_index=True # Let system choose appropriate index
)
# Explicitly specify index
specific_index_query = codex.query(
memory_types=["VegetationMemory"],
region="amazon-basin",
time="latest",
index="vegetation-spatial-index"
)
Custom Query Extensions
Extend the query system with custom functions:
from memories.earth.query import QueryExtension
# Define a custom query extension
class VegetationStressDetector(QueryExtension):
"""Custom extension to detect vegetation stress conditions."""
def __init__(self, drought_threshold=-1.5, heat_threshold=35.0):
self.drought_threshold = drought_threshold
self.heat_threshold = heat_threshold
def process(self, query_result):
"""Process query results to detect vegetation stress."""
# Implementation details...
return stress_areas
# Register the extension
codex.register_query_extension(VegetationStressDetector)
# Use the extension in a query
stress_query = codex.query(
memory_types=["VegetationMemory", "TemperatureMemory", "PrecipitationMemory"],
region="western-us",
time="latest",
extensions=[
VegetationStressDetector(drought_threshold=-2.0, heat_threshold=37.0)
]
)
# Get the extension results
stress_areas = stress_query.get_extension_result("VegetationStressDetector")
print(f"Detected {len(stress_areas)} areas under vegetation stress")
Multi-source Data Fusion
Fuse data from multiple sources in a single query:
from memories.earth.query import DataFusion
# Define a data fusion operation
drought_index_fusion = DataFusion(
name="combined-drought-index",
sources=[
{"memory_type": "PrecipitationMemory", "variable": "spi", "weight": 0.4},
{"memory_type": "SoilMoistureMemory", "variable": "percentile", "weight": 0.4},
{"memory_type": "VegetationMemory", "variable": "vhi", "weight": 0.2}
],
fusion_method="weighted_average",
normalization="min_max",
output_range=(0, 1)
)
# Execute a query with the fusion
drought_conditions = codex.query(
region="western-us",
time="latest",
fusion=drought_index_fusion
)
# Access the fused data
fused_index = drought_conditions.get_fused_data()
# Plot the results
drought_conditions.plot(
variable="combined-drought-index",
cmap="YlOrBr_r",
vmin=0, vmax=1,
title="Combined Drought Index (Higher = More Severe)"
)
Next Steps
After learning about memory querying:
Explore data visualization options in visualization
Learn about creating custom analyses in ../analysis/custom_analyses
Set up automated processing workflows in Workflows