Introduce monitoring module: added EndpointMonitor for API endpoint checks and SchemaRegistry for dynamic schema tracking. Centralized endpoint definitions and implemented schema change detection.

2025-11-29 00:04:16 +01:00 · 2025-11-29 00:04:16 +01:00 · a988aaa04f
commit a988aaa04f
parent 4f74343efc
3 changed files with 857 additions and 0 deletions
--- a/src/geoguessr_mcp/monitoring/init.py
+++ b/src/geoguessr_mcp/monitoring/init.py
@ -0,0 +1,21 @@
+"""Monitoring module for API endpoint tracking and schema detection."""
+
+from .endpoint_monitor import EndpointMonitor, endpoint_monitor, MONITORED_ENDPOINTS
+from .schema_manager import (
+    SchemaDetector,
+    SchemaRegistry,
+    EndpointSchema,
+    SchemaField,
+    schema_registry,
+)
+
+__all__ = [
+    "EndpointMonitor",
+    "endpoint_monitor",
+    "MONITORED_ENDPOINTS",
+    "SchemaDetector",
+    "SchemaRegistry",
+    "EndpointSchema",
+    "SchemaField",
+    "schema_registry",
+]
--- a/src/geoguessr_mcp/monitoring/endpoint_monitor.py
+++ b/src/geoguessr_mcp/monitoring/endpoint_monitor.py
@ -0,0 +1,397 @@
+"""
+API Endpoint Monitor.
+
+This module provides automated monitoring of GeoGuessr API endpoints,
+checking their availability and detecting response format changes.
+"""
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, UTC
+from typing import Optional
+
+import httpx
+
+from ..config import settings
+from .schema_manager import SchemaRegistry, schema_registry
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class EndpointDefinition:
+    """Definition of an API endpoint to monitor."""
+    path: str
+    method: str = "GET"
+    requires_auth: bool = True
+    use_game_server: bool = False
+    params: dict = field(default_factory=dict)
+    description: str = ""
+
+
+# Known GeoGuessr API endpoints to monitor
+MONITORED_ENDPOINTS = [
+    # Profile endpoints
+    EndpointDefinition(
+        path="/v3/profiles",
+        description="Current user profile",
+    ),
+    EndpointDefinition(
+        path="/v3/profiles/stats",
+        description="User statistics",
+    ),
+    EndpointDefinition(
+        path="/v4/stats/me",
+        description="Extended user statistics",
+    ),
+    EndpointDefinition(
+        path="/v3/profiles/achievements",
+        description="User achievements",
+    ),
+    EndpointDefinition(
+        path="/v3/profiles/maps",
+        description="User's custom maps",
+    ),
+
+    # Game endpoints
+    EndpointDefinition(
+        path="/v3/social/events/unfinishedgames",
+        description="Unfinished games",
+    ),
+
+    # Social endpoints
+    EndpointDefinition(
+        path="/v4/feed/private",
+        params={"count": 10, "page": 0},
+        description="Private activity feed",
+    ),
+    EndpointDefinition(
+        path="/v3/social/friends/summary",
+        description="Friends summary",
+    ),
+    EndpointDefinition(
+        path="/v3/social/badges/unclaimed",
+        description="Unclaimed badges",
+    ),
+    EndpointDefinition(
+        path="/v3/social/maps/browse/personalized",
+        description="Personalized map recommendations",
+    ),
+
+    # Competitive endpoints
+    EndpointDefinition(
+        path="/v4/seasons/active/stats",
+        description="Active season statistics",
+    ),
+
+    # Explorer endpoints
+    EndpointDefinition(
+        path="/v3/explorer",
+        description="Explorer mode progress",
+    ),
+
+    # Objectives endpoints
+    EndpointDefinition(
+        path="/v4/objectives",
+        description="Current objectives",
+    ),
+    EndpointDefinition(
+        path="/v4/objectives/unclaimed",
+        description="Unclaimed objective rewards",
+    ),
+
+    # Subscription endpoints
+    EndpointDefinition(
+        path="/v3/subscriptions",
+        description="Subscription information",
+    ),
+
+    # Challenge endpoints
+    EndpointDefinition(
+        path="/v3/challenges/daily-challenges/today",
+        description="Today's daily challenge",
+    ),
+
+    # Game server endpoints
+    EndpointDefinition(
+        path="/tournaments",
+        use_game_server=True,
+        description="Tournament information",
+    ),
+]
+
+
+@dataclass
+class MonitoringResult:
+    """Result of monitoring an endpoint."""
+    endpoint: str
+    is_available: bool
+    response_code: int
+    response_time_ms: float
+    schema_changed: bool
+    error_message: Optional[str] = None
+    timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
+
+
+class EndpointMonitor:
+    """
+    Monitors API endpoints for availability and schema changes.
+
+    This class runs periodic checks on all known endpoints, updating the
+    schema registry with any changes detected.
+    """
+
+    def __init__(
+            self,
+            registry: Optional[SchemaRegistry] = None,
+            ncfa_cookie: Optional[str] = None,
+    ):
+        self.registry = registry or schema_registry
+        self.ncfa_cookie = ncfa_cookie or settings.DEFAULT_NCFA_COOKIE
+        self.results: list[MonitoringResult] = []
+        self._running = False
+        self._task: Optional[asyncio.Task] = None
+
+    async def check_endpoint(
+            self,
+            endpoint: EndpointDefinition,
+            client: httpx.AsyncClient,
+    ) -> MonitoringResult:
+        """
+        Check a single endpoint and update its schema.
+
+        Args:
+            endpoint: The endpoint definition to check
+            client: HTTP client to use
+
+        Returns:
+            MonitoringResult with check details
+        """
+        base_url = (
+            settings.GAME_SERVER_URL
+            if endpoint.use_game_server
+            else settings.GEOGUESSR_API_URL
+        )
+        url = f"{base_url}{endpoint.path}"
+
+        start_time = datetime.now(UTC)
+
+        try:
+            response = await client.request(
+                endpoint.method,
+                url,
+                params=endpoint.params if endpoint.params else None,
+                timeout=settings.REQUEST_TIMEOUT,
+            )
+
+            response_time = (datetime.now(UTC) - start_time).total_seconds() * 1000
+
+            if response.status_code == 200:
+                try:
+                    data = response.json()
+                    schema, changed = self.registry.update_schema(
+                        endpoint.path,
+                        data,
+                        response.status_code,
+                        endpoint.method,
+                    )
+                    return MonitoringResult(
+                        endpoint=endpoint.path,
+                        is_available=True,
+                        response_code=response.status_code,
+                        response_time_ms=response_time,
+                        schema_changed=changed,
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to parse response from {endpoint.path}: {e}")
+                    return MonitoringResult(
+                        endpoint=endpoint.path,
+                        is_available=True,
+                        response_code=response.status_code,
+                        response_time_ms=response_time,
+                        schema_changed=False,
+                        error_message=f"Parse error: {str(e)}",
+                    )
+            else:
+                self.registry.mark_unavailable(
+                    endpoint.path,
+                    f"HTTP {response.status_code}",
+                    response.status_code,
+                )
+                return MonitoringResult(
+                    endpoint=endpoint.path,
+                    is_available=False,
+                    response_code=response.status_code,
+                    response_time_ms=response_time,
+                    schema_changed=False,
+                    error_message=f"HTTP {response.status_code}",
+                )
+
+        except httpx.TimeoutException:
+            self.registry.mark_unavailable(endpoint.path, "Timeout")
+            return MonitoringResult(
+                endpoint=endpoint.path,
+                is_available=False,
+                response_code=0,
+                response_time_ms=settings.REQUEST_TIMEOUT * 1000,
+                schema_changed=False,
+                error_message="Request timeout",
+            )
+        except Exception as e:
+            self.registry.mark_unavailable(endpoint.path, str(e))
+            return MonitoringResult(
+                endpoint=endpoint.path,
+                is_available=False,
+                response_code=0,
+                response_time_ms=0,
+                schema_changed=False,
+                error_message=str(e),
+            )
+
+    async def run_full_check(self) -> list[MonitoringResult]:
+        """
+        Run a full check of all monitored endpoints.
+
+        Returns:
+            List of monitoring results for all endpoints
+        """
+        if not self.ncfa_cookie:
+            logger.warning("No authentication cookie available for monitoring")
+            return []
+
+        results = []
+
+        async with httpx.AsyncClient() as client:
+            client.cookies.set("_ncfa", self.ncfa_cookie, domain="www.geoguessr.com")
+
+            for endpoint in MONITORED_ENDPOINTS:
+                try:
+                    result = await self.check_endpoint(endpoint, client)
+                    results.append(result)
+
+                    status = "✓" if result.is_available else "✗"
+                    changed = " [SCHEMA CHANGED]" if result.schema_changed else ""
+                    logger.info(
+                        f"{status} {endpoint.path}: "
+                        f"{result.response_code} ({result.response_time_ms:.0f}ms){changed}"
+                    )
+
+                    # Small delay between requests to avoid rate limiting
+                    await asyncio.sleep(0.5)
+
+                except Exception as e:
+                    logger.error(f"Error checking {endpoint.path}: {e}")
+                    results.append(MonitoringResult(
+                        endpoint=endpoint.path,
+                        is_available=False,
+                        response_code=0,
+                        response_time_ms=0,
+                        schema_changed=False,
+                        error_message=str(e),
+                    ))
+
+        self.results = results
+        return results
+
+    async def start_periodic_monitoring(self) -> None:
+        """Start the periodic monitoring background task."""
+        if self._running:
+            logger.warning("Monitoring already running")
+            return
+
+        self._running = True
+        self._task = asyncio.create_task(self._monitoring_loop())
+        logger.info(
+            f"Started periodic monitoring (interval: {settings.MONITORING_INTERVAL_HOURS}h)"
+        )
+
+    async def stop_monitoring(self) -> None:
+        """Stop the periodic monitoring background task."""
+        self._running = False
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+        logger.info("Stopped periodic monitoring")
+
+    async def _monitoring_loop(self) -> None:
+        """Background loop for periodic monitoring."""
+        while self._running:
+            try:
+                logger.info("Running scheduled endpoint check...")
+                await self.run_full_check()
+
+                # Wait for next check interval
+                await asyncio.sleep(settings.MONITORING_INTERVAL_HOURS * 3600)
+
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error in monitoring loop: {e}")
+                # Wait a bit before retrying on error
+                await asyncio.sleep(60)
+
+    def get_monitoring_report(self) -> dict:
+        """
+        Generate a monitoring report for all endpoints.
+
+        Returns:
+            Dictionary with monitoring summary and details
+        """
+        if not self.results:
+            return {
+                "status": "no_data",
+                "message": "No monitoring data available. Run a check first.",
+            }
+
+        available = [r for r in self.results if r.is_available]
+        unavailable = [r for r in self.results if not r.is_available]
+        changed = [r for r in self.results if r.schema_changed]
+
+        avg_response_time = (
+            sum(r.response_time_ms for r in available) / len(available)
+            if available else 0
+        )
+
+        return {
+            "status": "ok" if len(unavailable) == 0 else "degraded",
+            "summary": {
+                "total_endpoints": len(self.results),
+                "available": len(available),
+                "unavailable": len(unavailable),
+                "schema_changes": len(changed),
+                "average_response_time_ms": round(avg_response_time, 2),
+                "last_check": self.results[0].timestamp.isoformat() if self.results else None,
+            },
+            "available_endpoints": [
+                {
+                    "endpoint": r.endpoint,
+                    "response_code": r.response_code,
+                    "response_time_ms": round(r.response_time_ms, 2),
+                    "schema_changed": r.schema_changed,
+                }
+                for r in available
+            ],
+            "unavailable_endpoints": [
+                {
+                    "endpoint": r.endpoint,
+                    "error": r.error_message,
+                    "response_code": r.response_code,
+                }
+                for r in unavailable
+            ],
+            "schema_changes": [
+                {
+                    "endpoint": r.endpoint,
+                    "timestamp": r.timestamp.isoformat(),
+                }
+                for r in changed
+            ],
+        }
+
+
+# Global monitor instance
+endpoint_monitor = EndpointMonitor()
--- a/src/geoguessr_mcp/monitoring/schema_manager.py
+++ b/src/geoguessr_mcp/monitoring/schema_manager.py
@ -0,0 +1,439 @@
+"""
+Dynamic Schema Detection and Management.
+
+This module automatically detects, tracks, and adapts to changes in API response formats.
+It maintains a versioned history of schemas and provides tools for the LLM to understand
+the current data structure.
+"""
+
+import hashlib
+import json
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, UTC
+from pathlib import Path
+from typing import Any, Optional
+
+from ..config import settings
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SchemaField:
+    """Represents a single field in a schema."""
+    name: str
+    field_type: str
+    nullable: bool = False
+    nested_schema: Optional[dict] = None
+    example_value: Any = None
+    description: str = ""
+
+
+@dataclass
+class EndpointSchema:
+    """Schema definition for an API endpoint."""
+    endpoint: str
+    method: str
+    fields: dict[str, SchemaField] = field(default_factory=dict)
+    last_updated: datetime = field(default_factory=lambda: datetime.now(UTC))
+    schema_hash: str = ""
+    response_code: int = 200
+    is_available: bool = True
+    error_message: Optional[str] = None
+    sample_response: Optional[dict] = None
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "endpoint": self.endpoint,
+            "method": self.method,
+            "fields": {
+                name: {
+                    "name": f.name,
+                    "field_type": f.field_type,
+                    "nullable": f.nullable,
+                    "nested_schema": f.nested_schema,
+                    "example_value": self._serialize_example(f.example_value),
+                    "description": f.description,
+                }
+                for name, f in self.fields.items()
+            },
+            "last_updated": self.last_updated.isoformat(),
+            "schema_hash": self.schema_hash,
+            "response_code": self.response_code,
+            "is_available": self.is_available,
+            "error_message": self.error_message,
+            "sample_response": self.sample_response,
+        }
+
+    @staticmethod
+    def _serialize_example(value: Any) -> Any:
+        """Safely serialize example values."""
+        if isinstance(value, (str, int, float, bool, type(None))):
+            return value
+        if isinstance(value, (list, dict)):
+            return str(value)[:100] + "..." if len(str(value)) > 100 else value
+        return str(value)
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "EndpointSchema":
+        """Create from dictionary."""
+        fields = {}
+        for name, f_data in data.get("fields", {}).items():
+            fields[name] = SchemaField(
+                name=f_data["name"],
+                field_type=f_data["field_type"],
+                nullable=f_data.get("nullable", False),
+                nested_schema=f_data.get("nested_schema"),
+                example_value=f_data.get("example_value"),
+                description=f_data.get("description", ""),
+            )
+
+        last_updated = data.get("last_updated")
+        if isinstance(last_updated, str):
+            last_updated = datetime.fromisoformat(last_updated)
+        else:
+            last_updated = datetime.now(UTC)
+
+        return cls(
+            endpoint=data["endpoint"],
+            method=data.get("method", "GET"),
+            fields=fields,
+            last_updated=last_updated,
+            schema_hash=data.get("schema_hash", ""),
+            response_code=data.get("response_code", 200),
+            is_available=data.get("is_available", True),
+            error_message=data.get("error_message"),
+            sample_response=data.get("sample_response"),
+        )
+
+
+class SchemaDetector:
+    """Detects and analyzes JSON response schemas dynamically."""
+
+    @staticmethod
+    def detect_type(value: Any) -> str:
+        """Detect the type of value."""
+        if value is None:
+            return "null"
+        if isinstance(value, bool):
+            return "boolean"
+        if isinstance(value, int):
+            return "integer"
+        if isinstance(value, float):
+            return "number"
+        if isinstance(value, str):
+            # Try to detect special string types
+            if SchemaDetector._is_iso_datetime(value):
+                return "datetime"
+            if SchemaDetector._is_uuid(value):
+                return "uuid"
+            if SchemaDetector._is_url(value):
+                return "url"
+            return "string"
+        if isinstance(value, list):
+            return "array"
+        if isinstance(value, dict):
+            return "object"
+        return "unknown"
+
+    @staticmethod
+    def _is_iso_datetime(value: str) -> bool:
+        """Check if string is ISO datetime format."""
+        try:
+            datetime.fromisoformat(value.replace("Z", "+00:00"))
+            return True
+        except (ValueError, AttributeError):
+            return False
+
+    @staticmethod
+    def _is_uuid(value: str) -> bool:
+        """Check if string is UUID format."""
+        import re
+        uuid_pattern = r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'
+        return bool(re.match(uuid_pattern, value.lower()))
+
+    @staticmethod
+    def _is_url(value: str) -> bool:
+        """Check if string is URL format."""
+        return value.startswith(("http://", "https://"))
+
+    def analyze_response(self, data: Any, max_depth: int = 5) -> dict[str, SchemaField]:
+        """
+        Analyze a JSON response and extract its schema.
+
+        Args:
+            data: The JSON response data
+            max_depth: Maximum depth for nested object analysis
+
+        Returns:
+            Dictionary mapping field names to SchemaField objects
+        """
+        if not isinstance(data, dict):
+            return {}
+
+        fields = {}
+        self._analyze_object(data, fields, "", max_depth)
+        return fields
+
+    def _analyze_object(
+            self,
+            obj: dict,
+            fields: dict,
+            prefix: str,
+            remaining_depth: int
+    ) -> None:
+        """Recursively analyze an object and extract field information."""
+        if remaining_depth <= 0:
+            return
+
+        for key, value in obj.items():
+            field_name = f"{prefix}.{key}" if prefix else key
+            field_type = self.detect_type(value)
+
+            nested_schema = None
+            if field_type == "object" and isinstance(value, dict):
+                nested_schema = {}
+                self._analyze_object(value, nested_schema, "", remaining_depth - 1)
+            elif field_type == "array" and value and isinstance(value[0], dict):
+                nested_schema = {}
+                self._analyze_object(value[0], nested_schema, "", remaining_depth - 1)
+
+            fields[field_name] = SchemaField(
+                name=field_name,
+                field_type=field_type,
+                nullable=value is None,
+                nested_schema=nested_schema if nested_schema else None,
+                example_value=value,
+            )
+
+    @staticmethod
+    def compute_schema_hash(fields: dict[str, SchemaField]) -> str:
+        """Compute a hash of the schema for change detection."""
+        schema_repr = json.dumps(
+            {name: (f.field_type, f.nullable) for name, f in sorted(fields.items())},
+            sort_keys=True
+        )
+        return hashlib.sha256(schema_repr.encode()).hexdigest()[:16]
+
+
+class SchemaRegistry:
+    """
+    Manages schema storage, versioning, and change detection.
+
+    Schemas are persisted to disk and loaded on startup, allowing the system
+    to track changes over time and adapt automatically.
+    """
+
+    def __init__(self, cache_dir: Optional[str] = None):
+        self.cache_dir = Path(cache_dir or settings.SCHEMA_CACHE_DIR)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.schemas: dict[str, EndpointSchema] = {}
+        self.schema_history: dict[str, list[EndpointSchema]] = {}
+        self.detector = SchemaDetector()
+        self._load_cached_schemas()
+
+    def _get_schema_file(self) -> Path:
+        """Get the path to the schema cache file."""
+        return self.cache_dir / "schemas.json"
+
+    def _get_history_file(self) -> Path:
+        """Get the path to the schema history file."""
+        return self.cache_dir / "schema_history.json"
+
+    def _load_cached_schemas(self) -> None:
+        """Load schemas from disk cache."""
+        schema_file = self._get_schema_file()
+        if schema_file.exists():
+            try:
+                with open(schema_file) as f:
+                    data = json.load(f)
+                    for endpoint, schema_data in data.items():
+                        self.schemas[endpoint] = EndpointSchema.from_dict(schema_data)
+                logger.info(f"Loaded {len(self.schemas)} cached schemas")
+            except Exception as e:
+                logger.warning(f"Failed to load cached schemas: {e}")
+
+        history_file = self._get_history_file()
+        if history_file.exists():
+            try:
+                with open(history_file) as f:
+                    data = json.load(f)
+                    for endpoint, history in data.items():
+                        self.schema_history[endpoint] = [
+                            EndpointSchema.from_dict(h) for h in history
+                        ]
+            except Exception as e:
+                logger.warning(f"Failed to load schema history: {e}")
+
+    def _save_schemas(self) -> None:
+        """Save schemas to disk cache."""
+        try:
+            with open(self._get_schema_file(), "w") as f:
+                json.dump(
+                    {ep: schema.to_dict() for ep, schema in self.schemas.items()},
+                    f,
+                    indent=2
+                )
+
+            with open(self._get_history_file(), "w") as f:
+                json.dump(
+                    {
+                        ep: [h.to_dict() for h in history[-10:]]  # Keep last 10 versions
+                        for ep, history in self.schema_history.items()
+                    },
+                    f,
+                    indent=2
+                )
+        except Exception as e:
+            logger.error(f"Failed to save schemas: {e}")
+
+    def update_schema(
+            self,
+            endpoint: str,
+            response_data: Any,
+            response_code: int = 200,
+            method: str = "GET"
+    ) -> tuple[EndpointSchema, bool]:
+        """
+        Update schema for an endpoint based on response data.
+
+        Args:
+            endpoint: The API endpoint
+            response_data: The JSON response data
+            response_code: HTTP response code
+            method: HTTP method
+
+        Returns:
+            Tuple of (updated schema, whether schema changed)
+        """
+        fields = self.detector.analyze_response(response_data)
+        new_hash = self.detector.compute_schema_hash(fields)
+
+        existing_schema = self.schemas.get(endpoint)
+        schema_changed = existing_schema is None or existing_schema.schema_hash != new_hash
+
+        new_schema = EndpointSchema(
+            endpoint=endpoint,
+            method=method,
+            fields=fields,
+            last_updated=datetime.now(UTC),
+            schema_hash=new_hash,
+            response_code=response_code,
+            is_available=True,
+            sample_response=self._truncate_sample(response_data),
+        )
+
+        if schema_changed:
+            if endpoint not in self.schema_history:
+                self.schema_history[endpoint] = []
+            if existing_schema:
+                self.schema_history[endpoint].append(existing_schema)
+            logger.info(f"Schema changed for {endpoint}: {new_hash}")
+
+        self.schemas[endpoint] = new_schema
+        self._save_schemas()
+
+        return new_schema, schema_changed
+
+    def mark_unavailable(
+            self,
+            endpoint: str,
+            error_message: str,
+            response_code: int = 0
+    ) -> None:
+        """Mark an endpoint as unavailable."""
+        if endpoint in self.schemas:
+            self.schemas[endpoint].is_available = False
+            self.schemas[endpoint].error_message = error_message
+            self.schemas[endpoint].response_code = response_code
+            self.schemas[endpoint].last_updated = datetime.now(UTC)
+        else:
+            self.schemas[endpoint] = EndpointSchema(
+                endpoint=endpoint,
+                method="GET",
+                is_available=False,
+                error_message=error_message,
+                response_code=response_code,
+            )
+        self._save_schemas()
+
+    def get_schema(self, endpoint: str) -> Optional[EndpointSchema]:
+        """Get the current schema for an endpoint."""
+        return self.schemas.get(endpoint)
+
+    def get_all_schemas(self) -> dict[str, EndpointSchema]:
+        """Get all registered schemas."""
+        return self.schemas.copy()
+
+    def get_available_endpoints(self) -> list[str]:
+        """Get list of currently available endpoints."""
+        return [ep for ep, schema in self.schemas.items() if schema.is_available]
+
+    def get_schema_summary(self) -> dict:
+        """Get a summary of all schemas for LLM context."""
+        return {
+            "total_endpoints": len(self.schemas),
+            "available_endpoints": len(self.get_available_endpoints()),
+            "endpoints": {
+                endpoint: {
+                    "available": schema.is_available,
+                    "last_updated": schema.last_updated.isoformat(),
+                    "field_count": len(schema.fields),
+                    "fields": list(schema.fields.keys())[:20],  # Limit for context
+                    "response_code": schema.response_code,
+                }
+                for endpoint, schema in self.schemas.items()
+            }
+        }
+
+    def generate_dynamic_description(self, endpoint: str) -> str:
+        """
+        Generate a dynamic description of an endpoint's response format.
+        This is used to provide context to the LLM about what data is available.
+        """
+        schema = self.get_schema(endpoint)
+        if not schema:
+            return f"No schema information available for {endpoint}"
+
+        if not schema.is_available:
+            return f"Endpoint {endpoint} is currently unavailable: {schema.error_message}"
+
+        lines = [
+            f"Endpoint: {endpoint}",
+            f"Method: {schema.method}",
+            f"Last Updated: {schema.last_updated.isoformat()}",
+            f"Status: {'Available' if schema.is_available else 'Unavailable'}",
+            "",
+            "Response Fields:",
+        ]
+
+        for name, item in sorted(schema.fields.items()):
+            nullable_str = " (nullable)" if item.nullable else ""
+            lines.append(f"  - {name}: {item.field_type}{nullable_str}")
+            if item.nested_schema:
+                lines.append(f"    Nested fields: {list(item.nested_schema.keys())}")
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def _truncate_sample(data: Any, max_items: int = 3) -> Any:
+        """Truncate sample response for storage."""
+        if isinstance(data, dict):
+            return {
+                k: SchemaRegistry._truncate_sample(v, max_items)
+                for k, v in list(data.items())[:20]
+            }
+        if isinstance(data, list):
+            return [
+                SchemaRegistry._truncate_sample(item, max_items)
+                for item in data[:max_items]
+            ]
+        if isinstance(data, str) and len(data) > 200:
+            return data[:200] + "..."
+        return data
+
+
+# Global registry instance
+schema_registry = SchemaRegistry()