Source code for root_mcp.core.tools.data_access

"""Data access tools for reading TTree or RNTuple data."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    from root_mcp.config import Config
    from root_mcp.core.io.file_manager import FileManager
    from root_mcp.core.io.validators import PathValidator
    from root_mcp.core.io.readers import TreeReader

logger = logging.getLogger(__name__)



[docs]
class DataAccessTools:
    """Tools for accessing TTree or RNTuple data."""


[docs]
    def __init__(
        self,
        config: Config,
        file_manager: FileManager,
        path_validator: PathValidator,
        tree_reader: TreeReader,
    ):
        """
        Initialize data access tools.

        Args:
            config: Server configuration
            file_manager: File manager instance
            path_validator: Path validator instance
            tree_reader: Tree reader instance
        """
        self.config = config
        self.file_manager = file_manager
        self.path_validator = path_validator
        self.tree_reader = tree_reader



[docs]
    def read_branches(
        self,
        path: str,
        tree_name: str,
        branches: list[str],
        selection: str | None = None,
        limit: int | None = None,
        offset: int = 0,
        entry_start: int | None = None,
        entry_stop: int | None = None,
        flatten: bool = False,
        defines: dict[str, str] | None = None,
    ) -> dict[str, Any]:
        """
        Read branch data from a TTree or RNTuple.

        Args:
            path: File path
            tree_name: Tree name
            branches: List of branch names (can include derived branches from defines)
            selection: Optional cut expression
            limit: Maximum entries to return (alternative to entry_stop)
            offset: Number of entries to skip (alternative to entry_start)
            entry_start: Start entry index (alternative to offset)
            entry_stop: Stop entry index (alternative to limit)
            flatten: Flatten jagged arrays
            defines: Optional derived variable definitions {name: expression}

        Returns:
            Branch data and metadata
        """
        # Handle defines parameter if passed as JSON string
        if defines is not None and isinstance(defines, str):
            import json

            try:
                defines = json.loads(defines)
            except json.JSONDecodeError as e:
                return {
                    "error": "invalid_parameter",
                    "message": f"Invalid JSON in defines parameter: {e}",
                }

        # Handle entry_start/entry_stop vs offset/limit
        if entry_start is not None:
            offset = entry_start
        if entry_stop is not None:
            limit = entry_stop - offset

        # Validate path
        try:
            validated_path = self.path_validator.validate_path(path)
        except Exception as e:
            return {
                "error": "invalid_path",
                "message": str(e),
            }

        # Validate limit
        if limit is None:
            limit = self.config.analysis.default_read_limit
        if limit > self.config.limits.max_rows_per_call:
            return {
                "error": "limit_exceeded",
                "message": (
                    f"Requested limit ({limit}) exceeds maximum "
                    f"({self.config.limits.max_rows_per_call})"
                ),
                "suggestion": f"Use limit <= {self.config.limits.max_rows_per_call} or apply selection",
            }

        # Read data
        try:
            result = self.tree_reader.read_branches(
                path=str(validated_path),
                tree_name=tree_name,
                branches=branches,
                selection=selection,
                limit=limit,
                offset=offset,
                flatten=flatten,
                defines=defines,
            )
        except KeyError as e:
            return {
                "error": "branch_not_found",
                "message": str(e),
                "suggestion": "Use list_branches() to see available branches",
            }
        except ValueError as e:
            return {
                "error": "invalid_selection",
                "message": str(e),
                "suggestion": "Check ROOT expression syntax (e.g., 'pt > 20 && abs(eta) < 2.4')",
            }
        except Exception as e:
            logger.error(f"Failed to read branches: {e}")
            return {
                "error": "read_error",
                "message": f"Failed to read data: {e}",
            }

        # Add suggestions
        suggestions = []
        if result["metadata"]["truncated"]:
            next_offset = offset + result["data"]["entries"]
            suggestions.append(f"Use offset={next_offset} to get next page")

        if result["data"]["is_jagged"]:
            suggestions.append("Data has variable-length arrays - use flatten=true for flat output")

        entries_selected = result["metadata"]["entries_selected"]
        entries_scanned = result["metadata"]["entries_scanned"]
        if entries_selected < entries_scanned * 0.1:
            suggestions.append(
                f"Only {entries_selected}/{entries_scanned} entries pass selection - "
                "consider compute_histogram() for full dataset analysis"
            )

        result["suggestions"] = suggestions

        return result



[docs]
    def sample_tree(
        self,
        path: str,
        tree: str,
        size: int = 100,
        method: str = "first",
        branches: list[str] | None = None,
        seed: int | None = None,
    ) -> dict[str, Any]:
        """
        Get a sample from a tree.

        Args:
            path: File path
            tree: Tree name
            size: Sample size
            method: "first" or "random"
            branches: Branches to include (None = all)
            seed: Random seed

        Returns:
            Sample data and metadata
        """
        # Validate path
        try:
            validated_path = self.path_validator.validate_path(path)
        except Exception as e:
            return {
                "error": "invalid_path",
                "message": str(e),
            }

        # Validate size
        if size > 10_000:
            return {
                "error": "limit_exceeded",
                "message": f"Sample size ({size}) exceeds maximum (10,000)",
                "suggestion": "Use size <= 10000",
            }

        # Get sample
        try:
            result = self.tree_reader.sample_tree(
                path=str(validated_path),
                tree_name=tree,
                size=size,
                method=method,
                branches=branches,
                seed=seed,
            )
        except ValueError as e:
            return {
                "error": "invalid_parameter",
                "message": str(e),
            }
        except Exception as e:
            return {
                "error": "read_error",
                "message": f"Failed to sample tree: {e}",
            }

        # Add suggestions
        suggestions = [
            "Use this sample to understand data structure before full reads",
            "Use read_branches() with selection to get filtered data",
        ]
        result["suggestions"] = suggestions

        return result



[docs]
    def get_branch_stats(
        self,
        path: str,
        tree: str,
        branches: list[str],
        selection: str | None = None,
    ) -> dict[str, Any]:
        """
        Compute statistics for branches.

        Args:
            path: File path
            tree: Tree name
            branches: Branches to analyze
            selection: Optional cut expression

        Returns:
            Branch statistics
        """
        # Validate path
        try:
            validated_path = self.path_validator.validate_path(path)
        except Exception as e:
            return {
                "error": "invalid_path",
                "message": str(e),
            }

        # Compute stats
        try:
            stats = self.tree_reader.compute_branch_stats(
                path=str(validated_path),
                tree_name=tree,
                branches=branches,
                selection=selection,
            )
        except Exception as e:
            return {
                "error": "computation_error",
                "message": f"Failed to compute statistics: {e}",
            }

        return {
            "data": {
                "statistics": stats,
            },
            "metadata": {
                "operation": "get_branch_stats",
                "branches": branches,
                "selection": selection,
            },
            "suggestions": [
                "Use these statistics to choose histogram ranges",
                "Min/max values help identify outliers",
            ],
        }