Source code for root_mcp.core.tools.data_access
"""Data access tools for reading TTree or RNTuple data."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from root_mcp.config import Config
from root_mcp.core.io.file_manager import FileManager
from root_mcp.core.io.validators import PathValidator
from root_mcp.core.io.readers import TreeReader
logger = logging.getLogger(__name__)
[docs]
class DataAccessTools:
"""Tools for accessing TTree or RNTuple data."""
[docs]
def __init__(
self,
config: Config,
file_manager: FileManager,
path_validator: PathValidator,
tree_reader: TreeReader,
):
"""
Initialize data access tools.
Args:
config: Server configuration
file_manager: File manager instance
path_validator: Path validator instance
tree_reader: Tree reader instance
"""
self.config = config
self.file_manager = file_manager
self.path_validator = path_validator
self.tree_reader = tree_reader
[docs]
def read_branches(
self,
path: str,
tree_name: str,
branches: list[str],
selection: str | None = None,
limit: int | None = None,
offset: int = 0,
entry_start: int | None = None,
entry_stop: int | None = None,
flatten: bool = False,
defines: dict[str, str] | None = None,
) -> dict[str, Any]:
"""
Read branch data from a TTree or RNTuple.
Args:
path: File path
tree_name: Tree name
branches: List of branch names (can include derived branches from defines)
selection: Optional cut expression
limit: Maximum entries to return (alternative to entry_stop)
offset: Number of entries to skip (alternative to entry_start)
entry_start: Start entry index (alternative to offset)
entry_stop: Stop entry index (alternative to limit)
flatten: Flatten jagged arrays
defines: Optional derived variable definitions {name: expression}
Returns:
Branch data and metadata
"""
# Handle defines parameter if passed as JSON string
if defines is not None and isinstance(defines, str):
import json
try:
defines = json.loads(defines)
except json.JSONDecodeError as e:
return {
"error": "invalid_parameter",
"message": f"Invalid JSON in defines parameter: {e}",
}
# Handle entry_start/entry_stop vs offset/limit
if entry_start is not None:
offset = entry_start
if entry_stop is not None:
limit = entry_stop - offset
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Validate limit
if limit is None:
limit = self.config.analysis.default_read_limit
if limit > self.config.limits.max_rows_per_call:
return {
"error": "limit_exceeded",
"message": (
f"Requested limit ({limit}) exceeds maximum "
f"({self.config.limits.max_rows_per_call})"
),
"suggestion": f"Use limit <= {self.config.limits.max_rows_per_call} or apply selection",
}
# Read data
try:
result = self.tree_reader.read_branches(
path=str(validated_path),
tree_name=tree_name,
branches=branches,
selection=selection,
limit=limit,
offset=offset,
flatten=flatten,
defines=defines,
)
except KeyError as e:
return {
"error": "branch_not_found",
"message": str(e),
"suggestion": "Use list_branches() to see available branches",
}
except ValueError as e:
return {
"error": "invalid_selection",
"message": str(e),
"suggestion": "Check ROOT expression syntax (e.g., 'pt > 20 && abs(eta) < 2.4')",
}
except Exception as e:
logger.error(f"Failed to read branches: {e}")
return {
"error": "read_error",
"message": f"Failed to read data: {e}",
}
# Add suggestions
suggestions = []
if result["metadata"]["truncated"]:
next_offset = offset + result["data"]["entries"]
suggestions.append(f"Use offset={next_offset} to get next page")
if result["data"]["is_jagged"]:
suggestions.append("Data has variable-length arrays - use flatten=true for flat output")
entries_selected = result["metadata"]["entries_selected"]
entries_scanned = result["metadata"]["entries_scanned"]
if entries_selected < entries_scanned * 0.1:
suggestions.append(
f"Only {entries_selected}/{entries_scanned} entries pass selection - "
"consider compute_histogram() for full dataset analysis"
)
result["suggestions"] = suggestions
return result
[docs]
def sample_tree(
self,
path: str,
tree: str,
size: int = 100,
method: str = "first",
branches: list[str] | None = None,
seed: int | None = None,
) -> dict[str, Any]:
"""
Get a sample from a tree.
Args:
path: File path
tree: Tree name
size: Sample size
method: "first" or "random"
branches: Branches to include (None = all)
seed: Random seed
Returns:
Sample data and metadata
"""
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Validate size
if size > 10_000:
return {
"error": "limit_exceeded",
"message": f"Sample size ({size}) exceeds maximum (10,000)",
"suggestion": "Use size <= 10000",
}
# Get sample
try:
result = self.tree_reader.sample_tree(
path=str(validated_path),
tree_name=tree,
size=size,
method=method,
branches=branches,
seed=seed,
)
except ValueError as e:
return {
"error": "invalid_parameter",
"message": str(e),
}
except Exception as e:
return {
"error": "read_error",
"message": f"Failed to sample tree: {e}",
}
# Add suggestions
suggestions = [
"Use this sample to understand data structure before full reads",
"Use read_branches() with selection to get filtered data",
]
result["suggestions"] = suggestions
return result
[docs]
def get_branch_stats(
self,
path: str,
tree: str,
branches: list[str],
selection: str | None = None,
) -> dict[str, Any]:
"""
Compute statistics for branches.
Args:
path: File path
tree: Tree name
branches: Branches to analyze
selection: Optional cut expression
Returns:
Branch statistics
"""
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Compute stats
try:
stats = self.tree_reader.compute_branch_stats(
path=str(validated_path),
tree_name=tree,
branches=branches,
selection=selection,
)
except Exception as e:
return {
"error": "computation_error",
"message": f"Failed to compute statistics: {e}",
}
return {
"data": {
"statistics": stats,
},
"metadata": {
"operation": "get_branch_stats",
"branches": branches,
"selection": selection,
},
"suggestions": [
"Use these statistics to choose histogram ranges",
"Min/max values help identify outliers",
],
}