Source code for root_mcp.extended.tools.analysis
"""Analysis tools for histograms, selections, and exports."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any
from root_mcp.extended.analysis.fitting import fit_histogram
from root_mcp.extended.analysis.plotting import generate_plot
if TYPE_CHECKING:
from root_mcp.config import Config
from root_mcp.core.io.file_manager import FileManager
from root_mcp.core.io.validators import PathValidator
from root_mcp.extended.analysis.operations import AnalysisOperations
from root_mcp.core.io.readers import TreeReader
logger = logging.getLogger(__name__)
[docs]
class AnalysisTools:
"""Tools for physics analysis operations."""
[docs]
def __init__(
self,
config: Config,
file_manager: FileManager,
path_validator: PathValidator,
analysis_ops: AnalysisOperations,
tree_reader: TreeReader,
):
"""
Initialize analysis tools.
Args:
config: Server configuration
file_manager: File manager instance
path_validator: Path validator instance
analysis_ops: Analysis operations instance
tree_reader: Tree reader instance
"""
self.config = config
self.file_manager = file_manager
self.path_validator = path_validator
self.analysis_ops = analysis_ops
self.tree_reader = tree_reader
[docs]
def compute_histogram(
self,
path: str,
tree_name: str,
branch: str,
bins: int,
range: tuple[float, float] | None = None,
selection: str | None = None,
weights: str | None = None,
defines: dict[str, str] | None = None,
) -> dict[str, Any]:
"""
Compute a 1D histogram.
Args:
path: File path
tree_name: Tree name
branch: Branch to histogram
bins: Number of bins
range: (min, max) for histogram
selection: Optional cut expression
weights: Optional weight branch
defines: Optional variable definitions
Returns:
Histogram data and metadata
"""
# Handle defines parameter if passed as JSON string
if defines is not None and isinstance(defines, str):
import json
try:
defines = json.loads(defines)
except json.JSONDecodeError as e:
return {
"error": "invalid_parameter",
"message": f"Invalid JSON in defines parameter: {e}",
}
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Compute histogram
try:
result = self.analysis_ops.compute_histogram(
path=str(validated_path),
tree_name=tree_name,
branch=branch,
bins=bins,
range=range,
selection=selection,
weights=weights,
defines=defines,
)
except ValueError as e:
return {
"error": "invalid_parameter",
"message": str(e),
}
except KeyError as e:
return {
"error": "branch_not_found",
"message": str(e),
"suggestion": "Use list_branches() to see available branches",
}
except Exception as e:
logger.error(f"Failed to compute histogram: {e}")
return {
"error": "computation_error",
"message": f"Failed to compute histogram: {e}",
}
# Add suggestions
suggestions = []
if result["data"]["overflow"] > result["data"]["entries"] * 0.05:
suggestions.append(
f"{result['data']['overflow']} entries overflow - consider extending range"
)
if result["data"]["underflow"] > result["data"]["entries"] * 0.05:
suggestions.append(
f"{result['data']['underflow']} entries underflow - consider extending range"
)
result["suggestions"] = suggestions
return result
[docs]
def compute_histogram_2d(
self,
path: str,
tree_name: str,
x_branch: str,
y_branch: str,
x_bins: int,
y_bins: int,
x_range: tuple[float, float] | None = None,
y_range: tuple[float, float] | None = None,
selection: str | None = None,
defines: dict[str, str] | None = None,
) -> dict[str, Any]:
"""
Compute a 2D histogram.
Args:
path: File path
tree_name: Tree name
x_branch: X-axis branch
y_branch: Y-axis branch
x_bins: Number of bins in x
y_bins: Number of bins in y
x_range: (min, max) for x-axis
y_range: (min, max) for y-axis
selection: Optional cut expression
defines: Optional variable definitions
Returns:
2D histogram data and metadata
"""
# Handle defines parameter if passed as JSON string
if defines is not None and isinstance(defines, str):
import json
try:
defines = json.loads(defines)
except json.JSONDecodeError as e:
return {
"error": "invalid_parameter",
"message": f"Invalid JSON in defines parameter: {e}",
}
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Compute 2D histogram
try:
result = self.analysis_ops.compute_histogram_2d(
path=str(validated_path),
tree_name=tree_name,
x_branch=x_branch,
y_branch=y_branch,
x_bins=x_bins,
y_bins=y_bins,
x_range=x_range,
y_range=y_range,
selection=selection,
defines=defines,
)
except Exception as e:
return {
"error": "computation_error",
"message": f"Failed to compute 2D histogram: {e}",
}
result["suggestions"] = [
"Use for correlation studies or 2D distributions",
"Visualize as a heatmap or scatter plot",
]
return result
[docs]
def fit_histogram(
self,
model: str | list[str | dict[str, Any]] | dict[str, Any],
data: dict[str, Any] | None = None,
path: str | None = None,
tree_name: str | None = None,
branch: str | None = None,
bins: int | None = None,
range: tuple[float, float] | None = None,
selection: str | None = None,
weights: str | None = None,
defines: dict[str, str] | None = None,
initial_guess: list[float] | None = None,
bounds: list[list[float]] | None = None,
fixed_parameters: dict[str | int, float] | None = None,
) -> dict[str, Any]:
"""
Fit a histogram to a model. Can either take existing histogram data or
compute it from a file.
Args:
model: Model configuration
data: Optional histogram data (from compute_histogram)
path: File path (if data not provided)
tree_name: Tree name (if data not provided)
branch: Branch to histogram (if data not provided)
bins: Number of bins (if data not provided)
range: Histogram range (optional)
selection: Cut expression (optional)
weights: Weight branch (optional)
defines: Variable definitions (optional)
initial_guess: Initial parameters for fit
bounds: Parameter bounds
fixed_parameters: Fixed parameters
Returns:
Fit results
"""
# If data is not provided, compute it
if data is None:
if not all([path, tree_name, branch, bins]):
return {
"error": "missing_parameters",
"message": "Either 'data' or (path, tree_name, branch, bins) must be provided",
}
# Helper to handle potential errors in compute_histogram
hist_result = self.compute_histogram(
path=path, # type: ignore
tree_name=tree_name, # type: ignore
branch=branch, # type: ignore
bins=bins, # type: ignore
range=range,
selection=selection,
weights=weights,
defines=defines,
)
if "error" in hist_result:
return hist_result
data = hist_result
try:
return fit_histogram(data, model, initial_guess, bounds, fixed_parameters)
except Exception as e:
return {
"error": "fit_error",
"message": f"Fitting failed: {e}",
}
[docs]
def compute_histogram_arithmetic(
self,
operation: str,
data1: dict[str, Any],
data2: dict[str, Any],
) -> dict[str, Any]:
"""
Perform histogram arithmetic.
Args:
operation: Operation name
data1: First histogram
data2: Second histogram
"""
try:
return self.analysis_ops.compute_histogram_arithmetic(operation, data1, data2)
except Exception as e:
logger.error(f"Arithmetic failed: {e}")
return {"error": "arithmetic_error", "message": str(e)}
[docs]
def generate_plot(
self,
data: dict[str, Any],
plot_type: str = "histogram",
fit_data: dict[str, Any] | None = None,
options: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Generate a plot.
Args:
data: Analysis data
plot_type: Plot type
fit_data: Optional fit to overlay
options: Plot settings
Returns:
Plot image data
"""
try:
return generate_plot(data, plot_type, fit_data, options, self.config)
except Exception as e:
return {
"error": "plot_error",
"message": f"Plotting failed: {e}",
}
[docs]
def apply_selection(
self,
path: str,
tree: str,
selection: str,
defines: dict[str, str] | None = None,
) -> dict[str, Any]:
"""
Count entries passing a selection.
Args:
path: File path
tree: Tree name
selection: Cut expression
defines: Optional variable definitions
Returns:
Selection statistics
"""
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Apply selection
try:
result = self.analysis_ops.apply_selection(
path=str(validated_path),
tree_name=tree,
selection=selection,
defines=defines,
)
except Exception as e:
return {
"error": "computation_error",
"message": f"Failed to apply selection: {e}",
}
# Add suggestions
efficiency = result["data"]["efficiency"]
suggestions = []
if efficiency < 0.01:
suggestions.append(
f"Very tight selection ({efficiency * 100:.3f}%) - "
"consider loosening cuts or checking syntax"
)
elif efficiency > 0.95:
suggestions.append(
f"Selection passes most events ({efficiency * 100:.1f}%) - consider tightening cuts"
)
else:
suggestions.append(
f"{efficiency * 100:.1f}% of events pass selection - "
"proceed with compute_histogram() or read_branches()"
)
result["suggestions"] = suggestions
return result
[docs]
def export_branches(
self,
path: str,
tree: str,
branches: list[str],
output_path: str,
output_format: str,
selection: str | None = None,
limit: int | None = None,
) -> dict[str, Any]:
"""
Export branch data to a file.
Args:
path: File path
tree: Tree name
branches: Branches to export
output_path: Destination file path
output_format: Output format (json, csv, parquet)
selection: Optional cut expression
limit: Maximum entries to export
Returns:
Export metadata
"""
# Check if export is enabled
if not self.config.features.enable_export:
return {
"error": "feature_disabled",
"message": "Export feature is disabled",
}
# Validate paths
try:
validated_input = self.path_validator.validate_path(path)
validated_output = self.path_validator.validate_output_path(output_path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Check format
if output_format not in self.config.output.allowed_formats:
return {
"error": "invalid_format",
"message": f"Format '{output_format}' not allowed",
"details": {"allowed_formats": self.config.output.allowed_formats},
}
# Validate limit
max_export = self.config.limits.max_export_rows
if limit is None:
limit = max_export # Use configured max for export
if limit > max_export:
return {
"error": "limit_exceeded",
"message": f"Export limit cannot exceed {max_export:,} entries",
}
# Read data
try:
tree_obj = self.file_manager.get_tree(validated_input, tree)
arrays = tree_obj.arrays(
filter_name=branches,
cut=selection,
entry_stop=limit,
library="ak",
)
except Exception as e:
return {
"error": "read_error",
"message": f"Failed to read data for export: {e}",
}
# Export
try:
export_result = self.analysis_ops.export_to_formats(
data=arrays,
output_path=str(validated_output),
format=output_format,
)
except Exception as e:
return {
"error": "export_error",
"message": f"Failed to export data: {e}",
}
return {
"data": export_result,
"metadata": {
"operation": "export_branches",
},
"suggestions": [
f"Exported {export_result['entries_written']:,} entries to {output_format}",
f"File size: {export_result['size_bytes'] / 1024 / 1024:.2f} MB",
],
}
[docs]
def compute_kinematics(
self,
path: str,
tree: str,
computations: list[dict[str, Any]],
selection: str | None = None,
limit: int | None = None,
) -> dict[str, Any]:
"""
Compute kinematic quantities from four-momenta.
Args:
path: File path
tree: Tree name
computations: List of kinematic calculations. Each entry is a dict
with keys ``name`` (output variable name), ``type`` (one of
``invariant_mass``, ``invariant_mass_squared``, ``transverse_mass``,
``delta_r``, ``delta_phi``), ``particles`` (list of branch prefixes,
e.g. ``['K', 'pi1']``), and optionally ``components`` (component
suffixes; defaults vary by type).
selection: Optional cut expression
limit: Maximum entries to process
Returns:
Dictionary with computed kinematic quantities
"""
# Validate path
try:
validated_path = self.path_validator.validate_path(path)
except Exception as e:
return {
"error": "invalid_path",
"message": str(e),
}
# Validate computations
if not computations or not isinstance(computations, list):
return {
"error": "invalid_parameter",
"message": "computations must be a non-empty list",
}
# Validate each computation
for comp in computations:
if not isinstance(comp, dict):
return {
"error": "invalid_parameter",
"message": "Each computation must be a dictionary",
}
if "name" not in comp:
return {
"error": "invalid_parameter",
"message": "Each computation must have a 'name' field",
}
if "type" not in comp:
return {
"error": "invalid_parameter",
"message": f"Computation '{comp.get('name')}' must have a 'type' field",
}
if "particles" not in comp:
return {
"error": "invalid_parameter",
"message": f"Computation '{comp.get('name')}' must have a 'particles' field",
}
# Apply limit from config if necessary
if limit is not None and limit > self.config.limits.max_rows_per_call:
return {
"error": "limit_exceeded",
"message": f"Limit cannot exceed {self.config.limits.max_rows_per_call:,} entries",
}
# Compute kinematics
try:
result = self.analysis_ops.compute_kinematics(
path=str(validated_path),
tree_name=tree,
computations=computations,
selection=selection,
limit=limit,
)
except ValueError as e:
return {
"error": "invalid_parameter",
"message": str(e),
}
except KeyError as e:
return {
"error": "branch_not_found",
"message": f"Required branch not found: {e}",
"suggestion": "Use list_branches() to see available branches",
}
except Exception as e:
logger.error(f"Failed to compute kinematics: {e}")
return {
"error": "computation_error",
"message": f"Failed to compute kinematics: {e}",
}
# Add suggestions
comp_names = [c["name"] for c in computations]
suggestions = [
f"Computed {len(comp_names)} kinematic quantities: {', '.join(comp_names)}",
f"Processed {result['metadata']['entries_processed']:,} entries",
]
if selection:
suggestions.append("Selection was applied during computation")
# Suggest next steps based on computation type
has_mass = any("mass" in c["type"] for c in computations)
if has_mass:
suggestions.append(
"Use compute_histogram() to visualize mass distributions or "
"compute_histogram_2d() for Dalitz plots"
)
result["suggestions"] = suggestions
return result