from __future__ import annotations from typing import Iterable import pandas as pd LABEL_COLUMNS = {"label_win", "label_stop_loss"} EXCLUDED_COLUMNS = { "id", "trade_id", "date", "ticker", "name", "entry_time", "exit_time", "sample_time", "created_at", "exit_reason", "strategy", "reason", "source_file", "ai_win_score", "ai_stop_loss_score", "ai_model_version", } def select_feature_columns(df: pd.DataFrame, targets: Iterable[str] = LABEL_COLUMNS) -> list[str]: excluded = EXCLUDED_COLUMNS | set(targets) numeric_columns = [ column for column in df.columns if column not in excluded and pd.api.types.is_numeric_dtype(df[column]) ] return sorted(numeric_columns) def build_feature_matrix( df: pd.DataFrame, feature_columns: list[str], medians: dict[str, float] | None = None, ) -> tuple[pd.DataFrame, dict[str, float]]: features = df.reindex(columns=feature_columns) features = features.apply(pd.to_numeric, errors="coerce") if medians is None: medians = { column: float(value) if pd.notna(value) else 0.0 for column, value in features.median(numeric_only=True).items() } features = features.fillna(medians).fillna(0.0) return features, medians