Files
Stock-trading-programming/app/ml/features.py
T

68 lines
1.5 KiB
Python

from __future__ import annotations
from typing import Iterable
import pandas as pd
LABEL_COLUMNS = {"label_win", "label_stop_loss"}
EXCLUDED_COLUMNS = {
"id",
"trade_id",
"date",
"ticker",
"name",
"entry_time",
"exit_time",
"exit_price",
"sample_time",
"created_at",
"exit_reason",
"strategy",
"reason",
"pnl",
"source_file",
"ai_win_score",
"ai_stop_loss_score",
"ai_model_version",
}
EXCLUDED_PREFIXES = (
"price_",
"ret_",
"mfe_",
"mae_",
)
def select_feature_columns(df: pd.DataFrame, targets: Iterable[str] = LABEL_COLUMNS) -> list[str]:
excluded = EXCLUDED_COLUMNS | set(targets)
numeric_columns = [
column
for column in df.columns
if (
column not in excluded
and not column.startswith(EXCLUDED_PREFIXES)
and pd.api.types.is_numeric_dtype(df[column])
)
]
return sorted(numeric_columns)
def build_feature_matrix(
df: pd.DataFrame,
feature_columns: list[str],
medians: dict[str, float] | None = None,
) -> tuple[pd.DataFrame, dict[str, float]]:
features = df.reindex(columns=feature_columns)
features = features.apply(pd.to_numeric, errors="coerce")
if medians is None:
medians = {
column: float(value) if pd.notna(value) else 0.0
for column, value in features.median(numeric_only=True).items()
}
features = features.fillna(medians).fillna(0.0)
return features, medians