""" Export entry snapshots joined with trade outcomes for later model training. Usage: python scripts/export_training_dataset.py python scripts/export_training_dataset.py data/training_dataset.csv """ import csv import os import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from app.db.models import get_conn, init_db DEFAULT_OUT = ROOT / "data" / "training_dataset.csv" def export_csv(out_path: Path): init_db() out_path.parent.mkdir(parents=True, exist_ok=True) query = """ WITH post AS ( SELECT trade_id, MAX(CASE WHEN elapsed_sec = 60 THEN current_price END) AS price_1m, MAX(CASE WHEN elapsed_sec = 60 THEN return_pct END) AS ret_1m, MAX(CASE WHEN elapsed_sec = 60 THEN mfe_pct END) AS mfe_1m, MAX(CASE WHEN elapsed_sec = 60 THEN mae_pct END) AS mae_1m, MAX(CASE WHEN elapsed_sec = 180 THEN current_price END) AS price_3m, MAX(CASE WHEN elapsed_sec = 180 THEN return_pct END) AS ret_3m, MAX(CASE WHEN elapsed_sec = 180 THEN mfe_pct END) AS mfe_3m, MAX(CASE WHEN elapsed_sec = 180 THEN mae_pct END) AS mae_3m, MAX(CASE WHEN elapsed_sec = 300 THEN current_price END) AS price_5m, MAX(CASE WHEN elapsed_sec = 300 THEN return_pct END) AS ret_5m, MAX(CASE WHEN elapsed_sec = 300 THEN mfe_pct END) AS mfe_5m, MAX(CASE WHEN elapsed_sec = 300 THEN mae_pct END) AS mae_5m, MAX(CASE WHEN elapsed_sec = 600 THEN current_price END) AS price_10m, MAX(CASE WHEN elapsed_sec = 600 THEN return_pct END) AS ret_10m, MAX(CASE WHEN elapsed_sec = 600 THEN mfe_pct END) AS mfe_10m, MAX(CASE WHEN elapsed_sec = 600 THEN mae_pct END) AS mae_10m FROM post_entry_snapshots GROUP BY trade_id ) SELECT s.trade_id, s.date, s.ticker, s.name, s.entry_time, s.current_price, s.entry_price, s.target_price, s.stop_price, s.today_open, s.prev_high, s.prev_low, s.prev_amount, s.volume, s.change_pct, s.market_sentiment, s.sentiment_score, s.risk_level, s.trade_allowed, s.hot_sectors, s.avoid_sectors, s.boosted_tickers, s.blacklist_tickers, s.ai_boosted, s.ai_win_score, s.ai_stop_loss_score, s.ai_model_version, s.position_size_multiplier, s.combined_multiplier, s.entry_reason, post.price_1m, post.ret_1m, post.mfe_1m, post.mae_1m, post.price_3m, post.ret_3m, post.mfe_3m, post.mae_3m, post.price_5m, post.ret_5m, post.mfe_5m, post.mae_5m, post.price_10m, post.ret_10m, post.mfe_10m, post.mae_10m, t.exit_time, t.exit_price, t.quantity, t.exit_reason, t.pnl, CASE WHEN t.pnl > 0 THEN 1 ELSE 0 END AS label_win, CASE WHEN t.exit_reason = 'SL' THEN 1 ELSE 0 END AS label_stop_loss FROM entry_snapshots s LEFT JOIN trades t ON t.id = s.trade_id LEFT JOIN post ON post.trade_id = s.trade_id ORDER BY s.date, s.entry_time, s.ticker """ with get_conn() as conn: conn.row_factory = None cur = conn.execute(query) headers = [d[0] for d in cur.description] rows = cur.fetchall() with out_path.open("w", newline="", encoding="utf-8-sig") as f: writer = csv.writer(f) writer.writerow(headers) writer.writerows(rows) print(f"exported {len(rows)} rows -> {out_path}") if __name__ == "__main__": target = Path(sys.argv[1]) if len(sys.argv) > 1 else DEFAULT_OUT if not target.is_absolute(): target = ROOT / target export_csv(target)