[2026-05-27] 포맷 후 복구 설치 스크립트 추가
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Collect daily Korean-market features for model training.
|
||||
|
||||
Outputs:
|
||||
data/external/daily/YYYYMMDD/stocks.csv
|
||||
data/external/daily/YYYYMMDD/indexes.csv
|
||||
"""
|
||||
import argparse
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
OUT_ROOT = ROOT / "data" / "external" / "daily"
|
||||
|
||||
|
||||
def _yyyymmdd(date_text: str | None) -> str:
|
||||
if date_text:
|
||||
return date_text.replace("-", "")
|
||||
return datetime.now().strftime("%Y%m%d")
|
||||
|
||||
|
||||
def _standardize_stock_ohlcv(df: pd.DataFrame, date_yyyymmdd: str) -> pd.DataFrame:
|
||||
df = df.reset_index()
|
||||
columns = list(df.columns)
|
||||
rename = {columns[0]: "ticker"}
|
||||
standard = ["open", "high", "low", "close", "volume", "amount", "change_pct"]
|
||||
for source, target in zip(columns[1:], standard):
|
||||
rename[source] = target
|
||||
df = df.rename(columns=rename)
|
||||
df.insert(0, "date", date_yyyymmdd)
|
||||
return df[[c for c in ["date", "ticker", *standard] if c in df.columns]]
|
||||
|
||||
|
||||
def _standardize_index_row(row: dict, date_yyyymmdd: str, code: str, name: str) -> dict:
|
||||
values = list(row.values())
|
||||
keys = ["open", "high", "low", "close", "volume", "amount", "change_pct"]
|
||||
out = {"date": date_yyyymmdd, "code": code, "name": name}
|
||||
for key, value in zip(keys, values):
|
||||
out[key] = value
|
||||
return out
|
||||
|
||||
|
||||
def collect_with_pykrx(date_yyyymmdd: str, out_dir: Path):
|
||||
try:
|
||||
from pykrx import stock
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("pykrx is not installed. Install requirements first.") from exc
|
||||
|
||||
stocks_raw = stock.get_market_ohlcv_by_ticker(date_yyyymmdd, market="ALL")
|
||||
stocks = _standardize_stock_ohlcv(stocks_raw, date_yyyymmdd)
|
||||
stocks.to_csv(out_dir / "stocks.csv", index=False, encoding="utf-8-sig")
|
||||
|
||||
index_rows = []
|
||||
for code, name in (("1001", "KOSPI"), ("2001", "KOSDAQ")):
|
||||
try:
|
||||
df = stock.get_index_ohlcv_by_date(date_yyyymmdd, date_yyyymmdd, code)
|
||||
if not df.empty:
|
||||
index_rows.append(_standardize_index_row(df.iloc[-1].to_dict(), date_yyyymmdd, code, name))
|
||||
except Exception as exc:
|
||||
print(f"index fetch failed {name}: {exc}", file=sys.stderr)
|
||||
|
||||
pd.DataFrame(index_rows).to_csv(out_dir / "indexes.csv", index=False, encoding="utf-8-sig")
|
||||
return len(stocks), len(index_rows)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--date", help="YYYY-MM-DD or YYYYMMDD. Defaults to today.")
|
||||
args = parser.parse_args()
|
||||
|
||||
date_yyyymmdd = _yyyymmdd(args.date)
|
||||
out_dir = OUT_ROOT / date_yyyymmdd
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
stock_count, index_count = collect_with_pykrx(date_yyyymmdd, out_dir)
|
||||
print(f"saved daily features: stocks={stock_count}, indexes={index_count}, dir={out_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user