feat: add OHLCV market data updater

This commit is contained in:
2026-04-17 23:59:06 +08:00
parent 7239310be3
commit 3abc51e3e3
2 changed files with 183 additions and 0 deletions

View File

@@ -103,6 +103,66 @@ def _clean(data: pd.DataFrame) -> pd.DataFrame:
return data
def _clean_market_data(data: pd.DataFrame, field: str) -> pd.DataFrame:
"""Clean market data while preserving volume gaps."""
good = data.columns[data.notna().mean() > 0.5]
dropped = set(data.columns) - set(good)
if dropped:
print(f"--- Dropped {len(dropped)} tickers with >50% missing data ---")
data = data[good]
if field == "volume":
return data
return data.ffill().dropna(how="all")
def _merge_market_panel(existing: pd.DataFrame | None, new_data: pd.DataFrame) -> pd.DataFrame:
"""Merge new data into an existing cached panel, preserving old columns and dates."""
if existing is None or existing.empty:
merged = new_data.copy()
elif new_data.empty:
merged = existing.copy()
else:
merged = existing.combine_first(new_data)
merged.loc[new_data.index, new_data.columns] = new_data
merged = merged.sort_index()
merged = merged[~merged.index.duplicated(keep="last")]
return merged
def update_market_data(market: str, tickers: list[str], fields: list[str]) -> dict[str, pd.DataFrame]:
"""Download, clean, persist, and return market data panels for requested Yahoo fields."""
field_aliases = {
"close": "Close",
"open": "Open",
"high": "High",
"low": "Low",
"volume": "Volume",
}
normalized_fields = []
yahoo_fields = []
for field in fields:
normalized = field.lower()
if normalized not in field_aliases:
raise ValueError(f"Unsupported market data field: {field}")
normalized_fields.append(normalized)
yahoo_fields.append(field_aliases[normalized])
os.makedirs(DATA_DIR, exist_ok=True)
start = (datetime.now() - timedelta(days=365 * 10)).strftime("%Y-%m-%d")
downloaded = _download(tickers, start=start, fields=yahoo_fields)
cleaned = {}
for normalized, yahoo_field in zip(normalized_fields, yahoo_fields):
data = _clean_market_data(downloaded.get(yahoo_field, pd.DataFrame()), normalized)
existing = load(market, normalized)
data = _merge_market_panel(existing, data)
path = _data_path(market, normalized)
data.to_csv(path)
print(f"--- Saved {data.shape[0]} days x {data.shape[1]} tickers to {path} ---")
cleaned[normalized] = data
return cleaned
def update(market: str, tickers: list[str],
with_open: bool = False) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]:
"""