chore: backtest engine fee model, metrics, and strategy fixes

- main.py: add IBKR-style tiered fee schedule (fee_base + fee_per_share), PIT universe support, and open-to-close execution improvements - metrics.py: add raw_summary helper for JSON-safe metric export - Misc strategy fixes: deprecation warnings, NaN handling Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-21 20:57:56 +08:00
parent 1f50253d13
commit 149a00c458
7 changed files with 154 additions and 74 deletions
--- a/strategies/ensemble_alpha.py
+++ b/strategies/ensemble_alpha.py
@@ -84,17 +84,6 @@ class EnsembleAlphaStrategy(Strategy):
        row_sums = raw.sum(axis=1).replace(0, np.nan)
        signals = raw.div(row_sums, axis=0).fillna(0.0)

-        # === Tail-risk protection ===
-        if self.tail_protection:
-            # Portfolio equity proxy: equal-weight market return
-            mkt_ret = ret.mean(axis=1)
-            mkt_eq = (1 + mkt_ret).cumprod()
-            mkt_dd = mkt_eq / mkt_eq.cummax() - 1
-            in_tail = mkt_dd < self.tail_threshold
-            scale = pd.Series(1.0, index=data.index)
-            scale[in_tail] = self.tail_scale
-            signals = signals.mul(scale, axis=0)
-
        # === Monthly rebalance ===
        warmup = 252
        rebal_mask = pd.Series(False, index=data.index)
@@ -105,6 +94,20 @@ class EnsembleAlphaStrategy(Strategy):
        signals = signals.ffill().fillna(0.0)
        signals.iloc[:warmup] = 0.0

+        # === Tail-risk protection (rebal-gated; NaN-safe market mean) ===
+        # Apply AFTER ffill so the scale changes only at rebal points,
+        # otherwise daily flips force half-out/half-in trades that burn
+        # the account through fixed per-trade fees.
+        if self.tail_protection:
+            mkt_ret = ret.mean(axis=1, skipna=True)
+            mkt_eq = (1 + mkt_ret.fillna(0.0)).cumprod()
+            mkt_dd = mkt_eq / mkt_eq.cummax() - 1
+            in_tail = mkt_dd < self.tail_threshold
+            scale_raw = pd.Series(1.0, index=data.index)
+            scale_raw[in_tail] = self.tail_scale
+            scale = scale_raw.where(rebal_mask, np.nan).ffill().fillna(1.0)
+            signals = signals.mul(scale, axis=0)
+
        return signals.shift(1).fillna(0.0)


@@ -162,16 +165,6 @@ class EnhancedFactorComboStrategy(Strategy):
        row_sums = raw.sum(axis=1).replace(0, np.nan)
        signals = raw.div(row_sums, axis=0).fillna(0.0)

-        # Tail protection
-        if self.tail_protection:
-            mkt_ret = ret.mean(axis=1)
-            mkt_eq = (1 + mkt_ret).cumprod()
-            mkt_dd = mkt_eq / mkt_eq.cummax() - 1
-            in_tail = mkt_dd < -0.15
-            scale = pd.Series(1.0, index=data.index)
-            scale[in_tail] = 0.5
-            signals = signals.mul(scale, axis=0)
-
        # Monthly rebalance
        warmup = 252
        rebal_mask = pd.Series(False, index=data.index)
@@ -182,6 +175,17 @@ class EnhancedFactorComboStrategy(Strategy):
        signals = signals.ffill().fillna(0.0)
        signals.iloc[:warmup] = 0.0

+        # Tail protection (rebal-gated to avoid daily-flip turnover)
+        if self.tail_protection:
+            mkt_ret = ret.mean(axis=1, skipna=True)
+            mkt_eq = (1 + mkt_ret.fillna(0.0)).cumprod()
+            mkt_dd = mkt_eq / mkt_eq.cummax() - 1
+            in_tail = mkt_dd < -0.15
+            scale_raw = pd.Series(1.0, index=data.index)
+            scale_raw[in_tail] = 0.5
+            scale = scale_raw.where(rebal_mask, np.nan).ffill().fillna(1.0)
+            signals = signals.mul(scale, axis=0)
+
        return signals.shift(1).fillna(0.0)


@@ -230,31 +234,41 @@ class RiskManagedEnsembleStrategy(Strategy):
        # Step 1: Get raw signals from the ensemble (already shifted by 1)
        raw = self.ensemble.generate_signals(data)

-        # Step 2: Compute MARKET returns (equal-weight of all stocks)
-        daily_rets = data.pct_change().fillna(0.0)
-        mkt_rets = daily_rets.mean(axis=1)
+        # Step 2: Compute MARKET returns over valid (non-masked) columns.
+        # `daily_rets` keeps NaN for PIT-masked tickers so they don't dilute
+        # the cross-sectional mean to ~0.
+        daily_rets = data.pct_change()
+        mkt_rets = daily_rets.mean(axis=1, skipna=True)

        # Step 3: Market drawdown dampener
-        mkt_eq = (1 + mkt_rets).cumprod()
-        mkt_dd = mkt_eq / mkt_eq.cummax() - 1  # always ≤ 0
-        # Linear: at DD=0 → 1.0, at DD=-dd_denom → dd_floor
-        dd_scale = (1.0 + mkt_dd / self.dd_denom).clip(lower=self.dd_floor, upper=1.0)
-        dd_scale_lagged = dd_scale.shift(1).fillna(1.0)  # PIT
+        mkt_eq = (1 + mkt_rets.fillna(0.0)).cumprod()
+        mkt_dd = mkt_eq / mkt_eq.cummax() - 1
+        dd_scale_raw = (1.0 + mkt_dd / self.dd_denom).clip(
+            lower=self.dd_floor, upper=1.0,
+        )

-        # Step 4: Vol spike guard (uses portfolio's own vol for specificity)
+        # Step 4: Vol spike guard from portfolio returns (NaN-aware sum)
        if self.vol_spike_guard:
-            port_rets = (raw * daily_rets).sum(axis=1)
+            port_rets = (raw * daily_rets).sum(axis=1, min_count=1).fillna(0.0)
            short_vol = port_rets.rolling(self.vol_spike_window, min_periods=5).std() * np.sqrt(252)
            vol_90th = short_vol.rolling(self.vol_spike_lookback, min_periods=126).quantile(0.90)
            in_spike = short_vol > vol_90th
-            vol_scale = pd.Series(1.0, index=data.index)
-            vol_scale[in_spike] = self.vol_spike_floor
-            vol_scale_lagged = vol_scale.shift(1).fillna(1.0)  # PIT
+            vol_scale_raw = pd.Series(1.0, index=data.index)
+            vol_scale_raw[in_spike] = self.vol_spike_floor
        else:
-            vol_scale_lagged = 1.0
+            vol_scale_raw = pd.Series(1.0, index=data.index)
+
+        # Step 5: Combined scaling, sampled at the inner ensemble's rebal
+        # cadence so we don't trade in/out daily (which would incur huge
+        # fixed-fee costs).
+        combined = (dd_scale_raw * vol_scale_raw).shift(1).fillna(1.0)
+        rebal_freq = getattr(self.ensemble, "rebal_freq", 21)
+        warmup = 252
+        rebal_mask = pd.Series(False, index=data.index)
+        rebal_indices = list(range(warmup, len(data), rebal_freq))
+        rebal_mask.iloc[rebal_indices] = True
+        final_scale = combined.where(rebal_mask, np.nan).ffill().fillna(1.0)

-        # Step 5: Combined scaling
-        final_scale = dd_scale_lagged * vol_scale_lagged
        return raw.mul(final_scale, axis=0)


@@ -342,26 +356,31 @@ class SharpeBoostedEnsembleStrategy(Strategy):
        signals = signals.shift(1).fillna(0.0)  # PIT: 1-day execution lag

        # === Asymmetric vol scaling ===
-        # Only reduce exposure when vol is high AND returns are negative
-        # High vol + positive returns = riding a trend, don't cut
-        daily_rets = data.pct_change().fillna(0.0)
-        port_rets = (signals * daily_rets).sum(axis=1)
+        # NB: scales are RE-EVALUATED only on rebalance days. Daily flips of
+        # asym/dd scales would force half-in/half-out trades each session,
+        # burning the account through fixed per-trade fees ($2 US / $5 CN).
+        # Use cross-sectional mean of non-masked returns so PIT-masked
+        # NaN→0 fills don't dilute the market signal.
+        daily_rets = data.pct_change()
+        port_rets = (signals * daily_rets).sum(axis=1, min_count=1).fillna(0.0)
        short_vol = port_rets.rolling(20, min_periods=10).std() * np.sqrt(252)
        vol_median = short_vol.rolling(252, min_periods=126).median()
        recent_ret = port_rets.rolling(20, min_periods=10).sum()
        high_vol_neg = (short_vol > vol_median * 1.5) & (recent_ret < 0)
-        asym_scale = pd.Series(1.0, index=data.index)
-        asym_scale[high_vol_neg] = self.asym_vol_floor
-        signals = signals.mul(asym_scale.shift(1).fillna(1.0), axis=0)  # PIT
+        asym_scale_raw = pd.Series(1.0, index=data.index)
+        asym_scale_raw[high_vol_neg] = self.asym_vol_floor

-        # === Light market-DD dampener ===
-        # Uses market (not strategy) drawdown to avoid negative feedback loop
-        mkt_rets = daily_rets.mean(axis=1)
-        mkt_eq = (1 + mkt_rets).cumprod()
+        # === Market-DD dampener (rebal-gated, NaN-aware market mean) ===
+        mkt_rets = daily_rets.mean(axis=1, skipna=True)
+        mkt_eq = (1 + mkt_rets.fillna(0.0)).cumprod()
        mkt_dd = mkt_eq / mkt_eq.cummax() - 1
-        dd_scale = (1.0 + mkt_dd / self.dd_denom).clip(
+        dd_scale_raw = (1.0 + mkt_dd / self.dd_denom).clip(
            lower=self.dd_floor, upper=1.0
        )
-        signals = signals.mul(dd_scale.shift(1).fillna(1.0), axis=0)  # PIT
+
+        # Sample scales at rebal points only, then step-hold between rebals.
+        combined = (asym_scale_raw * dd_scale_raw).shift(1).fillna(1.0)
+        rebal_scale = combined.where(rebal_mask, np.nan).ffill().fillna(1.0)
+        signals = signals.mul(rebal_scale, axis=0)

        return signals