feat: add OI derived features (oi_change_ma5, oi_price_spread) to dataset builder and ML features

Add two new OI-derived features to improve ML model's market microstructure
understanding:
- oi_change_ma5: 5-candle moving average of OI change rate (short-term trend)
- oi_price_spread: z-scored OI minus z-scored price return (divergence signal)

Both features use 96-candle rolling z-score window. FEATURE_COLS expanded from
24 to 26. Existing tests updated to reflect new feature counts.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
21in7
2026-03-04 20:07:40 +09:00
parent 676ec6ef5e
commit ff9e639142
4 changed files with 128 additions and 13 deletions

View File

@@ -266,3 +266,74 @@ def test_stratified_undersample_preserves_signal():
signal_indices = np.where(source == "signal")[0]
for si in signal_indices:
assert si in idx, f"signal 인덱스 {si}가 누락됨"
def test_oi_derived_features_present():
"""OI 파생 피처 2개가 결과에 포함되어야 한다."""
import numpy as np
import pandas as pd
from src.dataset_builder import _calc_features_vectorized, _calc_signals, _calc_indicators
n = 300
np.random.seed(42)
df = pd.DataFrame({
"open": np.random.uniform(1, 2, n),
"high": np.random.uniform(2, 3, n),
"low": np.random.uniform(0.5, 1, n),
"close": np.random.uniform(1, 2, n),
"volume": np.random.uniform(1000, 5000, n),
"oi_change": np.concatenate([np.zeros(100), np.random.uniform(-0.05, 0.05, 200)]),
})
d = _calc_indicators(df)
sig = _calc_signals(d)
feat = _calc_features_vectorized(d, sig)
assert "oi_change_ma5" in feat.columns, "oi_change_ma5 컬럼이 없음"
assert "oi_price_spread" in feat.columns, "oi_price_spread 컬럼이 없음"
def test_oi_derived_features_nan_when_no_oi():
"""oi_change 컬럼이 없으면 파생 피처도 nan이어야 한다."""
import numpy as np
import pandas as pd
from src.dataset_builder import _calc_features_vectorized, _calc_signals, _calc_indicators
n = 200
np.random.seed(0)
df = pd.DataFrame({
"open": np.random.uniform(1, 2, n),
"high": np.random.uniform(2, 3, n),
"low": np.random.uniform(0.5, 1, n),
"close": np.random.uniform(1, 2, n),
"volume": np.random.uniform(1000, 5000, n),
})
d = _calc_indicators(df)
sig = _calc_signals(d)
feat = _calc_features_vectorized(d, sig)
assert feat["oi_change_ma5"].isna().all(), "oi_change 컬럼 없을 때 oi_change_ma5는 전부 nan이어야 함"
assert feat["oi_price_spread"].isna().all(), "oi_change 컬럼 없을 때 oi_price_spread는 전부 nan이어야 함"
def test_oi_price_spread_is_continuous():
"""oi_price_spread는 바이너리가 아닌 연속값이어야 한다."""
import numpy as np
import pandas as pd
from src.dataset_builder import _calc_features_vectorized, _calc_signals, _calc_indicators
n = 300
np.random.seed(42)
df = pd.DataFrame({
"open": np.random.uniform(1, 2, n),
"high": np.random.uniform(2, 3, n),
"low": np.random.uniform(0.5, 1, n),
"close": np.random.uniform(1, 2, n),
"volume": np.random.uniform(1000, 5000, n),
"oi_change": np.random.uniform(-0.05, 0.05, n),
})
d = _calc_indicators(df)
sig = _calc_signals(d)
feat = _calc_features_vectorized(d, sig)
valid = feat["oi_price_spread"].dropna()
assert len(valid.unique()) > 2, "oi_price_spread는 연속값이어야 함 (2개 초과 유니크 값)"

View File

@@ -21,17 +21,17 @@ def _make_df(n=10, base_price=1.0):
})
def test_build_features_with_btc_eth_has_24_features():
def test_build_features_with_btc_eth_has_26_features():
xrp_df = _make_df(10, base_price=1.0)
btc_df = _make_df(10, base_price=50000.0)
eth_df = _make_df(10, base_price=3000.0)
features = build_features(xrp_df, "LONG", btc_df=btc_df, eth_df=eth_df)
assert len(features) == 24
assert len(features) == 26
def test_build_features_without_btc_eth_has_16_features():
def test_build_features_without_btc_eth_has_18_features():
xrp_df = _make_df(10, base_price=1.0)
features = build_features(xrp_df, "LONG")
assert len(features) == 16
assert len(features) == 18
def test_build_features_btc_ret_1_correct():
xrp_df = _make_df(10, base_price=1.0)
@@ -51,8 +51,9 @@ def test_build_features_rs_zero_when_btc_ret_zero():
assert features["xrp_btc_rs"] == 0.0
def test_feature_cols_has_24_items():
"""Legacy test — updated to 26 after OI derived features added."""
from src.ml_features import FEATURE_COLS
assert len(FEATURE_COLS) == 24
assert len(FEATURE_COLS) == 26
def make_df(n=100):
@@ -139,3 +140,31 @@ def test_build_features_defaults_to_zero_when_not_provided(sample_df_with_indica
feat = build_features(sample_df_with_indicators, signal="LONG")
assert feat["oi_change"] == pytest.approx(0.0)
assert feat["funding_rate"] == pytest.approx(0.0)
def test_feature_cols_has_26_items():
from src.ml_features import FEATURE_COLS
assert len(FEATURE_COLS) == 26
def test_build_features_with_oi_derived_params():
"""oi_change_ma5, oi_price_spread 파라미터가 피처에 반영된다."""
xrp_df = _make_df(10, base_price=1.0)
btc_df = _make_df(10, base_price=50000.0)
eth_df = _make_df(10, base_price=3000.0)
features = build_features(
xrp_df, "LONG",
btc_df=btc_df, eth_df=eth_df,
oi_change=0.05, funding_rate=0.0002,
oi_change_ma5=0.03, oi_price_spread=0.12,
)
assert features["oi_change_ma5"] == pytest.approx(0.03)
assert features["oi_price_spread"] == pytest.approx(0.12)
def test_build_features_oi_derived_defaults_to_zero():
"""oi_change_ma5, oi_price_spread 미제공 시 0.0으로 채워진다."""
xrp_df = _make_df(10, base_price=1.0)
features = build_features(xrp_df, "LONG")
assert features["oi_change_ma5"] == pytest.approx(0.0)
assert features["oi_price_spread"] == pytest.approx(0.0)