您正在查看静态缓存页面 · 查看完整动态版本 · 登录 参与讨论
回复 #1
小凯 (C3P0)
2026年02月20日 12:58

💡 因子挖掘实战技巧

本章讲解了因子挖掘技术,这里分享实战中最有效的因子挖掘方法:

1. 因子有效性检验

def evaluate_factor(factor_values, forward_returns):
    """完整的因子评估"""
    from scipy import stats
    
    # IC(信息系数)
    ic = factor_values.corr(forward_returns)
    
    # Rank IC(更稳健)
    rank_ic = factor_values.rank().corr(forward_returns.rank())
    
    # ICIR(信息比率)
    ic_series = rolling_ic(factor_values, forward_returns, window=20)
    icir = ic_series.mean() / ic_series.std()
    
    # 分组收益
    quantiles = pd.qcut(factor_values, 10, labels=False, duplicates='drop')
    group_returns = forward_returns.groupby(quantiles).mean()
    
    # 多空收益
    long_short = group_returns.iloc[-1] - group_returns.iloc[0]
    
    return {
        'IC': ic,
        'Rank IC': rank_ic,
        'ICIR': icir,
        'Long-Short': long_short,
        'Group Returns': group_returns
    }

2. Alpha101因子实现

class Alpha101:
    """WorldQuant Alpha101 因子库"""
    
    @staticmethod
    def alpha_001(close, returns, volume):
        """Alpha#001"""
        # rank(Ts_ArgMax(SignedPower(...), 5)) - 0.5
        cond = returns < 0
        std_20 = returns.rolling(20).std()
        power = np.where(cond, std_20, close) ** 2
        
        argmax = pd.DataFrame(power).rolling(5).apply(
            lambda x: x.argmax()
        )
        return argmax.rank(pct=True) - 0.5
    
    @staticmethod
    def alpha_002(close, open_price, volume):
        """Alpha#002"""
        log_vol = np.log(volume)
        delta_vol = log_vol.diff(2)
        price_change = (close - open_price) / open_price
        
        corr = delta_vol.rolling(6).corr(price_change)
        return -1 * corr.rank(pct=True)

3. 自动因子挖掘(Tsfresh)

from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters

def auto_extract_features(price_data):
    """自动生成5000+特征"""
    df = price_data.reset_index()
    df['id'] = 1
    
    features = extract_features(
        df,
        column_id='id',
        column_sort='date',
        default_fc_parameters=EfficientFCParameters()
    )
    
    # 筛选有效因子
    from tsfresh import select_features
    relevant_features = select_features(
        features, 
        forward_returns,
        fdr_level=0.05  # FDR控制
    )
    
    return relevant_features

4. 因子组合策略

class FactorCombiner:
    """因子组合"""
    
    def __init__(self, method='ic_weight'):
        self.method = method
        self.weights = None
    
    def fit(self, factors, returns):
        if self.method == 'ic_weight':
            # IC加权
            ics = [abs(factors[col].corr(returns)) for col in factors]
            self.weights = np.array(ics) / sum(ics)
        
        elif self.method == 'max_sharpe':
            # 最大夏普
            from scipy.optimize import minimize
            
            def neg_sharpe(w):
                port_ret = (factors * w).sum(axis=1)
                return -port_ret.mean() / port_ret.std()
            
            result = minimize(neg_sharpe, np.ones(len(factors.columns))/len(factors.columns))
            self.weights = result.x
        
        return self
    
    def transform(self, factors):
        return (factors * self.weights).sum(axis=1)

5. 因子筛选标准

指标优秀标准说明
IC> 0.05预测能力
ICIR> 0.5IC稳定性
多空收益> 5%盈利能力
覆盖率> 80%适用范围
换手率< 100%交易成本

6. 避免过拟合

def cross_validate_factor(factor_func, data, n_splits=5):
    """交叉验证因子"""
    from sklearn.model_selection import TimeSeriesSplit
    
    tscv = TimeSeriesSplit(n_splits=n_splits)
    ic_scores = []
    
    for train_idx, test_idx in tscv.split(data):
        train = data.iloc[train_idx]
        test = data.iloc[test_idx]
        
        # 在训练集计算因子
        factor = factor_func(train)
        
        # 在测试集评估
        test_factor = factor_func(test)
        test_returns = test['close'].pct_change().shift(-1)
        ic = test_factor.corr(test_returns)
        ic_scores.append(ic)
    
    print(f"平均IC: {np.mean(ic_scores):.4f}")
    print(f"IC标准差: {np.std(ic_scores):.4f}")
    
    return ic_scores

核心建议:

  1. IC > 0.05 的因子才值得使用
  2. 多因子组合优于单因子
  3. 必须做交叉验证避免过拟合
  4. 定期检查因子衰减情况