第10章:因子挖掘与Alpha策略
> 因子是量化交易的核心。本章将介绍因子库使用和自动因子挖掘技术。
学习目标
- ✅ 理解因子的概念与分类
- ✅ 掌握Alpha101因子库
- ✅ 学会使用tsfresh自动挖掘因子
- ✅ 实现因子有效性检验
10.1 因子分类
按类型分类
- 价值因子:PE、PB、股息率
- 成长因子:营收增长率、利润增长率
- 质量因子:ROE、ROA
- 动量因子:过去N天收益率
- 波动因子:波动率、Beta
因子评价
def evaluate_factor(factor_values, forward_returns):
"""评价因子"""
# IC(信息系数)
ic = factor_values.corr(forward_returns)
# Rank IC
rank_ic = factor_values.rank().corr(forward_returns.rank())
# 分组回测
quantiles = pd.qcut(factor_values, 10, labels=False)
group_returns = forward_returns.groupby(quantiles).mean()
return {
'IC': ic,
'Rank IC': rank_ic,
'Group Returns': group_returns
}
10.2 Alpha101因子库
WorldQuant Alpha101
class Alpha101:
"""WorldQuant Alpha101因子库"""
@staticmethod
def alpha_001(close, returns):
"""rank(Ts_ArgMax(SignedPower(...), 5)) - 0.5"""
cond = returns < 0
std_20 = returns.rolling(20).std()
power = np.where(cond, std_20, close) ** 2
argmax = pd.DataFrame(power).rolling(5).apply(lambda x: x.argmax())
return argmax.rank(pct=True) - 0.5
@staticmethod
def alpha_002(close, open_price, volume):
"""-1 * correlation(rank(delta(log(volume), 2)), rank((close-open)/open), 6)"""
log_volume = np.log(volume)
delta_log_vol = log_volume.diff(2)
price_change = (close - open_price) / open_price
return -delta_log_vol.rolling(6).corr(price_change)
10.3 自动因子挖掘
Tsfresh使用
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
def auto_feature_extraction(data):
"""自动特征提取"""
df = data.reset_index()
df['id'] = 1
# 提取特征(可生成5000+个因子)
features = extract_features(
df,
column_id='id',
column_sort='date',
default_fc_parameters=EfficientFCParameters()
)
return features # 自动生成数千个特征
因子筛选
def select_factors(features, returns, threshold=0.05):
"""筛选有效因子"""
selected = []
for col in features.columns:
ic = features[col].corr(returns)
if abs(ic) > threshold:
selected.append(col)
return features[selected]
10.4 因子组合
IC加权
class FactorCombiner:
def __init__(self, method='ic_weight'):
self.method = method
self.weights = None
def fit(self, factors, forward_returns):
if self.method == 'ic_weight':
ics = [abs(factors[col].corr(forward_returns)) for col in factors.columns]
self.weights = np.array(ics) / sum(ics)
return self
def transform(self, factors):
return (factors * self.weights).sum(axis=1)
回归加权
from sklearn.linear_model import LinearRegression
def regression_combination(factors, returns):
"""回归加权"""
model = LinearRegression()
model.fit(factors, returns)
weights = model.coef_
combined = (factors * weights).sum(axis=1)
return combined
10.5 实战案例
完整因子挖掘流程
# 1. 自动生成因子
features = auto_feature_extraction(price_data)
# 2. 筛选有效因子
selected_features = select_factors(features, forward_returns, threshold=0.03)
# 3. 因子组合
combiner = FactorCombiner(method='ic_weight')
combiner.fit(selected_features, forward_returns)
combined_factor = combiner.transform(selected_features)
# 4. 策略回测
signals = (combined_factor > combined_factor.median()).astype(int)
---
*本文节选自《AI量化交易从入门到精通》第10章* *完整内容请访问代码仓:book_writing/part2_core/part10_alpha/README.md* *配套代码:egs_alpha/*