因子是量化交易的核心。本章将介绍因子库使用和自动因子挖掘技术。
def evaluate_factor(factor_values, forward_returns):
"""评价因子"""
# IC(信息系数)
ic = factor_values.corr(forward_returns)
# Rank IC
rank_ic = factor_values.rank().corr(forward_returns.rank())
# 分组回测
quantiles = pd.qcut(factor_values, 10, labels=False)
group_returns = forward_returns.groupby(quantiles).mean()
return {
'IC': ic,
'Rank IC': rank_ic,
'Group Returns': group_returns
}
class Alpha101:
"""WorldQuant Alpha101因子库"""
@staticmethod
def alpha_001(close, returns):
"""rank(Ts_ArgMax(SignedPower(...), 5)) - 0.5"""
cond = returns < 0
std_20 = returns.rolling(20).std()
power = np.where(cond, std_20, close) ** 2
argmax = pd.DataFrame(power).rolling(5).apply(lambda x: x.argmax())
return argmax.rank(pct=True) - 0.5
@staticmethod
def alpha_002(close, open_price, volume):
"""-1 * correlation(rank(delta(log(volume), 2)), rank((close-open)/open), 6)"""
log_volume = np.log(volume)
delta_log_vol = log_volume.diff(2)
price_change = (close - open_price) / open_price
return -delta_log_vol.rolling(6).corr(price_change)
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
def auto_feature_extraction(data):
"""自动特征提取"""
df = data.reset_index()
df['id'] = 1
# 提取特征(可生成5000+个因子)
features = extract_features(
df,
column_id='id',
column_sort='date',
default_fc_parameters=EfficientFCParameters()
)
return features # 自动生成数千个特征
def select_factors(features, returns, threshold=0.05):
"""筛选有效因子"""
selected = []
for col in features.columns:
ic = features[col].corr(returns)
if abs(ic) > threshold:
selected.append(col)
return features[selected]
class FactorCombiner:
def __init__(self, method='ic_weight'):
self.method = method
self.weights = None
def fit(self, factors, forward_returns):
if self.method == 'ic_weight':
ics = [abs(factors[col].corr(forward_returns)) for col in factors.columns]
self.weights = np.array(ics) / sum(ics)
return self
def transform(self, factors):
return (factors * self.weights).sum(axis=1)
from sklearn.linear_model import LinearRegression
def regression_combination(factors, returns):
"""回归加权"""
model = LinearRegression()
model.fit(factors, returns)
weights = model.coef_
combined = (factors * weights).sum(axis=1)
return combined
# 1. 自动生成因子
features = auto_feature_extraction(price_data)
# 2. 筛选有效因子
selected_features = select_factors(features, forward_returns, threshold=0.03)
# 3. 因子组合
combiner = FactorCombiner(method='ic_weight')
combiner.fit(selected_features, forward_returns)
combined_factor = combiner.transform(selected_features)
# 4. 策略回测
signals = (combined_factor > combined_factor.median()).astype(int)
本文节选自《AI量化交易从入门到精通》第10章
完整内容请访问代码仓:bookwriting/part2core/part10alpha/README.md
配套代码:egsalpha/