强化学习让AI自主学会交易策略。本章将实现一个完整的RL交易系统,复现年化收益53%的真实案例。
强化学习通过与环境交互学习最优策略:
智能体(Agent) → 动作(Action) → 环境(Environment)
↑ ↓
奖励(Reward) ← 状态(State) ←────────┘
核心要素:
class StockTradingEnv(gym.Env):
"""股票交易环境"""
def __init__(self, df, initial_balance=100000):
self.df = df
self.initial_balance = initial_balance
# 动作空间:买入/持有/卖出
self.action_space = spaces.Discrete(3)
# 状态空间
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(10,))
def step(self, action):
# 执行动作
if action == 1: # 买入
self._buy()
elif action == 2: # 卖出
self._sell()
# 计算奖励
reward = self._calculate_reward()
return self._get_state(), reward, self.done, {}
class DQNetwork(nn.Module):
def __init__(self, state_dim, action_dim):
super().__init__()
self.fc = nn.Sequential(
nn.Linear(state_dim, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, action_dim)
)
def forward(self, x):
return self.fc(x)
class DQNAgent:
def __init__(self, state_dim, action_dim):
self.policy_net = DQNetwork(state_dim, action_dim)
self.target_net = DQNetwork(state_dim, action_dim)
self.memory = deque(maxlen=10000)
def act(self, state, epsilon=0.1):
if random.random() < epsilon:
return random.randrange(self.action_dim)
with torch.no_grad():
q_values = self.policy_net(state)
return q_values.argmax().item()
def replay(self, batch_size=64):
# 经验回放训练
batch = random.sample(self.memory, batch_size)
# 训练逻辑...
class ActorCritic(nn.Module):
def __init__(self, state_dim, action_dim):
super().__init__()
# Actor(策略网络)
self.actor = nn.Sequential(
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, action_dim),
nn.Softmax(dim=-1)
)
# Critic(价值网络)
self.critic = nn.Sequential(
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 1)
)
总收益率:53.1%
年化收益率:53.1%
最大回撤:-10.4%
夏普比率:2.17
# 创建环境
env = StockTradingEnv(df, initial_balance=1000000)
# 创建PPO Agent
model = PPO('MlpPolicy', env, learning_rate=3e-4)
# 训练
model.learn(total_timesteps=200000)
# 回测
obs = env.reset()
for _ in range(len(df)):
action, _ = model.predict(obs)
obs, reward, done, info = env.step(action)
本文节选自《AI量化交易从入门到精通》第8章(特色章节)⭐
完整内容请访问代码仓:bookwriting/part2core/part8rl/README.md
配套代码:egstrade/rl/
还没有人回复