<!DOCTYPE html><html lang="zh-CN"><head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>LSE(学习自进化)强化学习框架深度研究</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/js/all.min.js"></script>
<link href="https://fonts.googleapis.com/css2?family=Playfair+Display:ital,wght@0,400;0,700;1,400&family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet"/>
<script src="https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.min.js"></script>
<style>
:root {
--earth-primary: #8B7355;
--earth-secondary: #D4C5B9;
--earth-accent: #A0927B;
--ocean-primary: #4A7C7E;
--ocean-light: #7BA7A8;
--text-primary: #2C2C2C;
--text-secondary: #6B6B6B;
--bg-primary: #FEFCFA;
--bg-secondary: #F5F2ED;
}
body {
font-family: 'Inter', sans-serif;
background-color: var(--bg-primary);
color: var(--text-primary);
}
.serif-display {
font-family: 'Playfair Display', serif;
}
.toc-sidebar {
position: fixed;
left: 0;
top: 0;
width: 280px;
height: 100vh;
background: linear-gradient(135deg, var(--earth-primary) 0%, var(--earth-accent) 100%);
color: white;
overflow-y: auto;
z-index: 1000;
padding: 2rem 1.5rem;
}
.main-content {
margin-left: 280px;
min-height: 100vh;
}
.hero-section {
background: linear-gradient(135deg, var(--earth-secondary) 0%, var(--earth-primary) 50%, var(--ocean-primary) 100%);
color: white;
position: relative;
overflow: hidden;
}
.hero-overlay {
background: rgba(0,0,0,0.2);
backdrop-filter: blur(2px);
}
.bento-grid {
display: grid;
grid-template-columns: 2fr 1fr;
grid-template-rows: auto auto;
gap: 2rem;
height: 60vh;
}
.hero-title {
grid-column: 1 / 2;
grid-row: 1 / 3;
display: flex;
flex-direction: column;
justify-content: center;
}
.hero-visual {
grid-column: 2 / 3;
grid-row: 1 / 2;
background: linear-gradient(45deg, rgba(255,255,255,0.1), rgba(255,255,255,0.05));
border-radius: 1rem;
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.2);
}
.hero-stats {
grid-column: 2 / 3;
grid-row: 2 / 3;
background: rgba(255,255,255,0.1);
border-radius: 1rem;
padding: 1.5rem;
}
.section-header {
border-left: 4px solid var(--ocean-primary);
padding-left: 1.5rem;
margin: 3rem 0 2rem 0;
}
.citation-link {
color: var(--ocean-primary);
text-decoration: none;
font-weight: 500;
border-bottom: 1px dotted var(--ocean-primary);
}
.citation-link:hover {
background-color: var(--ocean-light);
color: white;
border-radius: 0.25rem;
padding: 0.125rem 0.25rem;
}
.highlight-box {
background: linear-gradient(135deg, var(--ocean-light) 0%, var(--ocean-primary) 100%);
color: white;
border-radius: 1rem;
padding: 2rem;
margin: 2rem 0;
}
.formula-box {
background: var(--bg-secondary);
border: 2px solid var(--earth-accent);
border-radius: 0.75rem;
padding: 1.5rem;
margin: 1.5rem 0;
font-family: 'Courier New', monospace;
}
.toc-link {
display: block;
padding: 0.5rem 0;
color: rgba(255,255,255,0.8);
text-decoration: none;
border-left: 3px solid transparent;
padding-left: 1rem;
transition: all 0.3s ease;
}
.toc-link:hover, .toc-link.active {
color: white;
border-left-color: var(--ocean-light);
background: rgba(255,255,255,0.1);
border-radius: 0.5rem;
}
.toc-section {
font-weight: 600;
margin-top: 1rem;
margin-bottom: 0.5rem;
color: white;
}
.pull-quote {
font-size: 1.25rem;
line-height: 1.6;
font-style: italic;
color: var(--earth-primary);
border-left: 4px solid var(--ocean-primary);
padding-left: 2rem;
margin: 2rem 0;
background: var(--bg-secondary);
padding: 2rem;
border-radius: 0.75rem;
}
/* Mermaid diagram styles */
.mermaid-container {
display: flex;
justify-content: center;
min-height: 300px;
max-height: 800px;
background: #ffffff;
border: 2px solid #e5e7eb;
border-radius: 12px;
padding: 30px;
margin: 30px 0;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.08);
position: relative;
overflow: hidden;
}
.mermaid-container .mermaid {
width: 100%;
max-width: 100%;
height: 100%;
cursor: grab;
transition: transform 0.3s ease;
transform-origin: center center;
display: flex;
justify-content: center;
align-items: center;
touch-action: none; /* 防止触摸设备上的默认行为 */
-webkit-user-select: none; /* 防止文本选择 */
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
.mermaid-container .mermaid svg {
max-width: 100%;
height: 100%;
display: block;
margin: 0 auto;
}
.mermaid-container .mermaid:active {
cursor: grabbing;
}
.mermaid-container.zoomed .mermaid {
height: 100%;
width: 100%;
cursor: grab;
}
.mermaid-controls {
position: absolute;
top: 15px;
right: 15px;
display: flex;
gap: 10px;
z-index: 20;
background: rgba(255, 255, 255, 0.95);
padding: 8px;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
.mermaid-control-btn {
background: #ffffff;
border: 1px solid #d1d5db;
border-radius: 6px;
padding: 10px;
cursor: pointer;
transition: all 0.2s ease;
color: #374151;
font-size: 14px;
min-width: 36px;
height: 36px;
text-align: center;
display: flex;
align-items: center;
justify-content: center;
}
.mermaid-control-btn:hover {
background: #f8fafc;
border-color: #3b82f6;
color: #3b82f6;
transform: translateY(-1px);
}
.mermaid-control-btn:active {
transform: scale(0.95);
}
<span class="mention-invalid">@media</span> (max-width: 1024px) {
.toc-sidebar {
transform: translateX(-100%);
transition: transform 0.3s ease;
}
.toc-sidebar.open {
transform: translateX(0);
}
.main-content {
margin-left: 0;
}
.bento-grid {
grid-template-columns: 1fr;
grid-template-rows: auto auto auto;
height: auto;
}
.hero-title {
grid-column: 1;
grid-row: 1;
}
.hero-visual {
grid-column: 1;
grid-row: 2;
}
.hero-stats {
grid-column: 1;
grid-row: 3;
}
.mermaid-control-btn:not(.reset-zoom) {
display: none;
}
.mermaid-controls {
top: auto;
bottom: 15px;
right: 15px;
}
}
/* Responsive adjustments for small screens */
<span class="mention-invalid">@media</span> (max-width: 768px) {
.hero-section .bento-grid {
padding-left: 1rem;
padding-right: 1rem;
}
.hero-title h1 {
font-size: 2.25rem;
}
.hero-title p {
font-size: 1.125rem;
}
.hero-stats .grid {
grid-template-columns: 1fr;
}
}
<span class="mention-invalid">@media</span> (max-width: 480px) {
.hero-title h1 {
font-size: 1.75rem;
}
.hero-title p {
font-size: 1rem;
}
}
</style>
<base target="_blank">
</head>
<body>
<!-- Table of Contents Sidebar -->
<nav class="toc-sidebar">
<div class="mb-8">
<h2 class="text-xl font-bold serif-display mb-2">目录导航</h2>
</div>
<div class="toc-section">框架概述</div>
<a href="#hero" class="toc-link">研究背景</a>
<a href="#executive-summary" class="toc-link">执行摘要</a>
<div class="toc-section">核心技术</div>
<a href="#single-step-rl" class="toc-link">单步强化学习目标</a>
<a href="#tree-search" class="toc-link">树状引导搜索</a>
<a href="#delta-reward" class="toc-link">增量奖励机制</a>
<div class="toc-section">实验验证</div>
<a href="#experiments" class="toc-link">性能突破</a>
<a href="#ablation" class="toc-link">关键消融实验</a>
<div class="toc-section">结论展望</div>
<a href="#conclusion" class="toc-link">研究意义</a>
<div class="mt-8 pt-8 border-t border-white/20">
<p class="text-xs text-white/60">基于最新学术研究成果</p>
<p class="text-xs text-white/60 mt-1">2024年度技术报告</p>
</div>
</nav>
<!-- Main Content -->
<main class="main-content">
<!-- Executive Summary -->
<section id="executive-summary" class="py-16 px-8 max-w-7xl mx-auto">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">执行摘要</h2>
</div>
<div class="grid lg:grid-cols-3 gap-8 mb-12">
<div class="lg:col-span-2">
<div class="pull-quote">
"LSE框架通过单步改进量奖励显式训练模型掌握自我进化技能,核心是将奖励定义为编辑前后的性能差值,配合树状UCB搜索实现测试时的灵活探索。"
</div>
<p class="text-lg leading-relaxed mb-6">
当前大语言模型普遍面临知识冻结和经验丢失的"静态出厂"瓶颈。本研究介绍的LSE(学习自进化)强化学习框架,将复杂的自我改进过程简化为单步强化学习目标,显式训练模型掌握进化技能。
</p>
<p class="text-lg leading-relaxed mb-6">
技术核心在于采用树状引导搜索平衡探索与利用,并引入基于增量(Delta)的奖励机制,仅对真实的性能进步给予正向反馈,有效避免了传统绝对分值奖励导致的优化陷阱。
</p>
</div>
<div class="bg-gray-50 p-6 rounded-lg">
<h3 class="text-xl font-semibold mb-4 text-gray-800">关键创新点</h3>
<ul class="space-y-3 text-gray-700">
<li class="flex items-start">
<i class="fas fa-lightbulb text-yellow-500 mt-1 mr-3"></i>
<span>单步强化学习目标简化</span>
</li>
<li class="flex items-start">
<i class="fas fa-tree text-green-500 mt-1 mr-3"></i>
<span>树状UCB搜索算法</span>
</li>
<li class="flex items-start">
<i class="fas fa-chart-line text-blue-500 mt-1 mr-3"></i>
<span>增量式奖励机制</span>
</li>
<li class="flex items-start">
<i class="fas fa-exchange-alt text-purple-500 mt-1 mr-3"></i>
<span>跨模型迁移能力</span>
</li>
</ul>
</div>
</div>
<div class="highlight-box">
<h3 class="text-2xl font-bold mb-4">突破性成果</h3>
<div class="grid md:grid-cols-2 gap-8">
<div>
<h4 class="text-xl font-semibold mb-3">性能超越</h4>
<p class="mb-4">经LSE训练的4B参数模型在SQL生成等任务上性能超越了GPT-5等顶级闭源模型,且具备跨模型迁移能力。</p>
<p>这为构建能够动态适应环境、具备自我迭代能力的智能系统提供了实证支持与技术路径。</p>
</div>
<div>
<h4 class="text-xl font-semibold mb-3">技术贡献</h4>
<p class="mb-4">实验表明,相同预算下改进量奖励变体 consistently 优于绝对奖励变体,验证了增量奖励设计的有效性。</p>
<p>LSE最引人注目的发现是跨模型迁移能力:经LSE训练的4B模型生成的进化指令,可直接应用于7B模型,使其性能提升6.7%。</p>
</div>
</div>
</div>
</section>
<!-- Single-Step RL Objective -->
<section id="single-step-rl" class="py-16 px-8 max-w-7xl mx-auto bg-gray-50">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">单步强化学习目标的实现机制</h2>
</div>
<div class="grid lg:grid-cols-2 gap-12 mb-12">
<div>
<h3 class="text-2xl font-semibold mb-6">从多步到单步的简化策略</h3>
<p class="text-lg leading-relaxed mb-6">
大语言模型部署后面临的"静态出厂"瓶颈,根源在于传统训练范式将能力固化于参数空间,无法根据实际交互经验动态调整。现有自改进方法如Reflexion、TextGrad等完全依赖模型固有的推理能力,从未针对"如何根据失败案例修改指令"这一特定技能进行显式训练<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[58]</a>
<a href="https://chatpaper.com/zh-CN/paper/254630" class="citation-link">[67]</a>。
</p>
<div class="formula-box">
<h4 class="font-semibold mb-3">原始多步进化目标:</h4>
<div class="text-center text-lg">
max<sub>f<sub>ψ</sub></sub> Σ<sub>t=0</sub><sup>T</sup> γ<sup>t</sup> R̄(c<sub>t</sub>)
</div>
<p class="text-sm mt-3 text-gray-600">面临严重的长期信用分配困难</p>
</div>
</div>
<div>
<img src="https://kimi-web-img.moonshot.cn/img/img-blog.csdnimg.cn/79b2149e207f54f3f06a08c4a3e6bb5ecba3283c.jpeg" alt="强化学习智能体与环境交互示意图" class="w-full rounded-lg shadow-lg" size="medium" aspect="wide" query="强化学习智能体环境交互" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/>
<div class="mt-6 p-6 bg-white rounded-lg shadow-md">
<h4 class="font-semibold mb-3 text-ocean-primary">LSE简化策略</h4>
<p class="text-gray-700">将时间范围压缩至 T=1,采用上下文赌博机框架,使每个编辑决策获得即时、明确的反馈信号。</p>
</div>
</div>
</div>
<div class="bg-white p-8 rounded-lg shadow-md mb-8">
<h3 class="text-2xl font-semibold mb-6">单步目标的数学表述</h3>
<div class="grid md:grid-cols-2 gap-8">
<div>
<p class="text-lg leading-relaxed mb-4">
LSE的单步强化学习目标具有精确的数学结构。自进化策略 f<sub>ψ</sub> 接收当前上下文 c<sub>0</sub> 和性能摘要 S<sub>0</sub> 作为输入,输出新上下文 c<sub>1</sub> ~ f<sub>ψ</sub>(·|c<sub>0</sub>, S<sub>0</sub>),并立即获得奖励反馈。
</p>
<p class="text-gray-600">
性能摘要 S<sub>0</sub> 通常包含验证集上的准确率、错误模式分析、代表性失败案例等结构化信息。
</p>
</div>
<div class="formula-box">
<h4 class="font-semibold mb-3">核心奖励函数:</h4>
<div class="text-center text-xl font-bold text-ocean-primary mb-4">
r<sub>LSE</sub> = R̄(c<sub>1</sub>) - R̄(c<sub>0</sub>)
</div>
<p class="text-sm text-gray-600">
改进量奖励设计具有深刻的激励相容性:即使当前上下文性能很高,只要存在改进空间,正向奖励仍然可能。
</p>
</div>
</div>
</div>
<div class="grid lg:grid-cols-3 gap-6">
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-lg font-semibold mb-3 text-earth-primary">
<i class="fas fa-cogs mr-2"></i>策略网络架构
</h4>
<p class="text-gray-700">
采用Qwen3-4B-Instruct作为主干模型,输入端融合当前上下文和性能摘要,通过温度采样调节探索程度。
</p>
</div>
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-lg font-semibold mb-3 text-earth-primary">
<i class="fas fa-database mr-2"></i>训练数据构建
</h4>
<p class="text-gray-700">
通过与环境交互动态生成上下文-奖励对序列,固定验证集D的规模通常为5-10个样本,每个样本评估8次生成取平均。
</p>
</div>
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-lg font-semibold mb-3 text-earth-primary">
<i class="fas fa-chart-line mr-2"></i>参数优化方法
</h4>
<p class="text-gray-700">
采用策略梯度方法,学习率1×10<sup>-5</sup>,每批次采样32个节点,基于开发集选择最优检查点。
</p>
</div>
</div>
</section>
<!-- Tree-Guided Search -->
<section id="tree-search" class="py-16 px-8 max-w-7xl mx-auto">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">树状引导搜索的操作机制</h2>
</div>
<div class="mb-12">
<div class="mermaid-container">
<div class="mermaid-controls">
<button class="mermaid-control-btn zoom-in" title="放大">
<i class="fas fa-search-plus"></i>
</button>
<button class="mermaid-control-btn zoom-out" title="缩小">
<i class="fas fa-search-minus"></i>
</button>
<button class="mermaid-control-btn reset-zoom" title="重置">
<i class="fas fa-expand-arrows-alt"></i>
</button>
<button class="mermaid-control-btn fullscreen" title="全屏查看">
<i class="fas fa-expand"></i>
</button>
</div>
<div class="mermaid" id="tree-diagram">
graph TD
A["根节点 c₀"] --> B["节点1 c₁"]
A --> C["节点2 c₂"]
A --> D["节点3 c₃"]
B --> E["节点4 c₄"]
B --> F["节点5 c₅"]
C --> G["节点6 c₆"]
D --> H["节点7 c₇"]
D --> I["节点8 c₈"]
style A fill:#4A7C7E,stroke:#2C2C2C,stroke-width:3px,color:#fff
style B fill:#7BA7A8,stroke:#2C2C2C,stroke-width:2px,color:#fff
style C fill:#7BA7A8,stroke:#2C2C2C,stroke-width:2px,color:#fff
style D fill:#7BA7A8,stroke:#2C2C2C,stroke-width:2px,color:#fff
style E fill:#A0927B,stroke:#2C2C2C,stroke-width:1px,color:#fff
style F fill:#A0927B,stroke:#2C2C2C,stroke-width:1px,color:#fff
style G fill:#A0927B,stroke:#2C2C2C,stroke-width:1px,color:#fff
style H fill:#A0927B,stroke:#2C2C2C,stroke-width:1px,color:#fff
style I fill:#A0927B,stroke:#2C2C2C,stroke-width:1px,color:#fff
</div>
</div>
</div>
<div class="grid lg:grid-cols-2 gap-12 mb-12">
<div>
<h3 class="text-2xl font-semibold mb-6">进化树的结构与维护</h3>
<p class="text-lg leading-relaxed mb-6">
LSE在测试阶段维护进化树 G,每个节点存储四元组 (c<sub>n</sub>, S<sub>n</sub>, R̄<sub>n</sub>, v<sub>n</sub>),共同支持高效搜索决策<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[58]</a>
<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[102]</a>。
</p>
<div class="bg-gray-50 p-6 rounded-lg">
<h4 class="font-semibold mb-4">节点信息存储</h4>
<div class="space-y-3">
<div class="flex justify-between">
<span class="font-medium">上下文内容 c<sub>n</sub></span>
<span class="text-gray-600">完整提示文本</span>
</div>
<div class="flex justify-between">
<span class="font-medium">性能摘要 S<sub>n</sub></span>
<span class="text-gray-600">验证集评估结果</span>
</div>
<div class="flex justify-between">
<span class="font-medium">平均奖励估计 R̄<sub>n</sub></span>
<span class="text-gray-600">固定验证集性能</span>
</div>
<div class="flex justify-between">
<span class="font-medium">访问计数 v<sub>n</sub></span>
<span class="text-gray-600">被选择扩展次数</span>
</div>
</div>
</div>
</div>
<div>
<img src="https://kimi-web-img.moonshot.cn/img/developer.qcloudimg.com/3e75903161179559e4859ead10e0df251aabbbb4.jpg" alt="树搜索算法示意图" class="w-full rounded-lg shadow-lg mb-6" size="medium" aspect="wide" query="树搜索算法" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/>
<div class="highlight-box">
<h4 class="text-xl font-semibold mb-3">树结构的核心优势</h4>
<p class="text-lg">
通过回溯高绩效祖先节点,系统能从失败探索中恢复,避免线性链的不可逆缺陷,实现"自愈"能力。
</p>
</div>
</div>
</div>
<div class="bg-white p-8 rounded-lg shadow-md mb-8">
<h3 class="text-2xl font-semibold mb-6">UCB算法的应用</h3>
<div class="grid md:grid-cols-2 gap-8">
<div class="formula-box">
<h4 class="font-semibold mb-3">UCB选择公式:</h4>
<div class="text-center text-lg mb-4">
n* = argmax<sub>n∈G</sub> (R̄<sub>n</sub> + C√(ln N/v<sub>n</sub>))
</div>
<div class="text-sm text-gray-600 space-y-1">
<div>• 利用项 R̄<sub>n</sub>:倾向历史表现好的节点</div>
<div>• 探索项 C√(ln N/v<sub>n</sub>):激励未充分探索节点</div>
</div>
</div>
<div>
<p class="text-lg leading-relaxed mb-4">
UCB算法由利用项和探索项组成,对数项确保探索bonus随总迭代缓慢增长,分母 v<sub>n</sub> 使未探索节点获更大权重<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[58]</a>
<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[102]</a>。
</p>
<div class="bg-gray-50 p-4 rounded-lg">
<h5 class="font-semibold mb-2">参数C的调节</h5>
<p class="text-sm text-gray-700">
C=0退化为纯贪婪选择,C→∞接近均匀随机探索。实践中适中值(如C=2)表现最佳。
</p>
</div>
</div>
</div>
</div>
<div class="grid md:grid-cols-2 gap-8">
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-lg font-semibold mb-4 text-red-600">
<i class="fas fa-times-circle mr-2"></i>线性链策略缺陷
</h4>
<ul class="space-y-2 text-gray-700">
<li>• 单一路径,无错误恢复能力</li>
<li>• 错误累积导致性能崩盘</li>
<li>• 仅限于顺序局部搜索</li>
<li>• 无最优性保证</li>
</ul>
</div>
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-lg font-semibold mb-4 text-green-600">
<i class="fas fa-check-circle mr-2"></i>UCB树搜索优势
</h4>
<ul class="space-y-2 text-gray-700">
<li>• 分支树形,多路径并行</li>
<li>• 通过回溯快速恢复</li>
<li>• 全局自适应平衡探索</li>
<li>• 渐进最优(UCB理论)</li>
</ul>
</div>
</div>
<div class="pull-quote mt-8">
"BIRD Card Games数据集上的性能恢复曲线验证了树结构的核心优势:线性链因一次错误编辑准确率从~60%断崖跌至~20%,后续迭代在劣质基础上持续恶化;UCB树搜索遭遇类似挫折后,通过选择机制回溯到历史高值节点,数轮内重回~60%并继续提升。"
</div>
</section>
<!-- Delta Reward Mechanism -->
<section id="delta-reward" class="py-16 px-8 max-w-7xl mx-auto bg-gray-50">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">基于增量(Delta)的奖励机制</h2>
</div>
<div class="grid lg:grid-cols-2 gap-12 mb-12">
<div>
<h3 class="text-2xl font-semibold mb-6">绝对分值奖励的优化陷阱</h3>
<p class="text-lg leading-relaxed mb-6">
传统RL方法在自我进化场景中采用绝对分值奖励 r<sub>abs</sub> = R̄(c<sub>1</sub>),存在多重深层缺陷<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[58]</a>
<a href="https://www.wispaper.ai/en/blog/learning-to-self-evolve-20260321/zho" class="citation-link">[99]</a>。
</p>
<div class="space-y-4">
<div class="bg-red-50 border-l-4 border-red-400 p-4">
<h4 class="font-semibold text-red-800">高初始性能上下文的学习抑制</h4>
<p class="text-red-700 text-sm mt-1">
若策略发现R̄(c<sub>high</sub>) = 90%的上下文,此后任何修改都可能导致奖励下降,策略陷入"舒适区"。
</p>
</div>
<div class="bg-orange-50 border-l-4 border-orange-400 p-4">
<h4 class="font-semibold text-orange-800">任务难度差异导致的奖励偏差</h4>
<p class="text-orange-700 text-sm mt-1">
BIRD基准各数据库的Seed Prompt基线性能从52.3%到65.3%不等,绝对奖励使策略倾向"挑选容易任务"。
</p>
</div>
</div>
</div>
<div>
<h3 class="text-2xl font-semibold mb-6">改进量奖励的优势分析</h3>
<p class="text-lg leading-relaxed mb-6">
LSE的改进量奖励 r<sub>LSE</sub> = R̄(c<sub>1</sub>) - R̄(c<sub>0</sub>) 从根本上规避上述陷阱,具有三重关键优势<a href="https://arxiv.org/pdf/2603.18620" class="citation-link">[58]</a>
<a href="https://www.wispaper.ai/en/blog/learning-to-self-evolve-20260321/zho" class="citation-link">[99]</a>。
</p>
<div class="space-y-4">
<div class="bg-green-50 border-l-4 border-green-400 p-4">
<h4 class="font-semibold text-green-800">难度无关的公平比较</h4>
<p class="text-green-700 text-sm mt-1">
减去初始性能自动归一化任务难度,困难任务+5%与简单任务+5%获同等奖励。
</p>
</div>
<div class="bg-blue-50 border-l-4 border-blue-400 p-4">
<h4 class="font-semibold text-blue-800">持续探索的激励相容性</h4>
<p class="text-blue-700 text-sm mt-1">
不惩罚"从高处跌落"的尝试,只要新上下文比当前基础更好。
</p>
</div>
</div>
</div>
</div>
<div class="bg-white p-8 rounded-lg shadow-md mb-8">
<h3 class="text-2xl font-semibold mb-6">增量奖励的计算标准</h3>
<div class="grid md:grid-cols-2 gap-8">
<div>
<div class="formula-box">
<h4 class="font-semibold mb-3">平均奖励函数:</h4>
<div class="text-center text-lg mb-4">
R̄(c) = (1/|D|) Σ<sub>i=1</sub><sup>|D|</sup> 1[ŷ<sub>i</sub> = y<sub>i</sub>]
</div>
<p class="text-sm text-gray-600">
对于Text-to-SQL等生成任务,采用执行准确率而非字符串匹配,对语义等价但语法不同的SQL更具包容性。
</p>
</div>
</div>
<div>
<p class="text-lg leading-relaxed mb-4">
验证集D的固定性与代表性是奖励可比性基石。D规模通常5-10个样本,每样本评估8次生成取平均<a href="https://arxiv.org/html/2603.18620v1" class="citation-link">[71]</a>。
</p>
<div class="bg-gray-50 p-4 rounded-lg">
<h5 class="font-semibold mb-2">"真实性能进步"的评估维度</h5>
<ul class="text-sm text-gray-700 space-y-1">
<li>• 下游任务准确率的量化度量</li>
<li>• 多轮评估的方差控制</li>
<li>• 统计显著性检验的引入</li>
</ul>
</div>
</div>
</div>
</div>
<div class="highlight-box">
<h3 class="text-2xl font-bold mb-4">奖励机制的训练效果</h3>
<div class="grid md:grid-cols-2 gap-8">
<div>
<h4 class="text-xl font-semibold mb-3">消融对比结果</h4>
<div class="space-y-3">
<div class="flex justify-between items-center p-3 bg-red-100 rounded">
<span class="font-medium">A<sub>GRPO</sub> (绝对奖励)</span>
<span class="text-red-600 font-bold">~62%</span>
</div>
<div class="flex justify-between items-center p-3 bg-green-100 rounded">
<span class="font-medium">A<sub>LSE</sub> (改进量奖励)</span>
<span class="text-green-600 font-bold">~67%</span>
</div>
</div>
<p class="text-sm mt-3">
相同预算下改进量奖励变体 consistently 优于绝对奖励变体<a href="https://www.wispaper.ai/zh/blog/learning-to-self-evolve-20260321/zho" class="citation-link">[105]</a>。
</p>
</div>
<div>
<h4 class="text-xl font-semibold mb-3">跨模型迁移能力</h4>
<div class="bg-white p-4 rounded-lg">
<div class="text-center mb-3">
<div class="text-3xl font-bold text-ocean-primary">+6.7%</div>
<div class="text-sm text-gray-600">性能提升 (57.7% → 64.4%)</div>
</div>
<p class="text-sm text-gray-700">
经LSE训练的4B模型生成的进化指令,可直接应用于7B模型Arctic-7B,无需任何额外训练<a href="https://arxiv.org/html/2603.18620v1" class="citation-link">[71]</a>
<a href="https://www.wispaper.ai/zh/blog/learning-to-self-evolve-20260321/zho" class="citation-link">[88]</a>。
</p>
</div>
</div>
</div>
</div>
</section>
<!-- Experiments -->
<section id="experiments" class="py-16 px-8 max-w-7xl mx-auto">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">实验验证与性能突破</h2>
</div>
<div class="bg-white p-8 rounded-lg shadow-md mb-12">
<h3 class="text-2xl font-semibold mb-6">BIRD基准上的准确率对比</h3>
<div class="grid md:grid-cols-2 gap-8 mb-8">
<div>
<h4 class="text-xl font-semibold mb-4">核心结果</h4>
<div class="space-y-4">
<div class="flex items-center justify-between p-4 bg-gray-50 rounded-lg">
<div>
<div class="font-semibold">Seed Prompt(原始)</div>
<div class="text-sm text-gray-600">基线方法</div>
</div>
<div class="text-right">
<div class="text-2xl font-bold">57.2%</div>
<div class="text-sm text-gray-500">—</div>
</div>
</div>
<div class="flex items-center justify-between p-4 bg-blue-50 rounded-lg">
<div>
<div class="font-semibold">Claude 3.5 Sonnet</div>
<div class="text-sm text-gray-600">~175B参数</div>
</div>
<div class="text-right">
<div class="text-2xl font-bold text-blue-600">64.5%</div>
<div class="text-sm text-gray-500">+12.8%</div>
</div>
</div>
<div class="flex items-center justify-between p-4 bg-green-50 rounded-lg">
<div>
<div class="font-semibold">GPT-5(自进化)</div>
<div class="text-sm text-gray-600">~1.8T参数</div>
</div>
<div class="text-right">
<div class="text-2xl font-bold text-green-600">65.2%</div>
<div class="text-sm text-gray-500">+14.0%</div>
</div>
</div>
<div class="flex items-center justify-between p-4 bg-gradient-to-r from-purple-50 to-pink-50 rounded-lg border-2 border-purple-300">
<div>
<div class="font-semibold text-purple-800">LSE (Qwen3-4B)</div>
<div class="text-sm text-purple-600">4B参数</div>
</div>
<div class="text-right">
<div class="text-2xl font-bold text-purple-600">67.3%</div>
<div class="text-sm text-purple-500 font-semibold">+17.7%</div>
</div>
</div>
</div>
</div>
<div>
<h4 class="text-xl font-semibold mb-4">分数据库详细结果</h4>
<div class="space-y-3">
<div class="flex justify-between items-center p-3 bg-gray-50 rounded">
<span class="font-medium">Financial</span>
<div class="text-right">
<span class="text-sm text-gray-600">56.8% → </span>
<span class="font-bold text-green-600">68.3%</span>
<span class="text-sm text-green-600 ml-2">+11.5%</span>
</div>
</div>
<div class="flex justify-between items-center p-3 bg-gray-50 rounded">
<span class="font-medium">Toxicology</span>
<div class="text-right">
<span class="text-sm text-gray-600">54.5% → </span>
<span class="font-bold">62.3%</span>
<span class="text-sm text-blue-600 ml-2">+7.8%</span>
</div>
</div>
<div class="flex justify-between items-center p-3 bg-gray-50 rounded">
<span class="font-medium">Codebase</span>
<div class="text-right">
<span class="text-sm text-gray-600">65.3% → </span>
<span class="font-bold">71.5%</span>
<span class="text-sm text-blue-600 ml-2">+6.2%</span>
</div>
</div>
<div class="flex justify-between items-center p-3 bg-gray-50 rounded">
<span class="font-medium">Formula 1</span>
<div class="text-right">
<span class="text-sm text-gray-600">52.3% → </span>
<span class="font-bold">57.0%</span>
<span class="text-sm text-blue-600 ml-2">+4.7%</span>
</div>
</div>
<div class="flex justify-between items-center p-3 bg-gray-50 rounded">
<span class="font-medium">Card Games</span>
<div class="text-right">
<span class="text-sm text-gray-600">59.5% → </span>
<span class="font-bold">63.0%</span>
<span class="text-sm text-blue-600 ml-2">+3.5%</span>
</div>
</div>
</div>
</div>
</div>
<div class="mt-8 p-6 bg-blue-50 rounded-lg">
<h4 class="text-lg font-semibold mb-3 text-blue-800">结果分析</h4>
<p class="text-blue-700">
这一结果的多重意义:参数效率——4B模型超越175B和1.8T级别顶级模型,挑战"规模即一切"共识;技能特异性——LSE将自我进化显式训练为可学习技能,非依赖通用推理;动态适应——测试时进化使模型针对特定数据库自适应调整<a href="https://www.wispaper.ai/en/blog/learning-to-self-evolve-20260321/zho" class="citation-link">[99]</a>
<a href="https://www.wispaper.ai/zh/blog/learning-to-self-evolve-20260321/zho" class="citation-link">[105]</a>。
</p>
</div>
</div>
<div class="grid lg:grid-cols-2 gap-8 mb-12">
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-xl font-semibold mb-4">跨数据库泛化表现</h4>
<img src="https://kimi-web-img.moonshot.cn/img/mysql.taobao.org/d805cb6c16576e662788d4a401a71e455983933e.png" alt="数据库查询优化示意图" class="w-full rounded-lg mb-4" size="medium" aspect="wide" query="数据库查询优化" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/>
<p class="text-gray-700 mb-4">
LSE的领域自适应能力体现在动态进化轨迹中:面对新数据库,策略从通用种子提示出发,通过多轮迭代逐步积累领域特定知识。
</p>
<div class="bg-gray-50 p-4 rounded-lg">
<h5 class="font-semibold mb-2">效率对比</h5>
<div class="space-y-2 text-sm">
<div class="flex justify-between">
<span>静态微调</span>
<span class="text-red-600">高成本,需重训练</span>
</div>
<div class="flex justify-between">
<span>LSE进化</span>
<span class="text-green-600 font-semibold">低成本,即时适应</span>
</div>
</div>
</div>
</div>
<div class="bg-white p-6 rounded-lg shadow-md">
<h4 class="text-xl font-semibold mb-4">领域自适应关键能力</h4>
<div class="space-y-4">
<div class="flex items-start">
<i class="fas fa-search text-blue-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">识别常见模式</div>
<div class="text-sm text-gray-600">日期处理、聚合函数使用</div>
</div>
</div>
<div class="flex items-start">
<i class="fas fa-plus-circle text-green-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">添加针对性示例</div>
<div class="text-sm text-gray-600">根据错误模式补充例证</div>
</div>
</div>
<div class="flex items-start">
<i class="fas fa-cog text-purple-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">调整错误处理</div>
<div class="text-sm text-gray-600">优化异常情况的应对策略</div>
</div>
</div>
</div>
<div class="mt-4 p-4 bg-green-50 rounded-lg">
<div class="text-center">
<div class="text-2xl font-bold text-green-600">O(|D|×T)</div>
<div class="text-sm text-green-700">单次适应开销</div>
</div>
</div>
</div>
</div>
</section>
<!-- Ablation Studies -->
<section id="ablation" class="py-16 px-8 max-w-7xl mx-auto bg-gray-50">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">关键消融实验</h2>
</div>
<div class="grid lg:grid-cols-2 gap-8 mb-12">
<div class="bg-white p-6 rounded-lg shadow-md">
<h3 class="text-xl font-semibold mb-4">进化轮次的边际效益分析</h3>
<div class="space-y-4">
<div class="border-l-4 border-green-400 pl-4">
<div class="font-semibold text-green-800">0→5轮</div>
<div class="text-sm text-green-700">+8%提升,+1.6%/轮</div>
<div class="text-xs text-green-600">快速捕获明显改进空间</div>
</div>
<div class="border-l-4 border-blue-400 pl-4">
<div class="font-semibold text-blue-800">5→10轮</div>
<div class="text-sm text-blue-700">+3%提升,+0.6%/轮</div>
<div class="text-xs text-blue-600">进入精细优化阶段</div>
</div>
<div class="border-l-4 border-yellow-400 pl-4">
<div class="font-semibold text-yellow-800">10→15轮</div>
<div class="text-sm text-yellow-700">+1%提升,+0.2%/轮</div>
<div class="text-xs text-yellow-600">边际效益递减</div>
</div>
<div class="border-l-4 border-red-400 pl-4">
<div class="font-semibold text-red-800">15→20轮</div>
<div class="text-sm text-red-700">+0.5%提升,+0.1%/轮</div>
<div class="text-xs text-red-600">接近收敛,波动增大</div>
</div>
</div>
<div class="mt-4 p-4 bg-purple-50 rounded-lg">
<div class="text-center">
<div class="text-2xl font-bold text-purple-600">25轮</div>
<div class="text-sm text-purple-700">标准配置,平衡成本与收益</div>
</div>
</div>
</div>
<div class="bg-white p-6 rounded-lg shadow-md">
<h3 class="text-xl font-semibold mb-4">实验配置对比</h3>
<div class="space-y-4">
<div class="bg-gray-50 p-4 rounded-lg">
<h5 class="font-semibold mb-2">奖励设计对比</h5>
<div class="space-y-2 text-sm">
<div class="flex justify-between">
<span>绝对奖励 (GRPO)</span>
<span class="text-red-600">~62%,早熟收敛</span>
</div>
<div class="flex justify-between">
<span>改进量奖励 (LSE)</span>
<span class="text-green-600 font-semibold">~67%,持续探索</span>
</div>
</div>
</div>
<div class="bg-gray-50 p-4 rounded-lg">
<h5 class="font-semibold mb-2">搜索策略对比</h5>
<div class="space-y-2 text-sm">
<div class="flex justify-between">
<span>线性链</span>
<span class="text-red-600">错误累积,无法恢复</span>
</div>
<div class="flex justify-between">
<span>UCB树搜索</span>
<span class="text-green-600 font-semibold">自愈能力,稳定收敛</span>
</div>
</div>
</div>
<div class="bg-gray-50 p-4 rounded-lg">
<h5 class="font-semibold mb-2">迁移能力验证</h5>
<div class="space-y-2 text-sm">
<div class="flex justify-between">
<span>同架构不同规模</span>
<span class="text-blue-600">验证中</span>
</div>
<div class="flex justify-between">
<span>不同架构 (4B→7B)</span>
<span class="text-green-600 font-semibold">+6.7%</span>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="bg-white p-8 rounded-lg shadow-md">
<h3 class="text-2xl font-semibold mb-6">实验结果深度分析</h3>
<div class="grid md:grid-cols-3 gap-6">
<div class="text-center">
<div class="w-16 h-16 bg-green-100 rounded-full flex items-center justify-center mx-auto mb-4">
<i class="fas fa-trophy text-2xl text-green-600"></i>
</div>
<h4 class="font-semibold mb-2">参数效率突破</h4>
<p class="text-sm text-gray-600">
4B模型超越175B和1.8T级别模型,挑战"规模即一切"共识
</p>
</div>
<div class="text-center">
<div class="w-16 h-16 bg-blue-100 rounded-full flex items-center justify-center mx-auto mb-4">
<i class="fas fa-cogs text-2xl text-blue-600"></i>
</div>
<h4 class="font-semibold mb-2">技能特异性</h4>
<p class="text-sm text-gray-600">
将自我进化显式训练为可学习技能,非依赖通用推理
</p>
</div>
<div class="text-center">
<div class="w-16 h-16 bg-purple-100 rounded-full flex items-center justify-center mx-auto mb-4">
<i class="fas fa-sync-alt text-2xl text-purple-600"></i>
</div>
<h4 class="font-semibold mb-2">动态适应能力</h4>
<p class="text-sm text-gray-600">
测试时进化使模型针对特定数据库自适应调整
</p>
</div>
</div>
</div>
</section>
<!-- Conclusion -->
<section id="conclusion" class="py-16 px-8 max-w-7xl mx-auto">
<div class="section-header">
<h2 class="text-3xl font-bold serif-display">研究意义与未来展望</h2>
</div>
<div class="grid lg:grid-cols-2 gap-12 mb-12">
<div>
<h3 class="text-2xl font-semibold mb-6">技术贡献总结</h3>
<div class="space-y-6">
<div class="flex items-start">
<div class="w-8 h-8 bg-ocean-primary rounded-full flex items-center justify-center mr-4 mt-1">
<span class="text-white font-bold text-sm">1</span>
</div>
<div>
<h4 class="font-semibold mb-2">单步强化学习目标</h4>
<p class="text-gray-700">
将复杂的多步自我进化过程简化为单步强化学习目标,大幅降低学习难度,同时保证训练可行性。
</p>
</div>
</div>
<div class="flex items-start">
<div class="w-8 h-8 bg-ocean-primary rounded-full flex items-center justify-center mr-4 mt-1">
<span class="text-white font-bold text-sm">2</span>
</div>
<div>
<h4 class="font-semibold mb-2">树状UCB搜索算法</h4>
<p class="text-gray-700">
通过上置信界算法平衡探索与利用,实现测试时的灵活探索,具备错误恢复和自愈能力。
</p>
</div>
</div>
<div class="flex items-start">
<div class="w-8 h-8 bg-ocean-primary rounded-full flex items-center justify-center mr-4 mt-1">
<span class="text-white font-bold text-sm">3</span>
</div>
<div>
<h4 class="font-semibold mb-2">增量式奖励机制</h4>
<p class="text-gray-700">
基于性能差值的奖励设计避免优化陷阱,激励持续探索,与自我进化的根本目标高度一致。
</p>
</div>
</div>
</div>
</div>
<div>
<img src="https://kimi-web-img.moonshot.cn/img/msimg.bioon.com/4de98904a3ebb86f19650bac2d893477f72c546c.jpg" alt="人工智能自我进化技术示意图" class="w-full rounded-lg shadow-lg mb-6" size="medium" aspect="wide" query="人工智能自我进化" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/>
<div class="bg-gradient-to-r from-ocean-light to-ocean-primary p-6 rounded-lg text-white">
<h4 class="text-xl font-semibold mb-3">突破性发现</h4>
<p class="text-lg">
经LSE训练的4B参数模型在SQL生成任务上性能超越了GPT-5等顶级闭源模型,且具备跨模型迁移能力,为构建能够动态适应环境、具备自我迭代能力的智能系统提供了实证支持。
</p>
</div>
</div>
</div>
<div class="bg-white p-8 rounded-lg shadow-md mb-12">
<h3 class="text-2xl font-semibold mb-6">应用前景</h3>
<div class="grid md:grid-cols-2 gap-8">
<div>
<h4 class="text-lg font-semibold mb-4 text-ocean-primary">即时应用领域</h4>
<div class="space-y-4">
<div class="flex items-start">
<i class="fas fa-database text-blue-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">数据库查询优化</div>
<div class="text-sm text-gray-600">Text-to-SQL任务的持续改进</div>
</div>
</div>
<div class="flex items-start">
<i class="fas fa-code text-green-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">代码生成与优化</div>
<div class="text-sm text-gray-600">编程辅助工具的自我进化</div>
</div>
</div>
<div class="flex items-start">
<i class="fas fa-comments text-purple-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">对话系统优化</div>
<div class="text-sm text-gray-600">聊天机器人的自适应改进</div>
</div>
</div>
</div>
</div>
<div>
<h4 class="text-lg font-semibold mb-4 text-ocean-primary">长期发展方向</h4>
<div class="space-y-4">
<div class="flex items-start">
<i class="fas fa-robot text-red-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">自主AI系统</div>
<div class="text-sm text-gray-600">具备自我迭代能力的智能体</div>
</div>
</div>
<div class="flex items-start">
<i class="fas fa-sync-alt text-orange-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">持续学习系统</div>
<div class="text-sm text-gray-600">动态适应环境变化的AI</div>
</div>
</div>
<div class="flex items-start">
<i class="fas fa-network-wired text-teal-500 mt-1 mr-3"></i>
<div>
<div class="font-medium">分布式进化</div>
<div class="text-sm text-gray-600">多模型协同进化框架</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="highlight-box">
<h3 class="text-2xl font-bold mb-4">研究展望</h3>
<div class="grid md:grid-cols-2 gap-8">
<div>
<h4 class="text-xl font-semibold mb-3">技术深化方向</h4>
<ul class="space-y-2 text-white">
<li>• 探索更复杂的树搜索算法变体</li>
<li>• 研究多目标优化的奖励机制</li>
<li>• 开发自适应的进化终止策略</li>
<li>• 扩展到多模态任务场景</li>
</ul>
</div>
<div>
<h4 class="text-xl font-semibold mb-3">应用拓展方向</h4>
<ul class="space-y-2 text-white">
<li>• 构建开源的提示优化服务平台</li>
<li>• 为企业提供定制化的进化解决方案</li>
<li>• 建立跨模型的进化技能共享机制</li>
<li>• 推动AI自我进化技术的标准化</li>
</ul>
</div>
</div>
</div>
<div class="pull-quote mt-8">
"LSE框架的成功实现了'小模型服务大模型'的新范式,为闭源模型性能提升提供了新途径,也为构建能够动态适应环境、具备自我迭代能力的下一代智能系统奠定了坚实基础。"
</div>
</section>
<!-- Footer -->
<footer class="bg-gray-900 text-white py-12 px-8">
<div class="max-w-7xl mx-auto">
<div class="grid md:grid-cols-3 gap-8">
<div>
<h3 class="text-xl font-bold mb-4">主要参考文献</h3>
<div class="space-y-2 text-sm">
<a href="https://arxiv.org/pdf/2603.18620" class="block text-gray-300 hover:text-white">
[58] LSE: Learning to Self-Evolve (arXiv:2603.18620)
</a>
<a href="https://arxiv.org/html/2603.18620v1" class="block text-gray-300 hover:text-white">
[71] LSE Framework Technical Report
</a>
<a href="https://www.wispaper.ai/en/blog/learning-to-self-evolve-20260321/zho" class="block text-gray-300 hover:text-white">
[99] Learning to Self-Evolve Analysis
</a>
</div>
</div>
<div>
<h3 class="text-xl font-bold mb-4">相关资源</h3>
<div class="space-y-2 text-sm">
<a href="https://chatpaper.com/zh-CN/paper/254630" class="block text-gray-300 hover:text-white">
[67] ChatPaper Analysis
</a>
<a href="http://lonepatient.top/2026/03/20/arxiv_papers_2026-03-20" class="block text-gray-300 hover:text-white">
[117] arXiv Papers Collection
</a>
<a href="https://www.wispaper.ai/zh/blog/learning-to-self-evolve-20260321/zho" class="block text-gray-300 hover:text-white">
[105] 中文技术分析
</a>
</div>
</div>
<div>
<h3 class="text-xl font-bold mb-4">技术标签</h3>
<div class="flex flex-wrap gap-2">
<span class="px-3 py-1 bg-ocean-primary rounded-full text-xs">Self-Evolution</span>
<span class="px-3 py-1 bg-earth-primary rounded-full text-xs">Reinforcement Learning</span>
<span class="px-3 py-1 bg-ocean-light rounded-full text-xs">Tree Search</span>
<span class="px-3 py-1 bg-earth-accent rounded-full text-xs">Delta Reward</span>
<span class="px-3 py-1 bg-gray-700 rounded-full text-xs">Text-to-SQL</span>
</div>
</div>
</div>
<div class="border-t border-gray-700 mt-8 pt-8 text-center text-gray-400">
<p>© 2024 LSE Framework Research. All rights reserved.</p>
</div>
</div>
</footer>
</main>
<script>
// Initialize Mermaid with custom theme and contrast improvements
mermaid.initialize({
startOnLoad: true,
theme: 'base',
themeVariables: {
primaryColor: '#4A7C7E',
primaryTextColor: '#ffffff',
primaryBorderColor: '#2C2C2C',
lineColor: '#6B6B6B',
secondaryColor: '#A0927B',
tertiaryColor: '#D4C5B9',
background: '#FEFCFA',
mainBkg: '#4A7C7E',
secondBkg: '#A0927B',
tertiaryBkg: '#D4C5B9',
nodeBkg: '#4A7C7E',
nodeTextColor: '#ffffff',
edgeLabelBackground: '#FEFCFA',
clusterBkg: '#F5F2ED',
clusterBorder: '#A0927B',
defaultLinkColor: '#6B6B6B',
titleColor: '#2C2C2C',
edgeLabelColor: '#2C2C2C',
fontFamily: 'Inter, sans-serif',
fontSize: '14px'
},
flowchart: {
useMaxWidth: false,
htmlLabels: true,
curve: 'basis',
padding: 20
},
securityLevel: 'loose'
});
// Initialize Mermaid Controls for zoom and pan
function initializeMermaidControls() {
const containers = document.querySelectorAll('.mermaid-container');
containers.forEach(container => {
const mermaidElement = container.querySelector('.mermaid');
let scale = 1;
let isDragging = false;
let startX, startY, translateX = 0, translateY = 0;
// 触摸相关状态
let isTouch = false;
let touchStartTime = 0;
let initialDistance = 0;
let initialScale = 1;
let isPinching = false;
// Zoom controls
const zoomInBtn = container.querySelector('.zoom-in');
const zoomOutBtn = container.querySelector('.zoom-out');
const resetBtn = container.querySelector('.reset-zoom');
const fullscreenBtn = container.querySelector('.fullscreen');
function updateTransform() {
mermaidElement.style.transform = `translate(${translateX}px, ${translateY}px) scale(${scale})`;
if (scale > 1) {
container.classList.add('zoomed');
} else {
container.classList.remove('zoomed');
}
mermaidElement.style.cursor = isDragging ? 'grabbing' : 'grab';
}
if (zoomInBtn) {
zoomInBtn.addEventListener('click', () => {
scale = Math.min(scale * 1.25, 4);
updateTransform();
});
}
if (zoomOutBtn) {
zoomOutBtn.addEventListener('click', () => {
scale = Math.max(scale / 1.25, 0.3);
if (scale <= 1) {
translateX = 0;
translateY = 0;
}
updateTransform();
});
}
if (resetBtn) {
resetBtn.addEventListener('click', () => {
scale = 1;
translateX = 0;
translateY = 0;
updateTransform();
});
}
if (fullscreenBtn) {
fullscreenBtn.addEventListener('click', () => {
if (container.requestFullscreen) {
container.requestFullscreen();
} else if (container.webkitRequestFullscreen) {
container.webkitRequestFullscreen();
} else if (container.msRequestFullscreen) {
container.msRequestFullscreen();
}
});
}
// Mouse Events
mermaidElement.addEventListener('mousedown', (e) => {
if (isTouch) return; // 如果是触摸设备,忽略鼠标事件
isDragging = true;
startX = e.clientX - translateX;
startY = e.clientY - translateY;
mermaidElement.style.cursor = 'grabbing';
updateTransform();
e.preventDefault();
});
document.addEventListener('mousemove', (e) => {
if (isDragging && !isTouch) {
translateX = e.clientX - startX;
translateY = e.clientY - startY;
updateTransform();
}
});
document.addEventListener('mouseup', () => {
if (isDragging && !isTouch) {
isDragging = false;
mermaidElement.style.cursor = 'grab';
updateTransform();
}
});
document.addEventListener('mouseleave', () => {
if (isDragging && !isTouch) {
isDragging = false;
mermaidElement.style.cursor = 'grab';
updateTransform();
}
});
// 获取两点之间的距离
function getTouchDistance(touch1, touch2) {
return Math.hypot(
touch2.clientX - touch1.clientX,
touch2.clientY - touch1.clientY
);
}
// Touch Events - 触摸事件处理
mermaidElement.addEventListener('touchstart', (e) => {
isTouch = true;
touchStartTime = Date.now();
if (e.touches.length === 1) {
// 单指拖动
isPinching = false;
isDragging = true;
const touch = e.touches[0];
startX = touch.clientX - translateX;
startY = touch.clientY - translateY;
} else if (e.touches.length === 2) {
// 双指缩放
isPinching = true;
isDragging = false;
const touch1 = e.touches[0];
const touch2 = e.touches[1];
initialDistance = getTouchDistance(touch1, touch2);
initialScale = scale;
}
e.preventDefault();
}, { passive: false });
mermaidElement.addEventListener('touchmove', (e) => {
if (e.touches.length === 1 && isDragging && !isPinching) {
// 单指拖动
const touch = e.touches[0];
translateX = touch.clientX - startX;
translateY = touch.clientY - startY;
updateTransform();
} else if (e.touches.length === 2 && isPinching) {
// 双指缩放
const touch1 = e.touches[0];
const touch2 = e.touches[1];
const currentDistance = getTouchDistance(touch1, touch2);
if (initialDistance > 0) {
const newScale = Math.min(Math.max(
initialScale * (currentDistance / initialDistance),
0.3
), 4);
scale = newScale;
updateTransform();
}
}
e.preventDefault();
}, { passive: false });
mermaidElement.addEventListener('touchend', (e) => {
// 重置状态
if (e.touches.length === 0) {
isDragging = false;
isPinching = false;
initialDistance = 0;
// 延迟重置isTouch,避免鼠标事件立即触发
setTimeout(() => {
isTouch = false;
}, 100);
} else if (e.touches.length === 1 && isPinching) {
// 从双指变为单指,切换为拖动模式
isPinching = false;
isDragging = true;
const touch = e.touches[0];
startX = touch.clientX - translateX;
startY = touch.clientY - translateY;
}
updateTransform();
});
mermaidElement.addEventListener('touchcancel', (e) => {
isDragging = false;
isPinching = false;
initialDistance = 0;
setTimeout(() => {
isTouch = false;
}, 100);
updateTransform();
});
// Enhanced wheel zoom with better center point handling
container.addEventListener('wheel', (e) => {
e.preventDefault();
const rect = container.getBoundingClientRect();
const centerX = rect.width / 2;
const centerY = rect.height / 2;
const delta = e.deltaY > 0 ? 0.9 : 1.1;
const newScale = Math.min(Math.max(scale * delta, 0.3), 4);
// Adjust translation to zoom towards center
if (newScale !== scale) {
const scaleDiff = newScale / scale;
translateX = translateX * scaleDiff;
translateY = translateY * scaleDiff;
scale = newScale;
if (scale <= 1) {
translateX = 0;
translateY = 0;
}
updateTransform();
}
});
// Initialize display
updateTransform();
});
}
// Initialize mermaid controls
initializeMermaidControls();
// Smooth scrolling for TOC links
document.querySelectorAll('.toc-link').forEach(link => {
link.addEventListener('click', function(e) {
e.preventDefault();
const targetId = this.getAttribute('href').substring(1);
const targetElement = document.getElementById(targetId);
if (targetElement) {
targetElement.scrollIntoView({
behavior: 'smooth',
block: 'start'
});
}
});
});
// Active TOC link highlighting
window.addEventListener('scroll', function() {
const sections = document.querySelectorAll('section[id]');
const tocLinks = document.querySelectorAll('.toc-link');
let currentSection = '';
sections.forEach(section => {
const rect = section.getBoundingClientRect();
if (rect.top <= 100 && rect.bottom >= 100) {
currentSection = section.id;
}
});
tocLinks.forEach(link => {
link.classList.remove('active');
if (link.getAttribute('href') === '#' + currentSection) {
link.classList.add('active');
}
});
});
// Mobile menu toggle (if needed)
function toggleTOC() {
const sidebar = document.querySelector('.toc-sidebar');
sidebar.classList.toggle('open');
}
// Add responsive behavior for small screens
if (window.innerWidth <= 1024) {
// Create hamburger menu button
const menuButton = document.createElement('button');
menuButton.innerHTML = '<i class="fas fa-bars"></i>';
menuButton.className = 'fixed top-4 left-4 z-50 bg-gray-800 text-white p-3 rounded-lg shadow-lg';
menuButton.onclick = toggleTOC;
document.body.appendChild(menuButton);
// Close sidebar when clicking outside
document.addEventListener('click', function(e) {
const sidebar = document.querySelector('.toc-sidebar');
const menuButton = document.querySelector('button[onclick="toggleTOC()"]');
if (sidebar.classList.contains('open') &&
!sidebar.contains(e.target) &&
e.target !== menuButton &&
!menuButton.contains(e.target)) {
sidebar.classList.remove('open');
}
});
}
</script>
</body></html>