Loading...
正在加载...
请稍候

A Cookbook for Building Self-Evolving Agents

✨步子哥 (steper) 2025年11月15日 10:42
<!DOCTYPE html><html lang="en"><head> <meta charset="UTF-8"/> <meta name="viewport" content="width=device-width, initial-scale=1.0"/> <title>A Cookbook for Building Self-Evolving Agents</title> <script src="https://cdn.tailwindcss.com"></script> <link rel="preconnect" href="https://fonts.googleapis.com"/> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin=""/> <link href="https://fonts.googleapis.com/css2?family=Playfair+Display:ital,wght@0,400;0,600;0,700;1,400;1,600&amp;family=Inter:wght@300;400;500;600;700&amp;display=swap" rel="stylesheet"/> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"/> <script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script> <script> tailwind.config = { theme: { extend: { fontFamily: { 'serif': ['Playfair Display', 'serif'], 'sans': ['Inter', 'sans-serif'], }, colors: { 'primary': '#1a1a1a', 'secondary': '#4a4a4a', 'accent': '#0d9488', 'muted': '#6b7280', 'background': '#fefefe', 'surface': '#f8fafc', 'border': '#e5e7eb', } } } } </script> <style> .hero-gradient { background: linear-gradient(135deg, #0f172a 0%, #1e293b 50%, #334155 100%); } .text-shadow { text-shadow: 0 2px 4px rgba(0,0,0,0.3); } .glass-effect { backdrop-filter: blur(10px); background: rgba(255, 255, 255, 0.1); border: 1px solid rgba(255, 255, 255, 0.2); } .toc-fixed { position: fixed; top: 0; left: 0; height: 100vh; width: 280px; background: #fefefe; border-right: 1px solid #e5e7eb; overflow-y: auto; z-index: 1000; padding: 2rem 1.5rem; } .content-offset { margin-left: 280px; } .toc-link { transition: all 0.2s ease; } .toc-link:hover { color: #0d9488; padding-left: 0.5rem; } .citation-link { color: #0d9488; text-decoration: none; font-weight: 500; } .citation-link:hover { text-decoration: underline; } .section-divider { background: linear-gradient(90deg, #0d9488 0%, transparent 100%); height: 2px; margin: 3rem 0; } .mermaid-container { display: flex; justify-content: center; min-height: 300px; max-height: 800px; background: #ffffff; border: 2px solid #e5e7eb; border-radius: 12px; padding: 30px; margin: 30px 0; box-shadow: 0 8px 25px rgba(0, 0, 0, 0.08); position: relative; overflow: hidden; } .mermaid-container .mermaid { width: 100%; max-width: 100%; height: 100%; cursor: grab; transition: transform 0.3s ease; transform-origin: center center; display: flex; justify-content: center; align-items: center; touch-action: none; -webkit-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; } .mermaid-container .mermaid svg { max-width: 100%; height: 100%; display: block; margin: 0 auto; } .mermaid-container .mermaid:active { cursor: grabbing; } .mermaid-container.zoomed .mermaid { height: 100%; width: 100%; cursor: grab; } .mermaid-controls { position: absolute; top: 15px; right: 15px; display: flex; gap: 10px; z-index: 20; background: rgba(255, 255, 255, 0.95); padding: 8px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); } .mermaid-control-btn { background: #ffffff; border: 1px solid #d1d5db; border-radius: 6px; padding: 10px; cursor: pointer; transition: all 0.2s ease; color: #374151; font-size: 14px; min-width: 36px; height: 36px; text-align: center; display: flex; align-items: center; justify-content: center; } .mermaid-control-btn:hover { background: #f8fafc; border-color: #3b82f6; color: #3b82f6; transform: translateY(-1px); } .mermaid-control-btn:active { transform: scale(0.95); } /* Enhanced mermaid styling with better contrast for different node types */ .mermaid svg { max-width: none !important; height: auto !important; font-family: 'Inter', sans-serif !important; background: transparent !important; } /* Primary nodes - teal theme */ .mermaid .node.primary rect, .mermaid .node.primary circle, .mermaid .node.primary ellipse, .mermaid .node.primary polygon { fill: #0d9488 !important; stroke: #0f766e !important; stroke-width: 2.5px !important; filter: drop-shadow(0 2px 4px rgba(13, 148, 136, 0.3)); } .mermaid .node.primary .label { color: #ffffff !important; font-weight: 600 !important; font-size: 14px !important; text-shadow: 0 1px 2px rgba(0, 0, 0, 0.2); } /* Secondary nodes - light teal theme */ .mermaid .node.secondary rect, .mermaid .node.secondary circle, .mermaid .node.secondary ellipse, .mermaid .node.secondary polygon { fill: #5eead4 !important; stroke: #14b8a6 !important; stroke-width: 2px !important; filter: drop-shadow(0 2px 4px rgba(94, 234, 212, 0.3)); } .mermaid .node.secondary .label { color: #134e4a !important; font-weight: 600 !important; font-size: 13px !important; } /* Tertiary nodes - light gray theme */ .mermaid .node.tertiary rect, .mermaid .node.tertiary circle, .mermaid .node.tertiary ellipse, .mermaid .node.tertiary polygon { fill: #f1f5f9 !important; stroke: #64748b !important; stroke-width: 1.5px !important; filter: drop-shadow(0 2px 4px rgba(100, 116, 139, 0.2)); } .mermaid .node.tertiary .label { color: #334155 !important; font-weight: 500 !important; font-size: 12px !important; } /* Default node styling */ .mermaid .node rect, .mermaid .node circle, .mermaid .node ellipse, .mermaid .node polygon { fill: #ffffff !important; stroke: #0d9488 !important; stroke-width: 2px !important; filter: drop-shadow(0 2px 4px rgba(13, 148, 136, 0.2)); } .mermaid .node .label { color: #1a1a1a !important; font-weight: 600 !important; font-size: 13px !important; text-shadow: 0 1px 2px rgba(255, 255, 255, 0.8); } /* Edge styling */ .mermaid .edgePath .path { stroke: #64748b !important; stroke-width: 2.5px !important; filter: drop-shadow(0 1px 2px rgba(100, 116, 139, 0.3)); } .mermaid .edgeLabel { background-color: rgba(255, 255, 255, 0.95) !important; color: #374151 !important; font-weight: 500 !important; font-size: 12px !important; padding: 6px 10px !important; border-radius: 6px !important; border: 1px solid #e5e7eb !important; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important; } /* Timeline specific styling */ .mermaid .section { fill: #f1f5f9 !important; stroke: #d1d5db !important; } .mermaid .section0 { fill: #ecfdf5 !important; stroke: #10b981 !important; } .mermaid .section1 { fill: #eff6ff !important; stroke: #3b82f6 !important; } .mermaid .section2 { fill: #fef3c7 !important; stroke: #f59e0b !important; } .mermaid .section3 { fill: #fce7f3 !important; stroke: #ec4899 !important; } .mermaid .cScale0, .mermaid .cScale1, .mermaid .cScale2, .mermaid .cScale3 { fill: #0d9488 !important; stroke: #0f766e !important; } .mermaid .cScale0 .label, .mermaid .cScale1 .label, .mermaid .cScale2 .label, .mermaid .cScale3 .label { color: #ffffff !important; font-weight: 600 !important; } <span class="mention-invalid">@media</span> (max-width: 1024px) { .toc-fixed { transform: translateX(-100%); transition: transform 0.3s ease; } .toc-fixed.open { transform: translateX(0); } .content-offset { margin-left: 0; } /* Responsive mermaid controls */ .mermaid-control-btn:not(.reset-zoom) { display: none; } .mermaid-controls { top: auto; bottom: 15px; right: 15px; } } <span class="mention-invalid">@media</span> (max-width: 768px) { section.relative h1 { font-size: 1.5rem; } section.relative p { font-size: 1rem; } } /* Prevent horizontal overflow on small screens */ body { overflow-x: hidden; } </style> <base target="_blank"> </head> <body class="font-sans text-primary bg-background leading-relaxed overflow-x-hidden break-words"> <!-- Toggle button for small screens --> <button id="toc-toggle" class="fixed top-4 left-4 z-50 p-2 bg-accent text-white rounded-lg shadow-lg md:hidden"> <i class="fas fa-bars"></i> </button> <!-- Fixed Table of Contents --> <nav class="toc-fixed" id="toc"> <div class="mb-8"> <h2 class="text-lg font-bold text-primary mb-4">Table of Contents</h2> <div class="space-y-2 text-sm"> <a href="#introduction" class="toc-link block text-muted hover:text-accent transition-colors">Introduction</a> <a href="#framework" class="toc-link block text-muted hover:text-accent transition-colors">1. The Self-Evolving Agent Framework</a> <div class="ml-4 space-y-1"> <a href="#core-challenge" class="toc-link block text-muted hover:text-accent transition-colors text-xs">1.1 The Core Challenge</a> <a href="#self-evolving-loop" class="toc-link block text-muted hover:text-accent transition-colors text-xs">1.2 The Self-Evolving Loop</a> <a href="#use-case" class="toc-link block text-muted hover:text-accent transition-colors text-xs">1.3 Healthcare Use Case</a> </div> <a href="#manual-optimization" class="toc-link block text-muted hover:text-accent transition-colors">2. Manual Prompt Optimization</a> <div class="ml-4 space-y-1"> <a href="#platform-workflow" class="toc-link block text-muted hover:text-accent transition-colors text-xs">2.1 Platform Workflow</a> <a href="#step-by-step" class="toc-link block text-muted hover:text-accent transition-colors text-xs">2.2 Step-by-Step Process</a> </div> <a href="#automated-healing" class="toc-link block text-muted hover:text-accent transition-colors">3. Automated Self-Healing</a> <div class="ml-4 space-y-1"> <a href="#system-architecture" class="toc-link block text-muted hover:text-accent transition-colors text-xs">3.1 System Architecture</a> <a href="#evaluation-suite" class="toc-link block text-muted hover:text-accent transition-colors text-xs">3.2 Evaluation Suite</a> <a href="#orchestration" class="toc-link block text-muted hover:text-accent transition-colors text-xs">3.3 Orchestration</a> </div> <a href="#advanced-strategies" class="toc-link block text-muted hover:text-accent transition-colors">4. Advanced Strategies</a> <div class="ml-4 space-y-1"> <a href="#model-evaluation" class="toc-link block text-muted hover:text-accent transition-colors text-xs">4.1 Model Evaluation</a> <a href="#gepa" class="toc-link block text-muted hover:text-accent transition-colors text-xs">4.2 GEPA Framework</a> </div> <a href="#appendix" class="toc-link block text-muted hover:text-accent transition-colors">5. Appendix</a> </div> </div> </nav> <!-- Main Content --> <main class="content-offset"> <!-- Hero Section --> <section class="hero-gradient relative overflow-hidden"> <div class="absolute inset-0 bg-black/20"></div> <img src="https://kimi-web-img.moonshot.cn/img/smythos.com/77275013d45c2dcb10d40abd4ce7ffa04db91d9c.jpg" alt="Abstract representation of an AI agent improvement loop" class="absolute inset-0 w-full h-full object-cover opacity-10" size="wallpaper" aspect="wide" query="AI agent self improvement abstract" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/> <div class="relative z-10 container mx-auto px-8 py-16 md:py-24"> <div class="grid grid-cols-1 md:grid-cols-12 gap-8 items-center"> <!-- Title and Subtitle --> <div class="md:col-span-8"> <h1 class="font-serif text-3xl md:text-6xl font-bold text-white mb-6 text-shadow italic"> A Cookbook for Building <span class="block text-accent">Self-Evolving Agents</span> </h1> <p class="text-lg md:text-2xl text-gray-200 mb-8 leading-relaxed"> A Framework for Continuous Improvement in Production </p> <div class="flex items-center space-x-4 text-gray-300"> <span class="flex items-center"> <i class="fas fa-robot mr-2 text-accent"></i> AI Systems </span> <span class="flex items-center"> <i class="fas fa-sync-alt mr-2 text-accent"></i> Continuous Learning </span> </div> </div> <!-- Key Highlights --> <div class="md:col-span-4 mt-8 md:mt-0"> <div class="glass-effect rounded-lg p-6 backdrop-blur-sm"> <h3 class="text-white font-semibold mb-4">What You&#39;ll Learn</h3> <ul class="space-y-3 text-sm text-gray-200"> <li class="flex items-start"> <i class="fas fa-check-circle text-accent mr-3 mt-1"></i> Diagnose why autonomous agents fall short of production readiness </li> <li class="flex items-start"> <i class="fas fa-check-circle text-accent mr-3 mt-1"></i> Compare three prompt-optimization strategies </li> <li class="flex items-start"> <i class="fas fa-check-circle text-accent mr-3 mt-1"></i> Assemble a self-healing workflow with human review and LLM evals </li> </ul> </div> </div> </div> </div> </section> <!-- Introduction --> <section id="introduction" class="py-16 bg-background"> <div class="container mx-auto px-8 max-w-4xl"> <div class="prose prose-lg max-w-none"> <p class="text-xl text-muted leading-relaxed mb-8"> This cookbook provides a practical framework for building self-evolving agents that can learn from their mistakes and improve their performance over time. By combining human feedback, automated evaluation using an &#34;LLM-as-a-judge,&#34; and iterative prompt optimization, you can move beyond brittle proof-of-concept demos to create robust, production-ready systems. </p> <div class="grid grid-cols-1 md:grid-cols-3 gap-8 my-12"> <div class="bg-surface p-6 rounded-lg border border-border"> <i class="fas fa-microscope text-accent text-2xl mb-4"></i> <h3 class="font-semibold text-primary mb-2">ML/AI Engineers</h3> <p class="text-sm text-muted">Move beyond toy demos with executable artifacts for production pipelines</p> </div> <div class="bg-surface p-6 rounded-lg border border-border"> <i class="fas fa-users text-accent text-2xl mb-4"></i> <h3 class="font-semibold text-primary mb-2">Product Teams</h3> <p class="text-sm text-muted">Adapt internal tooling with accuracy, auditability, and rapid iteration</p> </div> <div class="bg-surface p-6 rounded-lg border border-border"> <i class="fas fa-cogs text-accent text-2xl mb-4"></i> <h3 class="font-semibold text-primary mb-2">Solution Architects</h3> <p class="text-sm text-muted">Design systems that learn and improve autonomously in production</p> </div> </div> </div> </div> </section> <div class="section-divider"></div> <!-- Section 1: The Self-Evolving Agent Framework --> <section id="framework" class="py-16"> <div class="container mx-auto px-8 max-w-4xl"> <h2 class="font-serif text-4xl font-bold text-primary mb-8">1. The Self-Evolving Agent Framework</h2> <div id="core-challenge" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">1.1 The Core Challenge: Overcoming the Post-Proof-of-Concept Plateau</h3> <p class="text-lg text-muted mb-6"> A significant and recurring challenge in the development of agentic systems is the plateau in performance and reliability that often follows an initial proof-of-concept. While early demonstrations can showcase the potential of Large Language Models (LLMs) to automate complex tasks, these systems frequently fall short of production readiness. </p> <div class="bg-amber-50 border-l-4 border-amber-400 p-6 mb-8"> <div class="flex"> <i class="fas fa-exclamation-triangle text-amber-400 mr-3 mt-1"></i> <div> <h4 class="font-semibold text-amber-800 mb-2">The Critical Gap</h4> <p class="text-amber-700">The core issue lies in their inability to autonomously diagnose and correct failures, particularly the edge cases that emerge when exposed to the full complexity and variability of real-world data.</p> </div> </div> </div> <p class="text-lg text-muted mb-6"> This dependency on human intervention for continuous diagnosis and correction creates a bottleneck, hindering scalability and long-term viability. The <strong>self-evolving loop</strong> addresses this critical gap by introducing a repeatable and structured retraining loop designed to capture failures, learn from feedback, and iteratively promote improvements back into the production workflow. </p> </div> <div id="self-evolving-loop" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">1.2 The Self-Evolving Loop: An Iterative Cycle of Feedback and Refinement</h3> <!-- Self-Evolving Loop Diagram --> <div class="bg-surface p-8 rounded-lg border border-border mb-8"> <h4 class="font-semibold text-primary mb-4">The Self-Evolving Loop Architecture</h4> <div class="mermaid-container"> <div class="mermaid-controls"> <button class="mermaid-control-btn zoom-in" title="放大"> <i class="fas fa-search-plus"></i> </button> <button class="mermaid-control-btn zoom-out" title="缩小"> <i class="fas fa-search-minus"></i> </button> <button class="mermaid-control-btn reset-zoom" title="重置"> <i class="fas fa-expand-arrows-alt"></i> </button> <button class="mermaid-control-btn fullscreen" title="全屏查看"> <i class="fas fa-expand"></i> </button> </div> <div class="mermaid" id="mermaid-1"> graph TD A[&#34;Baseline Agent&#34;] --&gt; B[&#34;Generate Output&#34;] B --&gt; C[&#34;Human Feedback&#34;] B --&gt; D[&#34;LLM-as-Judge&#34;] C --&gt; E[&#34;Evals &amp; Aggregated Score&#34;] D --&gt; E E --&gt; F{&#34;Score &gt; Threshold?&#34;} F --&gt;|&#34;No&#34;| G[&#34;Prompt Optimization&#34;] F --&gt;|&#34;Yes&#34;| H[&#34;Update Baseline Agent&#34;] G --&gt; I[&#34;Generate New Prompt&#34;] I --&gt; A H --&gt; A style A fill:#fefefe,stroke:#0d9488,stroke-width:3px,color:#1a1a1a style B fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a style C fill:#f0fdf4,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style D fill:#f0fdf4,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style E fill:#fffbeb,stroke:#d97706,stroke-width:2px,color:#1a1a1a style F fill:#fef3c7,stroke:#d97706,stroke-width:3px,color:#1a1a1a style G fill:#fdf2f8,stroke:#be185d,stroke-width:2px,color:#1a1a1a style H fill:#ecfdf5,stroke:#059669,stroke-width:3px,color:#1a1a1a style I fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a </div> </div> </div> <p class="text-lg text-muted mb-6"> The central innovation of this cookbook is the <strong>&#34;self-evolving loop,&#34;</strong> a systematic and iterative process designed to enable continuous, autonomous improvement of an AI agent. This loop is engineered to move agentic systems beyond static, pre-programmed behaviors and into a state of dynamic learning and adaptation. </p> <!-- Five Stages --> <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6 my-8"> <div class="bg-gradient-to-br from-blue-50 to-blue-100 p-6 rounded-lg border border-blue-200"> <div class="text-center mb-4"> <i class="fas fa-play-circle text-3xl text-blue-600"></i> </div> <h4 class="font-semibold text-blue-900 mb-2">1. Baseline Agent</h4> <p class="text-sm text-blue-700">Establish the initial benchmark with a deliberately simple agent</p> </div> <div class="bg-gradient-to-br from-green-50 to-green-100 p-6 rounded-lg border border-green-200"> <div class="text-center mb-4"> <i class="fas fa-comments text-3xl text-green-600"></i> </div> <h4 class="font-semibold text-green-900 mb-2">2. Feedback Collection</h4> <p class="text-sm text-green-700">Gather structured feedback from humans and LLM-as-a-judge</p> </div> <div class="bg-gradient-to-br from-yellow-50 to-yellow-100 p-6 rounded-lg border border-yellow-200"> <div class="text-center mb-4"> <i class="fas fa-chart-line text-3xl text-yellow-600"></i> </div> <h4 class="font-semibold text-yellow-900 mb-2">3. Evaluation &amp; Scoring</h4> <p class="text-sm text-yellow-700">Measure performance using specialized graders</p> </div> <div class="bg-gradient-to-br from-purple-50 to-purple-100 p-6 rounded-lg border border-purple-200"> <div class="text-center mb-4"> <i class="fas fa-magic text-3xl text-purple-600"></i> </div> <h4 class="font-semibold text-purple-900 mb-2">4. Prompt Optimization</h4> <p class="text-sm text-purple-700">Generate improved instructions based on feedback</p> </div> <div class="bg-gradient-to-br from-teal-50 to-teal-100 p-6 rounded-lg border border-teal-200"> <div class="text-center mb-4"> <i class="fas fa-arrow-up text-3xl text-teal-600"></i> </div> <h4 class="font-semibold text-teal-900 mb-2">5. Updated Agent</h4> <p class="text-sm text-teal-700">Promote the best-performing version to production</p> </div> </div> </div> <div id="use-case" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">1.3 Use Case: Healthcare Regulatory Documentation</h3> <div class="bg-surface p-8 rounded-lg border border-border mb-8"> <img src="https://kimi-web-img.moonshot.cn/img/pic.616pic.com/9aa9cd44d240b614c68607c2b53e4a406070db0d.jpg" alt="Pharmaceutical regulatory documents on a desk" class="w-full h-48 object-cover rounded-lg mb-6" size="medium" aspect="wide" style="photo" query="pharmaceutical regulatory documents" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/> <p class="text-lg text-muted mb-6"> To ground the abstract concepts in a concrete, real-world scenario, this cookbook focuses on a challenging and high-stakes use case: the drafting of regulatory documents for the pharmaceutical industry. This domain demands an exceptionally high degree of accuracy, precision, and compliance. </p> <div class="grid grid-cols-1 md:grid-cols-2 gap-6"> <div> <h4 class="font-semibold text-primary mb-3">Baseline Agent Architecture</h4> <ul class="space-y-2 text-muted"> <li class="flex items-start"> <i class="fas fa-file-alt text-accent mr-2 mt-1"></i> <span><strong>Summarizer:</strong> Creates scientific and concise summaries</span> </li> <li class="flex items-start"> <i class="fas fa-shield-alt text-accent mr-2 mt-1"></i> <span><strong>Compliance Checker:</strong> Evaluates against FDA 21 CFR Part 11</span> </li> </ul> </div> <div> <h4 class="font-semibold text-primary mb-3">Dataset</h4> <ul class="space-y-2 text-muted"> <li class="flex items-start"> <i class="fas fa-database text-accent mr-2 mt-1"></i> <span><strong>Source:</strong> Sample CMC Section for Hyperpolarized Pyruvate (13C) Injection</span> </li> <li class="flex items-start"> <i class="fas fa-list-ol text-accent mr-2 mt-1"></i> <span><strong>Size:</strong> ~70 sections of technical documentation</span> </li> </ul> </div> </div> </div> </div> </div> </section> <div class="section-divider"></div> <!-- Section 2: Manual Prompt Optimization --> <section id="manual-optimization" class="py-16 bg-surface"> <div class="container mx-auto px-8 max-w-4xl"> <h2 class="font-serif text-4xl font-bold text-primary mb-8">2. Manual Prompt Optimization with OpenAI Evals</h2> <div id="platform-workflow" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">2.1 Workflow Overview</h3> <p class="text-lg text-muted mb-8"> The OpenAI Evals platform provides a powerful and intuitive web-based interface for the manual optimization and evaluation of prompts. This approach is particularly well-suited for rapid prototyping and close collaboration with subject matter experts. </p> <!-- Platform Interface Workflow --> <div class="bg-white p-8 rounded-lg border border-border mb-8"> <img src="https://kimi-web-img.moonshot.cn/img/s3.amazonaws.com/c2bd42c94bb1f04f205fda3e5c5d4bc7de69c335.png" alt="OpenAI Evals platform user interface" class="w-full h-64 object-cover rounded-lg mb-6" size="medium" aspect="wide" style="photo" query="OpenAI Evals platform interface" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/> <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> <div> <h4 class="font-semibold text-primary mb-4">Key Features</h4> <ul class="space-y-3 text-muted"> <li class="flex items-start"> <i class="fas fa-upload text-accent mr-3 mt-1"></i> <span>Dataset upload and exploration</span> </li> <li class="flex items-start"> <i class="fas fa-cog text-accent mr-3 mt-1"></i> <span>Prompt configuration with variables</span> </li> <li class="flex items-start"> <i class="fas fa-play text-accent mr-3 mt-1"></i> <span>Batch output generation</span> </li> </ul> </div> <div> <h4 class="font-semibold text-primary mb-4">Optimization Tools</h4> <ul class="space-y-3 text-muted"> <li class="flex items-start"> <i class="fas fa-star text-accent mr-3 mt-1"></i> <span>Structured feedback collection</span> </li> <li class="flex items-start"> <i class="fas fa-magic text-accent mr-3 mt-1"></i> <span>Automated prompt optimization</span> </li> <li class="flex items-start"> <i class="fas fa-chart-bar text-accent mr-3 mt-1"></i> <span>Performance comparison across versions</span> </li> </ul> </div> </div> </div> </div> <div id="step-by-step" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">2.2 Step-by-Step Process</h3> <!-- Process Table --> <div class="overflow-x-auto mb-8"> <table class="w-full bg-white rounded-lg border border-border"> <thead class="bg-surface"> <tr> <th class="px-6 py-4 text-left font-semibold text-primary">Step</th> <th class="px-6 py-4 text-left font-semibold text-primary">Action</th> <th class="px-6 py-4 text-left font-semibold text-primary">Description</th> </tr> </thead> <tbody class="divide-y divide-border"> <tr> <td class="px-6 py-4 font-semibold text-accent">1</td> <td class="px-6 py-4 font-medium">Upload Dataset</td> <td class="px-6 py-4 text-muted">Upload CSV containing inputs for the agent</td> </tr> <tr> <td class="px-6 py-4 font-semibold text-accent">2</td> <td class="px-6 py-4 font-medium">Explore Data</td> <td class="px-6 py-4 text-muted">Verify data is properly formatted and complete</td> </tr> <tr> <td class="px-6 py-4 font-semibold text-accent">3</td> <td class="px-6 py-4 font-medium">Configure Prompt</td> <td class="px-6 py-4 text-muted">Define system prompt, user template, and model settings</td> </tr> <tr> <td class="px-6 py-4 font-semibold text-accent">4</td> <td class="px-6 py-4 font-medium">Generate Outputs</td> <td class="px-6 py-4 text-muted">Run prompt against dataset to create baseline</td> </tr> <tr> <td class="px-6 py-4 font-semibold text-accent">5</td> <td class="px-6 py-4 font-medium">Review &amp; Evaluate</td> <td class="px-6 py-4 text-muted">Provide structured feedback with ratings and comments</td> </tr> <tr> <td class="px-6 py-4 font-semibold text-accent">6</td> <td class="px-6 py-4 font-medium">Optimize Prompt</td> <td class="px-6 py-4 text-muted">Use automated optimization based on feedback</td> </tr> <tr> <td class="px-6 py-4 font-semibold text-accent">7</td> <td class="px-6 py-4 font-medium">Iterate &amp; Compare</td> <td class="px-6 py-4 text-muted">Repeat cycle until performance is satisfactory</td> </tr> </tbody> </table> </div> <div class="bg-blue-50 border-l-4 border-blue-400 p-6"> <div class="flex"> <i class="fas fa-lightbulb text-blue-400 mr-3 mt-1"></i> <div> <h4 class="font-semibold text-blue-800 mb-2">Pro Tip</h4> <p class="text-blue-700">Start with a very simple prompt like &#34;summarize&#34; to clearly demonstrate the power of the optimization process. The platform&#39;s ability to evolve from minimal starting points is remarkable.</p> </div> </div> </div> </div> </div> </section> <div class="section-divider"></div> <!-- Section 3: Automated Self-Healing --> <section id="automated-healing" class="py-16"> <div class="container mx-auto px-8 max-w-4xl"> <h2 class="font-serif text-4xl font-bold text-primary mb-8">3. Automated Self-Healing Loop</h2> <div id="system-architecture" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">3.1 System Architecture</h3> <p class="text-lg text-muted mb-8"> This section introduces a fully automated, programmatic approach to the self-evolving loop, eliminating the need for any user interface. This API-driven workflow is designed for scalability and is well-suited for integration into production pipelines and CI/CD environments. </p> <!-- System Components --> <div class="grid grid-cols-1 md:grid-cols-2 gap-8 mb-8"> <div class="bg-white p-6 rounded-lg border border-border"> <div class="flex items-center mb-4"> <i class="fas fa-robot text-2xl text-accent mr-3"></i> <h4 class="font-semibold text-primary">Summarization Agent</h4> </div> <p class="text-muted text-sm">Primary agent performing the document summarization task</p> </div> <div class="bg-white p-6 rounded-lg border border-border"> <div class="flex items-center mb-4"> <i class="fas fa-magic text-2xl text-accent mr-3"></i> <h4 class="font-semibold text-primary">Metaprompt Agent</h4> </div> <p class="text-muted text-sm">Separate agent responsible for prompt optimization</p> </div> <div class="bg-white p-6 rounded-lg border border-border"> <div class="flex items-center mb-4"> <i class="fas fa-chart-bar text-2xl text-accent mr-3"></i> <h4 class="font-semibold text-primary">Evaluation Suite</h4> </div> <p class="text-muted text-sm">Collection of specialized graders for quality assessment</p> </div> <div class="bg-white p-6 rounded-lg border border-border"> <div class="flex items-center mb-4"> <i class="fas fa-cogs text-2xl text-accent mr-3"></i> <h4 class="font-semibold text-primary">Orchestration Logic</h4> </div> <p class="text-muted text-sm">Python functions managing the feedback loop workflow</p> </div> </div> </div> <div id="evaluation-suite" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">3.2 Building the Evaluation Suite</h3> <!-- Graders Table --> <div class="overflow-x-auto mb-8"> <table class="w-full bg-white rounded-lg border border-border"> <thead class="bg-surface"> <tr> <th class="px-6 py-4 text-left font-semibold text-primary">Grader</th> <th class="px-6 py-4 text-left font-semibold text-primary">Type</th> <th class="px-6 py-4 text-left font-semibold text-primary">Pass Threshold</th> <th class="px-6 py-4 text-left font-semibold text-primary">What It Checks</th> </tr> </thead> <tbody class="divide-y divide-border"> <tr> <td class="px-6 py-4 font-medium text-blue-700">Chemical Name Preservation</td> <td class="px-6 py-4 text-muted">Python</td> <td class="px-6 py-4 font-semibold text-blue-600">0.8</td> <td class="px-6 py-4 text-muted">Ensures all chemical names appear in summary</td> </tr> <tr> <td class="px-6 py-4 font-medium text-green-700">Summary Length Adherence</td> <td class="px-6 py-4 text-muted">Python</td> <td class="px-6 py-4 font-semibold text-green-600">0.85</td> <td class="px-6 py-4 text-muted">Measures deviation from 100-word target</td> </tr> <tr> <td class="px-6 py-4 font-medium text-yellow-700">Semantic Similarity</td> <td class="px-6 py-4 text-muted">Cosine Similarity</td> <td class="px-6 py-4 font-semibold text-yellow-600">0.85</td> <td class="px-6 py-4 text-muted">Calculates semantic overlap with source</td> </tr> <tr> <td class="px-6 py-4 font-medium text-purple-700">Holistic Quality Assessment</td> <td class="px-6 py-4 text-muted">LLM-as-a-Judge</td> <td class="px-6 py-4 font-semibold text-purple-600">0.85</td> <td class="px-6 py-4 text-muted">Rubric-driven score from evaluator model</td> </tr> </tbody> </table> </div> <!-- Evaluation Process Flow --> <div class="bg-surface p-8 rounded-lg border border-border mb-8"> <h4 class="font-semibold text-primary mb-6">Evaluation Process Flow</h4> <div class="mermaid-container"> <div class="mermaid-controls"> <button class="mermaid-control-btn zoom-in" title="放大"> <i class="fas fa-search-plus"></i> </button> <button class="mermaid-control-btn zoom-out" title="缩小"> <i class="fas fa-search-minus"></i> </button> <button class="mermaid-control-btn reset-zoom" title="重置"> <i class="fas fa-expand-arrows-alt"></i> </button> <button class="mermaid-control-btn fullscreen" title="全屏查看"> <i class="fas fa-expand"></i> </button> </div> <div class="mermaid" id="mermaid-2"> graph LR A[&#34;Agent Output&#34;] --&gt; B[&#34;Chemical Grader&#34;] A --&gt; C[&#34;Length Grader&#34;] A --&gt; D[&#34;Similarity Grader&#34;] A --&gt; E[&#34;LLM Judge&#34;] B --&gt; F[&#34;Chemical Score: 0.8&#34;] C --&gt; G[&#34;Length Score: 0.85&#34;] D --&gt; H[&#34;Similarity Score: 0.9&#34;] E --&gt; I[&#34;Quality Score: 0.85&#34;] F --&gt; J[&#34;Aggregate Score: 0.85&#34;] G --&gt; J H --&gt; J I --&gt; J style A fill:#fefefe,stroke:#0d9488,stroke-width:3px,color:#1a1a1a style J fill:#f0f9ff,stroke:#0369a1,stroke-width:3px,color:#1a1a1a style B fill:#f0fdf4,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style C fill:#f0fdf4,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style D fill:#f0fdf4,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style E fill:#f0fdf4,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style F fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#1a1a1a style G fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#1a1a1a style H fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#1a1a1a style I fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#1a1a1a </div> </div> </div> </div> <div id="orchestration" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">3.3 Orchestration and Monitoring</h3> <p class="text-lg text-muted mb-8"> The orchestration logic brings together all components and coordinates their actions to create a seamless, automated workflow. This includes agent versioning, feedback translation, and promotion decisions. </p> <!-- Monitoring Dashboard --> <div class="bg-surface p-8 rounded-lg border border-border"> <img src="https://kimi-web-img.moonshot.cn/img/images.klipfolio.com/6ca28218882fe0b3b416a84b7aafc850f4f33bd7.png" alt="Monitoring dashboard with metrics and graphs" class="w-full h-48 object-cover rounded-lg mb-6" size="medium" aspect="wide" style="photo" query="software monitoring dashboard" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/> <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> <div> <h4 class="font-semibold text-primary mb-4">Observability Features</h4> <ul class="space-y-3 text-muted"> <li class="flex items-start"> <i class="fas fa-chart-line text-accent mr-3 mt-1"></i> <span><strong>Dashboard Tracing:</strong> Real-time workflow visualization</span> </li> <li class="flex items-start"> <i class="fas fa-history text-accent mr-3 mt-1"></i> <span><strong>Version History:</strong> Complete prompt evolution tracking</span> </li> <li class="flex items-start"> <i class="fas fa-clock text-accent mr-3 mt-1"></i> <span><strong>Performance Metrics:</strong> Latency and throughput monitoring</span> </li> </ul> </div> <div> <h4 class="font-semibold text-primary mb-4">Production Monitoring</h4> <ul class="space-y-3 text-muted"> <li class="flex items-start"> <i class="fas fa-sync-alt text-accent mr-3 mt-1"></i> <span><strong>Continuous Monitoring:</strong> Scheduled re-evaluation</span> </li> <li class="flex items-start"> <i class="fas fa-exclamation-triangle text-accent mr-3 mt-1"></i> <span><strong>Drift Detection:</strong> Performance degradation alerts</span> </li> <li class="flex items-start"> <i class="fas fa-redo text-accent mr-3 mt-1"></i> <span><strong>Auto-Recovery:</strong> Automatic rollback to stable versions</span> </li> </ul> </div> </div> </div> </div> </div> </section> <div class="section-divider"></div> <!-- Section 4: Advanced Strategies --> <section id="advanced-strategies" class="py-16 bg-surface"> <div class="container mx-auto px-8 max-w-4xl"> <h2 class="font-serif text-4xl font-bold text-primary mb-8">4. Advanced Optimization Strategies</h2> <div id="model-evaluation" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">4.1 Model Evaluation and Selection</h3> <p class="text-lg text-muted mb-8"> The self-evolving loop can be extended beyond prompt optimization to include the evaluation and selection of different model candidates, automatically finding the optimal balance between performance and cost. </p> <!-- Model Comparison --> <div class="bg-white p-8 rounded-lg border border-border mb-8"> <h4 class="font-semibold text-primary mb-6">Model Comparison Workflow</h4> <div class="mermaid-container"> <div class="mermaid-controls"> <button class="mermaid-control-btn zoom-in" title="放大"> <i class="fas fa-search-plus"></i> </button> <button class="mermaid-control-btn zoom-out" title="缩小"> <i class="fas fa-search-minus"></i> </button> <button class="mermaid-control-btn reset-zoom" title="重置"> <i class="fas fa-expand-arrows-alt"></i> </button> <button class="mermaid-control-btn fullscreen" title="全屏查看"> <i class="fas fa-expand"></i> </button> </div> <div class="mermaid" id="mermaid-3"> graph TD A[&#34;Improved Prompt&#34;] --&gt; B[&#34;Evaluate with GPT-5&#34;] A --&gt; C[&#34;Evaluate with GPT-5-mini&#34;] A --&gt; D[&#34;Evaluate with GPT-5-nano&#34;] B --&gt; E[&#34;Score: 0.92&#34;] C --&gt; F[&#34;Score: 0.88&#34;] D --&gt; G[&#34;Score: 0.85&#34;] E --&gt; H{&#34;Select Best Model&#34;} F --&gt; H G --&gt; H H --&gt; I[&#34;GPT-5 Selected&#34;] H --&gt; J[&#34;Cost Analysis: $0.12/query&#34;] H --&gt; K[&#34;Performance: +8% improvement&#34;] style A fill:#fefefe,stroke:#0d9488,stroke-width:3px,color:#1a1a1a style I fill:#ecfdf5,stroke:#059669,stroke-width:3px,color:#1a1a1a style B fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a style C fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a style D fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a style E fill:#ecfdf5,stroke:#16a34a,stroke-width:2px,color:#1a1a1a style F fill:#fef3c7,stroke:#d97706,stroke-width:2px,color:#1a1a1a style G fill:#fee2e2,stroke:#dc2626,stroke-width:2px,color:#1a1a1a style H fill:#f0f9ff,stroke:#0369a1,stroke-width:3px,color:#1a1a1a style J fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a style K fill:#f0f9ff,stroke:#0369a1,stroke-width:2px,color:#1a1a1a </div> </div> </div> </div> <div id="gepa" class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">4.2 Prompt Optimization with Genetic-Pareto (GEPA)</h3> <p class="text-lg text-muted mb-8"> The Genetic-Pareto (GEPA) framework represents a more advanced approach to prompt optimization, employing an evolutionary process with reflective, language-based updates to find robust, generalized prompts. </p> <div class="bg-white p-8 rounded-lg border border-border mb-8"> <img src="https://kimi-web-img.moonshot.cn/img/media.springernature.com/913e169745aaae72d604de7f643cfdb8e7663be4.png" alt="Abstract representation of evolutionary algorithm concept" class="w-full h-40 object-cover rounded-lg mb-6" size="medium" aspect="wide" query="evolutionary algorithm abstract" referrerpolicy="no-referrer" data-modified="1" data-score="0.00"/> <div class="mb-6"> <h4 class="font-semibold text-primary mb-4">GEPA Framework Benefits</h4> <div class="grid grid-cols-1 md:grid-cols-2 gap-6"> <div class="space-y-3"> <div class="flex items-start"> <i class="fas fa-brain text-accent mr-3 mt-1"></i> <span class="text-muted"><strong>Reflective Evolution:</strong> Analyzes performance and proposes intelligent improvements</span> </div> <div class="flex items-start"> <i class="fas fa-shield-alt text-accent mr-3 mt-1"></i> <span class="text-muted"><strong>Generalization:</strong> Uses training/validation sets to prevent overfitting</span> </div> </div> <div class="space-y-3"> <div class="flex items-start"> <i class="fas fa-dna text-accent mr-3 mt-1"></i> <span class="text-muted"><strong>Evolutionary Approach:</strong> Samples trajectories and reflects on feedback</span> </div> <div class="flex items-start"> <i class="fas fa-certificate text-accent mr-3 mt-1"></i> <span class="text-muted"><strong>Empirical Evidence:</strong> Clear performance validation across datasets</span> </div> </div> </div> </div> <div class="bg-gray-50 p-6 rounded-lg"> <p class="text-sm text-muted"> <strong>Citation:</strong> <a href="https://arxiv.org/abs/2507.19457" class="citation-link" target="_blank"> GEPA: Reflective Prompt Evolution Can Outperform Reinforcement Learning </a> by Agrawal et al. </p> </div> </div> </div> </div> </section> <div class="section-divider"></div> <!-- Section 5: Appendix --> <section id="appendix" class="py-16"> <div class="container mx-auto px-8 max-w-4xl"> <h2 class="font-serif text-4xl font-bold text-primary mb-8">5. Appendix</h2> <div class="mb-16"> <h3 class="font-serif text-2xl font-semibold text-primary mb-6">5.1 Example Prompts from Each Optimization Method</h3> <!-- Prompt Examples --> <div class="space-y-8"> <div class="bg-white border border-border rounded-lg overflow-hidden"> <div class="bg-gray-50 px-6 py-4 border-b border-border"> <h4 class="font-semibold text-primary">Initial Baseline Prompt</h4> </div> <div class="p-6"> <pre class="text-sm text-muted bg-gray-50 p-4 rounded overflow-x-auto">You are a summarization assistant. Given a section of text, produce a summary.</pre> </div> </div> <div class="bg-white border border-border rounded-lg overflow-hidden"> <div class="bg-blue-50 px-6 py-4 border-b border-border"> <h4 class="font-semibold text-primary">OpenAI Platform Optimizer Output</h4> </div> <div class="p-6"> <pre class="text-sm text-muted bg-blue-50 p-4 rounded overflow-x-auto">You are a summarization assistant. Task: Summarize the provided text concisely and accurately. Output requirements: - Output only the summary. Do not add titles, labels (e.g., &#34;Summary:&#34;), prefaces, or commentary. - Preserve the document&#39;s structure. If multiple sections/subsections appear, summarize each one. - Use a numbered list for sections/subsections (use their numbers/titles when present). - Under each, use short dash bullets for key points. - If there is only a single short section, return a brief bullet list or 1-2 concise sentences. - Split any inline lists into separate bullets. - Use plain, simple language. Keep bullets tight (ideally one line each). Remove redundancy. - Include important quantitative details (values, units, conditions) and constraints. Do not invent information. - Keep formatting simple: plain text, &#34;1.&#34; numbering and &#34;-&#34; bullets only. No tables or special markup. - Retain exact technical terms/notation from the source (e.g., chemical names, isotopic labels). - If a section is explicitly marked &#34;Not applicable,&#34; include that status; otherwise do not add it.</pre> </div> </div> <div class="bg-white border border-border rounded-lg overflow-hidden"> <div class="bg-green-50 px-6 py-4 border-b border-border"> <h4 class="font-semibold text-primary">Static Metaprompt Output</h4> </div> <div class="p-6"> <pre class="text-sm text-muted bg-green-50 p-4 rounded overflow-x-auto">You are a technical summarization assistant for scientific and regulatory documentation. Your task is to generate a concise, comprehensive, and fully detailed summary of any scientific, technical, or regulatory text provided. Strictly adhere to the following instructions: --- **1. Complete and Exact Information Inclusion** - Capture *every* explicit fact, technical value, specification, quantity, measurement, regulatory reference, entity, process, site, and contextual detail verbatim from the source text. - Do not omit or generalize any explicit information, no matter how minor. **2. Precise Terminology and Named Entity Retention** - Reproduce all names of chemicals, drugs, mixtures, buffer components, devices, companies, institutions, regulatory standards, section numbers, and procedural labels *exactly as stated*. - Report all quantities, measurements, concentrations, ratios, masses, volumes, compositions, pH values, and units precisely as given. - Do not paraphrase, rename, substitute, or simplify any term or value. ... [additional detailed instructions] ...</pre> </div> </div> <div class="bg-white border border-border rounded-lg overflow-hidden"> <div class="bg-purple-50 px-6 py-4 border-b border-border"> <h4 class="font-semibold text-primary">GEPA Optimizer Output</h4> </div> <div class="p-6"> <pre class="text-sm text-muted bg-purple-50 p-4 rounded overflow-x-auto">You are a domain-aware summarization assistant for technical pharmaceutical texts. Given a &#34;section&#34; of text, produce a concise, single-paragraph summary that preserves key technical facts and exact nomenclature. Length and format - Write 1–3 sentences totaling about 45–70 words (target ~60; never exceed 90). - Use one paragraph; no bullets, headings, tables, or heavy formatting. Exact names and notation - Include every chemical name that appears in the section at least once, using the exact original spelling, capitalization, punctuation, isotopic labels, brackets, hyphens, salts, buffer names, and parenthetical qualifiers... ... [highly detailed domain-specific instructions] ... Self-check before finalizing - Does the paragraph contain every distinct chemical name exactly as written in the section? - Is the summary 45–70 words (≤90), in a single paragraph? - Are the most critical process/regulatory/testing details preserved?</pre> </div> </div> </div> </div> </div> </section> <!-- Footer --> <footer class="bg-primary text-white py-12"> <div class="container mx-auto px-8 max-w-4xl"> <div class="grid grid-cols-1 md:grid-cols-3 gap-8"> <div> <h3 class="font-serif text-xl font-semibold mb-4">Contributors</h3> <ul class="space-y-2 text-gray-300 text-sm"> <li>Calvin Maguranis</li> <li>Fanny Perraudeau</li> <li>Giorgio Saladino</li> <li>Shikhar Kwatra</li> <li>Valentina Frenkel</li> </ul> </div> <div> <h3 class="font-serif text-xl font-semibold mb-4">Citations</h3> <p class="text-gray-300 text-sm"> <a href="https://arxiv.org/abs/2507.19457" class="citation-link text-accent" target="_blank"> GEPA: Reflective Prompt Evolution Can Outperform Reinforcement Learning </a> </p> </div> <div> <h3 class="font-serif text-xl font-semibold mb-4">Resources</h3> <ul class="space-y-2 text-gray-300 text-sm"> <li> <a href="#" class="citation-link text-accent">OpenAI Evals Platform</a> </li> <li> <a href="#" class="citation-link text-accent">Agents SDK Documentation</a> </li> <li> <a href="#" class="citation-link text-accent">Sample Dataset</a> </li> </ul> </div> </div> <div class="border-t border-gray-700 mt-8 pt-8 text-center text-gray-400 text-sm"> <p>A joint collaboration between Bain and OpenAI</p> </div> </div> </footer> </main> <script> // Initialize Mermaid with enhanced configuration mermaid.initialize({ startOnLoad: true, theme: 'base', themeVariables: { primaryColor: '#fefefe', primaryTextColor: '#1a1a1a', primaryBorderColor: '#0d9488', lineColor: '#64748b', secondaryColor: '#f1f5f9', tertiaryColor: '#fef3c7', background: '#fefefe', mainBkg: '#fefefe', secondBkg: '#f0f9ff', tertiaryBkg: '#f1f5f9', nodeBorder: '#0d9488', clusterBkg: '#f8fafc', edgeLabelBackground: '#ffffff', nodeTextColor: '#1a1a1a' }, flowchart: { useMaxWidth: true, htmlLabels: true, curve: 'basis', padding: 30, nodeSpacing: 50, rankSpacing: 80, diagramPadding: 20 }, timeline: { useMaxWidth: true, padding: 30, axisFormat: '%Y-%m-%d' }, gantt: { useMaxWidth: true, padding: 30 }, fontFamily: 'Inter, sans-serif', fontSize: 14, securityLevel: 'loose' }); // Initialize Mermaid Controls for zoom and pan function initializeMermaidControls() { const containers = document.querySelectorAll('.mermaid-container'); containers.forEach(container => { const mermaidElement = container.querySelector('.mermaid'); let scale = 1; let isDragging = false; let startX, startY, translateX = 0, translateY = 0; // 触摸相关状态 let isTouch = false; let touchStartTime = 0; let initialDistance = 0; let initialScale = 1; let isPinching = false; // Zoom controls const zoomInBtn = container.querySelector('.zoom-in'); const zoomOutBtn = container.querySelector('.zoom-out'); const resetBtn = container.querySelector('.reset-zoom'); const fullscreenBtn = container.querySelector('.fullscreen'); function updateTransform() { mermaidElement.style.transform = `translate(${translateX}px, ${translateY}px) scale(${scale})`; if (scale > 1) { container.classList.add('zoomed'); } else { container.classList.remove('zoomed'); } mermaidElement.style.cursor = isDragging ? 'grabbing' : 'grab'; } if (zoomInBtn) { zoomInBtn.addEventListener('click', () => { scale = Math.min(scale * 1.25, 4); updateTransform(); }); } if (zoomOutBtn) { zoomOutBtn.addEventListener('click', () => { scale = Math.max(scale / 1.25, 0.3); if (scale <= 1) { translateX = 0; translateY = 0; } updateTransform(); }); } if (resetBtn) { resetBtn.addEventListener('click', () => { scale = 1; translateX = 0; translateY = 0; updateTransform(); }); } if (fullscreenBtn) { fullscreenBtn.addEventListener('click', () => { if (container.requestFullscreen) { container.requestFullscreen(); } else if (container.webkitRequestFullscreen) { container.webkitRequestFullscreen(); } else if (container.msRequestFullscreen) { container.msRequestFullscreen(); } }); } // Mouse Events mermaidElement.addEventListener('mousedown', (e) => { if (isTouch) return; // 如果是触摸设备,忽略鼠标事件 isDragging = true; startX = e.clientX - translateX; startY = e.clientY - translateY; mermaidElement.style.cursor = 'grabbing'; updateTransform(); e.preventDefault(); }); document.addEventListener('mousemove', (e) => { if (isDragging && !isTouch) { translateX = e.clientX - startX; translateY = e.clientY - startY; updateTransform(); } }); document.addEventListener('mouseup', () => { if (isDragging && !isTouch) { isDragging = false; mermaidElement.style.cursor = 'grab'; updateTransform(); } }); document.addEventListener('mouseleave', () => { if (isDragging && !isTouch) { isDragging = false; mermaidElement.style.cursor = 'grab'; updateTransform(); } }); // 获取两点之间的距离 function getTouchDistance(touch1, touch2) { return Math.hypot( touch2.clientX - touch1.clientX, touch2.clientY - touch1.clientY ); } // Touch Events - 触摸事件处理 mermaidElement.addEventListener('touchstart', (e) => { isTouch = true; touchStartTime = Date.now(); if (e.touches.length === 1) { // 单指拖动 isPinching = false; isDragging = true; const touch = e.touches[0]; startX = touch.clientX - translateX; startY = touch.clientY - translateY; } else if (e.touches.length === 2) { // 双指缩放 isPinching = true; isDragging = false; const touch1 = e.touches[0]; const touch2 = e.touches[1]; initialDistance = getTouchDistance(touch1, touch2); initialScale = scale; } e.preventDefault(); }, { passive: false }); mermaidElement.addEventListener('touchmove', (e) => { if (e.touches.length === 1 && isDragging && !isPinching) { // 单指拖动 const touch = e.touches[0]; translateX = touch.clientX - startX; translateY = touch.clientY - startY; updateTransform(); } else if (e.touches.length === 2 && isPinching) { // 双指缩放 const touch1 = e.touches[0]; const touch2 = e.touches[1]; const currentDistance = getTouchDistance(touch1, touch2); if (initialDistance > 0) { const newScale = Math.min(Math.max( initialScale * (currentDistance / initialDistance), 0.3 ), 4); scale = newScale; updateTransform(); } } e.preventDefault(); }, { passive: false }); mermaidElement.addEventListener('touchend', (e) => { // 重置状态 if (e.touches.length === 0) { isDragging = false; isPinching = false; initialDistance = 0; // 延迟重置isTouch,避免鼠标事件立即触发 setTimeout(() => { isTouch = false; }, 100); } else if (e.touches.length === 1 && isPinching) { // 从双指变为单指,切换为拖动模式 isPinching = false; isDragging = true; const touch = e.touches[0]; startX = touch.clientX - translateX; startY = touch.clientY - translateY; } updateTransform(); }); mermaidElement.addEventListener('touchcancel', (e) => { isDragging = false; isPinching = false; initialDistance = 0; setTimeout(() => { isTouch = false; }, 100); updateTransform(); }); // Enhanced wheel zoom with better center point handling container.addEventListener('wheel', (e) => { e.preventDefault(); const rect = container.getBoundingClientRect(); const centerX = rect.width / 2; const centerY = rect.height / 2; const delta = e.deltaY > 0 ? 0.9 : 1.1; const newScale = Math.min(Math.max(scale * delta, 0.3), 4); // Adjust translation to zoom towards center if (newScale !== scale) { const scaleDiff = newScale / scale; translateX = translateX * scaleDiff; translateY = translateY * scaleDiff; scale = newScale; if (scale <= 1) { translateX = 0; translateY = 0; } updateTransform(); } }); // Initialize display updateTransform(); }); } // Initialize the controls when the DOM is loaded document.addEventListener('DOMContentLoaded', function() { initializeMermaidControls(); }); // Smooth scrolling for anchor links document.querySelectorAll('a[href^="#"]').forEach(anchor => { anchor.addEventListener('click', function (e) { e.preventDefault(); const target = document.querySelector(this.getAttribute('href')); if (target) { target.scrollIntoView({ behavior: 'smooth', block: 'start' }); } }); }); // Highlight current section in TOC window.addEventListener('scroll', () => { const sections = document.querySelectorAll('section[id]'); const scrollPos = window.scrollY + 100; sections.forEach(section => { const top = section.offsetTop; const bottom = top + section.offsetHeight; const id = section.getAttribute('id'); const link = document.querySelector(`a[href="#${id}"]`); if (link && scrollPos >= top && scrollPos <= bottom) { document.querySelectorAll('.toc-link').forEach(l => l.classList.remove('text-accent', 'font-semibold')); link.classList.add('text-accent', 'font-semibold'); } }); }); // Toggle TOC on small screens const tocToggle = document.getElementById('toc-toggle'); const toc = document.getElementById('toc'); tocToggle.addEventListener('click', () => { toc.classList.toggle('open'); }); // Close TOC when clicking on a link (on small screens) document.querySelectorAll('.toc-link').forEach(link => { link.addEventListener('click', () => { if (window.innerWidth <= 1024) { toc.classList.remove('open'); } }); }); </script> </body></html>

讨论回复

0 条回复

还没有人回复,快来发表你的看法吧!