hackertron
diff --git a/‎README.md‎
Lines changed: 36 additions & 4 deletions b/‎README.md‎
Lines changed: 36 additions & 4 deletions
diff --git a/‎cmd/yantra/main.go‎
Lines changed: 84 additions & 2 deletions b/‎cmd/yantra/main.go‎
Lines changed: 84 additions & 2 deletions
diff --git a/‎docs/architecture.md‎
Lines changed: 89 additions & 31 deletions b/‎docs/architecture.md‎
Lines changed: 89 additions & 31 deletions
diff --git a/‎docs/config.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/config.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/tools.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/tools.md‎
Lines changed: 1 addition & 1 deletion
@@ -14,9 +14,11 @@ Think of it as building your own Claude Code / Cursor agent from scratch.
 ┌─────────────────────────────────────────────┐
 │                    CLI                       │
 │              cmd/yantra/main.go              │
+│         yantra init | run | version          │
 ├─────────────────────────────────────────────┤
-│               Runtime (Step 4)               │
-│         the agent turn loop (planned)        │
+│                  Runtime                     │
+│           agent turn loop + session          │
+│     stream → think → act → observe → loop   │
 ├──────────────┬──────────────┬───────────────┤
 │   Provider   │    Tools     │    Memory     │
 │   Layer      │   System     │   (Step 5)    │
@@ -41,14 +43,25 @@ go build ./...
 # Generate default config
 go run ./cmd/yantra init
 
-# Edit yantra.toml with your API keys
+# Edit yantra.toml — set your API key
 $EDITOR yantra.toml
+
+# Set your provider API key
+export OPENAI_API_KEY=sk-...
+# Or for Anthropic:
+export ANTHROPIC_API_KEY=sk-ant-...
+
+# Run the agent
+go run ./cmd/yantra run "What is 2+2? Answer briefly."
+
+# Run with a custom system prompt and workspace
+go run ./cmd/yantra run --system "You are a Go expert" --workspace ./myproject "add tests for main.go"
 ```
 
 ## Project structure
 
 ```
-cmd/yantra/           CLI entry point (init, version, start, serve, tui)
+cmd/yantra/           CLI entry point (init, run, version, start, serve, tui)
 internal/
   types/              Shared interfaces and data types
     config.go         Configuration structs + defaults
@@ -67,6 +80,9 @@ internal/
     anthropic.go      Anthropic Messages API
     gemini.go         Google Gemini GenerateContent
     reliable.go       Retry wrapper with exponential backoff
+  runtime/            Agent turn loop
+    session.go        In-memory conversation buffer
+    runtime.go        AgentRuntime, Run(), stream accumulation, tool dispatch
   tool/               Tool system
     schema.go         JSON Schema builder helpers
     security.go       SecurityPolicy + WorkspacePolicy
@@ -119,6 +135,22 @@ All tool execution goes through a `SecurityPolicy`:
 - **Operator blocking**: `|`, `&&`, `||`, `;`, `>` blocked by default (configurable)
 - Deny always overrides allow
 
+## Runtime
+
+The runtime is the core agent loop that ties providers and tools together:
+
+1. User message is added to an in-memory session
+2. Session context (system prompt + messages + tool schemas) is streamed to the provider
+3. Response is accumulated, including fragmented tool call deltas
+4. If the LLM returns tool calls, they're dispatched respecting safety tiers:
+   - **ReadOnly** tools in a contiguous block run in parallel
+   - **SideEffecting/Privileged** tools run sequentially at their original position
+   - Model-provided tool call order is preserved (e.g., `write_file` before `read_file`)
+5. Tool results are appended to the session, and the loop repeats
+6. When the LLM responds with text only (no tool calls), the loop ends
+
+The turn timeout covers both provider streaming and tool execution as a single budget. Ctrl-C (SIGINT/SIGTERM) propagates cleanly into the runtime via context cancellation.
+
 ## Tests
 
 ```bash
 
@@ -1,9 +1,17 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	"os"
-
+	"os/signal"
+	"path/filepath"
+	"syscall"
+
+	"github.com/hackertron/Yantra/internal/provider"
+	"github.com/hackertron/Yantra/internal/runtime"
+	"github.com/hackertron/Yantra/internal/tool"
+	"github.com/hackertron/Yantra/internal/types"
 	"github.com/spf13/cobra"
 )
 
@@ -30,13 +38,17 @@ Single binary. Zero config to get started.`,
 
 	root.AddCommand(
 		initCmd(),
+		runCmd(),
 		startCmd(),
 		tuiCmd(),
 		serveCmd(),
 		versionCmd(),
 	)
 
-	if err := root.Execute(); err != nil {
+	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	defer stop()
+
+	if err := root.ExecuteContext(ctx); err != nil {
 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
 		os.Exit(1)
 	}
@@ -165,6 +177,76 @@ provider = "duckduckgo"
 	return nil
 }
 
+func runCmd() *cobra.Command {
+	var systemPrompt string
+	var workspace string
+
+	cmd := &cobra.Command{
+		Use:   "run [prompt]",
+		Short: "Run a single agent turn loop with the given prompt",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return runAgent(cmd.Context(), args[0], systemPrompt, workspace)
+		},
+	}
+	cmd.Flags().StringVar(&systemPrompt, "system", "You are a helpful AI assistant with access to tools.", "system prompt")
+	cmd.Flags().StringVar(&workspace, "workspace", ".", "workspace directory for tool execution")
+	return cmd
+}
+
+func runAgent(ctx context.Context, prompt, systemPrompt, workspace string) error {
+	cfg, err := types.LoadConfig(configPath)
+	if err != nil {
+		return fmt.Errorf("loading config: %w", err)
+	}
+
+	p, err := provider.BuildFromConfig(cfg)
+	if err != nil {
+		return fmt.Errorf("building provider: %w", err)
+	}
+	p = provider.NewReliable(p, provider.DefaultReliableConfig())
+
+	policy := tool.NewWorkspacePolicy(cfg.Tools.Shell)
+	reg := tool.NewRegistry(policy)
+	if err := tool.RegisterBuiltins(reg, cfg.Tools); err != nil {
+		return fmt.Errorf("registering tools: %w", err)
+	}
+
+	absWorkspace, err := filepath.Abs(workspace)
+	if err != nil {
+		return fmt.Errorf("resolving workspace: %w", err)
+	}
+
+	rt := runtime.New(p, reg, cfg.Runtime, absWorkspace)
+
+	progress := make(chan types.ProgressEvent, 32)
+	go func() {
+		for ev := range progress {
+			if ev.Tool != "" {
+				fmt.Fprintf(os.Stderr, "[%s] %s: %s\n", ev.Kind, ev.Tool, ev.Message)
+			} else {
+				fmt.Fprintf(os.Stderr, "[%s] %s\n", ev.Kind, ev.Message)
+			}
+		}
+	}()
+
+	result, err := rt.Run(ctx, systemPrompt, prompt, progress)
+	close(progress)
+	if err != nil {
+		return fmt.Errorf("agent run failed: %w", err)
+	}
+
+	fmt.Println(result.FinalContent)
+	fmt.Fprintf(os.Stderr, "\n--- stats ---\n")
+	fmt.Fprintf(os.Stderr, "turns: %d\n", result.TurnsUsed)
+	fmt.Fprintf(os.Stderr, "tokens: %d prompt, %d completion, %d total\n",
+		result.TotalUsage.PromptTokens,
+		result.TotalUsage.CompletionTokens,
+		result.TotalUsage.TotalTokens,
+	)
+	return nil
+}
+
 func runStart(cmd *cobra.Command, args []string) error {
 	fmt.Println("Starting Yantra daemon...")
 	// TODO: implement daemon startup
 
@@ -38,6 +38,7 @@ Everything in Yantra exists to make this loop work well:
 - **Tools** give the LLM hands
 - **Security** prevents the LLM from doing damage
 - **Config** makes it all customizable
+- **Runtime** runs the think → act → observe loop
 - **Memory** (planned) lets the agent remember across sessions
 - **Gateway** (planned) lets you control it remotely
 
@@ -129,10 +130,10 @@ const (
 )
 ```
 
-These tiers inform the runtime how to handle tools:
-- **ReadOnly** tools can run in parallel safely
-- **SideEffecting** tools should run sequentially (they change state)
-- **Privileged** tools need extra checks and may require user confirmation
+These tiers inform the runtime how to dispatch tools:
+- **ReadOnly** tools run in parallel when contiguous in the call list
+- **SideEffecting** tools run sequentially (they change state)
+- **Privileged** tools run sequentially and may require user confirmation in future
 
 ### Configuration
 
@@ -385,49 +386,106 @@ type ToolExecutionContext struct {
 `WorkspaceDir` is the most important — it's the root directory for all file operations. `Progress` is an optional channel for emitting status updates (the gateway can forward these to the UI).
 
 
-## How the pieces connect
+## Layer 4: Runtime (`internal/runtime/`)
+
+The runtime is the brain — it ties providers and tools together in a turn loop.
 
-Here's how everything flows when the runtime (Step 4) is built:
+### Session buffer
+
+`Session` is an in-memory conversation buffer. The system prompt is stored separately and injected by `Context()` when building the payload for the provider. This keeps the message list clean for turn counting and future summarization.
+
+```go
+session := NewSession("You are a helpful assistant.", toolSchemas)
+session.Append(Message{Role: "user", Content: "fix the bug"})
+
+ctx := session.Context()
+// → Messages: [system prompt, user message]
+// → Tools: [read_file, write_file, ...]
+```
+
+### The turn loop
+
+`AgentRuntime.Run()` is the main entry point:
 
 ```
 1. User runs: yantra run "add error handling to server.go"
 
-2. CLI loads config (yantra.toml + env vars)
-   → YantraConfig
+2. CLI loads config, builds provider + registry + runtime
+
+3. TURN LOOP (up to MaxTurns):
+   a. Per-turn timeout covers streaming + tool dispatch
+   b. Stream provider response, accumulate text + tool call deltas
+   c. If tool calls present:
+      - Dispatch respecting safety tiers and model-provided order
+      - Contiguous ReadOnly calls run in parallel
+      - SideEffecting/Privileged calls run sequentially at original position
+      - Tool results appended to session
+   d. If text-only response → return result (done)
+   e. Check context budget (log warning if approaching limit)
+
+4. Return: FinalContent, TurnsUsed, TotalUsage
+```
+
+### Stream accumulation
 
-3. Build provider from config
-   → ReliableProvider(OpenAIProvider{model: "gpt-4o"})
+The provider returns a channel of `StreamItem`. The runtime's `collectStream()` method:
+- Accumulates `StreamText` into the response content
+- Reassembles `StreamToolCallDelta` fragments into complete `ToolCall` objects (keyed by index)
+- Captures final `Usage` from the `StreamDone` event
+- Propagates `StreamError` as a Go error
 
-4. Create tool registry with workspace policy
-   → RegisterBuiltins(registry, config.Tools)
-   → registry has: read_file, write_file, list_files, shell_exec, web_fetch
+Tool call deltas arrive in chunks — the first delta for an index carries `ID` + `Name`, subsequent deltas append to `Arguments` via a `strings.Builder`. This handles all three providers (OpenAI, Anthropic, Gemini) uniformly.
 
-5. Get tool schemas for LLM
-   → registry.Schemas(nil) → []FunctionDecl
+### Tool dispatch ordering
 
-6. Build initial messages
-   → [system prompt, user message]
+Tools are dispatched in model-provided order with parallelism for contiguous ReadOnly blocks:
 
-7. AGENT LOOP:
-   a. Call provider.Complete(ctx, &Context{Messages, Tools})
-   b. LLM returns Message with ToolCalls
-   c. For each ToolCall:
-      - registry.Execute(ctx, name, args, execCtx)
-      - Policy check → timeout → execute → truncate
-      - Create tool result Message
-   d. Append assistant message + tool results to history
-   e. Check budget (turns, tokens, cost)
-   f. Go to step a
+```
+Call order from LLM: [read_file, read_file, write_file, read_file]
+                      ├─ parallel ─┤  sequential    sequential
+
+Block 1: read_file + read_file → parallel (both ReadOnly)
+Block 2: write_file → sequential (SideEffecting)
+Block 3: read_file → sequential (ReadOnly, but after a side effect)
+```
+
+This preserves correctness for patterns like `write_file → read_file` (verify what was written) while maximizing parallelism where safe.
+
+### Error handling
+
+The runtime classifies errors:
+- Parent context cancelled → `ErrCancelled` (user pressed Ctrl-C)
+- Turn context deadline exceeded → `ErrTimeout` (turn budget exhausted)
+- Max turns reached → `ErrMaxTurns`
+- Tool execution errors → placed in message content (the LLM sees them and can recover)
+
+### Context budget
 
-8. LLM returns text-only response → done
-   → Print final answer to user
+After each tool dispatch, the runtime estimates token usage (chars/4) and logs a warning if the session is approaching the context limit (`TriggerRatio * MaxContextTokens`). Actual summarization is deferred to Step 5 (Memory).
+
+## How the pieces connect
+
+```
+yantra run "add error handling to server.go"
+  │
+  ├── LoadConfig()                → YantraConfig
+  ├── BuildFromConfig()           → ReliableProvider(OpenAIProvider)
+  ├── NewWorkspacePolicy()        → SecurityPolicy
+  ├── NewRegistry() + RegisterBuiltins() → ToolRegistry
+  └── runtime.New() + Run()       → AgentRuntime turn loop
+       │
+       ├── Session.Context()      → system prompt + messages + tool schemas
+       ├── provider.Stream()      → channel of StreamItem
+       ├── collectStream()        → assembled Response with ToolCalls
+       ├── dispatchTools()        → tool results (parallel ReadOnly, sequential others)
+       ├── checkContextBudget()   → warning if approaching limit
+       └── loop until text-only response or MaxTurns
 ```
 
 ## What's next
 
 | Step | What | Purpose |
 |------|------|---------|
-| 4 | Runtime | The agent turn loop — the brain that ties providers + tools together |
-| 5 | Memory | Persistent vector DB for cross-session recall |
+| 5 | Memory | Persistent vector DB for cross-session recall + rolling summarization |
 | 6 | Gateway | WebSocket server for remote control |
 | 7 | Multi-agent | Specialist subagents with delegation |
@@ -112,7 +112,7 @@ max_cost = 0.0             # Max dollar cost (0 = unlimited)
 
 **max_turns** prevents infinite loops. If the LLM keeps calling tools without converging on an answer, this stops it.
 
-**turn_timeout_secs** is the timeout for a single turn (LLM call + tool executions). Not per-tool — that's the tool's own Timeout().
+**turn_timeout_secs** is the timeout for a single turn. It covers both the provider streaming phase and tool execution as one budget. Individual tools also have their own Timeout() applied by the registry.
 
 **max_cost** tracks token usage cost and stops if exceeded. Useful for preventing runaway spend.
 
 
@@ -78,7 +78,7 @@ One of three values:
 - `SideEffecting` — changes state (writing files, making HTTP requests)
 - `Privileged` — potentially dangerous (running shell commands)
 
-The runtime uses these to decide execution strategy. ReadOnly tools can run in parallel. SideEffecting tools run sequentially. Privileged tools might prompt the user for confirmation.
+The runtime uses these to decide execution strategy. Contiguous ReadOnly tools run in parallel; SideEffecting and Privileged tools run sequentially at their original position in the call list. This preserves model-provided ordering for cross-tool dependencies (e.g., `write_file` then `read_file`) while maximizing parallelism where safe.
 
 ### Timeout()