async def demo_memory():
explain(
"DEMO 4 — Memory: persistent MEMORY.md across sessions",
"""Long-term memory persists between runs by being saved to MEMORY.md.
During session 1 the agent stores a user preference; when a completely
new session 2 starts (fresh engine, fresh transcript) that stored memory
gets injected into the system prompt, so the agent already 'knows' the
user.""")
mem_path = os.path.join(tempfile.gettempdir(), "oh_demo4_MEMORY.md")
memory = MemoryStore(mem_path)
memory.reset()
registry = build_registry()
print(" ── Session 1 ──")
ctx1 = ToolContext(vfs=VirtualFS(), memory=memory, skills=SkillLibrary())
s1 = [
Use("I'll remember the user's stated preferences.",
[("remember", {"note": "User prefers metric units and concise answers."})]),
lambda m: Say("Noted your preferences for next time."),
]
eng1 = QueryEngine(brain=ScriptedBrain(s1), registry=registry, ctx=ctx1,
perms=PermissionChecker(PermissionMode.AUTO),
hooks=HookManager(),
system_prompt=assemble_system_prompt(
base=BASE_SYSTEM, project_context="",
memory=memory.read(),
skills_summary="(none)", tool_names=registry.names()))
await eng1.run("Remember that I like metric units and short answers.")
print(f" MEMORY.md is now:n{textwrap.indent(memory.read(), ' ')}")
print("n ── Session 2 (new session, memory reloaded from disk) ──")
memory2 = MemoryStore(mem_path)
ctx2 = ToolContext(vfs=VirtualFS(), memory=memory2, skills=SkillLibrary())
sysprompt2 = assemble_system_prompt(
base=BASE_SYSTEM, project_context="", memory=memory2.read(),
skills_summary="(none)", tool_names=registry.names())
print(" The new system prompt already contains:")
print(textwrap.indent("## Long-term memory (MEMORY.md)n" + memory2.read(),
" "))
s2 = [lambda m: Say("Since you prefer metric and brevity: it's about 5 km. 🙂")]
eng2 = QueryEngine(brain=ScriptedBrain(s2), registry=registry, ctx=ctx2,
perms=PermissionChecker(PermissionMode.AUTO),
hooks=HookManager(), system_prompt=sysprompt2)
final = await eng2.run("How far is a 5000 meter run, roughly?")
print(f"n FINAL: {final}")
print("n TAKEAWAY: state that should outlive a conversation goes to memory, "
"then is re-injected at the start of future sessions.")
async def demo_compaction():
explain(
"DEMO 5 — Context auto-compaction (multi-day sessions without overflow)",
"""As a session grows, the transcript can exceed the context window limit.
Auto-compaction condenses the older middle portion of the conversation
into a brief summary while keeping the original task and the most recent
turns intact — so long-running agents can continue without interruption.
(Here we force a small threshold to trigger it; in the real OpenHarness
the model itself writes the summary.)""")
msgs = [Message(role="user", content="Build and verify a data pipeline.")]
for i in range(8):
msgs.append(Message(role="assistant", content=f"Step {i}: doing work...",
tool_calls=[ToolCall(f"c{i}", "shell",
{"command": f"process chunk {i}"})]))
msgs.append(Message(role="tool", name="shell", tool_call_id=f"c{i}",
content=f"chunk {i} processed: 1000 rows ok " * 4))
before = estimate_messages_tokens(msgs)
print(f" Before: {len(msgs)} messages, ~{before} tokens")
compacted = maybe_compact(msgs, max_tokens=300, keep_last=4)
after = estimate_messages_tokens(compacted)
print(f" After: {len(compacted)} messages, ~{after} tokens "
f"({100 * (before - after) // before}% smaller)")
print("n The injected summary message:")
print(textwrap.indent(compacted[1].content, " "))
print("n TAKEAWAY: the harness manages the context window so the agent can "
"run far longer than a single window allows.")
async def demo_multi_agent():
explain(
"DEMO 6 — Swarm coordination: spawning parallel subagents",
"""A lead agent breaks down a task and assigns pieces to specialized
subagents. Each subagent operates as its OWN harness (own loop, own
brain, own tools). Two researchers execute IN PARALLEL (launched in the
same turn → asyncio.gather), then a writer combines their findings. The
team registry keeps a record of who handled what.""")
def researcher_profile():
reg = build_registry([WebSearchTool])
script = [
Use("Researching via web search.",
[("web_search", {"query": "PLACEHOLDER"})]),
lambda m: Say("Summary: " +
short(last_tool_results(m)[0]["content"], 160)),
]
return ScriptedBrain(script), reg
def writer_profile():
reg = build_registry([WriteFileTool])
script = [lambda m: Say("Synthesized brief combining both research notes "
"into a coherent paragraph.")]
return ScriptedBrain(script), reg
profiles = {"researcher": researcher_profile, "writer": writer_profile}
vfs = VirtualFS()
memory = MemoryStore(os.path.join(tempfile.gettempdir(), "oh_d6.md"))
skills = SkillLibrary()
team: list = []
def make_spawn():
async def spawn(role: str, task: str) -> str:
factory = profiles.get(role)
if not factory:
return f"(no such role: {role})"
child_brain, child_reg = factory()
if role == "researcher" and child_brain.script:
child_brain.script[0] = Use(f"Researching: {task}",
[("web_search", {"query": task})])
child_ctx = ToolContext(vfs=vfs, memory=memory, skills=skills,
spawn=spawn)
child_engine = QueryEngine(
brain=child_brain, registry=child_reg, ctx=child_ctx,
perms=PermissionChecker(PermissionMode.AUTO),
hooks=HookManager(), system_prompt="(subagent)",
approve=auto_approve, max_turns=6)
print(f" 🧑🔧 spawned [{role}] for: {short(task, 60)}")
result = await child_engine.run(task, on_event=None)
team.append({"role": role, "task": task, "result": result})
return result
return spawn
ctx = ToolContext(vfs=vfs, memory=memory, skills=skills, spawn=make_spawn())
registry = build_registry()
lead_script = [
Use("I'll split this: research vector databases AND agent harnesses in "
"parallel, then have a writer combine the findings.",
[("spawn_agent", {"role": "researcher",
"task": "vector database for RAG"}),
("spawn_agent", {"role": "researcher",
"task": "agent harness design"})]),
Use("Both research notes are in — delegating synthesis to the writer.",
[("spawn_agent", {"role": "writer",
"task": "combine the two research notes"})]),
lambda m: Say("Coordination complete: 2 researchers (parallel) + 1 "
"writer produced a combined brief."),
]
engine = QueryEngine(brain=ScriptedBrain(lead_script), registry=registry,
ctx=ctx, perms=PermissionChecker(PermissionMode.AUTO),
hooks=HookManager(), system_prompt="(lead agent)",
max_turns=8)
print("n[running the lead agent]n")
t0 = time.time()
final = await engine.run("Produce a short brief on building RAG agents.")
dt = time.time() - t0
print(f"n FINAL: {final}")
print(f"n Team registry ({len(team)} subagent runs, total {dt:.3f}s):")
for entry in team:
async def demo_real_provider():
explain(
"DEMO 7 — Switch to a LIVE model (Anthropic / OpenAI-compatible)",
"""Every previous example relied on a simulated decision-maker — no API
charges, no external calls. Activating a genuine model requires changing
just ONE component: the reasoning core. The execution framework, tool
definitions, access controls, event hooks, knowledge modules, storage,
and orchestration layer all stay exactly as they are. This modularity is
the fundamental advantage of a harness architecture: the underlying
model becomes a swappable part.""")
print(textwrap.dedent("""
To execute the SAME harness on a real model, configure these environment
variables and restart (compatible with any OpenAI- or Anthropic-aligned
endpoint that OpenHarness recognizes: Claude, GPT, Kimi, GLM, DeepSeek,
Qwen, Groq, Ollama, OpenRouter, and more):
import os
os.environ["USE_REAL_LLM"] = "1"
# --- Anthropic-style ---
os.environ["ANTHROPIC_API_KEY"] = "sk-ant-..."
os.environ["MODEL"] = "claude-sonnet-4-6"
# --- or OpenAI-style (including local Ollama) ---
# os.environ["OPENAI_API_KEY"] = "sk-..."
# os.environ["OPENAI_BASE_URL"] = "
# os.environ["MODEL"] = "llama-3.3-70b"
Then construct the engine with the live brain instead of the mock:
brain = make_real_brain(system=system_prompt) or ScriptedBrain([...])
engine = QueryEngine(brain=brain, registry=registry, ctx=ctx, ...)
await engine.run("Refactor utils.py and add tests.")
"""))
sysprompt = assemble_system_prompt(
base=BASE_SYSTEM, project_context="", memory="",
skills_summary="(none)", tool_names=build_registry().names())
real = make_real_brain(system=sysprompt)
if real is None:
print(" [USE_REAL_LLM not configured — remaining on the mock brain. "
"Set the environment variables above and restart to activate live mode.]")
return
print(f" [LIVE] Active provider: {real.api_format} / {real.model}n")
vfs = VirtualFS()
ctx = ToolContext(vfs=vfs, memory=MemoryStore(
os.path.join(tempfile.gettempdir(), "oh_real.md")),
skills=SkillLibrary(), canned_answers={})
engine = QueryEngine(
brain=RetryingBrain(real), registry=build_registry(), ctx=ctx,
perms=PermissionChecker(PermissionMode.AUTO), hooks=HookManager(),
system_prompt=sysprompt, cost=CostMeter(real.model), max_turns=12)
final = await engine.run(
"Create greet.py with a function greet(name) that returns "
"'Hello, !', then write and run a quick test to prove it works.")
print(f"n FINAL: {final}")
print(f"n Files:n{vfs.tree()}")
print(f"n 💰 {engine.cost.summary()}")
async def main():
banner("OpenHarness From Scratch — guided walkthrough")
print(textwrap.dedent("""
We will construct the harness one subsystem at a time:
1. The agent loop (tools, run/verify/fix, retries, cost)
2. Permissions (modes, sensitive paths, rules, hook veto)
3. Skills (on-demand knowledge)
4. Memory (persistent MEMORY.md across sessions)
5. Compaction (surviving long sessions)
6. Multi-agent (parallel subagent delegation)
7. Real provider (one-line swap to a live model)
Architecture (what each piece is responsible for):
User prompt
│
▼
QueryEngine ──► LLM brain (mock or real) "WHAT to do"
│ ▲ │ tool_use
│ └────────────┘
▼
For each tool call: Permission ─► PreHook ─► Execute ─► PostHook
│ │ │ │
deny/ask veto/edit sandbox redact
│
▼
Tool result ──► back into the transcript ──► loop
""").rstrip())
await demo_agent_loop()
await demo_permissions()
await demo_skills()
await demo_memory()
await demo_compaction()
await demo_multi_agent()
await demo_real_provider()
banner("All demos complete 🎉")
print(textwrap.dedent("""
You just assembled the core of an agent harness:
• a streaming tool-call loop with retries & cost tracking
• type-validated, self-describing tools
• layered governance (permission modes + lifecycle hooks)
• on-demand skills and persistent memory
• context auto-compaction
• nested multi-agent coordination
• a one-line swap to a real LLM provider
To explore further, examine the full project:
(43+ tools, plugin ecosystem, MCP client, React/Ink TUI, the `oh` CLI,
and the `ohmo` personal agent). "The model is the agent; the code is the
harness."
"""))
run_async(main())