Create Completion

POST

api

completions

Create Completion

curl --request POST \
  --url https://api.example.com/api/v1/completions \
  --header 'Content-Type: application/json' \
  --data '
{
  "prompt": "<string>",
  "mode": "<string>",
  "stream": true,
  "conversation_id": "<string>",
  "skills": [
    "<string>"
  ],
  "tools": [
    {
      "name": "<string>",
      "description": "<string>",
      "input_schema": {}
    }
  ],
  "persona": {},
  "temperature": 123,
  "max_iterations": 123,
  "model_overrides": {},
  "format": "<string>",
  "metadata": {},
  "component_library": "<string>"
}
'

import requests

url = "https://api.example.com/api/v1/completions"

payload = {
    "prompt": "<string>",
    "mode": "<string>",
    "stream": True,
    "conversation_id": "<string>",
    "skills": ["<string>"],
    "tools": [
        {
            "name": "<string>",
            "description": "<string>",
            "input_schema": {}
        }
    ],
    "persona": {},
    "temperature": 123,
    "max_iterations": 123,
    "model_overrides": {},
    "format": "<string>",
    "metadata": {},
    "component_library": "<string>"
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: JSON.stringify({
    prompt: '<string>',
    mode: '<string>',
    stream: true,
    conversation_id: '<string>',
    skills: ['<string>'],
    tools: [{name: '<string>', description: '<string>', input_schema: {}}],
    persona: {},
    temperature: 123,
    max_iterations: 123,
    model_overrides: {},
    format: '<string>',
    metadata: {},
    component_library: '<string>'
  })
};

fetch('https://api.example.com/api/v1/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.example.com/api/v1/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'prompt' => '<string>',
    'mode' => '<string>',
    'stream' => true,
    'conversation_id' => '<string>',
    'skills' => [
        '<string>'
    ],
    'tools' => [
        [
                'name' => '<string>',
                'description' => '<string>',
                'input_schema' => [
                                
                ]
        ]
    ],
    'persona' => [
        
    ],
    'temperature' => 123,
    'max_iterations' => 123,
    'model_overrides' => [
        
    ],
    'format' => '<string>',
    'metadata' => [
        
    ],
    'component_library' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.example.com/api/v1/completions"

	payload := strings.NewReader("{\n  \"prompt\": \"<string>\",\n  \"mode\": \"<string>\",\n  \"stream\": true,\n  \"conversation_id\": \"<string>\",\n  \"skills\": [\n    \"<string>\"\n  ],\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"description\": \"<string>\",\n      \"input_schema\": {}\n    }\n  ],\n  \"persona\": {},\n  \"temperature\": 123,\n  \"max_iterations\": 123,\n  \"model_overrides\": {},\n  \"format\": \"<string>\",\n  \"metadata\": {},\n  \"component_library\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.example.com/api/v1/completions")
  .header("Content-Type", "application/json")
  .body("{\n  \"prompt\": \"<string>\",\n  \"mode\": \"<string>\",\n  \"stream\": true,\n  \"conversation_id\": \"<string>\",\n  \"skills\": [\n    \"<string>\"\n  ],\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"description\": \"<string>\",\n      \"input_schema\": {}\n    }\n  ],\n  \"persona\": {},\n  \"temperature\": 123,\n  \"max_iterations\": 123,\n  \"model_overrides\": {},\n  \"format\": \"<string>\",\n  \"metadata\": {},\n  \"component_library\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.example.com/api/v1/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"prompt\": \"<string>\",\n  \"mode\": \"<string>\",\n  \"stream\": true,\n  \"conversation_id\": \"<string>\",\n  \"skills\": [\n    \"<string>\"\n  ],\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"description\": \"<string>\",\n      \"input_schema\": {}\n    }\n  ],\n  \"persona\": {},\n  \"temperature\": 123,\n  \"max_iterations\": 123,\n  \"model_overrides\": {},\n  \"format\": \"<string>\",\n  \"metadata\": {},\n  \"component_library\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "object": "<string>",
  "created": "<string>",
  "content": "<string>",
  "mode": "<string>",
  "resolved_mode": "<string>",
  "model": {
    "id": "<string>",
    "label": "<string>",
    "engine": "<string>"
  },
  "tools_used": [
    {
      "name": "<string>",
      "status": "<string>",
      "description": "<string>"
    }
  ],
  "artifacts": [
    {}
  ],
  "follow_ups": [
    {
      "label": "<string>",
      "prompt": "<string>"
    }
  ],
  "usage": {
    "cost_cents": 123,
    "prompt_tokens": 123,
    "completion_tokens": 123,
    "total_tokens": 123,
    "cached": true
  },
  "metadata": {},
  "conversation_id": {},
  "request_id": "<string>"
}

The core endpoint of the Theo API. Sends a prompt through the full orchestration pipeline and returns the complete response.

For real-time token delivery, set stream: true or see Streaming Completions.

Authentication

Requires a Bearer token. See Authentication.

Authorization: Bearer theo_sk_...

Request Body

prompt

string

required

The prompt text. Must be a non-empty string.

mode

string

default:"auto"

Execution mode. When set to auto, Theo classifies the prompt and selects the optimal engine automatically.Available modes:

auto — Classify prompt and route to best engine (default)
fast — Low-latency responses for simple queries
think — Deep reasoning for complex analysis
code — Code generation (Theo Code engine, extended output budget)
image — Image generation (Theo Create)
video — Async. Use POST /api/v1/video + job polling, not this endpoint.
research — Async. Use POST /api/v1/research + job polling, not this endpoint.
roast — Humorous, irreverent tone
genui — Generate interactive UI components (OpenUI Lang)

research and video are asynchronous and must not be sent to /completions (or stream()). They run as background jobs; invoking them here executes the work inline and the request hits the timeout before it finishes. Enqueue them via POST /api/v1/research / POST /api/v1/video and poll with Get Job Status. The @hitheo/sdk throws a TheoUsageError immediately if you pass these modes to complete() / stream(). See the Async Jobs guide.

stream

boolean

default:"false"

Enable SSE streaming. When true, returns a text/event-stream response instead of JSON. See Streaming.

conversation_id

string

Continue an existing conversation. Pass the conversation ID to maintain multi-turn context.

skills

string[]

Skill slugs to activate for this request. These are merged with the user’s installed skills.Each slug activates a skill’s prompt extension, tools, and model preferences for this completion. You can find slugs in the dashboard (copy icon on each skill card), via GET /api/v1/skills, or in the E.V.I. Canvas Input node.See Activating Skills via API for the full guide.

tools

object[]

Inline tool definitions the model can call during the agent loop.

Show Tool object

name

string

required

Tool name (e.g., check_inventory).

description

string

required

What the tool does — the model uses this to decide when to call it.

input_schema

object

JSON Schema describing the tool’s input parameters.

persona

string | object

default:"theo"

Override Theo’s personality for this request.

"theo" — Default Theo persona
"none" — No persona (raw model output)
{ "system_prompt": "You are..." } — Custom system prompt

temperature

number

Sampling temperature (0–2). Higher values produce more creative output.

max_iterations

integer

default:"8"

Maximum agent loop iterations (1–20). Each iteration is a think → act → observe cycle.

model_overrides

object

Override the engine used for specific modes. Keys are mode names (e.g., "code", "think"), values are Theo engine IDs (e.g., "theo-1-reason", "theo-1-flash"). See List Models for valid engine IDs.

format

string

default:"theo"

Response format. "theo" for the default format, "openai" for OpenAI-compatible format.

metadata

object

Arbitrary key-value metadata attached to the completion. Returned in the response and logged in the audit trail.

component_library

string

Component library identifier for GenUI mode. Used by E.V.I. callers for custom UI rendering.

Request Examples

curl -X POST https://www.hitheo.ai/api/v1/completions \
  -H "Authorization: Bearer $THEO_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "Explain microservices architecture",
    "mode": "auto",
    "temperature": 0.7
  }'

import { Theo } from "@hitheo/sdk";

const theo = new Theo({ apiKey: process.env.THEO_API_KEY! });

const res = await theo.complete({
  prompt: "Explain microservices architecture",
  mode: "auto",
  temperature: 0.7,
});

console.log(res.content);
console.log(res.model.label);     // e.g. "Theo Flash"
console.log(res.usage.cost_cents); // e.g. 0.02

With Skills and Tools

curl -X POST https://www.hitheo.ai/api/v1/completions \
  -H "Authorization: Bearer $THEO_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "Check current inventory levels for SKU-1234",
    "mode": "auto",
    "skills": ["inventory-check"],
    "tools": [
      {
        "name": "check_stock",
        "description": "Look up current stock levels by SKU",
        "input_schema": {
          "type": "object",
          "properties": {
            "sku": { "type": "string" },
            "warehouse": { "type": "string" }
          },
          "required": ["sku"]
        }
      }
    ],
    "persona": { "system_prompt": "You are Atlas, an operations assistant." },
    "max_iterations": 5
  }'

const res = await theo.complete({
  prompt: "Check current inventory levels for SKU-1234",
  skills: ["inventory-check"],
  tools: [
    {
      name: "check_stock",
      description: "Look up current stock levels by SKU",
      input_schema: {
        type: "object",
        properties: {
          sku: { type: "string" },
          warehouse: { type: "string" },
        },
        required: ["sku"],
      },
    },
  ],
  persona: { system_prompt: "You are Atlas, an operations assistant." },
  max_iterations: 5,
});

Response

string

Unique completion ID (prefixed cmpl_).

object

string

Always "completion".

created

string

ISO 8601 timestamp.

content

string

The generated text content.

mode

string

The mode you requested (e.g., "auto").

resolved_mode

string

The mode Theo actually used after intent classification (e.g., "fast", "think", "code").

model

object

The Theo engine that handled the request.

Show Model object

string

Theo-branded model ID (e.g., "theo-1-flash", "theo-1-reason").

label

string

Human-readable name (e.g., "Theo Flash", "Theo Reason").

engine

string

Engine subsystem (e.g., "theo-core", "theo-vision").

tools_used

object[]

Tools called during the agent loop.

Show Tool usage object

name

string

Tool name.

status

string

"success" or "error".

description

string

Tool description.

artifacts

object[]

Generated files (images, code, documents) produced during the completion.

follow_ups

object[]

Suggested next prompts.

Show Follow-up object

label

string

Short label for the suggestion.

prompt

string

Full prompt text.

usage

object

Token counts and cost.

Show Usage object

cost_cents

number

Cost in cents.

prompt_tokens

integer

Input tokens consumed. Always 0 for non-text modes (image, video, tts, stt).

completion_tokens

integer

Output tokens generated. Always 0 for non-text modes.

total_tokens

integer

Total tokens.

cached

boolean

Present and true when the response was served from the semantic cache.

For non-text modes (image, video, tts, stt), prompt_tokens and completion_tokens are always 0 — tokens are not a meaningful billing unit there. Use usage.cost_cents as the sole usage metric for those modes.

metadata

object | null

The metadata you passed in the request, echoed back.

conversation_id

string | null

The server-side conversation id this turn resolved against. null when no conversation was created or attached. Echoed unchanged when you passed conversation_id in the request.

request_id

string

Server-assigned request identifier (also returned as the X-Request-Id header). Include this in support tickets so we can look up the request in logs.

Example Response

{
  "id": "cmpl_abc123",
  "object": "completion",
  "created": "2026-04-10T12:00:00Z",
  "content": "Microservices architecture is a design pattern where an application is composed of small, independent services...",
  "mode": "auto",
  "resolved_mode": "fast",
  "model": {
    "id": "theo-1-flash",
    "label": "Theo Flash",
    "engine": "theo-core"
  },
  "tools_used": [],
  "artifacts": [],
  "follow_ups": [
    { "label": "Compare with monoliths", "prompt": "Compare microservices vs monolithic architecture" },
    { "label": "Service mesh", "prompt": "Explain service mesh in microservices" }
  ],
  "usage": {
    "cost_cents": 0.02,
    "prompt_tokens": 12,
    "completion_tokens": 156,
    "total_tokens": 168
  },
  "metadata": null,
  "conversation_id": null,
  "request_id": "req_9f2e1a"
}

OpenAI-Compatible Format

Pass format: "openai" to receive responses in OpenAI’s chat.completions format. This allows drop-in replacement in existing OpenAI-based applications.

curl -X POST https://www.hitheo.ai/api/v1/completions \
  -H "Authorization: Bearer $THEO_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "Hello",
    "format": "openai"
  }'

The response follows the OpenAI chat.completion schema with choices, usage, and model fields.

Semantic Caching

Non-conversation completions (no conversation_id) are automatically cached. Identical requests return cached results instantly at zero cost. See Semantic Caching. Cached responses include "_cached": true in the response body.

Errors

Status	Code	Description
400	`validation_error`	Invalid request body (missing prompt, invalid mode, etc.)
401	`invalid_api_key`	Missing or invalid API key
402	`insufficient_credits`	Account has insufficient balance
404	`not_found`	Conversation ID not found
429	`rate_limit_exceeded`	Too many requests — check `Retry-After` header
500	`server_error`	Internal server error

Authentication

Streaming Completions

Create Completion

curl --request POST \
  --url https://api.example.com/api/v1/completions \
  --header 'Content-Type: application/json' \
  --data '
{
  "prompt": "<string>",
  "mode": "<string>",
  "stream": true,
  "conversation_id": "<string>",
  "skills": [
    "<string>"
  ],
  "tools": [
    {
      "name": "<string>",
      "description": "<string>",
      "input_schema": {}
    }
  ],
  "persona": {},
  "temperature": 123,
  "max_iterations": 123,
  "model_overrides": {},
  "format": "<string>",
  "metadata": {},
  "component_library": "<string>"
}
'

import requests

url = "https://api.example.com/api/v1/completions"

payload = {
    "prompt": "<string>",
    "mode": "<string>",
    "stream": True,
    "conversation_id": "<string>",
    "skills": ["<string>"],
    "tools": [
        {
            "name": "<string>",
            "description": "<string>",
            "input_schema": {}
        }
    ],
    "persona": {},
    "temperature": 123,
    "max_iterations": 123,
    "model_overrides": {},
    "format": "<string>",
    "metadata": {},
    "component_library": "<string>"
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: JSON.stringify({
    prompt: '<string>',
    mode: '<string>',
    stream: true,
    conversation_id: '<string>',
    skills: ['<string>'],
    tools: [{name: '<string>', description: '<string>', input_schema: {}}],
    persona: {},
    temperature: 123,
    max_iterations: 123,
    model_overrides: {},
    format: '<string>',
    metadata: {},
    component_library: '<string>'
  })
};

fetch('https://api.example.com/api/v1/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.example.com/api/v1/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'prompt' => '<string>',
    'mode' => '<string>',
    'stream' => true,
    'conversation_id' => '<string>',
    'skills' => [
        '<string>'
    ],
    'tools' => [
        [
                'name' => '<string>',
                'description' => '<string>',
                'input_schema' => [
                                
                ]
        ]
    ],
    'persona' => [
        
    ],
    'temperature' => 123,
    'max_iterations' => 123,
    'model_overrides' => [
        
    ],
    'format' => '<string>',
    'metadata' => [
        
    ],
    'component_library' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.example.com/api/v1/completions"

	payload := strings.NewReader("{\n  \"prompt\": \"<string>\",\n  \"mode\": \"<string>\",\n  \"stream\": true,\n  \"conversation_id\": \"<string>\",\n  \"skills\": [\n    \"<string>\"\n  ],\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"description\": \"<string>\",\n      \"input_schema\": {}\n    }\n  ],\n  \"persona\": {},\n  \"temperature\": 123,\n  \"max_iterations\": 123,\n  \"model_overrides\": {},\n  \"format\": \"<string>\",\n  \"metadata\": {},\n  \"component_library\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.example.com/api/v1/completions")
  .header("Content-Type", "application/json")
  .body("{\n  \"prompt\": \"<string>\",\n  \"mode\": \"<string>\",\n  \"stream\": true,\n  \"conversation_id\": \"<string>\",\n  \"skills\": [\n    \"<string>\"\n  ],\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"description\": \"<string>\",\n      \"input_schema\": {}\n    }\n  ],\n  \"persona\": {},\n  \"temperature\": 123,\n  \"max_iterations\": 123,\n  \"model_overrides\": {},\n  \"format\": \"<string>\",\n  \"metadata\": {},\n  \"component_library\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.example.com/api/v1/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"prompt\": \"<string>\",\n  \"mode\": \"<string>\",\n  \"stream\": true,\n  \"conversation_id\": \"<string>\",\n  \"skills\": [\n    \"<string>\"\n  ],\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"description\": \"<string>\",\n      \"input_schema\": {}\n    }\n  ],\n  \"persona\": {},\n  \"temperature\": 123,\n  \"max_iterations\": 123,\n  \"model_overrides\": {},\n  \"format\": \"<string>\",\n  \"metadata\": {},\n  \"component_library\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "object": "<string>",
  "created": "<string>",
  "content": "<string>",
  "mode": "<string>",
  "resolved_mode": "<string>",
  "model": {
    "id": "<string>",
    "label": "<string>",
    "engine": "<string>"
  },
  "tools_used": [
    {
      "name": "<string>",
      "status": "<string>",
      "description": "<string>"
    }
  ],
  "artifacts": [
    {}
  ],
  "follow_ups": [
    {
      "label": "<string>",
      "prompt": "<string>"
    }
  ],
  "usage": {
    "cost_cents": 123,
    "prompt_tokens": 123,
    "completion_tokens": 123,
    "total_tokens": 123,
    "cached": true
  },
  "metadata": {},
  "conversation_id": {},
  "request_id": "<string>"
}

Overview

Completions

Media Generation

Audio

Skills API

E.V.I. Canvas

Workflows

Hooks

Settings

Embed Widgets

Guardrails

Routing Studio

Theo Browser

Benchmarks

Webhooks

Billing

Resources

Authentication

Request Body

Request Examples

With Skills and Tools

Response

Example Response

OpenAI-Compatible Format

Semantic Caching

Errors

​Authentication

​Request Body

​Request Examples

​With Skills and Tools

​Response

​Example Response

​OpenAI-Compatible Format

​Semantic Caching

​Errors

Authentication

Request Body

Request Examples

With Skills and Tools

Response

Example Response

OpenAI-Compatible Format

Semantic Caching

Errors