TUTORIAL
Nov 13, 2025
10 min read
Integrating TOON in Your AI Application
Complete guide with code examples for Python, Node.js, and LangChain. From basic parsing to production RAG systems.
Quick Start: 5-Minute Integration
Step 1: Convert JSON to TOON
Use our online converter or implement the conversion logic:
Python
import json
def json_to_toon(data):
"""Convert JSON array to TOON format"""
if not isinstance(data, list) or len(data) == 0:
raise ValueError("Input must be non-empty array")
# Extract schema from first object
schema = list(data[0].keys())
# Build TOON string
toon = "TOON.schema: " + " | ".join(schema) + "\n"
for obj in data:
values = [str(obj.get(key, "")) for key in schema]
toon += " | ".join(values) + "\n"
return toon
# Example usage
users = [
{"id": 1, "name": "Alice", "email": "alice@example.com"},
{"id": 2, "name": "Bob", "email": "bob@example.com"}
]
toon_output = json_to_toon(users)
print(toon_output)Output:
TOON.schema: id | name | email 1 | Alice | alice@example.com 2 | Bob | bob@example.com
Step 2: Send to LLM
Python
from openai import OpenAI
client = OpenAI(api_key="your-api-key")
# Get database records
users = fetch_users_from_db() # Returns list of dicts
# Convert to TOON
toon_data = json_to_toon(users)
# Send to GPT
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Here is user data in TOON format:\n{toon_data}\n\nWho are the users from California?"}
]
)
print(response.choices[0].message.content)Node.js Implementation
TOON Parser (TypeScript)
TypeScript
export interface ToonData {
schema: string[];
rows: string[][];
}
export function jsonToToon(data: any[]): string {
if (!Array.isArray(data) || data.length === 0) {
throw new Error('Input must be non-empty array');
}
// Extract schema
const schema = Object.keys(data[0]);
// Build TOON
let toon = `TOON.schema: ${schema.join(' | ')}\n`;
for (const obj of data) {
const values = schema.map(key => String(obj[key] ?? ''));
toon += values.join(' | ') + '\n';
}
return toon;
}
export function parseToon(toonStr: string): ToonData {
const lines = toonStr.trim().split('\n');
// Parse schema
const schemaLine = lines[0];
if (!schemaLine.startsWith('TOON.schema:')) {
throw new Error('Invalid TOON format');
}
const schema = schemaLine
.replace('TOON.schema:', '')
.split('|')
.map(s => s.trim());
// Parse rows
const rows = lines.slice(1).map(line =>
line.split('|').map(s => s.trim())
);
return { schema, rows };
}
// Convert back to JSON
export function toonToJson(toonStr: string): any[] {
const { schema, rows } = parseToon(toonStr);
return rows.map(row => {
const obj: any = {};
schema.forEach((key, i) => {
obj[key] = row[i];
});
return obj;
});
}Usage Example
TypeScript
import OpenAI from 'openai';
import { jsonToToon, toonToJson } from './toon-parser';
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function queryWithToon(data: any[], query: string) {
// Convert to TOON
const toonData = jsonToToon(data);
// Send to GPT
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: `Data (TOON format):\n${toonData}\n\nQuery: ${query}` }
]
});
return response.choices[0].message.content;
}
// Usage
const products = [
{ id: 1, name: 'Laptop', price: 999 },
{ id: 2, name: 'Mouse', price: 29 }
];
const answer = await queryWithToon(
products,
'Which products are under $50?'
);
console.log(answer);LangChain Integration
Custom Document Loader
Python
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
def create_toon_documents(data, chunk_size=50):
"""Split large dataset into TOON chunks"""
documents = []
for i in range(0, len(data), chunk_size):
chunk = data[i:i + chunk_size]
toon_text = json_to_toon(chunk)
doc = Document(
page_content=toon_text,
metadata={"format": "toon", "rows": len(chunk)}
)
documents.append(doc)
return documents
# Example: RAG with TOON
products = fetch_products_from_db() # 10,000 products
# Convert to TOON documents
toon_docs = create_toon_documents(products, chunk_size=100)
# Create vector store
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(toon_docs, embeddings)
# Create QA chain
llm = ChatOpenAI(model="gpt-4", temperature=0)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
)
# Query
result = qa_chain.run("What are the top-rated laptops under $1000?")
print(result)Why this works: By chunking large datasets into 100-row TOON documents, we get better retrieval than JSON (less tokens = more relevant chunks fit in context).
Production Best Practices
1. Error Handling
Python
def safe_json_to_toon(data):
try:
if not isinstance(data, list):
raise ValueError("Input must be list")
if len(data) == 0:
return "TOON.schema: empty\n"
# Check schema consistency
schema = set(data[0].keys())
for obj in data[1:]:
if set(obj.keys()) != schema:
raise ValueError("Inconsistent schema")
return json_to_toon(data)
except Exception as e:
logger.error(f"TOON conversion failed: {e}")
# Fallback to JSON
return json.dumps(data)2. Handling Null Values
Python
def json_to_toon_with_nulls(data):
schema = list(data[0].keys())
toon = "TOON.schema: " + " | ".join(schema) + "\n"
for obj in data:
values = []
for key in schema:
value = obj.get(key)
# Represent null as empty string or special marker
values.append(str(value) if value is not None else "NULL")
toon += " | ".join(values) + "\n"
return toon3. Escaping Pipe Characters
If your data contains | characters, escape them:
Python
def escape_toon_value(value):
return str(value).replace("|", "\\|")
def json_to_toon_safe(data):
schema = list(data[0].keys())
toon = "TOON.schema: " + " | ".join(schema) + "\n"
for obj in data:
values = [escape_toon_value(obj.get(key, "")) for key in schema]
toon += " | ".join(values) + "\n"
return toon4. Caching TOON Conversions
If you query the same dataset repeatedly, cache the TOON conversion:
Python
from functools import lru_cache
import hashlib
import json
@lru_cache(maxsize=100)
def cached_json_to_toon(data_hash):
# Look up original data by hash
data = get_data_from_cache(data_hash)
return json_to_toon(data)
def get_toon_cached(data):
# Hash the data
data_json = json.dumps(data, sort_keys=True)
data_hash = hashlib.md5(data_json.encode()).hexdigest()
return cached_json_to_toon(data_hash)Testing Your Integration
Unit Tests
Python (pytest)
import pytest
from toon_converter import json_to_toon, toon_to_json
def test_basic_conversion():
data = [
{"id": 1, "name": "Alice"},
{"id": 2, "name": "Bob"}
]
toon = json_to_toon(data)
assert "TOON.schema:" in toon
assert "Alice" in toon
assert "Bob" in toon
def test_roundtrip():
original = [{"id": 1, "name": "Alice", "age": 30}]
toon = json_to_toon(original)
reconstructed = toon_to_json(toon)
assert reconstructed == original
def test_empty_array():
with pytest.raises(ValueError):
json_to_toon([])
def test_null_values():
data = [{"id": 1, "name": None}]
toon = json_to_toon(data)
assert "NULL" in toon or toon.endswith(" | \n")Performance Optimization
Benchmark: TOON vs JSON Token Count
Python
import tiktoken
def count_tokens(text, model="gpt-4"):
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
# Compare formats
data = fetch_large_dataset() # 1000 rows
json_text = json.dumps(data)
toon_text = json_to_toon(data)
json_tokens = count_tokens(json_text)
toon_tokens = count_tokens(toon_text)
print(f"JSON: {json_tokens} tokens")
print(f"TOON: {toon_tokens} tokens")
print(f"Savings: {((json_tokens - toon_tokens) / json_tokens * 100):.1f}%")Expected output for 1000-row dataset:
JSON: 42,300 tokens TOON: 18,900 tokens Savings: 55.3%