Code Examples
This document provides practical examples for interacting with LLMKit's inference API, showcasing how to use the three supported prompt types: static, dynamic_system, and dynamic_both
Request Structure for Different Prompt Types
Static Prompts
For static prompts, the system prompt is predefined and fixed (e.g., "You are a helpful assistant"). You only need to provide user messages in the API request.
Example:
{
"model": "STATIC-SYSTEM-CHAT",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
Dynamic System Prompts
For dynamic_system prompts, the system prompt contains variables (e.g., {{ personality }}
, {{ topic }}
). You must include a system message in the request with a JSON string providing values for these variables. The API renders the system prompt using these values, and you can add user messages as needed.
Example:
{
"model": "DYNAMIC-SYSTEM-CHAT",
"messages": [
{
"role": "system",
"content": "{\"personality\": \"friendly\", \"detail_level\": \"high\", \"tone\": \"casual\"}"
},
{
"role": "user",
"content": "Tell me about AI."
}
]
}
Dynamic Both Prompts
For dynamic_both prompts, both the system and user prompts are templates with variables (e.g., {{ topic }}
, {{ concept }}
). Provide a system message with a JSON string containing values for all variables. The API renders both the system prompt and the user prompt using these values. For one-shot prompts, additional user messages may not be needed; for chat-based prompts, you can append extra user messages.
Example:
{
"model": "DYNAMIC-BOTH-ONESHOT",
"messages": [
{
"role": "system",
"content": "{\"topic\": \"quantum computing\", \"concept\": \"superposition\"}"
}
]
}
- Rendered System: "You are an expert in quantum computing."
- Rendered User: "Explain superposition in simple terms."
Authentication
All requests require an API key, included in the Authorization
header:
Authorization: Bearer <YOUR_API_KEY>
Replace <YOUR_API_KEY>
with your actual key from the LLMKit management interface (e.g., http://localhost:3000/settings
). Examples assume LLMKit runs at http://localhost:8000
.
Table of Contents
Each section below provides examples for static, dynamic_system, and dynamic_both prompts, with non-streaming and streaming variants where applicable.
cURL
Non-Streaming Chat Completion
Static Prompt
curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-d '{
"model": "STATIC-SYSTEM-CHAT",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}'
Dynamic System Prompt
curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-d '{
"model": "DYNAMIC-SYSTEM-CHAT",
"messages": [
{
"role": "system",
"content": "{\"personality\": \"friendly\", \"detail_level\": \"high\", \"tone\": \"casual\"}"
},
{
"role": "user",
"content": "Tell me about AI."
}
]
}'
Dynamic Both Prompt
curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-d '{
"model": "DYNAMIC-BOTH-ONESHOT",
"messages": [
{
"role": "system",
"content": "{\"topic\": \"quantum computing\", \"concept\": \"superposition\"}"
}
]
}'
Streaming Chat Completion
Static Prompt
curl -X POST http://localhost:8000/v1/chat/completions/stream \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-d '{
"model": "STATIC-SYSTEM-CHAT",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
],
"stream": true
}'
Dynamic System Prompt
curl -X POST http://localhost:8000/v1/chat/completions/stream \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-d '{
"model": "DYNAMIC-SYSTEM-CHAT",
"messages": [
{
"role": "system",
"content": "{\"personality\": \"friendly\", \"detail_level\": \"high\", \"tone\": \"casual\"}"
},
{
"role": "user",
"content": "Tell me about AI."
}
],
"stream": true
}'
Dynamic Both Prompt
curl -X POST http://localhost:8000/v1/chat/completions/stream \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR_API_KEY>" \
-d '{
"model": "DYNAMIC-BOTH-ONESHOT",
"messages": [
{
"role": "system",
"content": "{\"topic\": \"quantum computing\", \"concept\": \"superposition\"}"
}
],
"stream": true
}'
JavaScript (Fetch API)
Non-Streaming Chat Completion
Static Prompt
async function getChatCompletion() {
const response = await fetch('http://localhost:8000/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
body: JSON.stringify({
model: 'STATIC-SYSTEM-CHAT',
messages: [{ role: 'user', content: 'Hello, how are you?' }]
})
});
const data = await response.json();
console.log(data.choices[0].message.content);
}
getChatCompletion();
Dynamic System Prompt
async function getChatCompletion() {
const response = await fetch('http://localhost:8000/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
body: JSON.stringify({
model: 'DYNAMIC-SYSTEM-CHAT',
messages: [
{ role: 'system', content: JSON.stringify({ personality: 'friendly', detail_level: 'high', tone: 'casual' }) },
{ role: 'user', content: 'Tell me about AI.' }
]
})
});
const data = await response.json();
console.log(data.choices[0].message.content);
}
getChatCompletion();
Dynamic Both Prompt
async function getChatCompletion() {
const response = await fetch('http://localhost:8000/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
body: JSON.stringify({
model: 'DYNAMIC-BOTH-ONESHOT',
messages: [
{ role: 'system', content: JSON.stringify({ topic: 'quantum computing', concept: 'superposition' }) }
]
})
});
const data = await response.json();
console.log(data.choices[0].message.content);
}
getChatCompletion();
Streaming Chat Completion
Static Prompt
async function getStreamingChatCompletion() {
const response = await fetch('http://localhost:8000/v1/chat/completions/stream', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
body: JSON.stringify({
model: 'STATIC-SYSTEM-CHAT',
messages: [{ role: 'user', content: 'Hello, how are you?' }],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { value, done } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
const data = JSON.parse(line.slice(6));
if (data.choices[0].delta.content) process.stdout.write(data.choices[0].delta.content);
}
}
}
}
getStreamingChatCompletion();
Dynamic System Prompt
async function getStreamingChatCompletion() {
const response = await fetch('http://localhost:8000/v1/chat/completions/stream', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
body: JSON.stringify({
model: 'DYNAMIC-SYSTEM-CHAT',
messages: [
{ role: 'system', content: JSON.stringify({ personality: 'friendly', detail_level: 'high', tone: 'casual' }) },
{ role: 'user', content: 'Tell me about AI.' }
],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { value, done } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
const data = JSON.parse(line.slice(6));
if (data.choices[0].delta.content) process.stdout.write(data.choices[0].delta.content);
}
}
}
}
getStreamingChatCompletion();
Dynamic Both Prompt
async function getStreamingChatCompletion() {
const response = await fetch('http://localhost:8000/v1/chat/completions/stream', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
body: JSON.stringify({
model: 'DYNAMIC-BOTH-ONESHOT',
messages: [
{ role: 'system', content: JSON.stringify({ topic: 'quantum computing', concept: 'superposition' }) }
],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { value, done } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
const data = JSON.parse(line.slice(6));
if (data.choices[0].delta.content) process.stdout.write(data.choices[0].delta.content);
}
}
}
}
getStreamingChatCompletion();
Python (requests)
Non-Streaming Chat Completion
Static Prompt
import requests
response = requests.post(
'http://localhost:8000/v1/chat/completions',
headers={
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
json={
'model': 'STATIC-SYSTEM-CHAT',
'messages': [{'role': 'user', 'content': 'Hello, how are you?'}]
}
)
print(response.json()['choices'][0]['message']['content'])
Dynamic System Prompt
import requests
response = requests.post(
'http://localhost:8000/v1/chat/completions',
headers={
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
json={
'model': 'DYNAMIC-SYSTEM-CHAT',
'messages': [
{'role': 'system', 'content': '{"personality": "friendly", "detail_level": "high", "tone": "casual"}'},
{'role': 'user', 'content': 'Tell me about AI.'}
]
}
)
print(response.json()['choices'][0]['message']['content'])
Dynamic Both Prompt
import requests
response = requests.post(
'http://localhost:8000/v1/chat/completions',
headers={
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
json={
'model': 'DYNAMIC-BOTH-ONESHOT',
'messages': [
{'role': 'system', 'content': '{"topic": "quantum computing", "concept": "superposition"}'}
]
}
)
print(response.json()['choices'][0]['message']['content'])
Streaming Chat Completion
Static Prompt
import requests
response = requests.post(
'http://localhost:8000/v1/chat/completions/stream',
headers={
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
json={
'model': 'STATIC-SYSTEM-CHAT',
'messages': [{'role': 'user', 'content': 'Hello, how are you?'}],
'stream': True
},
stream=True
)
for line in response.iter_lines():
if line:
decoded = line.decode('utf-8')
if decoded.startswith('data: ') and decoded != 'data: [DONE]':
data = json.loads(decoded[6:])
if data['choices'][0]['delta'].get('content'):
print(data['choices'][0]['delta']['content'], end='', flush=True)
Dynamic System Prompt
import requests
response = requests.post(
'http://localhost:8000/v1/chat/completions/stream',
headers={
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
json={
'model': 'DYNAMIC-SYSTEM-CHAT',
'messages': [
{'role': 'system', 'content': '{"personality": "friendly", "detail_level": "high", "tone": "casual"}'},
{'role': 'user', 'content': 'Tell me about AI.'}
],
'stream': True
},
stream=True
)
for line in response.iter_lines():
if line:
decoded = line.decode('utf-8')
if decoded.startswith('data: ') and decoded != 'data: [DONE]':
data = json.loads(decoded[6:])
if data['choices'][0]['delta'].get('content'):
print(data['choices'][0]['delta']['content'], end='', flush=True)
Dynamic Both Prompt
import requests
response = requests.post(
'http://localhost:8000/v1/chat/completions/stream',
headers={
'Content-Type': 'application/json',
'Authorization': 'Bearer <YOUR_API_KEY>'
},
json={
'model': 'DYNAMIC-BOTH-ONESHOT',
'messages': [
{'role': 'system', 'content': '{"topic": "quantum computing", "concept": "superposition"}'}
],
'stream': True
},
stream=True
)
for line in response.iter_lines():
if line:
decoded = line.decode('utf-8')
if decoded.startswith('data: ') and decoded != 'data: [DONE]':
data = json.loads(decoded[6:])
if data['choices'][0]['delta'].get('content'):
print(data['choices'][0]['delta']['content'], end='', flush=True)
Python (OpenAI Library)
Non-Streaming Chat Completion
Static Prompt
from openai import OpenAI
client = OpenAI(api_key='<YOUR_API_KEY>', base_url='http://localhost:8000/v1')
response = client.chat.completions.create(
model='STATIC-SYSTEM-CHAT',
messages=[{'role': 'user', 'content': 'Hello, how are you?'}]
)
print(response.choices[0].message.content)
Dynamic System Prompt
from openai import OpenAI
client = OpenAI(api_key='<YOUR_API_KEY>', base_url='http://localhost:8000/v1')
response = client.chat.completions.create(
model='DYNAMIC-SYSTEM-CHAT',
messages=[
{'role': 'system', 'content': '{"personality": "friendly", "detail_level": "high", "tone": "casual"}'},
{'role': 'user', 'content': 'Tell me about AI.'}
]
)
print(response.choices[0].message.content)
Dynamic Both Prompt
from openai import OpenAI
client = OpenAI(api_key='<YOUR_API_KEY>', base_url='http://localhost:8000/v1')
response = client.chat.completions.create(
model='DYNAMIC-BOTH-ONESHOT',
messages=[
{'role': 'system', 'content': '{"topic": "quantum computing", "concept": "superposition"}'}
]
)
print(response.choices[0].message.content)
Streaming Chat Completion
Static Prompt
from openai import OpenAI
client = OpenAI(api_key='<YOUR_API_KEY>', base_url='http://localhost:8000/v1')
stream = client.chat.completions.create(
model='STATIC-SYSTEM-CHAT',
messages=[{'role': 'user', 'content': 'Hello, how are you?'}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end='', flush=True)
Dynamic System Prompt
from openai import OpenAI
client = OpenAI(api_key='<YOUR_API_KEY>', base_url='http://localhost:8000/v1')
stream = client.chat.completions.create(
model='DYNAMIC-SYSTEM-CHAT',
messages=[
{'role': 'system', 'content': '{"personality": "friendly", "detail_level": "high", "tone": "casual"}'},
{'role': 'user', 'content': 'Tell me about AI.'}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end='', flush=True)
Dynamic Both Prompt
from openai import OpenAI
client = OpenAI(api_key='<YOUR_API_KEY>', base_url='http://localhost:8000/v1')
stream = client.chat.completions.create(
model='DYNAMIC-BOTH-ONESHOT',
messages=[
{'role': 'system', 'content': '{"topic": "quantum computing", "concept": "superposition"}'}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end='', flush=True)
Node.js (OpenAI Library)
Non-Streaming Chat Completion
Static Prompt
const { OpenAI } = require('openai');
const client = new OpenAI({ apiKey: '<YOUR_API_KEY>', baseURL: 'http://localhost:8000/v1' });
async function getChatCompletion() {
const response = await client.chat.completions.create({
model: 'STATIC-SYSTEM-CHAT',
messages: [{ role: 'user', content: 'Hello, how are you?' }]
});
console.log(response.choices[0].message.content);
}
getChatCompletion();
Dynamic System Prompt
const { OpenAI } = require('openai');
const client = new OpenAI({ apiKey: '<YOUR_API_KEY>', baseURL: 'http://localhost:8000/v1' });
async function getChatCompletion() {
const response = await client.chat.completions.create({
model: 'DYNAMIC-SYSTEM-CHAT',
messages: [
{ role: 'system', content: JSON.stringify({ personality: 'friendly', detail_level: 'high', tone: 'casual' }) },
{ role: 'user', content: 'Tell me about AI.' }
]
});
console.log(response.choices[0].message.content);
}
getChatCompletion();
Dynamic Both Prompt
const { OpenAI } = require('openai');
const client = new OpenAI({ apiKey: '<YOUR_API_KEY>', baseURL: 'http://localhost:8000/v1' });
async function getChatCompletion() {
const response = await client.chat.completions.create({
model: 'DYNAMIC-BOTH-ONESHOT',
messages: [
{ role: 'system', content: JSON.stringify({ topic: 'quantum computing', concept: 'superposition' }) }
]
});
console.log(response.choices[0].message.content);
}
getChatCompletion();
Streaming Chat Completion
Static Prompt
const { OpenAI } = require('openai');
const client = new OpenAI({ apiKey: '<YOUR_API_KEY>', baseURL: 'http://localhost:8000/v1' });
async function getStreamingChatCompletion() {
const stream = await client.chat.completions.create({
model: 'STATIC-SYSTEM-CHAT',
messages: [{ role: 'user', content: 'Hello, how are you?' }],
stream: true
});
for await (const chunk of stream) {
if (chunk.choices[0].delta.content) process.stdout.write(chunk.choices[0].delta.content);
}
}
getStreamingChatCompletion();
Dynamic System Prompt
const { OpenAI } = require('openai');
const client = new OpenAI({ apiKey: '<YOUR_API_KEY>', baseURL: 'http://localhost:8000/v1' });
async function getStreamingChatCompletion() {
const stream = await client.chat.completions.create({
model: 'DYNAMIC-SYSTEM-CHAT',
messages: [
{ role: 'system', content: JSON.stringify({ personality: 'friendly', detail_level: 'high', tone: 'casual' }) },
{ role: 'user', content: 'Tell me about AI.' }
],
stream: true
});
for await (const chunk of stream) {
if (chunk.choices[0].delta.content) process.stdout.write(chunk.choices[0].delta.content);
}
}
getStreamingChatCompletion();
Dynamic Both Prompt
const { OpenAI } = require('openai');
const client = new OpenAI({ apiKey: '<YOUR_API_KEY>', baseURL: 'http://localhost:8000/v1' });
async function getStreamingChatCompletion() {
const stream = await client.chat.completions.create({
model: 'DYNAMIC-BOTH-ONESHOT',
messages: [
{ role: 'system', content: JSON.stringify({ topic: 'quantum computing', concept: 'superposition' }) }
],
stream: true
});
for await (const chunk of stream) {
if (chunk.choices[0].delta.content) process.stdout.write(chunk.choices[0].delta.content);
}
}
getStreamingChatCompletion();
Additional Notes
- Prompt Keys: The
model
field uses prompt keys (e.g.,DYNAMIC-BOTH-ONESHOT
) from the migration scripts, linking to specific prompt configurations. - Dynamic Variables: Ensure system message content is a valid JSON string for dynamic prompts.
- Error Handling: Add try-catch blocks or error checks in production code.
- API Reference: See api-reference.md for more details.