Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
POST /v1/chat/completions model: "meta/llama-guard-3-8b"
{
"model": "meta/llama-guard-3-8b",
"messages": [
{ "role": "system", "content": "You are a helpful assistant." },
{ "role": "user", "content": "Hello!" }
],
"temperature": 0.7,
"top_p": 0.95,
"max_tokens": 1024,
"stream": false
}{
"id": "chatcmpl-abc123",
"object": "chat.completion",
"created": 1776947082,
"model": "meta/llama-guard-3-8b",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! How can I help you today?"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 24,
"completion_tokens": 12,
"total_tokens": 36
}
}"stream": true// 1. Role announcement (first chunk):
data: {"choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
// 2. Content chunks (final answer):
data: {"choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}
data: {"choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}
// Finish chunk:
data: {"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
// Terminator:
data: [DONE]# pip install aigateway-py openai
# aigateway-py adds sub-accounts, evals, replays, jobs, webhook verify.
# openai SDK covers chat — drop-in per our SDK's own guidance.
from openai import OpenAI
client = OpenAI(
base_url="https://api.aigateway.sh/v1",
api_key="sk-aig-...",
)
stream = client.chat.completions.create(
model="meta/llama-guard-3-8b",
messages=[{"role": "user", "content": "Hello!"}],
stream=True,
)
for chunk in stream:
print(chunk.choices[0].delta.content or "", end="", flush=True)