Path parameters
- inference_id
string Required The inference Id
Query parameters
- timeout
string Specifies the amount of time to wait for the inference request to complete.
Values are
-1
or0
.
BodyRequired
- messages
array[object] Required A list of objects representing the conversation. Requests should generally only add new messages from the user (role
user
). The other message roles (assistant
,system
, ortool
) should generally only be copied from the response to a previous completion request, such that the messages array is built up throughout a conversation.An object representing part of the conversation.
- model
string The ID of the model to use.
- max_completion_tokens
number The upper bound limit for the number of tokens that can be generated for a completion request.
- stop
array[string] A sequence of strings to control when the model should stop generating additional tokens.
- temperature
number The sampling temperature to use.
- tools
array[object] A list of tools that the model can call. Example:
{ "tools": [ { "type": "function", "function": { "name": "get_price_of_item", "description": "Get the current price of an item", "parameters": { "type": "object", "properties": { "item": { "id": "12345" }, "unit": { "type": "currency" } } } } } ] }
A list of tools that the model can call.
- top_p
number Nucleus sampling, an alternative to sampling with temperature.
POST _inference/chat_completion/openai-completion/_stream
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is Elastic?"
}
]
}
resp = client.inference.chat_completion_unified(
inference_id="openai-completion",
chat_completion_request={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is Elastic?"
}
]
},
)
const response = await client.inference.chatCompletionUnified({
inference_id: "openai-completion",
chat_completion_request: {
model: "gpt-4o",
messages: [
{
role: "user",
content: "What is Elastic?",
},
],
},
});
response = client.inference.chat_completion_unified(
inference_id: "openai-completion",
body: {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is Elastic?"
}
]
}
)
$resp = $client->inference()->chatCompletionUnified([
"inference_id" => "openai-completion",
"body" => [
"model" => "gpt-4o",
"messages" => array(
[
"role" => "user",
"content" => "What is Elastic?",
],
),
],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"model":"gpt-4o","messages":[{"role":"user","content":"What is Elastic?"}]}' "$ELASTICSEARCH_URL/_inference/chat_completion/openai-completion/_stream"
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is Elastic?"
}
]
}
{
"messages": [
{
"role": "assistant",
"content": "Let's find out what the weather is",
"tool_calls": [
{
"id": "call_KcAjWtAww20AihPHphUh46Gd",
"type": "function",
"function": {
"name": "get_current_weather",
"arguments": "{\"location\":\"Boston, MA\"}"
}
}
]
},
{
"role": "tool",
"content": "The weather is cold",
"tool_call_id": "call_KcAjWtAww20AihPHphUh46Gd"
}
]
}
{
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What's the price of a scarf?"
}
]
}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_current_price",
"description": "Get the current price of a item",
"parameters": {
"type": "object",
"properties": {
"item": {
"id": "123"
}
}
}
}
}
],
"tool_choice": {
"type": "function",
"function": {
"name": "get_current_price"
}
}
}
event: message
data: {"chat_completion":{"id":"chatcmpl-Ae0TWsy2VPnSfBbv5UztnSdYUMFP3","choices":[{"delta":{"content":"","role":"assistant"},"index":0}],"model":"gpt-4o-2024-08-06","object":"chat.completion.chunk"}}
event: message
data: {"chat_completion":{"id":"chatcmpl-Ae0TWsy2VPnSfBbv5UztnSdYUMFP3","choices":[{"delta":{"content":Elastic"},"index":0}],"model":"gpt-4o-2024-08-06","object":"chat.completion.chunk"}}
event: message
data: {"chat_completion":{"id":"chatcmpl-Ae0TWsy2VPnSfBbv5UztnSdYUMFP3","choices":[{"delta":{"content":" is"},"index":0}],"model":"gpt-4o-2024-08-06","object":"chat.completion.chunk"}}
(...)
event: message
data: {"chat_completion":{"id":"chatcmpl-Ae0TWsy2VPnSfBbv5UztnSdYUMFP3","choices":[],"model":"gpt-4o-2024-08-06","object":"chat.completion.chunk","usage":{"completion_tokens":28,"prompt_tokens":16,"total_tokens":44}}}
event: message
data: [DONE]