Create response (OpenAI Responses API)
/openai/v1/responsesCreates a response using OpenAI Responses API format. Supports streaming via SSE.
Async inference: Send x-bf-async: true to submit the request as a background job and receive a job ID immediately. Poll with x-bf-async-id: <job-id> to retrieve the result. When the job is still processing, the response status will not be completed. When completed, the full response with output_text will be returned. See Async Inference for details.
Note: This endpoint also works without the /v1 prefix (e.g., /openai/responses).
Header Parameters
Set to true to submit this request as an async job. Returns immediately with a job ID. Not compatible with streaming.
"true"Poll for results of a previously submitted async job by providing the job ID returned from the initial async request.
Override the default result TTL in seconds. Results expire after this duration from completion time.
3600Request Body
application/json
TypeScript Definitions
Use the request body type in TypeScript.
Response Body
application/json
application/json
curl -X POST "http://localhost:8080/openai/v1/responses" \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-4", "input": "string" }'{
"id": "string",
"background": true,
"conversation": {},
"created_at": 0,
"error": {
"code": "string",
"message": "string"
},
"include": [
"string"
],
"incomplete_details": {
"reason": "string"
},
"instructions": {},
"max_output_tokens": 0,
"max_tool_calls": 0,
"metadata": {},
"model": "string",
"output": [
{
"id": "string",
"type": "message",
"status": "in_progress",
"role": "assistant",
"content": "string",
"call_id": "string",
"name": "string",
"arguments": "string",
"output": {},
"action": {},
"error": "string",
"queries": [
"string"
],
"results": [
{}
],
"summary": [
{
"type": "summary_text",
"text": "string"
}
],
"encrypted_content": "string"
}
],
"parallel_tool_calls": true,
"previous_response_id": "string",
"prompt": {},
"prompt_cache_key": "string",
"reasoning": {
"effort": "none",
"generate_summary": "string",
"summary": "auto",
"max_tokens": 0
},
"safety_identifier": "string",
"service_tier": "string",
"status": "completed",
"stop_reason": "string",
"store": true,
"temperature": 0,
"text": {
"format": {
"type": "text",
"name": "string",
"schema": {},
"strict": true
},
"verbosity": "low"
},
"top_logprobs": 0,
"top_p": 0,
"tool_choice": "none",
"tools": [
{
"type": "function",
"name": "string",
"description": "string",
"cache_control": {
"type": "ephemeral",
"ttl": "string"
},
"parameters": {
"type": "string",
"description": "string",
"required": [
"string"
],
"properties": {},
"enum": [
"string"
],
"additionalProperties": true
},
"strict": true,
"vector_store_ids": [
"string"
],
"filters": {},
"max_num_results": 0,
"ranking_options": {},
"display_height": 0,
"display_width": 0,
"environment": "string",
"enable_zoom": true,
"search_context_size": "string",
"user_location": {},
"server_label": "string",
"server_url": "string",
"allowed_tools": {},
"authorization": "string",
"connector_id": "string",
"headers": {
"property1": "string",
"property2": "string"
},
"require_approval": {},
"server_description": "string",
"container": {},
"background": "string",
"input_fidelity": "string",
"input_image_mask": {},
"moderation": "string",
"output_compression": 0,
"output_format": "string",
"partial_images": 0,
"quality": "string",
"size": "string",
"format": {}
}
],
"truncation": "string",
"usage": {
"input_tokens": 0,
"input_tokens_details": {
"text_tokens": 0,
"audio_tokens": 0,
"image_tokens": 0,
"cached_read_tokens": 0,
"cached_write_tokens": 0
},
"output_tokens": 0,
"output_tokens_details": {
"text_tokens": 0,
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0,
"citation_tokens": 0,
"num_search_queries": 0
},
"total_tokens": 0,
"cost": {
"input_tokens_cost": 0,
"output_tokens_cost": 0,
"reasoning_tokens_cost": 0,
"citation_tokens_cost": 0,
"search_queries_cost": 0,
"request_cost": 0,
"total_cost": 0
}
},
"extra_fields": {
"request_type": "string",
"provider": "openai",
"model_requested": "string",
"model_deployment": "string",
"latency": 0,
"chunk_index": 0,
"raw_request": {},
"raw_response": {},
"cache_debug": {
"cache_hit": true,
"cache_id": "string",
"hit_type": "string",
"requested_provider": "string",
"requested_model": "string",
"provider_used": "string",
"model_used": "string",
"input_tokens": 0,
"threshold": 0,
"similarity": 0
}
},
"search_results": [
{
"title": "string",
"url": "string",
"date": "string",
"last_updated": "string",
"snippet": "string",
"source": "string"
}
],
"videos": [
{
"url": "string",
"thumbnail_url": "string",
"thumbnail_width": 0,
"thumbnail_height": 0,
"duration": 0
}
],
"citations": [
"string"
]
}{
"event_id": "string",
"type": "string",
"is_bifrost_error": true,
"status_code": 0,
"error": {
"type": "string",
"code": "string",
"message": "string",
"param": "string",
"event_id": "string"
},
"extra_fields": {
"provider": "openai",
"model_requested": "string",
"request_type": "string"
}
}{
"event_id": "string",
"type": "string",
"is_bifrost_error": true,
"status_code": 0,
"error": {
"type": "string",
"code": "string",
"message": "string",
"param": "string",
"event_id": "string"
},
"extra_fields": {
"provider": "openai",
"model_requested": "string",
"request_type": "string"
}
}Create image POST
Generates images from text prompts using OpenAI-compatible format. Note: Azure OpenAI deployments are also supported via the Azure integration endpoint. Note: This endpoint also works without the /v1 prefix (e.g., /openai/images/generations).
Create speech (OpenAI TTS) POST
Generates audio from text using OpenAI TTS. Supports streaming via SSE when stream_format is set to 'sse'. Note: This endpoint also works without the /v1 prefix (e.g., /openai/audio/speech).