id | str | "llama-4-scout-17b-16e-instruct" | The id of the Cerebras model to use |
name | str | "Cerebras" | The name of the model |
provider | str | "Cerebras" | The provider of the model |
parallel_tool_calls | Optional[bool] | None | Whether to run tool calls in parallel (automatically set to False for llama-4-scout) |
max_completion_tokens | Optional[int] | None | Maximum number of completion tokens to generate |
repetition_penalty | Optional[float] | None | Penalty for repeating tokens (higher values reduce repetition) |
temperature | Optional[float] | None | Controls randomness in the model’s output (0.0 to 2.0) |
top_p | Optional[float] | None | Controls diversity via nucleus sampling (0.0 to 1.0) |
top_k | Optional[int] | None | Controls diversity via top-k sampling |
extra_headers | Optional[Any] | None | Additional headers to include in requests |
extra_query | Optional[Any] | None | Additional query parameters to include in requests |
extra_body | Optional[Any] | None | Additional body parameters to include in requests |
request_params | Optional[Dict[str, Any]] | None | Additional parameters to include in the request |
api_key | Optional[str] | None | The API key for authenticating with Cerebras (defaults to CEREBRAS_API_KEY env var) |
base_url | Optional[Union[str, httpx.URL]] | None | The base URL for the Cerebras API |
timeout | Optional[float] | None | Request timeout in seconds |
max_retries | Optional[int] | None | Maximum number of retries for failed requests |
default_headers | Optional[Any] | None | Default headers to include in all requests |
default_query | Optional[Any] | None | Default query parameters to include in all requests |
http_client | Optional[httpx.Client] | None | HTTP client instance for making requests |
client_params | Optional[Dict[str, Any]] | None | Additional parameters for client configuration |
client | Optional[CerebrasClient] | None | A pre-configured instance of the Cerebras client |
async_client | Optional[AsyncCerebrasClient] | None | A pre-configured instance of the async Cerebras client |