From a39323db70fa3e5724c305d7219132169fa49ba7 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 5 Jun 2026 12:39:17 -0400 Subject: [PATCH] =?UTF-8?q?litellm:=20fix=20direct=5F*=20model=20IDs=20?= =?UTF-8?q?=E2=80=94=20gemini=202.5,=20drop=20o3-mini?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified the direct_* providers end-to-end after billing was enabled. - OpenAI direct_gpt-4o / direct_gpt-4o-mini: working. - Gemini: gemini-2.0-flash 404s (LiteLLM 1.55.10 rewrites it to a retired experimental name) and gemini-1.5-pro is retired -> switch to the current GA gemini-2.5-flash / gemini-2.5-pro (both verified). - Drop direct_o3-mini: o-series needs max_completion_tokens, which 1.55.10 won't translate from the max_tokens clients (Open WebUI) send -> 400. Re-add after a LiteLLM bump. Co-Authored-By: Claude Opus 4.8 (1M context) --- vars/main.yml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/vars/main.yml b/vars/main.yml index 6470285..ee5bbe5 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -108,25 +108,28 @@ litellm_models: backend: anthropic/claude-opus-4-7 api_base: "http://127.0.0.1:{{ meridian_port }}" # ---- direct_* → public provider APIs, METERED, keys from Infisical /meridian ---- - # SCAFFOLDED 2026-06-05 with NO real keys: these 401 on call until - # OPENAI_API_KEY / GEMINI_API_KEY land in Infisical /meridian and deploy.sh - # pulls them (litellm.env carries a placeholder so the proxy still boots). + # LIVE 2026-06-05 (OpenAI + Gemini keys verified end-to-end). Keys come from + # Infisical /meridian via deploy.sh; litellm.env carries a placeholder so the + # proxy still boots if a key is absent (that provider's models then 401). # No api_base → LiteLLM uses each provider's default endpoint; routing is by # the backend's provider prefix (openai/, gemini/). + # + # Gemini must be CURRENT model IDs: LiteLLM 1.55.10 rewrites gemini-2.0-flash + # to a retired experimental name (404) and gemini-1.5-pro is itself retired. + # 2.5-flash / 2.5-pro pass through clean. o-series (o3-mini) is intentionally + # absent: it needs max_completion_tokens, which 1.55.10 won't translate from + # the max_tokens that clients (Open WebUI) send → 400. Re-add after a bump. - name: direct_gpt-4o backend: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY - name: direct_gpt-4o-mini backend: openai/gpt-4o-mini api_key: os.environ/OPENAI_API_KEY - - name: direct_o3-mini - backend: openai/o3-mini - api_key: os.environ/OPENAI_API_KEY - - name: direct_gemini-2.0-flash - backend: gemini/gemini-2.0-flash + - name: direct_gemini-2.5-flash + backend: gemini/gemini-2.5-flash api_key: os.environ/GEMINI_API_KEY - - name: direct_gemini-1.5-pro - backend: gemini/gemini-1.5-pro + - name: direct_gemini-2.5-pro + backend: gemini/gemini-2.5-pro api_key: os.environ/GEMINI_API_KEY # Master key is required by LiteLLM. Pulled at deploy time from Infisical # /meridian/vault_litellm_master_key and passed via -e on the playbook