diff --git a/vars/main.yml b/vars/main.yml index 6470285..ee5bbe5 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -108,25 +108,28 @@ litellm_models: backend: anthropic/claude-opus-4-7 api_base: "http://127.0.0.1:{{ meridian_port }}" # ---- direct_* → public provider APIs, METERED, keys from Infisical /meridian ---- - # SCAFFOLDED 2026-06-05 with NO real keys: these 401 on call until - # OPENAI_API_KEY / GEMINI_API_KEY land in Infisical /meridian and deploy.sh - # pulls them (litellm.env carries a placeholder so the proxy still boots). + # LIVE 2026-06-05 (OpenAI + Gemini keys verified end-to-end). Keys come from + # Infisical /meridian via deploy.sh; litellm.env carries a placeholder so the + # proxy still boots if a key is absent (that provider's models then 401). # No api_base → LiteLLM uses each provider's default endpoint; routing is by # the backend's provider prefix (openai/, gemini/). + # + # Gemini must be CURRENT model IDs: LiteLLM 1.55.10 rewrites gemini-2.0-flash + # to a retired experimental name (404) and gemini-1.5-pro is itself retired. + # 2.5-flash / 2.5-pro pass through clean. o-series (o3-mini) is intentionally + # absent: it needs max_completion_tokens, which 1.55.10 won't translate from + # the max_tokens that clients (Open WebUI) send → 400. Re-add after a bump. - name: direct_gpt-4o backend: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY - name: direct_gpt-4o-mini backend: openai/gpt-4o-mini api_key: os.environ/OPENAI_API_KEY - - name: direct_o3-mini - backend: openai/o3-mini - api_key: os.environ/OPENAI_API_KEY - - name: direct_gemini-2.0-flash - backend: gemini/gemini-2.0-flash + - name: direct_gemini-2.5-flash + backend: gemini/gemini-2.5-flash api_key: os.environ/GEMINI_API_KEY - - name: direct_gemini-1.5-pro - backend: gemini/gemini-1.5-pro + - name: direct_gemini-2.5-pro + backend: gemini/gemini-2.5-pro api_key: os.environ/GEMINI_API_KEY # Master key is required by LiteLLM. Pulled at deploy time from Infisical # /meridian/vault_litellm_master_key and passed via -e on the playbook