From b64a95a71b798a031103ae3d58b53fb21f6aaa93 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 20:07:49 -0400 Subject: [PATCH] litellm: drop claude-*/gpt-* shadow aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Honest model names only — local picks up real Ollama names (qwen2.5-72b, llama-3.3-70b, llama-3.1-8b, nomic-embed-text), Claude via *-max only. The shadows were briefly useful (paperless-ai wizard probe quirk) and then briefly used to make the ALL-LOCAL cutover transparent to clients, but having "claude-sonnet-4-6" silently route to llama3.3:70b in the Open WebUI picker was a constant foot-gun. Pulse re-pointed to a clean alias in its UI prior to this push; paperless-ai was already on qwen2.5-72b. Trade-off captured in [[litellm-openai-alias-shadowing]]. Co-Authored-By: Claude Opus 4.7 (1M context) --- vars/main.yml | 41 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/vars/main.yml b/vars/main.yml index 429f49f..571d318 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -57,42 +57,19 @@ litellm_package_spec: "litellm[proxy]==1.55.10" # the inference path off the resolver. Ollama has no auth (placeholder api_key). anvil_ollama_base: "http://192.168.1.150:11434" -# ALL homelab LLM load routes LOCAL as of 2026-05-28. Every standard alias that -# clients already use (claude-*, gpt-*) now resolves to Anvil/Ollama — no client -# reconfig needed. Meridian still runs, but Claude/Max is reachable ONLY via the -# explicit *-max escape-hatch aliases below (use them for vision or hard -# reasoning — llama3.x is text-only and weaker on complex tasks). +# Model list — honest names only. Shadow aliases (claude-*/gpt-*) were removed +# 2026-05-29 because the dual meaning (was-Claude, now-local) was a constant +# foot-gun in the Open WebUI picker. Local models keep their real names; Claude +# is reached only via the explicit *-max aliases. +# +# Trade-off: any client that hard-codes a claude-*/gpt-* model name in a probe +# (paperless-ai wizard hits gpt-4o-mini; see [[litellm-openai-alias-shadowing]]) +# will 400 with `Invalid model name` until that alias is re-added. # -# Size split: mini/haiku-class → llama3.1:8b; everything else → llama3.3:70b. # Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the # 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps). litellm_models: - # ---- Default aliases → LOCAL (Anvil/Ollama) ---- - - name: claude-haiku-4-5 - backend: ollama_chat/llama3.1:8b - api_base: "{{ anvil_ollama_base }}" - api_key: ollama-no-auth - - name: gpt-4o-mini - backend: ollama_chat/llama3.1:8b - api_base: "{{ anvil_ollama_base }}" - api_key: ollama-no-auth - - name: claude-sonnet-4-6 - backend: ollama_chat/llama3.3:70b - api_base: "{{ anvil_ollama_base }}" - api_key: ollama-no-auth - - name: claude-opus-4-7 - backend: ollama_chat/llama3.3:70b - api_base: "{{ anvil_ollama_base }}" - api_key: ollama-no-auth - - name: gpt-4o - backend: ollama_chat/llama3.3:70b - api_base: "{{ anvil_ollama_base }}" - api_key: ollama-no-auth - - name: gpt-4-turbo - backend: ollama_chat/llama3.3:70b - api_base: "{{ anvil_ollama_base }}" - api_key: ollama-no-auth - # Direct local model names (explicit) + # ---- Local (Anvil/Ollama) ---- - name: qwen2.5-72b backend: ollama_chat/qwen2.5:72b api_base: "{{ anvil_ollama_base }}"