From b64a95a71b798a031103ae3d58b53fb21f6aaa93 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 20:07:49 -0400
Subject: [PATCH] litellm: drop claude-*/gpt-* shadow aliases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Honest model names only — local picks up real Ollama names (qwen2.5-72b,
llama-3.3-70b, llama-3.1-8b, nomic-embed-text), Claude via *-max only.

The shadows were briefly useful (paperless-ai wizard probe quirk) and
then briefly used to make the ALL-LOCAL cutover transparent to clients,
but having "claude-sonnet-4-6" silently route to llama3.3:70b in the
Open WebUI picker was a constant foot-gun. Pulse re-pointed to a clean
alias in its UI prior to this push; paperless-ai was already on
qwen2.5-72b. Trade-off captured in [[litellm-openai-alias-shadowing]].

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 vars/main.yml | 41 +++++++++--------------------------------
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/vars/main.yml b/vars/main.yml
index 429f49f..571d318 100644
--- a/vars/main.yml
+++ b/vars/main.yml
@@ -57,42 +57,19 @@ litellm_package_spec: "litellm[proxy]==1.55.10"
 # the inference path off the resolver. Ollama has no auth (placeholder api_key).
 anvil_ollama_base: "http://192.168.1.150:11434"
 
-# ALL homelab LLM load routes LOCAL as of 2026-05-28. Every standard alias that
-# clients already use (claude-*, gpt-*) now resolves to Anvil/Ollama — no client
-# reconfig needed. Meridian still runs, but Claude/Max is reachable ONLY via the
-# explicit *-max escape-hatch aliases below (use them for vision or hard
-# reasoning — llama3.x is text-only and weaker on complex tasks).
+# Model list — honest names only. Shadow aliases (claude-*/gpt-*) were removed
+# 2026-05-29 because the dual meaning (was-Claude, now-local) was a constant
+# foot-gun in the Open WebUI picker. Local models keep their real names; Claude
+# is reached only via the explicit *-max aliases.
+#
+# Trade-off: any client that hard-codes a claude-*/gpt-* model name in a probe
+# (paperless-ai wizard hits gpt-4o-mini; see [[litellm-openai-alias-shadowing]])
+# will 400 with `Invalid model name` until that alias is re-added.
 #
-# Size split: mini/haiku-class → llama3.1:8b; everything else → llama3.3:70b.
 # Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the
 # 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps).
 litellm_models:
-  # ---- Default aliases → LOCAL (Anvil/Ollama) ----
-  - name: claude-haiku-4-5
-    backend: ollama_chat/llama3.1:8b
-    api_base: "{{ anvil_ollama_base }}"
-    api_key: ollama-no-auth
-  - name: gpt-4o-mini
-    backend: ollama_chat/llama3.1:8b
-    api_base: "{{ anvil_ollama_base }}"
-    api_key: ollama-no-auth
-  - name: claude-sonnet-4-6
-    backend: ollama_chat/llama3.3:70b
-    api_base: "{{ anvil_ollama_base }}"
-    api_key: ollama-no-auth
-  - name: claude-opus-4-7
-    backend: ollama_chat/llama3.3:70b
-    api_base: "{{ anvil_ollama_base }}"
-    api_key: ollama-no-auth
-  - name: gpt-4o
-    backend: ollama_chat/llama3.3:70b
-    api_base: "{{ anvil_ollama_base }}"
-    api_key: ollama-no-auth
-  - name: gpt-4-turbo
-    backend: ollama_chat/llama3.3:70b
-    api_base: "{{ anvil_ollama_base }}"
-    api_key: ollama-no-auth
-  # Direct local model names (explicit)
+  # ---- Local (Anvil/Ollama) ----
   - name: qwen2.5-72b
     backend: ollama_chat/qwen2.5:72b
     api_base: "{{ anvil_ollama_base }}"