litellm: route all homelab LLM load to Anvil/Ollama by default
Per-model api_base/api_key overrides in the template (default stays Meridian's local port). All standard aliases (claude-*, gpt-*) now point at Anvil's Ollama (mini/haiku-class -> llama3.1:8b, rest -> llama3.3:70b). Claude/Max reachable only via new *-max escape-hatch aliases. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,18 @@
|
|||||||
# {{ ansible_managed }}
|
# {{ ansible_managed }}
|
||||||
#
|
#
|
||||||
# LiteLLM proxy config. Routes OpenAI-shaped requests to Meridian's
|
# LiteLLM proxy config. Routes OpenAI-shaped requests to backends:
|
||||||
# /v1/messages (Anthropic format). Meridian (same host, :3456) ignores the
|
# - Claude models → Meridian's /v1/messages (same host, :3456), which
|
||||||
# upstream API key, so we pass a placeholder.
|
# ignores the upstream API key (placeholder passed below).
|
||||||
|
# - Local models → Anvil's Ollama (openai/ provider, OpenAI-compatible
|
||||||
|
# endpoint at http://192.168.1.150:11434). Set per-model api_base in vars.
|
||||||
|
|
||||||
model_list:
|
model_list:
|
||||||
{% for m in litellm_models %}
|
{% for m in litellm_models %}
|
||||||
- model_name: {{ m.name }}
|
- model_name: {{ m.name }}
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: {{ m.backend }}
|
model: {{ m.backend }}
|
||||||
api_base: http://127.0.0.1:{{ meridian_port }}
|
api_base: {{ m.api_base | default('http://127.0.0.1:' ~ meridian_port) }}
|
||||||
api_key: placeholder-meridian-ignores-this
|
api_key: {{ m.api_key | default('placeholder-meridian-ignores-this') }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
|
|||||||
+55
-15
@@ -53,27 +53,67 @@ litellm_venv: /opt/litellm/venv
|
|||||||
litellm_port: 4000
|
litellm_port: 4000
|
||||||
litellm_host: "0.0.0.0"
|
litellm_host: "0.0.0.0"
|
||||||
litellm_package_spec: "litellm[proxy]==1.55.10"
|
litellm_package_spec: "litellm[proxy]==1.55.10"
|
||||||
# Models map onto Meridian's pinned Anthropic-shape backend on 127.0.0.1:3456.
|
# Anvil — local Ollama backend (Strix Halo iGPU, gfx1151). IP not DNS keeps
|
||||||
# Native Claude aliases AND OpenAI-named aliases — some clients (paperless-ai's
|
# the inference path off the resolver. Ollama has no auth (placeholder api_key).
|
||||||
# setup wizard) hardcode `model=gpt-4o-mini` for validation regardless of
|
anvil_ollama_base: "http://192.168.1.150:11434"
|
||||||
# what you set as the default model, so we shadow the common OpenAI names too.
|
|
||||||
|
# ALL homelab LLM load routes LOCAL as of 2026-05-28. Every standard alias that
|
||||||
|
# clients already use (claude-*, gpt-*) now resolves to Anvil/Ollama — no client
|
||||||
|
# reconfig needed. Meridian still runs, but Claude/Max is reachable ONLY via the
|
||||||
|
# explicit *-max escape-hatch aliases below (use them for vision or hard
|
||||||
|
# reasoning — llama3.x is text-only and weaker on complex tasks).
|
||||||
|
#
|
||||||
|
# Size split: mini/haiku-class → llama3.1:8b; everything else → llama3.3:70b.
|
||||||
|
# Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the
|
||||||
|
# 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps).
|
||||||
litellm_models:
|
litellm_models:
|
||||||
# Native Claude aliases (preferred for new clients)
|
# ---- Default aliases → LOCAL (Anvil/Ollama) ----
|
||||||
- name: claude-haiku-4-5
|
- name: claude-haiku-4-5
|
||||||
backend: anthropic/claude-haiku-4-5
|
backend: ollama_chat/llama3.1:8b
|
||||||
- name: claude-sonnet-4-6
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
backend: anthropic/claude-sonnet-4-6
|
api_key: ollama-no-auth
|
||||||
- name: claude-opus-4-7
|
|
||||||
backend: anthropic/claude-opus-4-7
|
|
||||||
# OpenAI-name shadows — for clients that probe gpt-* names regardless of
|
|
||||||
# config (paperless-ai wizard, Open WebUI defaults, etc.). All actually
|
|
||||||
# backed by Claude on the Max sub.
|
|
||||||
- name: gpt-4o-mini
|
- name: gpt-4o-mini
|
||||||
backend: anthropic/claude-haiku-4-5
|
backend: ollama_chat/llama3.1:8b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
|
- name: claude-sonnet-4-6
|
||||||
|
backend: ollama_chat/llama3.3:70b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
|
- name: claude-opus-4-7
|
||||||
|
backend: ollama_chat/llama3.3:70b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
- name: gpt-4o
|
- name: gpt-4o
|
||||||
backend: anthropic/claude-sonnet-4-6
|
backend: ollama_chat/llama3.3:70b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
- name: gpt-4-turbo
|
- name: gpt-4-turbo
|
||||||
|
backend: ollama_chat/llama3.3:70b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
|
# Direct local model names (explicit)
|
||||||
|
- name: llama-3.3-70b
|
||||||
|
backend: ollama_chat/llama3.3:70b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
|
- name: llama-3.1-8b
|
||||||
|
backend: ollama_chat/llama3.1:8b
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
|
- name: nomic-embed-text
|
||||||
|
backend: ollama/nomic-embed-text
|
||||||
|
api_base: "{{ anvil_ollama_base }}"
|
||||||
|
api_key: ollama-no-auth
|
||||||
|
# ---- Escape hatches → Claude via Meridian/Max ----
|
||||||
|
# No api_base → template default (127.0.0.1:meridian_port). Reach Claude by
|
||||||
|
# name when local can't do the job (vision, hard reasoning).
|
||||||
|
- name: claude-haiku-4-5-max
|
||||||
|
backend: anthropic/claude-haiku-4-5
|
||||||
|
- name: claude-sonnet-4-6-max
|
||||||
backend: anthropic/claude-sonnet-4-6
|
backend: anthropic/claude-sonnet-4-6
|
||||||
|
- name: claude-opus-4-7-max
|
||||||
|
backend: anthropic/claude-opus-4-7
|
||||||
# Master key is required by LiteLLM. Pulled at deploy time from Infisical
|
# Master key is required by LiteLLM. Pulled at deploy time from Infisical
|
||||||
# /meridian/vault_litellm_master_key and passed via -e on the playbook
|
# /meridian/vault_litellm_master_key and passed via -e on the playbook
|
||||||
# (see deploy.sh).
|
# (see deploy.sh).
|
||||||
|
|||||||
Reference in New Issue
Block a user