e866d0c89f
Replaces the short-lived mistral-large alias. Backed by ollama_chat/qwen2.5:72b on Anvil. Consumers (paperless-ai, RAG chat, HA, morning-report) target this. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
125 lines
4.2 KiB
YAML
125 lines
4.2 KiB
YAML
---
|
|
timezone: America/Toronto
|
|
|
|
packages:
|
|
- apt-utils
|
|
- bash-completion
|
|
- ca-certificates
|
|
- curl
|
|
- git
|
|
- gnupg
|
|
- htop
|
|
- net-tools
|
|
- openssh-server
|
|
- python3
|
|
- python3-pip
|
|
- python3-venv
|
|
- sudo
|
|
- vim
|
|
- wget
|
|
|
|
users:
|
|
- name: cbalders
|
|
groups: sudo
|
|
shell: /bin/bash
|
|
|
|
ssh_authorized_keys:
|
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINALaic1jpoP6t1urbZqJLI1eU5NeTVD9k8AAMAvOvvk OfficeMini"
|
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGzTHdCiQjhIHsGB8oMpyKtr9TZXrXeIRKwcwe698zMW Generated By Termius"
|
|
|
|
# Alloy ships journald to Loki on observe.lan.balders.ca. No docker on
|
|
# this LXC — bare-metal systemd Alloy via Grafana apt repo.
|
|
alloy_host_label: meridian
|
|
alloy_loki_url: http://observe.lan.balders.ca:3100/loki/api/v1/push
|
|
# Track A canary: _canary suffix lets the embedded exporter run in
|
|
# parallel with the existing node_exporter scrape — once parity is
|
|
# verified, flip this to node_lxc and decommission node_exporter.
|
|
alloy_prom_job: node_lxc
|
|
alloy_prom_group: lxc
|
|
alloy_prom_hostname: meridian
|
|
|
|
# Meridian
|
|
meridian_user: meridian
|
|
meridian_home: /opt/meridian
|
|
meridian_port: 3456
|
|
meridian_host: "0.0.0.0"
|
|
meridian_idle_timeout_seconds: 300
|
|
meridian_node_major: 22
|
|
|
|
# LiteLLM — OpenAI-compatible proxy in front of Meridian
|
|
litellm_user: litellm
|
|
litellm_home: /opt/litellm
|
|
litellm_venv: /opt/litellm/venv
|
|
litellm_port: 4000
|
|
litellm_host: "0.0.0.0"
|
|
litellm_package_spec: "litellm[proxy]==1.55.10"
|
|
# Anvil — local Ollama backend (Strix Halo iGPU, gfx1151). IP not DNS keeps
|
|
# the inference path off the resolver. Ollama has no auth (placeholder api_key).
|
|
anvil_ollama_base: "http://192.168.1.150:11434"
|
|
|
|
# ALL homelab LLM load routes LOCAL as of 2026-05-28. Every standard alias that
|
|
# clients already use (claude-*, gpt-*) now resolves to Anvil/Ollama — no client
|
|
# reconfig needed. Meridian still runs, but Claude/Max is reachable ONLY via the
|
|
# explicit *-max escape-hatch aliases below (use them for vision or hard
|
|
# reasoning — llama3.x is text-only and weaker on complex tasks).
|
|
#
|
|
# Size split: mini/haiku-class → llama3.1:8b; everything else → llama3.3:70b.
|
|
# Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the
|
|
# 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps).
|
|
litellm_models:
|
|
# ---- Default aliases → LOCAL (Anvil/Ollama) ----
|
|
- name: claude-haiku-4-5
|
|
backend: ollama_chat/llama3.1:8b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: gpt-4o-mini
|
|
backend: ollama_chat/llama3.1:8b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: claude-sonnet-4-6
|
|
backend: ollama_chat/llama3.3:70b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: claude-opus-4-7
|
|
backend: ollama_chat/llama3.3:70b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: gpt-4o
|
|
backend: ollama_chat/llama3.3:70b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: gpt-4-turbo
|
|
backend: ollama_chat/llama3.3:70b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
# Direct local model names (explicit)
|
|
- name: qwen2.5-72b
|
|
backend: ollama_chat/qwen2.5:72b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: llama-3.3-70b
|
|
backend: ollama_chat/llama3.3:70b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: llama-3.1-8b
|
|
backend: ollama_chat/llama3.1:8b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: nomic-embed-text
|
|
backend: ollama/nomic-embed-text
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
# ---- Escape hatches → Claude via Meridian/Max ----
|
|
# No api_base → template default (127.0.0.1:meridian_port). Reach Claude by
|
|
# name when local can't do the job (vision, hard reasoning).
|
|
- name: claude-haiku-4-5-max
|
|
backend: anthropic/claude-haiku-4-5
|
|
- name: claude-sonnet-4-6-max
|
|
backend: anthropic/claude-sonnet-4-6
|
|
- name: claude-opus-4-7-max
|
|
backend: anthropic/claude-opus-4-7
|
|
# Master key is required by LiteLLM. Pulled at deploy time from Infisical
|
|
# /meridian/vault_litellm_master_key and passed via -e on the playbook
|
|
# (see deploy.sh).
|
|
litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}"
|