--- timezone: America/Toronto packages: - apt-utils - bash-completion - ca-certificates - curl - git - gnupg - htop - net-tools - openssh-server - python3 - python3-pip - python3-venv - sudo - vim - wget users: - name: cbalders groups: sudo shell: /bin/bash ssh_authorized_keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINALaic1jpoP6t1urbZqJLI1eU5NeTVD9k8AAMAvOvvk OfficeMini" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGzTHdCiQjhIHsGB8oMpyKtr9TZXrXeIRKwcwe698zMW Generated By Termius" # Alloy ships journald to Loki on observe.lan.balders.ca. No docker on # this LXC — bare-metal systemd Alloy via Grafana apt repo. alloy_host_label: meridian alloy_loki_url: http://observe.lan.balders.ca:3100/loki/api/v1/push # Track A canary: _canary suffix lets the embedded exporter run in # parallel with the existing node_exporter scrape — once parity is # verified, flip this to node_lxc and decommission node_exporter. alloy_prom_job: node_lxc alloy_prom_group: lxc alloy_prom_hostname: meridian # Meridian meridian_user: meridian meridian_home: /opt/meridian meridian_port: 3456 meridian_host: "0.0.0.0" meridian_idle_timeout_seconds: 300 meridian_node_major: 22 # LiteLLM — OpenAI-compatible proxy in front of Meridian litellm_user: litellm litellm_home: /opt/litellm litellm_venv: /opt/litellm/venv litellm_port: 4000 litellm_host: "0.0.0.0" litellm_package_spec: "litellm[proxy]==1.55.10" # Anvil — local Ollama backend (Strix Halo iGPU, gfx1151). IP not DNS keeps # the inference path off the resolver. Ollama has no auth (placeholder api_key). anvil_ollama_base: "http://192.168.1.150:11434" # ALL homelab LLM load routes LOCAL as of 2026-05-28. Every standard alias that # clients already use (claude-*, gpt-*) now resolves to Anvil/Ollama — no client # reconfig needed. Meridian still runs, but Claude/Max is reachable ONLY via the # explicit *-max escape-hatch aliases below (use them for vision or hard # reasoning — llama3.x is text-only and weaker on complex tasks). # # Size split: mini/haiku-class → llama3.1:8b; everything else → llama3.3:70b. # Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the # 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps). litellm_models: # ---- Default aliases → LOCAL (Anvil/Ollama) ---- - name: claude-haiku-4-5 backend: ollama_chat/llama3.1:8b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: gpt-4o-mini backend: ollama_chat/llama3.1:8b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: claude-sonnet-4-6 backend: ollama_chat/llama3.3:70b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: claude-opus-4-7 backend: ollama_chat/llama3.3:70b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: gpt-4o backend: ollama_chat/llama3.3:70b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: gpt-4-turbo backend: ollama_chat/llama3.3:70b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth # Direct local model names (explicit) - name: llama-3.3-70b backend: ollama_chat/llama3.3:70b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: llama-3.1-8b backend: ollama_chat/llama3.1:8b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: nomic-embed-text backend: ollama/nomic-embed-text api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth # ---- Escape hatches → Claude via Meridian/Max ---- # No api_base → template default (127.0.0.1:meridian_port). Reach Claude by # name when local can't do the job (vision, hard reasoning). - name: claude-haiku-4-5-max backend: anthropic/claude-haiku-4-5 - name: claude-sonnet-4-6-max backend: anthropic/claude-sonnet-4-6 - name: claude-opus-4-7-max backend: anthropic/claude-opus-4-7 # Master key is required by LiteLLM. Pulled at deploy time from Infisical # /meridian/vault_litellm_master_key and passed via -e on the playbook # (see deploy.sh). litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}"