homelab-ansible-lxc-meridian/vars/main.yml

---
timezone: America/Toronto

packages:
  - apt-utils
  - bash-completion
  - ca-certificates
  - curl
  - git
  - gnupg
  - htop
  - net-tools
  - openssh-server
  - python3
  - python3-pip
  - python3-venv
  - sudo
  - vim
  - wget

users:
  - name: cbalders
    groups: sudo
    shell: /bin/bash

ssh_authorized_keys:
  - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINALaic1jpoP6t1urbZqJLI1eU5NeTVD9k8AAMAvOvvk OfficeMini"
  - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGzTHdCiQjhIHsGB8oMpyKtr9TZXrXeIRKwcwe698zMW Generated By Termius"

# Alloy ships journald to Loki on observe.lan.balders.ca. No docker on
# this LXC — bare-metal systemd Alloy via Grafana apt repo.
alloy_host_label: meridian
alloy_loki_url: http://observe.lan.balders.ca:3100/loki/api/v1/push
# Track A canary: _canary suffix lets the embedded exporter run in
# parallel with the existing node_exporter scrape — once parity is
# verified, flip this to node_lxc and decommission node_exporter.
alloy_prom_job: node_lxc
alloy_prom_group: lxc
alloy_prom_hostname: meridian

# Meridian
meridian_user: meridian
meridian_home: /opt/meridian
meridian_port: 3456
meridian_host: "0.0.0.0"
meridian_idle_timeout_seconds: 300
meridian_node_major: 22

# LiteLLM — OpenAI-compatible proxy in front of Meridian
litellm_user: litellm
litellm_home: /opt/litellm
litellm_venv: /opt/litellm/venv
litellm_port: 4000
litellm_host: "0.0.0.0"
litellm_package_spec: "litellm[proxy]==1.55.10"
# Anvil — local Ollama backend (Strix Halo iGPU, gfx1151). IP not DNS keeps
# the inference path off the resolver. Ollama has no auth (placeholder api_key).
anvil_ollama_base: "http://192.168.1.150:11434"

# Model list — a prefix states each model's backend up front, so the Open WebUI
# picker is self-documenting (re-indexed 2026-06-05):
#   local_*  → Anvil/Ollama, local GPU, no metered cost
#   proxy_*  → Claude via Meridian's Max-OAuth bridge, no per-token cost
#   direct_* → a real provider API (OpenAI/Gemini), METERED billing, key from
#              Infisical /meridian
#
# This replaces the bare-name scheme. Shadow aliases (bare claude-*/gpt-*) were
# already removed 2026-05-29 because the dual meaning (was-Claude, now-local) was
# a foot-gun; the prefix makes that impossible — a name can no longer lie about
# where it routes. See [[litellm-openai-alias-shadowing]].
#
# Trade-off: clients that hard-code a bare model name in a probe (paperless-ai
# wizard hits gpt-4o-mini) 400 until pointed at the prefixed name — or, for
# OpenAI specifically, at direct_gpt-4o-mini once a key is in Infisical.
#
# Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the
# 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps).
litellm_models:
  # ---- local_* → Anvil/Ollama, local GPU, no cost ----
  - name: local_qwen2.5-72b
    backend: ollama_chat/qwen2.5:72b
    api_base: "{{ anvil_ollama_base }}"
    api_key: ollama-no-auth
  - name: local_llama-3.3-70b
    backend: ollama_chat/llama3.3:70b
    api_base: "{{ anvil_ollama_base }}"
    api_key: ollama-no-auth
  - name: local_llama-3.1-8b
    backend: ollama_chat/llama3.1:8b
    api_base: "{{ anvil_ollama_base }}"
    api_key: ollama-no-auth
  # Embeddings don't actually route through LiteLLM 1.55.10 (broken async ollama
  # handler → 500); consumers hit Anvil direct. Kept as a catalog entry only.
  - name: local_nomic-embed-text
    backend: ollama/nomic-embed-text
    api_base: "{{ anvil_ollama_base }}"
    api_key: ollama-no-auth
  # ---- proxy_* → Claude via Meridian/Max, no per-token cost ----
  # api_base pins Meridian's local Anthropic endpoint (the OAuth Max sub pays).
  # Reach Claude by name when local can't do the job (vision, hard reasoning).
  - name: proxy_claude-haiku-4-5
    backend: anthropic/claude-haiku-4-5
    api_base: "http://127.0.0.1:{{ meridian_port }}"
  - name: proxy_claude-sonnet-4-6
    backend: anthropic/claude-sonnet-4-6
    api_base: "http://127.0.0.1:{{ meridian_port }}"
  - name: proxy_claude-opus-4-7
    backend: anthropic/claude-opus-4-7
    api_base: "http://127.0.0.1:{{ meridian_port }}"
  # ---- direct_* → public provider APIs, METERED, keys from Infisical /meridian ----
  # SCAFFOLDED 2026-06-05 with NO real keys: these 401 on call until
  # OPENAI_API_KEY / GEMINI_API_KEY land in Infisical /meridian and deploy.sh
  # pulls them (litellm.env carries a placeholder so the proxy still boots).
  # No api_base → LiteLLM uses each provider's default endpoint; routing is by
  # the backend's provider prefix (openai/, gemini/).
  - name: direct_gpt-4o
    backend: openai/gpt-4o
    api_key: os.environ/OPENAI_API_KEY
  - name: direct_gpt-4o-mini
    backend: openai/gpt-4o-mini
    api_key: os.environ/OPENAI_API_KEY
  - name: direct_o3-mini
    backend: openai/o3-mini
    api_key: os.environ/OPENAI_API_KEY
  - name: direct_gemini-2.0-flash
    backend: gemini/gemini-2.0-flash
    api_key: os.environ/GEMINI_API_KEY
  - name: direct_gemini-1.5-pro
    backend: gemini/gemini-1.5-pro
    api_key: os.environ/GEMINI_API_KEY
# Master key is required by LiteLLM. Pulled at deploy time from Infisical
# /meridian/vault_litellm_master_key and passed via -e on the playbook
# (see deploy.sh).
litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}"

# Provider keys for direct_* models. Optional — deploy.sh pulls them from
# Infisical /meridian if present, else they stay empty and litellm.env writes a
# placeholder so the proxy still boots (direct_* models just 401 until a real
# key lands). Drop OPENAI_API_KEY / GEMINI_API_KEY into Infisical /meridian to
# activate them.
litellm_openai_api_key: "{{ lookup('env', 'OPENAI_API_KEY') | default('', true) }}"
litellm_gemini_api_key: "{{ lookup('env', 'GEMINI_API_KEY') | default('', true) }}"