--- timezone: America/Toronto packages: - apt-utils - bash-completion - ca-certificates - curl - git - gnupg - htop - net-tools - openssh-server - python3 - python3-pip - python3-venv - sudo - vim - wget users: - name: cbalders groups: sudo shell: /bin/bash ssh_authorized_keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINALaic1jpoP6t1urbZqJLI1eU5NeTVD9k8AAMAvOvvk OfficeMini" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGzTHdCiQjhIHsGB8oMpyKtr9TZXrXeIRKwcwe698zMW Generated By Termius" # Alloy ships journald to Loki on observe.lan.balders.ca. No docker on # this LXC — bare-metal systemd Alloy via Grafana apt repo. alloy_host_label: meridian alloy_loki_url: http://observe.lan.balders.ca:3100/loki/api/v1/push # Track A canary: _canary suffix lets the embedded exporter run in # parallel with the existing node_exporter scrape — once parity is # verified, flip this to node_lxc and decommission node_exporter. alloy_prom_job: node_lxc alloy_prom_group: lxc alloy_prom_hostname: meridian # Meridian meridian_user: meridian meridian_home: /opt/meridian meridian_port: 3456 meridian_host: "0.0.0.0" meridian_idle_timeout_seconds: 300 meridian_node_major: 22 # LiteLLM — OpenAI-compatible proxy in front of Meridian litellm_user: litellm litellm_home: /opt/litellm litellm_venv: /opt/litellm/venv litellm_port: 4000 litellm_host: "0.0.0.0" litellm_package_spec: "litellm[proxy]==1.55.10" # Anvil — local Ollama backend (Strix Halo iGPU, gfx1151). IP not DNS keeps # the inference path off the resolver. Ollama has no auth (placeholder api_key). anvil_ollama_base: "http://192.168.1.150:11434" # Model list — a prefix states each model's backend up front, so the Open WebUI # picker is self-documenting (re-indexed 2026-06-05): # local_* → Anvil/Ollama, local GPU, no metered cost # proxy_* → Claude via Meridian's Max-OAuth bridge, no per-token cost # direct_* → a real provider API (OpenAI/Gemini), METERED billing, key from # Infisical /meridian # # This replaces the bare-name scheme. Shadow aliases (bare claude-*/gpt-*) were # already removed 2026-05-29 because the dual meaning (was-Claude, now-local) was # a foot-gun; the prefix makes that impossible — a name can no longer lie about # where it routes. See [[litellm-openai-alias-shadowing]]. # # Trade-off: clients that hard-code a bare model name in a probe (paperless-ai # wizard hits gpt-4o-mini) 400 until pointed at the prefixed name — or, for # OpenAI specifically, at direct_gpt-4o-mini once a key is in Infisical. # # Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the # 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps). litellm_models: # ---- local_* → Anvil/Ollama, local GPU, no cost ---- - name: local_qwen2.5-72b backend: ollama_chat/qwen2.5:72b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: local_llama-3.3-70b backend: ollama_chat/llama3.3:70b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth - name: local_llama-3.1-8b backend: ollama_chat/llama3.1:8b api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth # Embeddings don't actually route through LiteLLM 1.55.10 (broken async ollama # handler → 500); consumers hit Anvil direct. Kept as a catalog entry only. - name: local_nomic-embed-text backend: ollama/nomic-embed-text api_base: "{{ anvil_ollama_base }}" api_key: ollama-no-auth # ---- proxy_* → Claude via Meridian/Max, no per-token cost ---- # api_base pins Meridian's local Anthropic endpoint (the OAuth Max sub pays). # Reach Claude by name when local can't do the job (vision, hard reasoning). - name: proxy_claude-haiku-4-5 backend: anthropic/claude-haiku-4-5 api_base: "http://127.0.0.1:{{ meridian_port }}" - name: proxy_claude-sonnet-4-6 backend: anthropic/claude-sonnet-4-6 api_base: "http://127.0.0.1:{{ meridian_port }}" - name: proxy_claude-opus-4-7 backend: anthropic/claude-opus-4-7 api_base: "http://127.0.0.1:{{ meridian_port }}" # ---- direct_* → public provider APIs, METERED, keys from Infisical /meridian ---- # LIVE 2026-06-05 (OpenAI + Gemini keys verified end-to-end). Keys come from # Infisical /meridian via deploy.sh; litellm.env carries a placeholder so the # proxy still boots if a key is absent (that provider's models then 401). # No api_base → LiteLLM uses each provider's default endpoint; routing is by # the backend's provider prefix (openai/, gemini/). # # Gemini must be CURRENT model IDs: LiteLLM 1.55.10 rewrites gemini-2.0-flash # to a retired experimental name (404) and gemini-1.5-pro is itself retired. # 2.5-flash / 2.5-pro pass through clean. o-series (o3-mini) is intentionally # absent: it needs max_completion_tokens, which 1.55.10 won't translate from # the max_tokens that clients (Open WebUI) send → 400. Re-add after a bump. - name: direct_gpt-4o backend: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY - name: direct_gpt-4o-mini backend: openai/gpt-4o-mini api_key: os.environ/OPENAI_API_KEY - name: direct_gemini-2.5-flash backend: gemini/gemini-2.5-flash api_key: os.environ/GEMINI_API_KEY - name: direct_gemini-2.5-pro backend: gemini/gemini-2.5-pro api_key: os.environ/GEMINI_API_KEY # Infisical (secrets source). site.yml's pre_tasks log into this shared machine # identity and read /meridian, then set_fact the keys below — so BOTH Semaphore # and local deploys get real secrets with no per-runner env wiring. The client # secret is in vars/vault.yml (ansible-vault, shared across LXC repos). infisical_url: "https://secrets.balders.ca" infisical_project_id: "50062d7c-06ff-4d5c-8ca3-6c0cdba9f270" infisical_client_id: "828d2cc8-eb25-4b1e-a711-c9a4b1580106" infisical_client_secret: "{{ vault_infisical_client_secret }}" # These three are OVERRIDDEN by site.yml set_fact from the Infisical read of # /meridian (vault_litellm_master_key / vault_openai_api_key / vault_gemini_api_key). # The env-lookup defaults here are only a manual fallback for `-e`/ad-hoc runs; # the normal path is the in-playbook Infisical pull. litellm.env writes a # placeholder when a provider key is empty so the proxy still boots (that # provider's direct_* models then 401 until a real key lands). litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}" litellm_openai_api_key: "{{ lookup('env', 'OPENAI_API_KEY') | default('', true) }}" litellm_gemini_api_key: "{{ lookup('env', 'GEMINI_API_KEY') | default('', true) }}"