02c2f4ee2d
Replaces the deploy.sh env-var hand-off (which only worked locally and would have made Semaphore write placeholder keys, regressing direct_*) with the standard in-playbook Infisical pull used by dawarich/mcp/cloudflared: - site.yml pre_tasks: login via the shared 828d2cc8 machine identity, read /meridian as_dict, set_fact litellm_master_key + the openai/gemini keys. - vars/vault.yml: shared ansible-vault client secret (copied from sibling repo). - requirements.yml: + infisical.vault. - deploy.sh: drop the infisical-CLI pulls; add --ask-vault-pass. Same secret path for Semaphore and local — no per-template env wiring. Deploy prereqs: attach the ansible-vault password to Semaphore template 27, and ensure the 828d2cc8 identity can read /meridian (env prod). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
152 lines
6.5 KiB
YAML
152 lines
6.5 KiB
YAML
---
|
|
timezone: America/Toronto
|
|
|
|
packages:
|
|
- apt-utils
|
|
- bash-completion
|
|
- ca-certificates
|
|
- curl
|
|
- git
|
|
- gnupg
|
|
- htop
|
|
- net-tools
|
|
- openssh-server
|
|
- python3
|
|
- python3-pip
|
|
- python3-venv
|
|
- sudo
|
|
- vim
|
|
- wget
|
|
|
|
users:
|
|
- name: cbalders
|
|
groups: sudo
|
|
shell: /bin/bash
|
|
|
|
ssh_authorized_keys:
|
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINALaic1jpoP6t1urbZqJLI1eU5NeTVD9k8AAMAvOvvk OfficeMini"
|
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGzTHdCiQjhIHsGB8oMpyKtr9TZXrXeIRKwcwe698zMW Generated By Termius"
|
|
|
|
# Alloy ships journald to Loki on observe.lan.balders.ca. No docker on
|
|
# this LXC — bare-metal systemd Alloy via Grafana apt repo.
|
|
alloy_host_label: meridian
|
|
alloy_loki_url: http://observe.lan.balders.ca:3100/loki/api/v1/push
|
|
# Track A canary: _canary suffix lets the embedded exporter run in
|
|
# parallel with the existing node_exporter scrape — once parity is
|
|
# verified, flip this to node_lxc and decommission node_exporter.
|
|
alloy_prom_job: node_lxc
|
|
alloy_prom_group: lxc
|
|
alloy_prom_hostname: meridian
|
|
|
|
# Meridian
|
|
meridian_user: meridian
|
|
meridian_home: /opt/meridian
|
|
meridian_port: 3456
|
|
meridian_host: "0.0.0.0"
|
|
meridian_idle_timeout_seconds: 300
|
|
meridian_node_major: 22
|
|
|
|
# LiteLLM — OpenAI-compatible proxy in front of Meridian
|
|
litellm_user: litellm
|
|
litellm_home: /opt/litellm
|
|
litellm_venv: /opt/litellm/venv
|
|
litellm_port: 4000
|
|
litellm_host: "0.0.0.0"
|
|
litellm_package_spec: "litellm[proxy]==1.55.10"
|
|
# Anvil — local Ollama backend (Strix Halo iGPU, gfx1151). IP not DNS keeps
|
|
# the inference path off the resolver. Ollama has no auth (placeholder api_key).
|
|
anvil_ollama_base: "http://192.168.1.150:11434"
|
|
|
|
# Model list — a prefix states each model's backend up front, so the Open WebUI
|
|
# picker is self-documenting (re-indexed 2026-06-05):
|
|
# local_* → Anvil/Ollama, local GPU, no metered cost
|
|
# proxy_* → Claude via Meridian's Max-OAuth bridge, no per-token cost
|
|
# direct_* → a real provider API (OpenAI/Gemini), METERED billing, key from
|
|
# Infisical /meridian
|
|
#
|
|
# This replaces the bare-name scheme. Shadow aliases (bare claude-*/gpt-*) were
|
|
# already removed 2026-05-29 because the dual meaning (was-Claude, now-local) was
|
|
# a foot-gun; the prefix makes that impossible — a name can no longer lie about
|
|
# where it routes. See [[litellm-openai-alias-shadowing]].
|
|
#
|
|
# Trade-off: clients that hard-code a bare model name in a probe (paperless-ai
|
|
# wizard hits gpt-4o-mini) 400 until pointed at the prefixed name — or, for
|
|
# OpenAI specifically, at direct_gpt-4o-mini once a key is in Infisical.
|
|
#
|
|
# Single GPU, OLLAMA_NUM_PARALLEL=1 — concurrent/mixed requests queue and the
|
|
# 70B+8B can't both stay resident in the ~62 GB budget (expect model swaps).
|
|
litellm_models:
|
|
# ---- local_* → Anvil/Ollama, local GPU, no cost ----
|
|
- name: local_qwen2.5-72b
|
|
backend: ollama_chat/qwen2.5:72b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: local_llama-3.3-70b
|
|
backend: ollama_chat/llama3.3:70b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
- name: local_llama-3.1-8b
|
|
backend: ollama_chat/llama3.1:8b
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
# Embeddings don't actually route through LiteLLM 1.55.10 (broken async ollama
|
|
# handler → 500); consumers hit Anvil direct. Kept as a catalog entry only.
|
|
- name: local_nomic-embed-text
|
|
backend: ollama/nomic-embed-text
|
|
api_base: "{{ anvil_ollama_base }}"
|
|
api_key: ollama-no-auth
|
|
# ---- proxy_* → Claude via Meridian/Max, no per-token cost ----
|
|
# api_base pins Meridian's local Anthropic endpoint (the OAuth Max sub pays).
|
|
# Reach Claude by name when local can't do the job (vision, hard reasoning).
|
|
- name: proxy_claude-haiku-4-5
|
|
backend: anthropic/claude-haiku-4-5
|
|
api_base: "http://127.0.0.1:{{ meridian_port }}"
|
|
- name: proxy_claude-sonnet-4-6
|
|
backend: anthropic/claude-sonnet-4-6
|
|
api_base: "http://127.0.0.1:{{ meridian_port }}"
|
|
- name: proxy_claude-opus-4-7
|
|
backend: anthropic/claude-opus-4-7
|
|
api_base: "http://127.0.0.1:{{ meridian_port }}"
|
|
# ---- direct_* → public provider APIs, METERED, keys from Infisical /meridian ----
|
|
# LIVE 2026-06-05 (OpenAI + Gemini keys verified end-to-end). Keys come from
|
|
# Infisical /meridian via deploy.sh; litellm.env carries a placeholder so the
|
|
# proxy still boots if a key is absent (that provider's models then 401).
|
|
# No api_base → LiteLLM uses each provider's default endpoint; routing is by
|
|
# the backend's provider prefix (openai/, gemini/).
|
|
#
|
|
# Gemini must be CURRENT model IDs: LiteLLM 1.55.10 rewrites gemini-2.0-flash
|
|
# to a retired experimental name (404) and gemini-1.5-pro is itself retired.
|
|
# 2.5-flash / 2.5-pro pass through clean. o-series (o3-mini) is intentionally
|
|
# absent: it needs max_completion_tokens, which 1.55.10 won't translate from
|
|
# the max_tokens that clients (Open WebUI) send → 400. Re-add after a bump.
|
|
- name: direct_gpt-4o
|
|
backend: openai/gpt-4o
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
- name: direct_gpt-4o-mini
|
|
backend: openai/gpt-4o-mini
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
- name: direct_gemini-2.5-flash
|
|
backend: gemini/gemini-2.5-flash
|
|
api_key: os.environ/GEMINI_API_KEY
|
|
- name: direct_gemini-2.5-pro
|
|
backend: gemini/gemini-2.5-pro
|
|
api_key: os.environ/GEMINI_API_KEY
|
|
# Infisical (secrets source). site.yml's pre_tasks log into this shared machine
|
|
# identity and read /meridian, then set_fact the keys below — so BOTH Semaphore
|
|
# and local deploys get real secrets with no per-runner env wiring. The client
|
|
# secret is in vars/vault.yml (ansible-vault, shared across LXC repos).
|
|
infisical_url: "https://secrets.balders.ca"
|
|
infisical_project_id: "50062d7c-06ff-4d5c-8ca3-6c0cdba9f270"
|
|
infisical_client_id: "828d2cc8-eb25-4b1e-a711-c9a4b1580106"
|
|
infisical_client_secret: "{{ vault_infisical_client_secret }}"
|
|
|
|
# These three are OVERRIDDEN by site.yml set_fact from the Infisical read of
|
|
# /meridian (vault_litellm_master_key / vault_openai_api_key / vault_gemini_api_key).
|
|
# The env-lookup defaults here are only a manual fallback for `-e`/ad-hoc runs;
|
|
# the normal path is the in-playbook Infisical pull. litellm.env writes a
|
|
# placeholder when a provider key is empty so the proxy still boots (that
|
|
# provider's direct_* models then 401 until a real key lands).
|
|
litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}"
|
|
litellm_openai_api_key: "{{ lookup('env', 'OPENAI_API_KEY') | default('', true) }}"
|
|
litellm_gemini_api_key: "{{ lookup('env', 'GEMINI_API_KEY') | default('', true) }}"
|