From a6b26c500ff3a291a6b784046e4a62955fab8aec Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 19 May 2026 11:23:52 -0400 Subject: [PATCH] =?UTF-8?q?litellm:=20add=20OpenAI=E2=86=92Meridian=20shim?= =?UTF-8?q?=20role=20(venv=20+=20systemd,=20port=204000)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LiteLLM sits in front of Meridian for clients that can't talk Anthropic's /v1/messages format (Pulse OpenAI provider, paperless-ai, etc.). Routes OpenAI-shaped requests to localhost:3456 (Meridian) which forwards to the Max sub. - New roles/litellm/ — Python venv, pip install litellm[proxy], systemd - vars/main.yml — model map (haiku/sonnet/opus) + LITELLM_MASTER_KEY env lookup - site.yml — adds litellm role + sanity-check assert - deploy.sh — pulls LITELLM_MASTER_KEY from Infisical (/meridian/) on the controller and exports it for the playbook - New Infisical secret /meridian/vault_litellm_master_key Smoke: Pulse → LiteLLM /v1/chat/completions → Meridian /v1/messages → Max sub returns "pong" through both the LiteLLM master key auth and the Claude Code SDK OAuth. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy.sh | 33 ++++- roles/litellm/handlers/main.yml | 9 ++ roles/litellm/tasks/main.yml | 114 ++++++++++++++++++ .../litellm/templates/litellm-config.yaml.j2 | 21 ++++ roles/litellm/templates/litellm.env.j2 | 2 + roles/litellm/templates/litellm.service.j2 | 21 ++++ site.yml | 28 +++-- vars/main.yml | 23 ++++ 8 files changed, 238 insertions(+), 13 deletions(-) create mode 100644 roles/litellm/handlers/main.yml create mode 100644 roles/litellm/tasks/main.yml create mode 100644 roles/litellm/templates/litellm-config.yaml.j2 create mode 100644 roles/litellm/templates/litellm.env.j2 create mode 100644 roles/litellm/templates/litellm.service.j2 diff --git a/deploy.sh b/deploy.sh index 45ba8ac..887da31 100755 --- a/deploy.sh +++ b/deploy.sh @@ -5,13 +5,31 @@ # Usage: # ./deploy.sh # full deploy # ./deploy.sh --tags meridian # meridian role only +# ./deploy.sh --tags litellm # litellm role only # ./deploy.sh -v # verbose output +# +# Secrets: +# LITELLM_MASTER_KEY is pulled from Infisical (/meridian/vault_litellm_master_key) +# on the controller and exported into the env for the playbook to read. +# For Semaphore deploys, set LITELLM_MASTER_KEY as an env var on the template. # ============================================================================== set -euo pipefail HOST_IP="$(grep -E '^[0-9]' inventory.ini | head -1 | awk '{print $1}')" HOST_USER="$(grep -o 'ansible_user=[^ ]*' inventory.ini | head -1 | cut -d= -f2)" +if [[ -z "${LITELLM_MASTER_KEY:-}" ]]; then + echo "==> Pulling LITELLM_MASTER_KEY from Infisical ..." + LITELLM_MASTER_KEY="$(infisical secrets get vault_litellm_master_key \ + --projectId 50062d7c-06ff-4d5c-8ca3-6c0cdba9f270 \ + --env prod --path /meridian --plain 2>/dev/null)" + if [[ -z "$LITELLM_MASTER_KEY" ]]; then + echo " ERROR: couldn't fetch LITELLM_MASTER_KEY. Is the infisical CLI logged in?" >&2 + exit 1 + fi + export LITELLM_MASTER_KEY +fi + echo "==> Checking connectivity to ${HOST_USER}@${HOST_IP} ..." if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "${HOST_USER}@${HOST_IP}" true 2>/dev/null; then echo " Cannot SSH to ${HOST_IP} — refreshing host key ..." @@ -29,12 +47,15 @@ echo "==> Verifying ..." ssh "${HOST_USER}@${HOST_IP}" bash -s <<'VERIFY' echo "Node: $(node --version 2>/dev/null || echo missing)" echo "Meridian binary: $(which meridian 2>/dev/null || echo missing)" -echo "Service:" -systemctl is-enabled meridian 2>&1 -systemctl is-active meridian 2>&1 -if systemctl is-active --quiet meridian; then - curl -sf --max-time 3 http://127.0.0.1:3456/v1/messages -X POST -H 'Content-Type: application/json' -d '{}' >/dev/null 2>&1 && echo "API reachable on :3456" || echo "API on :3456 not responding (expected if OAuth creds missing)" -fi +echo "Services:" +for svc in meridian litellm; do + printf " %-10s enabled=%s active=%s\n" "$svc" "$(systemctl is-enabled $svc 2>/dev/null)" "$(systemctl is-active $svc 2>/dev/null)" +done +echo "Endpoints:" +curl -sf --max-time 3 http://127.0.0.1:3456/v1/messages -X POST -H 'Content-Type: application/json' -d '{}' >/dev/null 2>&1 \ + && echo " meridian :3456 reachable" || echo " meridian :3456 not responding" +curl -sf --max-time 3 http://127.0.0.1:4000/health/liveliness >/dev/null 2>&1 \ + && echo " litellm :4000 healthy" || echo " litellm :4000 not responding" VERIFY echo "==> Done." diff --git a/roles/litellm/handlers/main.yml b/roles/litellm/handlers/main.yml new file mode 100644 index 0000000..2528769 --- /dev/null +++ b/roles/litellm/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: reload systemd + systemd: + daemon_reload: true + +- name: restart litellm + systemd: + name: litellm + state: restarted diff --git a/roles/litellm/tasks/main.yml b/roles/litellm/tasks/main.yml new file mode 100644 index 0000000..62c8ae8 --- /dev/null +++ b/roles/litellm/tasks/main.yml @@ -0,0 +1,114 @@ +--- +# LiteLLM — OpenAI-compatible proxy that fronts Meridian's Anthropic +# /v1/messages endpoint. Lets Anthropic-unfriendly clients (Pulse's +# OpenAI provider, paperless-ai, etc.) talk to the Max sub via Meridian. + +- name: Ensure python3-venv is installed (LiteLLM runs in a venv) + apt: + name: + - python3-venv + - python3-pip + state: present + +- name: Ensure litellm system user + user: + name: "{{ litellm_user }}" + system: true + home: "{{ litellm_home }}" + shell: /usr/sbin/nologin + create_home: true + state: present + +- name: Ensure litellm home perms + file: + path: "{{ litellm_home }}" + state: directory + owner: "{{ litellm_user }}" + group: "{{ litellm_user }}" + mode: '0755' + +# Pip install runs as root (LXC filesystem doesn't support the ACL flip +# Ansible uses for become_user). Venv contents end up root-owned, which is +# fine — systemd runs the proxy as the litellm user and only needs read+exec. + +- name: Create litellm venv + command: python3 -m venv {{ litellm_venv }} + args: + creates: "{{ litellm_venv }}/bin/python" + +- name: Upgrade pip + wheel + setuptools in venv + pip: + name: + - pip + - wheel + - setuptools + state: latest + virtualenv: "{{ litellm_venv }}" + virtualenv_command: python3 -m venv + +- name: Install LiteLLM into venv + pip: + name: "{{ litellm_package_spec }}" + virtualenv: "{{ litellm_venv }}" + virtualenv_command: python3 -m venv + notify: restart litellm + +- name: Resolve litellm binary + stat: + path: "{{ litellm_venv }}/bin/litellm" + register: litellm_bin + +- name: Fail if litellm binary missing + fail: + msg: "litellm not installed at {{ litellm_venv }}/bin/litellm" + when: not litellm_bin.stat.exists + +- name: Drop LiteLLM config + template: + src: litellm-config.yaml.j2 + dest: "{{ litellm_home }}/config.yaml" + owner: "{{ litellm_user }}" + group: "{{ litellm_user }}" + mode: '0640' + notify: restart litellm + +- name: Drop systemd environment file (master key) + template: + src: litellm.env.j2 + dest: "{{ litellm_home }}/litellm.env" + owner: "{{ litellm_user }}" + group: "{{ litellm_user }}" + mode: '0600' + notify: restart litellm + no_log: true + +- name: Deploy litellm systemd unit + template: + src: litellm.service.j2 + dest: /etc/systemd/system/litellm.service + owner: root + group: root + mode: '0644' + notify: + - reload systemd + - restart litellm + +- name: Flush handlers (reload systemd before enable) + meta: flush_handlers + +- name: Enable + start litellm + systemd: + name: litellm + enabled: true + state: started + daemon_reload: true + +- name: Wait for LiteLLM /health + uri: + url: "http://127.0.0.1:{{ litellm_port }}/health/liveliness" + status_code: 200 + register: litellm_health + until: litellm_health.status is defined and litellm_health.status == 200 + retries: 20 + delay: 3 + failed_when: false diff --git a/roles/litellm/templates/litellm-config.yaml.j2 b/roles/litellm/templates/litellm-config.yaml.j2 new file mode 100644 index 0000000..91e3e60 --- /dev/null +++ b/roles/litellm/templates/litellm-config.yaml.j2 @@ -0,0 +1,21 @@ +# {{ ansible_managed }} +# +# LiteLLM proxy config. Routes OpenAI-shaped requests to Meridian's +# /v1/messages (Anthropic format). Meridian (same host, :3456) ignores the +# upstream API key, so we pass a placeholder. + +model_list: +{% for m in litellm_models %} + - model_name: {{ m.name }} + litellm_params: + model: {{ m.backend }} + api_base: http://127.0.0.1:{{ meridian_port }} + api_key: placeholder-meridian-ignores-this +{% endfor %} + +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY + +litellm_settings: + drop_params: true # tolerate clients sending unsupported params + set_verbose: false diff --git a/roles/litellm/templates/litellm.env.j2 b/roles/litellm/templates/litellm.env.j2 new file mode 100644 index 0000000..b18456e --- /dev/null +++ b/roles/litellm/templates/litellm.env.j2 @@ -0,0 +1,2 @@ +# {{ ansible_managed }} +LITELLM_MASTER_KEY={{ litellm_master_key }} diff --git a/roles/litellm/templates/litellm.service.j2 b/roles/litellm/templates/litellm.service.j2 new file mode 100644 index 0000000..3e08f40 --- /dev/null +++ b/roles/litellm/templates/litellm.service.j2 @@ -0,0 +1,21 @@ +[Unit] +Description=LiteLLM (OpenAI → Meridian shim) +Documentation=https://docs.litellm.ai/ +After=network-online.target meridian.service +Wants=network-online.target +Requires=meridian.service + +[Service] +Type=simple +User={{ litellm_user }} +Group={{ litellm_user }} +WorkingDirectory={{ litellm_home }} +EnvironmentFile={{ litellm_home }}/litellm.env +ExecStart={{ litellm_venv }}/bin/litellm --config {{ litellm_home }}/config.yaml --host {{ litellm_host }} --port {{ litellm_port }} --num_workers 1 +Restart=on-failure +RestartSec=10 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/site.yml b/site.yml index 208b959..737666c 100644 --- a/site.yml +++ b/site.yml @@ -7,14 +7,19 @@ # integration (and any Anthropic-compatible client) can use the Max subscription # instead of paid API tokens. # -# Security: Meridian has no auth layer of its own. LAN-only reachability is -# the entire security model — no Caddy public vhost, no Cloudflare tunnel. -# OAuth bootstrap is manual: `claude login` on Chuck's Mac, scp ~/.claude/ to -# /opt/meridian/.claude/ on the LXC, then `systemctl restart meridian`. +# Security: +# - Meridian itself has no auth layer; LAN-only reachability is the security model. +# - LiteLLM sits in front for clients that speak OpenAI (e.g. Pulse). It does +# require a master key (Infisical /meridian/vault_litellm_master_key). +# +# OAuth bootstrap is one-time, paste-code flow run directly on the LXC +# (see homelab-docs services/meridian.md). Don't scp ~/.claude/ from Mac — +# Mac stores the refresh token in Keychain, scp can't see it. # # Usage: -# ./deploy.sh # full deploy +# ./deploy.sh # full deploy (pulls LITELLM_MASTER_KEY from Infisical) # ./deploy.sh --tags meridian # meridian role only +# ./deploy.sh --tags litellm # litellm role only # ============================================================================== - name: Deploy Meridian LXC @@ -28,6 +33,15 @@ debug: msg: "===== {{ ansible_play_name }} → {{ inventory_hostname }} ({{ ansible_host | default(inventory_hostname) }}) =====" + - name: Sanity-check LITELLM_MASTER_KEY is set + assert: + that: litellm_master_key is defined and litellm_master_key != 'CHANGE_ME' and (litellm_master_key | length) >= 24 + fail_msg: | + LITELLM_MASTER_KEY env var not set on the controller. + Run via ./deploy.sh (which pulls it from Infisical), or pass: + -e litellm_master_key="$(infisical secrets get vault_litellm_master_key --env prod --path /meridian --plain)" + roles: - - meridian - - node_exporter + - { role: meridian, tags: ['meridian'] } + - { role: litellm, tags: ['litellm'] } + - { role: node_exporter, tags: ['node_exporter'] } diff --git a/vars/main.yml b/vars/main.yml index 297945d..f981a34 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -13,6 +13,7 @@ packages: - openssh-server - python3 - python3-pip + - python3-venv - sudo - vim - wget @@ -33,3 +34,25 @@ meridian_port: 3456 meridian_host: "0.0.0.0" meridian_idle_timeout_seconds: 300 meridian_node_major: 22 + +# LiteLLM — OpenAI-compatible proxy in front of Meridian +litellm_user: litellm +litellm_home: /opt/litellm +litellm_venv: /opt/litellm/venv +litellm_port: 4000 +litellm_host: "0.0.0.0" +litellm_package_spec: "litellm[proxy]==1.55.10" +# Models map onto Meridian's pinned Anthropic-shape backend on 127.0.0.1:3456. +# Add Sonnet/Opus aliases too so clients can pick cost vs quality without +# touching this config. +litellm_models: + - name: claude-haiku-4-5 + backend: anthropic/claude-haiku-4-5 + - name: claude-sonnet-4-6 + backend: anthropic/claude-sonnet-4-6 + - name: claude-opus-4-7 + backend: anthropic/claude-opus-4-7 +# Master key is required by LiteLLM. Pulled at deploy time from Infisical +# /meridian/vault_litellm_master_key and passed via -e on the playbook +# (see deploy.sh). +litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}"