litellm: add OpenAI→Meridian shim role (venv + systemd, port 4000)

LiteLLM sits in front of Meridian for clients that can't talk Anthropic's /v1/messages format (Pulse OpenAI provider, paperless-ai, etc.). Routes OpenAI-shaped requests to localhost:3456 (Meridian) which forwards to the Max sub. - New roles/litellm/ — Python venv, pip install litellm[proxy], systemd - vars/main.yml — model map (haiku/sonnet/opus) + LITELLM_MASTER_KEY env lookup - site.yml — adds litellm role + sanity-check assert - deploy.sh — pulls LITELLM_MASTER_KEY from Infisical (/meridian/) on the controller and exports it for the playbook - New Infisical secret /meridian/vault_litellm_master_key Smoke: Pulse → LiteLLM /v1/chat/completions → Meridian /v1/messages → Max sub returns "pong" through both the LiteLLM master key auth and the Claude Code SDK OAuth. Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
2026-05-19 11:23:52 -04:00
parent 4ab85f0227
commit a6b26c500f
8 changed files with 238 additions and 13 deletions
@@ -5,13 +5,31 @@
 # Usage:
 #   ./deploy.sh                  # full deploy
 #   ./deploy.sh --tags meridian  # meridian role only
+#   ./deploy.sh --tags litellm   # litellm role only
 #   ./deploy.sh -v               # verbose output
+#
+# Secrets:
+#   LITELLM_MASTER_KEY is pulled from Infisical (/meridian/vault_litellm_master_key)
+#   on the controller and exported into the env for the playbook to read.
+#   For Semaphore deploys, set LITELLM_MASTER_KEY as an env var on the template.
 # ==============================================================================
 set -euo pipefail

 HOST_IP="$(grep -E '^[0-9]' inventory.ini | head -1 | awk '{print $1}')"
 HOST_USER="$(grep -o 'ansible_user=[^ ]*' inventory.ini | head -1 | cut -d= -f2)"

+if [[ -z "${LITELLM_MASTER_KEY:-}" ]]; then
+    echo "==> Pulling LITELLM_MASTER_KEY from Infisical ..."
+    LITELLM_MASTER_KEY="$(infisical secrets get vault_litellm_master_key \
+        --projectId 50062d7c-06ff-4d5c-8ca3-6c0cdba9f270 \
+        --env prod --path /meridian --plain 2>/dev/null)"
+    if [[ -z "$LITELLM_MASTER_KEY" ]]; then
+        echo "    ERROR: couldn't fetch LITELLM_MASTER_KEY. Is the infisical CLI logged in?" >&2
+        exit 1
+    fi
+    export LITELLM_MASTER_KEY
+fi
+
 echo "==> Checking connectivity to ${HOST_USER}@${HOST_IP} ..."
 if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "${HOST_USER}@${HOST_IP}" true 2>/dev/null; then
    echo "    Cannot SSH to ${HOST_IP} — refreshing host key ..."
@@ -29,12 +47,15 @@ echo "==> Verifying ..."
 ssh "${HOST_USER}@${HOST_IP}" bash -s <<'VERIFY'
 echo "Node: $(node --version 2>/dev/null || echo missing)"
 echo "Meridian binary: $(which meridian 2>/dev/null || echo missing)"
-echo "Service:"
-systemctl is-enabled meridian 2>&1
-systemctl is-active meridian 2>&1
-if systemctl is-active --quiet meridian; then
-  curl -sf --max-time 3 http://127.0.0.1:3456/v1/messages -X POST -H 'Content-Type: application/json' -d '{}' >/dev/null 2>&1 && echo "API reachable on :3456" || echo "API on :3456 not responding (expected if OAuth creds missing)"
-fi
+echo "Services:"
+for svc in meridian litellm; do
+  printf "  %-10s enabled=%s active=%s\n" "$svc" "$(systemctl is-enabled $svc 2>/dev/null)" "$(systemctl is-active $svc 2>/dev/null)"
+done
+echo "Endpoints:"
+curl -sf --max-time 3 http://127.0.0.1:3456/v1/messages -X POST -H 'Content-Type: application/json' -d '{}' >/dev/null 2>&1 \
+  && echo "  meridian :3456 reachable" || echo "  meridian :3456 not responding"
+curl -sf --max-time 3 http://127.0.0.1:4000/health/liveliness >/dev/null 2>&1 \
+  && echo "  litellm  :4000 healthy" || echo "  litellm  :4000 not responding"
 VERIFY

 echo "==> Done."
@@ -0,0 +1,9 @@
+---
+- name: reload systemd
+  systemd:
+    daemon_reload: true
+
+- name: restart litellm
+  systemd:
+    name: litellm
+    state: restarted
@@ -0,0 +1,114 @@
+---
+# LiteLLM — OpenAI-compatible proxy that fronts Meridian's Anthropic
+# /v1/messages endpoint. Lets Anthropic-unfriendly clients (Pulse's
+# OpenAI provider, paperless-ai, etc.) talk to the Max sub via Meridian.
+
+- name: Ensure python3-venv is installed (LiteLLM runs in a venv)
+  apt:
+    name:
+      - python3-venv
+      - python3-pip
+    state: present
+
+- name: Ensure litellm system user
+  user:
+    name: "{{ litellm_user }}"
+    system: true
+    home: "{{ litellm_home }}"
+    shell: /usr/sbin/nologin
+    create_home: true
+    state: present
+
+- name: Ensure litellm home perms
+  file:
+    path: "{{ litellm_home }}"
+    state: directory
+    owner: "{{ litellm_user }}"
+    group: "{{ litellm_user }}"
+    mode: '0755'
+
+# Pip install runs as root (LXC filesystem doesn't support the ACL flip
+# Ansible uses for become_user). Venv contents end up root-owned, which is
+# fine — systemd runs the proxy as the litellm user and only needs read+exec.
+
+- name: Create litellm venv
+  command: python3 -m venv {{ litellm_venv }}
+  args:
+    creates: "{{ litellm_venv }}/bin/python"
+
+- name: Upgrade pip + wheel + setuptools in venv
+  pip:
+    name:
+      - pip
+      - wheel
+      - setuptools
+    state: latest
+    virtualenv: "{{ litellm_venv }}"
+    virtualenv_command: python3 -m venv
+
+- name: Install LiteLLM into venv
+  pip:
+    name: "{{ litellm_package_spec }}"
+    virtualenv: "{{ litellm_venv }}"
+    virtualenv_command: python3 -m venv
+  notify: restart litellm
+
+- name: Resolve litellm binary
+  stat:
+    path: "{{ litellm_venv }}/bin/litellm"
+  register: litellm_bin
+
+- name: Fail if litellm binary missing
+  fail:
+    msg: "litellm not installed at {{ litellm_venv }}/bin/litellm"
+  when: not litellm_bin.stat.exists
+
+- name: Drop LiteLLM config
+  template:
+    src: litellm-config.yaml.j2
+    dest: "{{ litellm_home }}/config.yaml"
+    owner: "{{ litellm_user }}"
+    group: "{{ litellm_user }}"
+    mode: '0640'
+  notify: restart litellm
+
+- name: Drop systemd environment file (master key)
+  template:
+    src: litellm.env.j2
+    dest: "{{ litellm_home }}/litellm.env"
+    owner: "{{ litellm_user }}"
+    group: "{{ litellm_user }}"
+    mode: '0600'
+  notify: restart litellm
+  no_log: true
+
+- name: Deploy litellm systemd unit
+  template:
+    src: litellm.service.j2
+    dest: /etc/systemd/system/litellm.service
+    owner: root
+    group: root
+    mode: '0644'
+  notify:
+    - reload systemd
+    - restart litellm
+
+- name: Flush handlers (reload systemd before enable)
+  meta: flush_handlers
+
+- name: Enable + start litellm
+  systemd:
+    name: litellm
+    enabled: true
+    state: started
+    daemon_reload: true
+
+- name: Wait for LiteLLM /health
+  uri:
+    url: "http://127.0.0.1:{{ litellm_port }}/health/liveliness"
+    status_code: 200
+  register: litellm_health
+  until: litellm_health.status is defined and litellm_health.status == 200
+  retries: 20
+  delay: 3
+  failed_when: false
@@ -0,0 +1,21 @@
+# {{ ansible_managed }}
+#
+# LiteLLM proxy config. Routes OpenAI-shaped requests to Meridian's
+# /v1/messages (Anthropic format). Meridian (same host, :3456) ignores the
+# upstream API key, so we pass a placeholder.
+
+model_list:
+{% for m in litellm_models %}
+  - model_name: {{ m.name }}
+    litellm_params:
+      model: {{ m.backend }}
+      api_base: http://127.0.0.1:{{ meridian_port }}
+      api_key: placeholder-meridian-ignores-this
+{% endfor %}
+
+general_settings:
+  master_key: os.environ/LITELLM_MASTER_KEY
+
+litellm_settings:
+  drop_params: true        # tolerate clients sending unsupported params
+  set_verbose: false
@@ -0,0 +1,2 @@
+# {{ ansible_managed }}
+LITELLM_MASTER_KEY={{ litellm_master_key }}
@@ -0,0 +1,21 @@
+[Unit]
+Description=LiteLLM (OpenAI → Meridian shim)
+Documentation=https://docs.litellm.ai/
+After=network-online.target meridian.service
+Wants=network-online.target
+Requires=meridian.service
+
+[Service]
+Type=simple
+User={{ litellm_user }}
+Group={{ litellm_user }}
+WorkingDirectory={{ litellm_home }}
+EnvironmentFile={{ litellm_home }}/litellm.env
+ExecStart={{ litellm_venv }}/bin/litellm --config {{ litellm_home }}/config.yaml --host {{ litellm_host }} --port {{ litellm_port }} --num_workers 1
+Restart=on-failure
+RestartSec=10
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target
@@ -7,14 +7,19 @@
 # integration (and any Anthropic-compatible client) can use the Max subscription
 # instead of paid API tokens.
 #
-# Security: Meridian has no auth layer of its own. LAN-only reachability is
-# the entire security model — no Caddy public vhost, no Cloudflare tunnel.
-# OAuth bootstrap is manual: `claude login` on Chuck's Mac, scp ~/.claude/ to
-# /opt/meridian/.claude/ on the LXC, then `systemctl restart meridian`.
+# Security:
+# - Meridian itself has no auth layer; LAN-only reachability is the security model.
+# - LiteLLM sits in front for clients that speak OpenAI (e.g. Pulse). It does
+#   require a master key (Infisical /meridian/vault_litellm_master_key).
+#
+# OAuth bootstrap is one-time, paste-code flow run directly on the LXC
+# (see homelab-docs services/meridian.md). Don't scp ~/.claude/ from Mac —
+# Mac stores the refresh token in Keychain, scp can't see it.
 #
 # Usage:
-#   ./deploy.sh                  # full deploy
+#   ./deploy.sh                  # full deploy (pulls LITELLM_MASTER_KEY from Infisical)
 #   ./deploy.sh --tags meridian  # meridian role only
+#   ./deploy.sh --tags litellm   # litellm role only
 # ==============================================================================

 - name: Deploy Meridian LXC
@@ -28,6 +33,15 @@
      debug:
        msg: "===== {{ ansible_play_name }} → {{ inventory_hostname }} ({{ ansible_host | default(inventory_hostname) }}) ====="

+    - name: Sanity-check LITELLM_MASTER_KEY is set
+      assert:
+        that: litellm_master_key is defined and litellm_master_key != 'CHANGE_ME' and (litellm_master_key | length) >= 24
+        fail_msg: |
+          LITELLM_MASTER_KEY env var not set on the controller.
+          Run via ./deploy.sh (which pulls it from Infisical), or pass:
+            -e litellm_master_key="$(infisical secrets get vault_litellm_master_key --env prod --path /meridian --plain)"
+
  roles:
-    - meridian
-    - node_exporter
+    - { role: meridian,      tags: ['meridian'] }
+    - { role: litellm,       tags: ['litellm'] }
+    - { role: node_exporter, tags: ['node_exporter'] }
@@ -13,6 +13,7 @@ packages:
  - openssh-server
  - python3
  - python3-pip
+  - python3-venv
  - sudo
  - vim
  - wget
@@ -33,3 +34,25 @@ meridian_port: 3456
 meridian_host: "0.0.0.0"
 meridian_idle_timeout_seconds: 300
 meridian_node_major: 22
+
+# LiteLLM — OpenAI-compatible proxy in front of Meridian
+litellm_user: litellm
+litellm_home: /opt/litellm
+litellm_venv: /opt/litellm/venv
+litellm_port: 4000
+litellm_host: "0.0.0.0"
+litellm_package_spec: "litellm[proxy]==1.55.10"
+# Models map onto Meridian's pinned Anthropic-shape backend on 127.0.0.1:3456.
+# Add Sonnet/Opus aliases too so clients can pick cost vs quality without
+# touching this config.
+litellm_models:
+  - name: claude-haiku-4-5
+    backend: anthropic/claude-haiku-4-5
+  - name: claude-sonnet-4-6
+    backend: anthropic/claude-sonnet-4-6
+  - name: claude-opus-4-7
+    backend: anthropic/claude-opus-4-7
+# Master key is required by LiteLLM. Pulled at deploy time from Infisical
+# /meridian/vault_litellm_master_key and passed via -e on the playbook
+# (see deploy.sh).
+litellm_master_key: "{{ lookup('env', 'LITELLM_MASTER_KEY') | default('CHANGE_ME', true) }}"