Add model cache pvc and the "serve" field for ramalama (#31)

ArthurKamalov · web-flow · commit 85f83fd8ac22 · 2025-11-06T12:24:48.000+01:00
* add pvc and serve filed for ramalama

* small fixes

* fix ollama and ramalama notebooks
diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile
@@ -26,12 +26,12 @@ install: check_hf_token
 	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml
 
 start:
-	mkdir -p /tmp/models-cache
-	minikube start --cpus 4 --memory 15000 --mount --mount-string="$$HOME/models-cache:/tmp/models-cache"
+	mkdir -p "${HOME}/models-cache"
+	minikube start --cpus 4 --memory 15000 --mount --mount-string="${HOME}/models-cache:/tmp/models-cache"
 
 start_gpu:
-	mkdir -p $HOME/models-cache
-	minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="$HOME/models-cache:$HOME/models-cache"
+	mkdir -p "${HOME}/models-cache"
+	minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="${HOME}/models-cache:/tmp/models-cache"
 
 uninstall:
 	helm uninstall ai-starter-kit
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb
@@ -131,6 +131,8 @@
     "import os, subprocess, time, json, requests, threading\n",
     "from pathlib import Path\n",
     "\n",
+    "import psutil\n",
+    "\n",
     "api_wrapper_code = '''\n",
     "import os, time, uuid, requests, json\n",
     "from fastapi import FastAPI, Request\n",
@@ -235,7 +237,14 @@
     "print(\" Wrapper script created\")\n",
     "\n",
     "print(\"\\nKilling existing wrapper processes...\")\n",
-    "!pkill -f ollama_wrapper.py 2>/dev/null || true\n",
+    "proc_iter = psutil.process_iter(attrs=[\"pid\", \"name\", \"cmdline\"])\n",
+    "for p in proc_iter:\n",
+    "    for arg in p.info[\"cmdline\"]:\n",
+    "        if \"ollama_wrapper.py\" in arg:\n",
+    "            p.kill()\n",
+    "            print(f\"...Process {p.pid} has been killed\")\n",
+    "            break\n",
+    "\n",
     "time.sleep(2)\n",
     "\n",
     "log_file = '/tmp/wrapper.log'\n",
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb
@@ -133,6 +133,8 @@
     "import os, subprocess, time, json, requests\n",
     "from pathlib import Path\n",
     "\n",
+    "import psutil\n",
+    "\n",
     "api_wrapper_code = '''\n",
     "import os, time, uuid, requests, json\n",
     "from fastapi import FastAPI, Request\n",
@@ -232,7 +234,14 @@
     "print(\"Wrapper script created\")\n",
     "\n",
     "print(\"Killing existing wrapper processes...\")\n",
-    "!pkill -f ramalama_wrapper.py 2>/dev/null || true\n",
+    "proc_iter = psutil.process_iter(attrs=[\"pid\", \"name\", \"cmdline\"])\n",
+    "for p in proc_iter:\n",
+    "    for arg in p.info[\"cmdline\"]:\n",
+    "        if \"ollama_wrapper.py\" in arg:\n",
+    "            p.kill()\n",
+    "            print(f\"...Process {p.pid} has been killed\")\n",
+    "            break\n",
+    "\n",
     "time.sleep(2)\n",
     "\n",
     "log_file = '/tmp/ramalama_wrapper.log'\n",
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
@@ -1,10 +1,11 @@
-transformers
-torch
-tensorflow
-huggingface_hub
-numpy
-ipywidgets
+transformers==4.57.1
+torch==2.9.0
+tensorflow==2.20.0
+huggingface_hub==0.36.0
+numpy==2.3.4
+ipywidgets==8.1.8
 mlflow==2.19.0
-ollama
-panel
+ollama==0.6.0
+panel==1.8.2
 ray==2.41.0
+psutil==7.1.3
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml
@@ -26,9 +26,19 @@ spec:
       - name: ramalama
         image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}"
         imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }}
-        {{- if .Values.ramalama.command }}
         command:
-          {{- toYaml .Values.ramalama.command | nindent 10 }}
+          - ramalama
+          - --store
+          - "/mnt/ramalama-store"
+          - serve
+          - {{ .Values.ramalama.models.serve }}
+          - "--port"
+          - "8080"
+        {{- if .Values.ramalama.persistentVolume.enabled }}
+        volumeMounts:
+          - name: store
+            mountPath: "/mnt/ramalama-store"
+            subPath: {{ .Values.ramalama.persistentVolume.subPath }}
         {{- end }}
         ports:
         - containerPort: 8080
@@ -37,6 +47,27 @@ spec:
         resources:
           {{- toYaml .Values.ramalama.resources | nindent 10 }}
         {{- end }}
+      initContainers:
+      - name: init
+        image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}"
+        imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }}
+        command:
+          - "bash"
+          - "-c"
+          - |
+            ramalama --store /mnt/ramalama-store pull {{ .Values.ramalama.models.serve }}
+        {{- if .Values.ramalama.persistentVolume.enabled }}
+        volumeMounts:
+          - name: store
+            mountPath: "/mnt/ramalama-store"
+            subPath: {{ .Values.ramalama.persistentVolume.subPath }}
+        {{- end }}
+      {{- if .Values.ramalama.persistentVolume.enabled }}
+      volumes:
+      - name: store
+        persistentVolumeClaim:
+          claimName: {{ .Values.ramalama.persistentVolume.existingClaim }}
+      {{- end }}
 ---
 apiVersion: v1
 kind: Service
@@ -55,4 +86,4 @@ spec:
   selector:
     {{- include "ai-starter-kit.selectorLabels" . | nindent 4 }}
     app.kubernetes.io/component: ramalama
-{{- end }}
+{{- end }}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -183,17 +183,18 @@ ollama:
     existingClaim: "ai-starter-kit-models-cache-pvc"
     subPath: "ollama"
 
-
 ramalama:
   enabled: true
-  command:
-    - /bin/sh
-    - -c
-    - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080
   image:
     repository: "quay.io/ramalama/ramalama"
     tag: "latest"
     pullPolicy: IfNotPresent
+  persistentVolume:
+    enabled: true
+    existingClaim: "ai-starter-kit-models-cache-pvc"
+    subPath: "ramalama"
+  models:
+    serve: qwen2.5:1.5b
 
 genericDevicePlugin:
-  enabled: false
+  enabled: false