Skip to content

Commit 85f83fd

Browse files
Add model cache pvc and the "serve" field for ramalama (#31)
* add pvc and serve filed for ramalama * small fixes * fix ollama and ramalama notebooks
1 parent 78a03d7 commit 85f83fd

File tree

6 files changed

+74
-23
lines changed

6 files changed

+74
-23
lines changed

ai/ai-starter-kit/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ install: check_hf_token
2626
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml
2727

2828
start:
29-
mkdir -p /tmp/models-cache
30-
minikube start --cpus 4 --memory 15000 --mount --mount-string="$$HOME/models-cache:/tmp/models-cache"
29+
mkdir -p "${HOME}/models-cache"
30+
minikube start --cpus 4 --memory 15000 --mount --mount-string="${HOME}/models-cache:/tmp/models-cache"
3131

3232
start_gpu:
33-
mkdir -p $HOME/models-cache
34-
minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="$HOME/models-cache:$HOME/models-cache"
33+
mkdir -p "${HOME}/models-cache"
34+
minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="${HOME}/models-cache:/tmp/models-cache"
3535

3636
uninstall:
3737
helm uninstall ai-starter-kit

ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@
131131
"import os, subprocess, time, json, requests, threading\n",
132132
"from pathlib import Path\n",
133133
"\n",
134+
"import psutil\n",
135+
"\n",
134136
"api_wrapper_code = '''\n",
135137
"import os, time, uuid, requests, json\n",
136138
"from fastapi import FastAPI, Request\n",
@@ -235,7 +237,14 @@
235237
"print(\" Wrapper script created\")\n",
236238
"\n",
237239
"print(\"\\nKilling existing wrapper processes...\")\n",
238-
"!pkill -f ollama_wrapper.py 2>/dev/null || true\n",
240+
"proc_iter = psutil.process_iter(attrs=[\"pid\", \"name\", \"cmdline\"])\n",
241+
"for p in proc_iter:\n",
242+
" for arg in p.info[\"cmdline\"]:\n",
243+
" if \"ollama_wrapper.py\" in arg:\n",
244+
" p.kill()\n",
245+
" print(f\"...Process {p.pid} has been killed\")\n",
246+
" break\n",
247+
"\n",
239248
"time.sleep(2)\n",
240249
"\n",
241250
"log_file = '/tmp/wrapper.log'\n",

ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@
133133
"import os, subprocess, time, json, requests\n",
134134
"from pathlib import Path\n",
135135
"\n",
136+
"import psutil\n",
137+
"\n",
136138
"api_wrapper_code = '''\n",
137139
"import os, time, uuid, requests, json\n",
138140
"from fastapi import FastAPI, Request\n",
@@ -232,7 +234,14 @@
232234
"print(\"Wrapper script created\")\n",
233235
"\n",
234236
"print(\"Killing existing wrapper processes...\")\n",
235-
"!pkill -f ramalama_wrapper.py 2>/dev/null || true\n",
237+
"proc_iter = psutil.process_iter(attrs=[\"pid\", \"name\", \"cmdline\"])\n",
238+
"for p in proc_iter:\n",
239+
" for arg in p.info[\"cmdline\"]:\n",
240+
" if \"ollama_wrapper.py\" in arg:\n",
241+
" p.kill()\n",
242+
" print(f\"...Process {p.pid} has been killed\")\n",
243+
" break\n",
244+
"\n",
236245
"time.sleep(2)\n",
237246
"\n",
238247
"log_file = '/tmp/ramalama_wrapper.log'\n",
Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
transformers
2-
torch
3-
tensorflow
4-
huggingface_hub
5-
numpy
6-
ipywidgets
1+
transformers==4.57.1
2+
torch==2.9.0
3+
tensorflow==2.20.0
4+
huggingface_hub==0.36.0
5+
numpy==2.3.4
6+
ipywidgets==8.1.8
77
mlflow==2.19.0
8-
ollama
9-
panel
8+
ollama==0.6.0
9+
panel==1.8.2
1010
ray==2.41.0
11+
psutil==7.1.3

ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,19 @@ spec:
2626
- name: ramalama
2727
image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}"
2828
imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }}
29-
{{- if .Values.ramalama.command }}
3029
command:
31-
{{- toYaml .Values.ramalama.command | nindent 10 }}
30+
- ramalama
31+
- --store
32+
- "/mnt/ramalama-store"
33+
- serve
34+
- {{ .Values.ramalama.models.serve }}
35+
- "--port"
36+
- "8080"
37+
{{- if .Values.ramalama.persistentVolume.enabled }}
38+
volumeMounts:
39+
- name: store
40+
mountPath: "/mnt/ramalama-store"
41+
subPath: {{ .Values.ramalama.persistentVolume.subPath }}
3242
{{- end }}
3343
ports:
3444
- containerPort: 8080
@@ -37,6 +47,27 @@ spec:
3747
resources:
3848
{{- toYaml .Values.ramalama.resources | nindent 10 }}
3949
{{- end }}
50+
initContainers:
51+
- name: init
52+
image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}"
53+
imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }}
54+
command:
55+
- "bash"
56+
- "-c"
57+
- |
58+
ramalama --store /mnt/ramalama-store pull {{ .Values.ramalama.models.serve }}
59+
{{- if .Values.ramalama.persistentVolume.enabled }}
60+
volumeMounts:
61+
- name: store
62+
mountPath: "/mnt/ramalama-store"
63+
subPath: {{ .Values.ramalama.persistentVolume.subPath }}
64+
{{- end }}
65+
{{- if .Values.ramalama.persistentVolume.enabled }}
66+
volumes:
67+
- name: store
68+
persistentVolumeClaim:
69+
claimName: {{ .Values.ramalama.persistentVolume.existingClaim }}
70+
{{- end }}
4071
---
4172
apiVersion: v1
4273
kind: Service
@@ -55,4 +86,4 @@ spec:
5586
selector:
5687
{{- include "ai-starter-kit.selectorLabels" . | nindent 4 }}
5788
app.kubernetes.io/component: ramalama
58-
{{- end }}
89+
{{- end }}

ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,17 +183,18 @@ ollama:
183183
existingClaim: "ai-starter-kit-models-cache-pvc"
184184
subPath: "ollama"
185185

186-
187186
ramalama:
188187
enabled: true
189-
command:
190-
- /bin/sh
191-
- -c
192-
- ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080
193188
image:
194189
repository: "quay.io/ramalama/ramalama"
195190
tag: "latest"
196191
pullPolicy: IfNotPresent
192+
persistentVolume:
193+
enabled: true
194+
existingClaim: "ai-starter-kit-models-cache-pvc"
195+
subPath: "ramalama"
196+
models:
197+
serve: qwen2.5:1.5b
197198

198199
genericDevicePlugin:
199-
enabled: false
200+
enabled: false

0 commit comments

Comments
 (0)