Ollama安装部署
Linux 下安装
安装方式:https://ollama.com/download/linux
安装后使用 systemd 管理进程,模块文件:/etc/systemd/system/ollama.service
更换服务端监听的地址
更换存储模型的位置(一定要 chown -R /app/models/ollama
)
设置代理地址(不设置代理拉取模型也很快)
Environment="HTTP_PROXY=http://192.168.248.11:18899"
Environment="HTTPS_PROXY=http://192.168.248.11:18899"
Environment="ALL_PROXY=socks5://192.168.248.11:18899"
Environment="NO_PROXY=127.0.0.1,localhost,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16"
K8s 部署
customTolerations: &customTolerations
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoExecute
tolerationSeconds: 60
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoExecute
tolerationSeconds: 60
- key: node-role.kubernetes.io/control-plane
operator: Exists
customNodeAffinity: &customNodeAffinity
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: In
values:
- ""
customNodeSelector: &customNodeSelector
kubernetes.io/arch: amd64
kubernetes.io/os: linux
replicaCount: 1
image:
repository: harbor.alpha-quant.tech/3rd/docker.io/ollama/ollama
tag: "0.5.11"
imagePullSecrets:
- name: platform-oci-image-pull-secrets
ollama:
gpu:
enabled: true
type: "nvidia"
nvidiaResource: "nvidia.com/gpu"
number: 4
models:
run:
- deepseek-r1:70b
- qwen2.5-coder:32b
ingress:
enabled: true
className: "nginx"
hosts:
- host: ollama.alpha-quant.tech
paths:
- path: /
pathType: Prefix
tls:
- secretName: https-self-hosted-certs
hosts:
- ollama.alpha-quant.tech
resources:
requests:
memory: 300Gi
cpu: "48"
limits:
memory: 300Gi
cpu: "48"
nodeSelector: *customNodeSelector
tolerations: *customTolerations
affinity:
nodeAffinity: *customNodeAffinity
extraEnv:
- name: NVIDIA_VISIBLE_DEVICES
value: GPU-08453bc4-c2c3-4b97-42a3-b1047533264d,GPU-324badda-57ba-468e-2e93-ab1f0ae06be1,GPU-c24351e3-a9ac-2437-7ad6-1fb903a621d9,GPU-d91f98df-5f46-4103-ff1f-55b9422ead34
- name: OLLAMA_SCHED_SPREAD
value: "1"
persistentVolume:
enabled: true
storageClass: "hostpath-local-data"