State-of-the-art, high-efficiency LLM excelling in reasoning, math, and coding.
Follow the steps below to download and run the NVIDIA NIM inference microservice for this model on your infrastructure of choice.
Follow the steps below to download and run the NVIDIA NIM inference microservice with NIM Operator on your infrastructure of choice.
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia \
&& helm repo update
helm install nim-operator nvidia/k8s-nim-operator --create-namespace -n nim-operator
kubectl create secret -n nim-service docker-registry ngc-secret \
--docker-server=nvcr.io \
--docker-username='$oauthtoken' \
--docker-password=<PASTE_API_KEY_HERE>
kubectl create secret -n nim-service generic ngc-api-secret \
--from-literal=NGC_API_KEY=<PASTE_API_KEY_HERE>
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: deepseek-ai-deepseek-r1
spec:
source:
ngc:
modelPuller: nvcr.io/nim/deepseek-ai/deepseek-r1:latest
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
engine: tensorrt_llm
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: <storage-class-name>
size: "50Gi"
volumeAccessMode: ReadWriteMany
resources: {}
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMService
metadata:
name: deepseek-ai-deepseek-r1
spec:
image:
repository: nvcr.io/nim/deepseek-ai/deepseek-r1
tag: 1.8.3
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api-secret
storage:
nimCache:
name: deepseek-ai-deepseek-r1
profile: ''
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
kubectl run --rm -it -n default curl --image=curlimages/curl:latest -- ash
curl -X "POST" \
'http://deepseek-ai-deepseek-r1.nim-service:8000/v1/chat/completions' \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"model": "deepseek-ai/deepseek-r1",
"messages": [
{
"content":"What should I do for a 4 day vacation at Cape Hatteras National Seashore?",
"role": "user"
}],
"top_p": 1,
"n": 1,
"max_tokens": 1024,
"stream": false,
"frequency_penalty": 0.0,
"stop": ["STOP"]
}'
For more details on getting started with this NIM, visit the NVIDIA NIM Operator Docs.