import requests, base64
invoke_url = "https://integrate.api.nvidia.com/v1/chat/completions"
stream = False
def read_b64(path):
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode()
headers = {
"Authorization": "Bearer $NVIDIA_API_KEY",
"Accept": "text/event-stream" if stream else "application/json"
}
payload = {
"model": "moonshotai/kimi-k2.6",
"messages": [{"role":"user","content":""}],
"max_tokens": 16384,
"temperature": 1.00,
"top_p": 1.00,
"stream": stream,
}
response = requests.post(invoke_url, headers=headers, json=payload, stream=stream)
if stream:
for line in response.iter_lines():
if line:
print(line.decode("utf-8"))
else:
print(response.json())Follow the steps below to download and run the NVIDIA NIM inference microservice for this model on your infrastructure of choice.
$ docker login nvcr.io
Username: $oauthtoken
Password: <PASTE_API_KEY_HERE>
Pull and run the NVIDIA NIM with the command below. This will download the optimized model for your infrastructure.
export NGC_API_KEY=<PASTE_API_KEY_HERE>
export LOCAL_NIM_CACHE=~/.cache/nim
mkdir -p "$LOCAL_NIM_CACHE"
chmod -R a+w "$LOCAL_NIM_CACHE"
docker run -it --rm \
--gpus all \
--ipc host \
--shm-size=32GB \
-e NGC_API_KEY \
-v "$LOCAL_NIM_CACHE:/opt/nim/.cache" \
-p 8000:8000 \
nvcr.io/nim/moonshotai/kimi-k2.6:latest
You can now make a local API call using this curl command:
curl -X 'POST' \
'http://0.0.0.0:8000/v1/chat/completions' \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"model": "moonshotai/kimi-k2.6",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?"
},
{
"type": "image_url",
"image_url":
{
"url": "https://assets.ngc.nvidia.com/products/api-catalog/phi-3-5-vision/example1b.jpg"
}
}
]
}
],
"max_tokens": 1024
}'
For more details on getting started with this NIM, visit the NVIDIA NIM Docs.