from openai import OpenAI
client = OpenAI(
base_url = "https://integrate.api.nvidia.com/v1",
api_key = "$NVIDIA_API_KEY"
)
completion = client.chat.completions.create(
model="meta/llama-3.1-70b-instruct",
messages=[{"role":"user","content":""}],
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=False
)
# Handle both content and tool calls for non-streaming
if completion.choices[0].message.content is not None:
print(completion.choices[0].message.content)Deploy this model now on your endpoint provider of choice