Skip to content
Snippets Groups Projects
Commit 4dde5434 authored by AmeerHamza Shakur's avatar AmeerHamza Shakur
Browse files

small update

parent d3ab0d15
Branches
No related merge requests found
promptflow @ 82a6dbdd
Subproject commit 82a6dbdd1d23ee7c2e5179952f9005b927d4877c
#!/bin/bash
#SBATCH --job-name=openai_vllm
#SBATCH --nodes=1
#SBATCH --cpus-per-task 128
#SBATCH --gres=gpu:4
#SBATCH --partition=GPU4h100
#SBATCH --time=72:00:00
set -x
module load cuda121
conda activate vllm-env
export CUDA_VISIBLE_DEVICES=0,1,2,3
srun --cpus-per-task 128 python -m vllm.entrypoints.openai.api_server \
--model /archive/shared/sim_center/shared/mixtral/data/Mixtral-8x7B-Instruct-v0.1 \
--chat-template /archive/shared/sim_center/shared/mixtral/vllm/template_mistral.jinja \
--trust-remote-code --dtype float16 --tensor-parallel-size 1 --max-model-len 8192
...@@ -3,10 +3,14 @@ from openai import OpenAI ...@@ -3,10 +3,14 @@ from openai import OpenAI
# Modify OpenAI's API key and API base to use vLLM's API server. # Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY" openai_api_key = "EMPTY"
openai_api_base = "http://172.18.227.71:8000/v1" openai_api_base = "http://172.18.227.71:8000/v1"
model="/archive/shared/sim_center/shared/mixtral/data/Mixtral-8x7B-Instruct-v0.1"
client = OpenAI( client = OpenAI(
api_key=openai_api_key, api_key=openai_api_key,
base_url=openai_api_base, base_url=openai_api_base,
) )
completion = client.completions.create(model="/archive/shared/sim_center/shared/mixtral/data/Mixtral-8x7B-Instruct-v0.1", prompt_text = "The coolest town near Dallas is "
prompt="San Francisco is a") completion = client.completions.create(model=model,
print("Completion result:", completion) prompt=prompt_text,
max_tokens=50,
temperature=0.5)
print("Completion result:", completion)
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment