25 lines
706 B
Python
25 lines
706 B
Python
from openai import OpenAI
|
|
|
|
client = OpenAI(
|
|
api_key="unis123",
|
|
base_url="http://10.100.53.199:9527/v1"
|
|
)
|
|
|
|
def get_qwen_response(model_name, system_prompt, user_prompt, max_token=64000):
|
|
response = client.chat.completions.create(
|
|
model=model_name,
|
|
messages=[
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_prompt}
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=max_token,
|
|
stream=True
|
|
)
|
|
|
|
for chunk in response:
|
|
if chunk.choices[0].delta.content is None:
|
|
yield "reasoning", chunk.choices[0].delta.reasoning
|
|
else:
|
|
yield "content", chunk.choices[0].delta.content
|