You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

70 lines
2.0 KiB

import requests
import time
import sys
import base64
# Adjust if your server is remote
VLLM_URL = "http://localhost:8010/v1/chat/completions"
# Path to your image file
IMAGE_PATH = "page_063.png"
# Read and encode the image
with open(IMAGE_PATH, "rb") as img_file:
image_data = img_file.read()
# Convert image to base64
image_base64 = base64.b64encode(image_data).decode("utf-8")
# Prepare the messages
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Describe the content of this image."},
{
"role": "user",
"content": [
{"type": "text", "text": "Please analyze the following image."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}",
"detail": "high"
}
}
]
}
]
# Number of test iterations
N = 10
def test_vllm():
print(f"Starting vLLM image input test on {VLLM_URL}")
for i in range(1, N + 1):
payload = {
"model": "openbmb/MiniCPM-V-4_5-AWQ",
"messages": messages,
"max_tokens": 256,
"temperature": 0.7,
}
t0 = time.time()
try:
resp = requests.post(VLLM_URL, json=payload, timeout=120)
resp.raise_for_status()
data = resp.json()
output = data.get("choices", [{}])[0].get("message", {}).get("content", "")
print(output)
elapsed = time.time() - t0
print(f"[{i}/{N}] ✅ Response OK in {elapsed:.2f}s, output length={len(output)}")
except Exception as e:
print(f"[{i}/{N}] ❌ Error: {e}")
if "connection" in str(e).lower() or "timeout" in str(e).lower():
print("⚠ Possible GPU hang or server crash. Stopping test.")
sys.exit(1)
time.sleep(1) # small delay between requests
print("✅ Test completed successfully.")
if __name__ == "__main__":
test_vllm()