import requests import time import sys import base64 # Adjust if your server is remote VLLM_URL = "http://localhost:8010/v1/chat/completions" # Path to your image file IMAGE_PATH = "page_063.png" # Read and encode the image with open(IMAGE_PATH, "rb") as img_file: image_data = img_file.read() # Convert image to base64 image_base64 = base64.b64encode(image_data).decode("utf-8") # Prepare the messages messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Describe the content of this image."}, { "role": "user", "content": [ {"type": "text", "text": "Please analyze the following image."}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_base64}", "detail": "high" } } ] } ] # Number of test iterations N = 10 def test_vllm(): print(f"Starting vLLM image input test on {VLLM_URL}") for i in range(1, N + 1): payload = { "model": "openbmb/MiniCPM-V-4_5-AWQ", "messages": messages, "max_tokens": 256, "temperature": 0.7, } t0 = time.time() try: resp = requests.post(VLLM_URL, json=payload, timeout=120) resp.raise_for_status() data = resp.json() output = data.get("choices", [{}])[0].get("message", {}).get("content", "") print(output) elapsed = time.time() - t0 print(f"[{i}/{N}] ✅ Response OK in {elapsed:.2f}s, output length={len(output)}") except Exception as e: print(f"[{i}/{N}] ❌ Error: {e}") if "connection" in str(e).lower() or "timeout" in str(e).lower(): print("⚠️ Possible GPU hang or server crash. Stopping test.") sys.exit(1) time.sleep(1) # small delay between requests print("✅ Test completed successfully.") if __name__ == "__main__": test_vllm()