|
|
|
|
@ -0,0 +1,492 @@ |
|
|
|
|
|
|
|
|
|
# ------------------- TESTS --------------------------------------------------------- |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
import asyncio |
|
|
|
|
from pydantic import BaseModel |
|
|
|
|
from typing import List |
|
|
|
|
from _llm._llm.tool_registry import register_tool, get_tools, execute_tool |
|
|
|
|
from _llm import LLM, ChatCompletionMessage |
|
|
|
|
# Define structured output models |
|
|
|
|
class CalculationStep(BaseModel): |
|
|
|
|
step_number: int |
|
|
|
|
description: str |
|
|
|
|
calculation: str |
|
|
|
|
result: float |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NameResponse(BaseModel): |
|
|
|
|
name: str |
|
|
|
|
age: int |
|
|
|
|
occupation: str |
|
|
|
|
hobbies: List[str] |
|
|
|
|
|
|
|
|
|
class MathSolution(BaseModel): |
|
|
|
|
steps: List[CalculationStep] |
|
|
|
|
final_answer: float |
|
|
|
|
explanation: str |
|
|
|
|
|
|
|
|
|
llm = LLM() |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="""Create a simple math problem solution in JSON format with this structure: |
|
|
|
|
{ |
|
|
|
|
"problem": "the math problem", |
|
|
|
|
"steps": ["step 1", "step 2", "step 3"], |
|
|
|
|
"answer": "final answer" |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Problem: What is 12 * 8 + 15?""", |
|
|
|
|
model='vllm', |
|
|
|
|
format=MathSolution |
|
|
|
|
) |
|
|
|
|
print(response.content.steps) |
|
|
|
|
exit() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm = LLM(silent=False, chat=False) # Don't persist chat history |
|
|
|
|
response = llm.generate("Hello! Can you introduce yourself briefly?", model='vllm', format=NameResponse) |
|
|
|
|
print(response.__dict__) |
|
|
|
|
response = llm.generate("What's the weather like in San Francisco? Also calculate 15 * 7 for me.", model='vllm') |
|
|
|
|
print(response.__dict__) |
|
|
|
|
|
|
|
|
|
# Define a tool for calculations |
|
|
|
|
@register_tool |
|
|
|
|
def calculate_tool(number: int, multiply_factor: int) -> int: |
|
|
|
|
'''Multiply a number by a factor |
|
|
|
|
Args: |
|
|
|
|
number (int): The number to be multiplied |
|
|
|
|
multiply_factor (int): The factor to multiply by |
|
|
|
|
Returns: |
|
|
|
|
int: The result of the multiplication |
|
|
|
|
''' |
|
|
|
|
return number * multiply_factor |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def run_tests(): |
|
|
|
|
print("🧪 Testing LLM class with vLLM model") |
|
|
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
|
# Initialize LLM instance - use fresh instance for each test |
|
|
|
|
def get_fresh_llm(): |
|
|
|
|
return LLM(silent=False, chat=False) # Don't persist chat history |
|
|
|
|
|
|
|
|
|
# Test 1: Basic vLLM generation |
|
|
|
|
print("\n1️⃣ Basic vLLM Generation Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="Hello! Can you introduce yourself briefly?", |
|
|
|
|
model='vllm' |
|
|
|
|
) |
|
|
|
|
print(f"✅ Basic response: {response.content[:100]}...") |
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Basic test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 2: Tools usage |
|
|
|
|
print("\n2️⃣ Tools Usage Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="What's the weather like in San Francisco? Also calculate 15 * 7 for me.", |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools |
|
|
|
|
) |
|
|
|
|
print(f"✅ Tools response: {response.content[:100]}...") |
|
|
|
|
|
|
|
|
|
# Enhanced tool call detection |
|
|
|
|
tool_calls_found = False |
|
|
|
|
if hasattr(response, 'tool_calls') and response.tool_calls: |
|
|
|
|
print(f"🔧 OpenAI-style tool calls detected: {len(response.tool_calls)} calls") |
|
|
|
|
for i, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f" Tool {i+1}: {tool_call.function.name}") |
|
|
|
|
print(f" Arguments: {tool_call.function.arguments}") |
|
|
|
|
tool_calls_found = True |
|
|
|
|
|
|
|
|
|
# Check if response contains JSON that might be tool-like |
|
|
|
|
if not tool_calls_found: |
|
|
|
|
try: |
|
|
|
|
import json |
|
|
|
|
# Try to parse the content as JSON |
|
|
|
|
content_json = json.loads(response.content) |
|
|
|
|
if isinstance(content_json, dict): |
|
|
|
|
print("🔧 JSON-formatted response detected (not OpenAI tool calls)") |
|
|
|
|
print(f" Keys: {list(content_json.keys())}") |
|
|
|
|
|
|
|
|
|
# Check if it looks like a tool call |
|
|
|
|
if any(key in content_json for key in ['location', 'expression', 'function', 'name']): |
|
|
|
|
print(" ℹ️ This appears to be tool-like output in JSON format") |
|
|
|
|
except json.JSONDecodeError: |
|
|
|
|
print("ℹ️ No structured tool calls or JSON found") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Tools test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 3: Thinking mode (use vLLM model since reasoning model doesn't exist) |
|
|
|
|
print("\n3️⃣ Thinking Mode Test (using vllm)") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
response: ChatCompletionMessage = llm.generate( |
|
|
|
|
query="Solve this step by step: If I have 20 apples, eat 3, give away 5, then buy 8 more, how many do I have?", |
|
|
|
|
model='vllm', # Use vllm instead of reasoning |
|
|
|
|
think=True |
|
|
|
|
) |
|
|
|
|
print(f"✅ Thinking response: {response.content[:100]}...") |
|
|
|
|
|
|
|
|
|
if hasattr(response, 'reasoning_content') and response.reasoning_content: |
|
|
|
|
print(f"🧠 Thinking content: {response.reasoning_content[:100]}...") |
|
|
|
|
else: |
|
|
|
|
print("ℹ️ No explicit thinking content found") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Thinking test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 4: Streaming (simplified test) |
|
|
|
|
print("\n4️⃣ Streaming Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
print("Streaming response: ", end="") |
|
|
|
|
|
|
|
|
|
stream = llm.generate( |
|
|
|
|
query="Explain photosynthesis in 2 sentences", |
|
|
|
|
model='vllm', |
|
|
|
|
stream=True |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
content_parts = [] |
|
|
|
|
try: |
|
|
|
|
for chunk_type, chunk_content in stream: |
|
|
|
|
if chunk_type == "content": |
|
|
|
|
content_parts.append(chunk_content) |
|
|
|
|
print(chunk_content, end="") |
|
|
|
|
elif chunk_type == "thinking": |
|
|
|
|
print(f"\033[94m{chunk_content}\033[0m", end="") # Blue for thinking |
|
|
|
|
|
|
|
|
|
print(f"\n✅ Streaming completed - Content: {len(content_parts)} chunks") |
|
|
|
|
except Exception as stream_error: |
|
|
|
|
print(f"\n❌ Stream processing failed: {stream_error}") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Streaming test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 5: Structured output (JSON mode) |
|
|
|
|
print("\n5️⃣ Structured Output Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="""Create a simple math problem solution in JSON format with this structure: |
|
|
|
|
{ |
|
|
|
|
"problem": "the math problem", |
|
|
|
|
"steps": ["step 1", "step 2", "step 3"], |
|
|
|
|
"answer": "final answer" |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Problem: What is 12 * 8 + 15?""", |
|
|
|
|
model='vllm', |
|
|
|
|
format=MathSolution |
|
|
|
|
) |
|
|
|
|
print(f"✅ Structured response: {response.content[:150]}...") |
|
|
|
|
|
|
|
|
|
# Try to parse as JSON to verify structure |
|
|
|
|
try: |
|
|
|
|
import json |
|
|
|
|
parsed = json.loads(response.content) |
|
|
|
|
print(f"🎯 Valid JSON with keys: {list(parsed.keys())}") |
|
|
|
|
except json.JSONDecodeError: |
|
|
|
|
print("⚠️ Response is not valid JSON") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Structured output test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 6: Async generation |
|
|
|
|
print("\n6️⃣ Async Generation Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
response = await llm.async_generate( |
|
|
|
|
query="What's the capital of France? Answer briefly.", |
|
|
|
|
model='vllm' |
|
|
|
|
) |
|
|
|
|
print(f"✅ Async response: {response.content[:100]}...") |
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Async test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 7: Multiple tools with vllm (enhanced debugging) |
|
|
|
|
print("\n7️⃣ Complex Integration Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() # Get all registered tools |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="I need to multiply 12 by 11", |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools, |
|
|
|
|
think=True |
|
|
|
|
) |
|
|
|
|
print(f"✅ Complex response: {response.content[:100]}...") |
|
|
|
|
|
|
|
|
|
# Enhanced checking for both thinking and tool usage |
|
|
|
|
has_thinking = hasattr(response, 'thinking') and response.reasoning_content |
|
|
|
|
has_tool_calls = hasattr(response, 'tool_calls') and response.tool_calls |
|
|
|
|
|
|
|
|
|
print(f"🧠 Has thinking: {has_thinking}") |
|
|
|
|
if has_thinking: |
|
|
|
|
print(f" Thinking content: {response.reasoning_content[:50]}...") |
|
|
|
|
|
|
|
|
|
print(f"🔧 Has OpenAI tool calls: {has_tool_calls}") |
|
|
|
|
if has_tool_calls: |
|
|
|
|
print(f" Tool calls count: {len(response.tool_calls)}") |
|
|
|
|
for i, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f" Tool {i+1}: {tool_call.function.name}") |
|
|
|
|
|
|
|
|
|
# Check for JSON-style tool responses |
|
|
|
|
try: |
|
|
|
|
import json |
|
|
|
|
content_json = json.loads(response.content) |
|
|
|
|
if isinstance(content_json, dict) and any(key in content_json for key in ['expression', 'calculation', 'result']): |
|
|
|
|
print("🔧 JSON-style tool response detected:") |
|
|
|
|
print(f" Content: {content_json}") |
|
|
|
|
except json.JSONDecodeError: |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Complex test failed: {e}") |
|
|
|
|
|
|
|
|
|
# New Test 8: Tool Call Format Analysis |
|
|
|
|
print("\n8️⃣ Tool Call Format Analysis") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() |
|
|
|
|
|
|
|
|
|
# Test with explicit tool instruction |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="Use the calculate tool to compute 25 * 4. Make sure to call the function.", |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
print(f"Response content: {response.content}") |
|
|
|
|
print(f"Response type: {type(response)}") |
|
|
|
|
print(f"Has tool_calls attribute: {hasattr(response, 'tool_calls')}") |
|
|
|
|
|
|
|
|
|
if hasattr(response, 'tool_calls') and response.tool_calls: |
|
|
|
|
print(f"Tool calls count: {len(response.tool_calls)}") |
|
|
|
|
print(f"Tool calls type: {type(response.tool_calls)}") |
|
|
|
|
|
|
|
|
|
for i, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f"Tool {i+1}:") |
|
|
|
|
print(f" ID: {tool_call.id}") |
|
|
|
|
print(f" Type: {tool_call.type}") |
|
|
|
|
print(f" Function name: {tool_call.function.name}") |
|
|
|
|
print(f" Function arguments: {tool_call.function.arguments}") |
|
|
|
|
else: |
|
|
|
|
print("No tool calls found") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Tool format analysis failed: {e}") |
|
|
|
|
|
|
|
|
|
# New Test 9: vLLM Tool Response Conversion Test |
|
|
|
|
print("\n9️⃣ vLLM Tool Response Conversion Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() # Get all registered tools |
|
|
|
|
|
|
|
|
|
# Test multiple tool scenarios |
|
|
|
|
test_cases = [ |
|
|
|
|
"Calculate 15 * 7 using the calculate tool", |
|
|
|
|
"Get weather for New York using the weather tool", |
|
|
|
|
"Use both tools: calculate 20 + 5 and get weather for London" |
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
for i, test_query in enumerate(test_cases, 1): |
|
|
|
|
print(f"\n Test {i}: {test_query}") |
|
|
|
|
response = llm.generate( |
|
|
|
|
query=test_query, |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
print(f" Response: {response.content[:60]}...") |
|
|
|
|
|
|
|
|
|
if hasattr(response, 'tool_calls') and response.tool_calls: |
|
|
|
|
print(f" ✅ Converted to {len(response.tool_calls)} tool call(s)") |
|
|
|
|
for j, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f" Tool {j+1}: {tool_call.function.name}") |
|
|
|
|
else: |
|
|
|
|
print(" ⚠️ No tool calls detected") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ vLLM conversion test failed: {e}") |
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 50) |
|
|
|
|
print("🏁 Test suite completed!") |
|
|
|
|
|
|
|
|
|
# Helper function for non-async testing |
|
|
|
|
def translate_to_spanish(text): |
|
|
|
|
llm = LLM() |
|
|
|
|
prompt = f"Translate the following text to Spanish:\n\n{text}" |
|
|
|
|
response = llm.generate(query=prompt, model='vllm') |
|
|
|
|
return response.content |
|
|
|
|
|
|
|
|
|
# Run the test suite |
|
|
|
|
print("Starting comprehensive test suite...") |
|
|
|
|
asyncio.run(run_tests()) |
|
|
|
|
|
|
|
|
|
# Quick translation test |
|
|
|
|
print("\n🌍 Translation Test:") |
|
|
|
|
spanish_text = translate_to_spanish("Hello, how are you today?") |
|
|
|
|
print(f"Spanish translation: {spanish_text}") |
|
|
|
|
# Quick translation test |
|
|
|
|
print("\n🌍 Translation Test:") |
|
|
|
|
spanish_text = translate_to_spanish("Hello, how are you today?") |
|
|
|
|
print("\n6️⃣ Async Generation Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
response = await llm.async_generate( |
|
|
|
|
query="What's the capital of France? Answer briefly.", |
|
|
|
|
model='vllm' |
|
|
|
|
) |
|
|
|
|
print(f"✅ Async response: {response.content[:100]}...") |
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Async test failed: {e}") |
|
|
|
|
|
|
|
|
|
# Test 7: Multiple tools with vllm (enhanced debugging) |
|
|
|
|
print("\n7️⃣ Complex Integration Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() # Get all registered tools |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="I need to multiply 12 by 11", |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools, |
|
|
|
|
think=True |
|
|
|
|
) |
|
|
|
|
print(f"✅ Complex response: {response.content[:100]}...") |
|
|
|
|
|
|
|
|
|
# Enhanced checking for both thinking and tool usage |
|
|
|
|
has_thinking = hasattr(response, 'thinking') and response.reasoning_content |
|
|
|
|
has_tool_calls = hasattr(response, 'tool_calls') and response.tool_calls |
|
|
|
|
|
|
|
|
|
print(f"🧠 Has thinking: {has_thinking}") |
|
|
|
|
if has_thinking: |
|
|
|
|
print(f" Thinking content: {response.reasoning_content[:50]}...") |
|
|
|
|
|
|
|
|
|
print(f"🔧 Has OpenAI tool calls: {has_tool_calls}") |
|
|
|
|
if has_tool_calls: |
|
|
|
|
print(f" Tool calls count: {len(response.tool_calls)}") |
|
|
|
|
for i, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f" Tool {i+1}: {tool_call.function.name}") |
|
|
|
|
|
|
|
|
|
# Check for JSON-style tool responses |
|
|
|
|
try: |
|
|
|
|
import json |
|
|
|
|
content_json = json.loads(response.content) |
|
|
|
|
if isinstance(content_json, dict) and any(key in content_json for key in ['expression', 'calculation', 'result']): |
|
|
|
|
print("🔧 JSON-style tool response detected:") |
|
|
|
|
print(f" Content: {content_json}") |
|
|
|
|
except json.JSONDecodeError: |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Complex test failed: {e}") |
|
|
|
|
|
|
|
|
|
# New Test 8: Tool Call Format Analysis |
|
|
|
|
print("\n8️⃣ Tool Call Format Analysis") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() |
|
|
|
|
|
|
|
|
|
# Test with explicit tool instruction |
|
|
|
|
response = llm.generate( |
|
|
|
|
query="Use the calculate tool to compute 25 * 4. Make sure to call the function.", |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
print(f"Response content: {response.content}") |
|
|
|
|
print(f"Response type: {type(response)}") |
|
|
|
|
print(f"Has tool_calls attribute: {hasattr(response, 'tool_calls')}") |
|
|
|
|
|
|
|
|
|
if hasattr(response, 'tool_calls') and response.tool_calls: |
|
|
|
|
print(f"Tool calls count: {len(response.tool_calls)}") |
|
|
|
|
print(f"Tool calls type: {type(response.tool_calls)}") |
|
|
|
|
|
|
|
|
|
for i, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f"Tool {i+1}:") |
|
|
|
|
print(f" ID: {tool_call.id}") |
|
|
|
|
print(f" Type: {tool_call.type}") |
|
|
|
|
print(f" Function name: {tool_call.function.name}") |
|
|
|
|
print(f" Function arguments: {tool_call.function.arguments}") |
|
|
|
|
else: |
|
|
|
|
print("No tool calls found") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ Tool format analysis failed: {e}") |
|
|
|
|
|
|
|
|
|
# New Test 9: vLLM Tool Response Conversion Test |
|
|
|
|
print("\n9️⃣ vLLM Tool Response Conversion Test") |
|
|
|
|
print("-" * 30) |
|
|
|
|
try: |
|
|
|
|
llm = get_fresh_llm() |
|
|
|
|
tools = get_tools() # Get all registered tools |
|
|
|
|
|
|
|
|
|
# Test multiple tool scenarios |
|
|
|
|
test_cases = [ |
|
|
|
|
"Calculate 15 * 7 using the calculate tool", |
|
|
|
|
"Get weather for New York using the weather tool", |
|
|
|
|
"Use both tools: calculate 20 + 5 and get weather for London" |
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
for i, test_query in enumerate(test_cases, 1): |
|
|
|
|
print(f"\n Test {i}: {test_query}") |
|
|
|
|
response = llm.generate( |
|
|
|
|
query=test_query, |
|
|
|
|
model='vllm', |
|
|
|
|
tools=tools |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
print(f" Response: {response.content[:60]}...") |
|
|
|
|
|
|
|
|
|
if hasattr(response, 'tool_calls') and response.tool_calls: |
|
|
|
|
print(f" ✅ Converted to {len(response.tool_calls)} tool call(s)") |
|
|
|
|
for j, tool_call in enumerate(response.tool_calls): |
|
|
|
|
print(f" Tool {j+1}: {tool_call.function.name}") |
|
|
|
|
else: |
|
|
|
|
print(" ⚠️ No tool calls detected") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"❌ vLLM conversion test failed: {e}") |
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 50) |
|
|
|
|
print("🏁 Test suite completed!") |
|
|
|
|
|
|
|
|
|
# Helper function for non-async testing |
|
|
|
|
def translate_to_spanish(text): |
|
|
|
|
llm = LLM() |
|
|
|
|
prompt = f"Translate the following text to Spanish:\n\n{text}" |
|
|
|
|
response = llm.generate(query=prompt, model='vllm') |
|
|
|
|
return response.content |
|
|
|
|
|
|
|
|
|
# Run the test suite |
|
|
|
|
print("Starting comprehensive test suite...") |
|
|
|
|
asyncio.run(run_tests()) |
|
|
|
|
|
|
|
|
|
# Quick translation test |
|
|
|
|
print("\n🌍 Translation Test:") |
|
|
|
|
spanish_text = translate_to_spanish("Hello, how are you today?") |
|
|
|
|
print(f"Spanish translation: {spanish_text}") |
|
|
|
|
# Quick translation test |
|
|
|
|
print("\n🌍 Translation Test:") |
|
|
|
|
spanish_text = translate_to_spanish("Hello, how are you today?") |
|
|
|
|
|