This is now vLLM only

main
Lasse Server 2 months ago
parent 238a5146f8
commit 6c6be0798e
  1. 4
      __init__.py
  2. 1795
      _llm/llm.py
  3. 492
      _llm/tests_for_llm.py
  4. 90
      _llm/tool_registry.py

@ -2,7 +2,7 @@
llm_client: A Python package for interacting with LLM models through Ollama.
"""
from _llm._llm.llm import LLM
from _llm._llm.llm import LLM, ChatCompletionMessage
from _llm._llm.tool_registry import register_tool, get_tools
__all__ = ["LLM", "register_tool", "get_tools"]
__all__ = ["LLM", "register_tool", "get_tools", "ChatCompletionMessage"]

File diff suppressed because it is too large Load Diff

@ -0,0 +1,492 @@
# ------------------- TESTS ---------------------------------------------------------
if __name__ == "__main__":
import asyncio
from pydantic import BaseModel
from typing import List
from _llm._llm.tool_registry import register_tool, get_tools, execute_tool
from _llm import LLM, ChatCompletionMessage
# Define structured output models
class CalculationStep(BaseModel):
step_number: int
description: str
calculation: str
result: float
class NameResponse(BaseModel):
name: str
age: int
occupation: str
hobbies: List[str]
class MathSolution(BaseModel):
steps: List[CalculationStep]
final_answer: float
explanation: str
llm = LLM()
response = llm.generate(
query="""Create a simple math problem solution in JSON format with this structure:
{
"problem": "the math problem",
"steps": ["step 1", "step 2", "step 3"],
"answer": "final answer"
}
Problem: What is 12 * 8 + 15?""",
model='vllm',
format=MathSolution
)
print(response.content.steps)
exit()
llm = LLM(silent=False, chat=False) # Don't persist chat history
response = llm.generate("Hello! Can you introduce yourself briefly?", model='vllm', format=NameResponse)
print(response.__dict__)
response = llm.generate("What's the weather like in San Francisco? Also calculate 15 * 7 for me.", model='vllm')
print(response.__dict__)
# Define a tool for calculations
@register_tool
def calculate_tool(number: int, multiply_factor: int) -> int:
'''Multiply a number by a factor
Args:
number (int): The number to be multiplied
multiply_factor (int): The factor to multiply by
Returns:
int: The result of the multiplication
'''
return number * multiply_factor
async def run_tests():
print("🧪 Testing LLM class with vLLM model")
print("=" * 50)
# Initialize LLM instance - use fresh instance for each test
def get_fresh_llm():
return LLM(silent=False, chat=False) # Don't persist chat history
# Test 1: Basic vLLM generation
print("\n1 Basic vLLM Generation Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = llm.generate(
query="Hello! Can you introduce yourself briefly?",
model='vllm'
)
print(f"✅ Basic response: {response.content[:100]}...")
except Exception as e:
print(f"❌ Basic test failed: {e}")
# Test 2: Tools usage
print("\n2 Tools Usage Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools()
response = llm.generate(
query="What's the weather like in San Francisco? Also calculate 15 * 7 for me.",
model='vllm',
tools=tools
)
print(f"✅ Tools response: {response.content[:100]}...")
# Enhanced tool call detection
tool_calls_found = False
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f"🔧 OpenAI-style tool calls detected: {len(response.tool_calls)} calls")
for i, tool_call in enumerate(response.tool_calls):
print(f" Tool {i+1}: {tool_call.function.name}")
print(f" Arguments: {tool_call.function.arguments}")
tool_calls_found = True
# Check if response contains JSON that might be tool-like
if not tool_calls_found:
try:
import json
# Try to parse the content as JSON
content_json = json.loads(response.content)
if isinstance(content_json, dict):
print("🔧 JSON-formatted response detected (not OpenAI tool calls)")
print(f" Keys: {list(content_json.keys())}")
# Check if it looks like a tool call
if any(key in content_json for key in ['location', 'expression', 'function', 'name']):
print(" This appears to be tool-like output in JSON format")
except json.JSONDecodeError:
print(" No structured tool calls or JSON found")
except Exception as e:
print(f"❌ Tools test failed: {e}")
# Test 3: Thinking mode (use vLLM model since reasoning model doesn't exist)
print("\n3 Thinking Mode Test (using vllm)")
print("-" * 30)
try:
llm = get_fresh_llm()
response: ChatCompletionMessage = llm.generate(
query="Solve this step by step: If I have 20 apples, eat 3, give away 5, then buy 8 more, how many do I have?",
model='vllm', # Use vllm instead of reasoning
think=True
)
print(f"✅ Thinking response: {response.content[:100]}...")
if hasattr(response, 'reasoning_content') and response.reasoning_content:
print(f"🧠 Thinking content: {response.reasoning_content[:100]}...")
else:
print(" No explicit thinking content found")
except Exception as e:
print(f"❌ Thinking test failed: {e}")
# Test 4: Streaming (simplified test)
print("\n4 Streaming Test")
print("-" * 30)
try:
llm = get_fresh_llm()
print("Streaming response: ", end="")
stream = llm.generate(
query="Explain photosynthesis in 2 sentences",
model='vllm',
stream=True
)
content_parts = []
try:
for chunk_type, chunk_content in stream:
if chunk_type == "content":
content_parts.append(chunk_content)
print(chunk_content, end="")
elif chunk_type == "thinking":
print(f"\033[94m{chunk_content}\033[0m", end="") # Blue for thinking
print(f"\n✅ Streaming completed - Content: {len(content_parts)} chunks")
except Exception as stream_error:
print(f"\n❌ Stream processing failed: {stream_error}")
except Exception as e:
print(f"❌ Streaming test failed: {e}")
# Test 5: Structured output (JSON mode)
print("\n5 Structured Output Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = llm.generate(
query="""Create a simple math problem solution in JSON format with this structure:
{
"problem": "the math problem",
"steps": ["step 1", "step 2", "step 3"],
"answer": "final answer"
}
Problem: What is 12 * 8 + 15?""",
model='vllm',
format=MathSolution
)
print(f"✅ Structured response: {response.content[:150]}...")
# Try to parse as JSON to verify structure
try:
import json
parsed = json.loads(response.content)
print(f"🎯 Valid JSON with keys: {list(parsed.keys())}")
except json.JSONDecodeError:
print(" Response is not valid JSON")
except Exception as e:
print(f"❌ Structured output test failed: {e}")
# Test 6: Async generation
print("\n6 Async Generation Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = await llm.async_generate(
query="What's the capital of France? Answer briefly.",
model='vllm'
)
print(f"✅ Async response: {response.content[:100]}...")
except Exception as e:
print(f"❌ Async test failed: {e}")
# Test 7: Multiple tools with vllm (enhanced debugging)
print("\n7 Complex Integration Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
response = llm.generate(
query="I need to multiply 12 by 11",
model='vllm',
tools=tools,
think=True
)
print(f"✅ Complex response: {response.content[:100]}...")
# Enhanced checking for both thinking and tool usage
has_thinking = hasattr(response, 'thinking') and response.reasoning_content
has_tool_calls = hasattr(response, 'tool_calls') and response.tool_calls
print(f"🧠 Has thinking: {has_thinking}")
if has_thinking:
print(f" Thinking content: {response.reasoning_content[:50]}...")
print(f"🔧 Has OpenAI tool calls: {has_tool_calls}")
if has_tool_calls:
print(f" Tool calls count: {len(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f" Tool {i+1}: {tool_call.function.name}")
# Check for JSON-style tool responses
try:
import json
content_json = json.loads(response.content)
if isinstance(content_json, dict) and any(key in content_json for key in ['expression', 'calculation', 'result']):
print("🔧 JSON-style tool response detected:")
print(f" Content: {content_json}")
except json.JSONDecodeError:
pass
except Exception as e:
print(f"❌ Complex test failed: {e}")
# New Test 8: Tool Call Format Analysis
print("\n8 Tool Call Format Analysis")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools()
# Test with explicit tool instruction
response = llm.generate(
query="Use the calculate tool to compute 25 * 4. Make sure to call the function.",
model='vllm',
tools=tools
)
print(f"Response content: {response.content}")
print(f"Response type: {type(response)}")
print(f"Has tool_calls attribute: {hasattr(response, 'tool_calls')}")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f"Tool calls count: {len(response.tool_calls)}")
print(f"Tool calls type: {type(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f"Tool {i+1}:")
print(f" ID: {tool_call.id}")
print(f" Type: {tool_call.type}")
print(f" Function name: {tool_call.function.name}")
print(f" Function arguments: {tool_call.function.arguments}")
else:
print("No tool calls found")
except Exception as e:
print(f"❌ Tool format analysis failed: {e}")
# New Test 9: vLLM Tool Response Conversion Test
print("\n9 vLLM Tool Response Conversion Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
# Test multiple tool scenarios
test_cases = [
"Calculate 15 * 7 using the calculate tool",
"Get weather for New York using the weather tool",
"Use both tools: calculate 20 + 5 and get weather for London"
]
for i, test_query in enumerate(test_cases, 1):
print(f"\n Test {i}: {test_query}")
response = llm.generate(
query=test_query,
model='vllm',
tools=tools
)
print(f" Response: {response.content[:60]}...")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f" ✅ Converted to {len(response.tool_calls)} tool call(s)")
for j, tool_call in enumerate(response.tool_calls):
print(f" Tool {j+1}: {tool_call.function.name}")
else:
print(" No tool calls detected")
except Exception as e:
print(f"❌ vLLM conversion test failed: {e}")
print("\n" + "=" * 50)
print("🏁 Test suite completed!")
# Helper function for non-async testing
def translate_to_spanish(text):
llm = LLM()
prompt = f"Translate the following text to Spanish:\n\n{text}"
response = llm.generate(query=prompt, model='vllm')
return response.content
# Run the test suite
print("Starting comprehensive test suite...")
asyncio.run(run_tests())
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")
print(f"Spanish translation: {spanish_text}")
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")
print("\n6 Async Generation Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = await llm.async_generate(
query="What's the capital of France? Answer briefly.",
model='vllm'
)
print(f"✅ Async response: {response.content[:100]}...")
except Exception as e:
print(f"❌ Async test failed: {e}")
# Test 7: Multiple tools with vllm (enhanced debugging)
print("\n7 Complex Integration Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
response = llm.generate(
query="I need to multiply 12 by 11",
model='vllm',
tools=tools,
think=True
)
print(f"✅ Complex response: {response.content[:100]}...")
# Enhanced checking for both thinking and tool usage
has_thinking = hasattr(response, 'thinking') and response.reasoning_content
has_tool_calls = hasattr(response, 'tool_calls') and response.tool_calls
print(f"🧠 Has thinking: {has_thinking}")
if has_thinking:
print(f" Thinking content: {response.reasoning_content[:50]}...")
print(f"🔧 Has OpenAI tool calls: {has_tool_calls}")
if has_tool_calls:
print(f" Tool calls count: {len(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f" Tool {i+1}: {tool_call.function.name}")
# Check for JSON-style tool responses
try:
import json
content_json = json.loads(response.content)
if isinstance(content_json, dict) and any(key in content_json for key in ['expression', 'calculation', 'result']):
print("🔧 JSON-style tool response detected:")
print(f" Content: {content_json}")
except json.JSONDecodeError:
pass
except Exception as e:
print(f"❌ Complex test failed: {e}")
# New Test 8: Tool Call Format Analysis
print("\n8 Tool Call Format Analysis")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools()
# Test with explicit tool instruction
response = llm.generate(
query="Use the calculate tool to compute 25 * 4. Make sure to call the function.",
model='vllm',
tools=tools
)
print(f"Response content: {response.content}")
print(f"Response type: {type(response)}")
print(f"Has tool_calls attribute: {hasattr(response, 'tool_calls')}")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f"Tool calls count: {len(response.tool_calls)}")
print(f"Tool calls type: {type(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f"Tool {i+1}:")
print(f" ID: {tool_call.id}")
print(f" Type: {tool_call.type}")
print(f" Function name: {tool_call.function.name}")
print(f" Function arguments: {tool_call.function.arguments}")
else:
print("No tool calls found")
except Exception as e:
print(f"❌ Tool format analysis failed: {e}")
# New Test 9: vLLM Tool Response Conversion Test
print("\n9 vLLM Tool Response Conversion Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
# Test multiple tool scenarios
test_cases = [
"Calculate 15 * 7 using the calculate tool",
"Get weather for New York using the weather tool",
"Use both tools: calculate 20 + 5 and get weather for London"
]
for i, test_query in enumerate(test_cases, 1):
print(f"\n Test {i}: {test_query}")
response = llm.generate(
query=test_query,
model='vllm',
tools=tools
)
print(f" Response: {response.content[:60]}...")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f" ✅ Converted to {len(response.tool_calls)} tool call(s)")
for j, tool_call in enumerate(response.tool_calls):
print(f" Tool {j+1}: {tool_call.function.name}")
else:
print(" No tool calls detected")
except Exception as e:
print(f"❌ vLLM conversion test failed: {e}")
print("\n" + "=" * 50)
print("🏁 Test suite completed!")
# Helper function for non-async testing
def translate_to_spanish(text):
llm = LLM()
prompt = f"Translate the following text to Spanish:\n\n{text}"
response = llm.generate(query=prompt, model='vllm')
return response.content
# Run the test suite
print("Starting comprehensive test suite...")
asyncio.run(run_tests())
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")
print(f"Spanish translation: {spanish_text}")
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")

@ -24,39 +24,82 @@ def _pytype_to_jsonschema(t):
}
return mapping.get(t, {"type": "string"})
# --- docstring parser (Google style) ---
# --- docstring parser (Google style) - FIXED VERSION ---
def _parse_google_docstring(docstring: str):
if not docstring:
return {"description": "", "params": {}}
lines = [ln.rstrip() for ln in docstring.splitlines()]
# Find where Args/Arguments section starts
args_start = None
for i, line in enumerate(lines):
if line.strip().lower() in ("args:", "arguments:"):
args_start = i
break
# Find where Args section ends (Returns:, Raises:, or another section)
args_end = len(lines)
if args_start is not None:
for i in range(args_start + 1, len(lines)):
line = lines[i].strip().lower()
if line.endswith(':') and line.rstrip(':') in ('returns', 'return', 'raises', 'raise', 'yields', 'yield', 'examples', 'example', 'notes', 'note'):
args_end = i
break
# Build description from everything EXCEPT the Args section content
desc_lines = []
i = 0
while i < len(lines) and not lines[i].lower().startswith(("args:", "arguments:")):
if lines[i].strip():
desc_lines.append(lines[i].strip())
i += 1
# Before Args
if args_start is not None:
for i in range(args_start):
if lines[i].strip():
desc_lines.append(lines[i].strip())
else:
# No Args section, include everything
for line in lines:
if line.strip():
desc_lines.append(line.strip())
# After Args section (Returns, examples, etc.)
if args_start is not None and args_end < len(lines):
for i in range(args_end, len(lines)):
if lines[i].strip():
desc_lines.append(lines[i].strip())
description = " ".join(desc_lines).strip()
# Parse parameters from Args section
params = {}
if i < len(lines):
i += 1
while i < len(lines):
if args_start is not None:
i = args_start + 1
while i < args_end:
line = lines[i].strip()
if not line:
i += 1
continue
# Match parameter line: "param_name (type): description" or "param_name: description"
m = re.match(r'^(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.*)$', line)
if m:
name = m.group(1)
desc = m.group(3)
# Collect continuation lines for this parameter
j = i + 1
while j < len(lines) and not re.match(r'^\w+\s*(?:\([^)]+\))?\s*:', lines[j].strip()):
if lines[j].strip():
desc += " " + lines[j].strip()
while j < args_end:
next_line = lines[j].strip()
# Check if it's a new parameter or empty
if not next_line or re.match(r'^\w+\s*(?:\([^)]+\))?\s*:', next_line):
break
desc += " " + next_line
j += 1
params[name] = {"description": desc.strip(), "type": m.group(2)}
i = j
continue
i += 1
return {"description": description, "params": params}
# --- helper: make OpenAI-style function spec ---
@ -109,9 +152,26 @@ def register_tool(func: Callable = None, *, name: str = None, description: str =
return _register(func)
# --- what to send to model ---
def get_tools() -> List[dict]:
def get_tools(specific_tools: list[str] = False, exclude_tools: list[str]= False) -> List[dict]:
"""Return OpenAI-compatible functions list with proper 'function' wrapper."""
return [entry["schema"] for entry in TOOL_REGISTRY.values()]
assert not (specific_tools and exclude_tools), "Cannot specify both specific_tools and exclude_tools"
if isinstance(specific_tools, str):
specific_tools = [specific_tools]
if specific_tools:
# Returned named tools only
result = []
for t in specific_tools:
entry = TOOL_REGISTRY.get(t)
if entry:
result.append(entry["schema"])
elif exclude_tools:
all_tools = [entry["schema"] for entry in TOOL_REGISTRY.values()]
result = [t for t in all_tools if t["function"]["name"] not in exclude_tools]
else:
# Return all registered tools
result = [entry["schema"] for entry in TOOL_REGISTRY.values()]
return result
# --- robust parser for arguments ---
def parse_function_call_arguments(raw) -> dict:
@ -173,4 +233,4 @@ def execute_tool(name: str, args: dict):
else:
kwargs[pname] = val
result = fn(**kwargs)
return result
return result
Loading…
Cancel
Save