Compare commits

..

5 Commits
legacy ... main

  1. 5
      __init__.py
  2. 7
      _llm/__init__.py
  3. 1491
      _llm/llm.py
  4. 2
      _llm/notes.md
  5. 13
      _llm/ollama-cloud-test.py
  6. 492
      _llm/tests_for_llm.py
  7. 236
      _llm/tool_registry.py
  8. 4
      llm_client.py

@ -2,6 +2,7 @@
llm_client: A Python package for interacting with LLM models through Ollama. llm_client: A Python package for interacting with LLM models through Ollama.
""" """
from _llm._llm.llm import LLM from _llm._llm.llm import LLM, ChatCompletionMessage
from _llm._llm.tool_registry import register_tool, get_tools
__all__ = ["LLM"] __all__ = ["LLM", "register_tool", "get_tools", "ChatCompletionMessage"]

@ -1,7 +1,6 @@
# ...existing code...
# Export the LLM class so "from _llm import LLM" works.
from .llm import LLM # re-export the class from the module from .llm import LLM # re-export the class from the module
from .tool_registry import register_tool, get_tools
# Define public API # Define public API
__all__ = ["LLM"] __all__ = ["LLM", "register_tool", "get_tools"]
# ...existing code...

File diff suppressed because it is too large Load Diff

@ -0,0 +1,2 @@
Implementera den här för att välja verktyg? https://vllm-semantic-router.com

@ -0,0 +1,13 @@
from ollama import Client
client = Client()
messages = [
{
'role': 'user',
'content': 'Why is the sky blue?',
},
]
for part in client.chat('gpt-oss:120b-cloud', messages=messages, stream=True):
print(part['message']['content'], end='', flush=True)

@ -0,0 +1,492 @@
# ------------------- TESTS ---------------------------------------------------------
if __name__ == "__main__":
import asyncio
from pydantic import BaseModel
from typing import List
from _llm._llm.tool_registry import register_tool, get_tools, execute_tool
from _llm import LLM, ChatCompletionMessage
# Define structured output models
class CalculationStep(BaseModel):
step_number: int
description: str
calculation: str
result: float
class NameResponse(BaseModel):
name: str
age: int
occupation: str
hobbies: List[str]
class MathSolution(BaseModel):
steps: List[CalculationStep]
final_answer: float
explanation: str
llm = LLM()
response = llm.generate(
query="""Create a simple math problem solution in JSON format with this structure:
{
"problem": "the math problem",
"steps": ["step 1", "step 2", "step 3"],
"answer": "final answer"
}
Problem: What is 12 * 8 + 15?""",
model='vllm',
format=MathSolution
)
print(response.content.steps)
exit()
llm = LLM(silent=False, chat=False) # Don't persist chat history
response = llm.generate("Hello! Can you introduce yourself briefly?", model='vllm', format=NameResponse)
print(response.__dict__)
response = llm.generate("What's the weather like in San Francisco? Also calculate 15 * 7 for me.", model='vllm')
print(response.__dict__)
# Define a tool for calculations
@register_tool
def calculate_tool(number: int, multiply_factor: int) -> int:
'''Multiply a number by a factor
Args:
number (int): The number to be multiplied
multiply_factor (int): The factor to multiply by
Returns:
int: The result of the multiplication
'''
return number * multiply_factor
async def run_tests():
print("🧪 Testing LLM class with vLLM model")
print("=" * 50)
# Initialize LLM instance - use fresh instance for each test
def get_fresh_llm():
return LLM(silent=False, chat=False) # Don't persist chat history
# Test 1: Basic vLLM generation
print("\n1 Basic vLLM Generation Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = llm.generate(
query="Hello! Can you introduce yourself briefly?",
model='vllm'
)
print(f"✅ Basic response: {response.content[:100]}...")
except Exception as e:
print(f"❌ Basic test failed: {e}")
# Test 2: Tools usage
print("\n2 Tools Usage Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools()
response = llm.generate(
query="What's the weather like in San Francisco? Also calculate 15 * 7 for me.",
model='vllm',
tools=tools
)
print(f"✅ Tools response: {response.content[:100]}...")
# Enhanced tool call detection
tool_calls_found = False
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f"🔧 OpenAI-style tool calls detected: {len(response.tool_calls)} calls")
for i, tool_call in enumerate(response.tool_calls):
print(f" Tool {i+1}: {tool_call.function.name}")
print(f" Arguments: {tool_call.function.arguments}")
tool_calls_found = True
# Check if response contains JSON that might be tool-like
if not tool_calls_found:
try:
import json
# Try to parse the content as JSON
content_json = json.loads(response.content)
if isinstance(content_json, dict):
print("🔧 JSON-formatted response detected (not OpenAI tool calls)")
print(f" Keys: {list(content_json.keys())}")
# Check if it looks like a tool call
if any(key in content_json for key in ['location', 'expression', 'function', 'name']):
print(" This appears to be tool-like output in JSON format")
except json.JSONDecodeError:
print(" No structured tool calls or JSON found")
except Exception as e:
print(f"❌ Tools test failed: {e}")
# Test 3: Thinking mode (use vLLM model since reasoning model doesn't exist)
print("\n3 Thinking Mode Test (using vllm)")
print("-" * 30)
try:
llm = get_fresh_llm()
response: ChatCompletionMessage = llm.generate(
query="Solve this step by step: If I have 20 apples, eat 3, give away 5, then buy 8 more, how many do I have?",
model='vllm', # Use vllm instead of reasoning
think=True
)
print(f"✅ Thinking response: {response.content[:100]}...")
if hasattr(response, 'reasoning_content') and response.reasoning_content:
print(f"🧠 Thinking content: {response.reasoning_content[:100]}...")
else:
print(" No explicit thinking content found")
except Exception as e:
print(f"❌ Thinking test failed: {e}")
# Test 4: Streaming (simplified test)
print("\n4 Streaming Test")
print("-" * 30)
try:
llm = get_fresh_llm()
print("Streaming response: ", end="")
stream = llm.generate(
query="Explain photosynthesis in 2 sentences",
model='vllm',
stream=True
)
content_parts = []
try:
for chunk_type, chunk_content in stream:
if chunk_type == "content":
content_parts.append(chunk_content)
print(chunk_content, end="")
elif chunk_type == "thinking":
print(f"\033[94m{chunk_content}\033[0m", end="") # Blue for thinking
print(f"\n✅ Streaming completed - Content: {len(content_parts)} chunks")
except Exception as stream_error:
print(f"\n❌ Stream processing failed: {stream_error}")
except Exception as e:
print(f"❌ Streaming test failed: {e}")
# Test 5: Structured output (JSON mode)
print("\n5 Structured Output Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = llm.generate(
query="""Create a simple math problem solution in JSON format with this structure:
{
"problem": "the math problem",
"steps": ["step 1", "step 2", "step 3"],
"answer": "final answer"
}
Problem: What is 12 * 8 + 15?""",
model='vllm',
format=MathSolution
)
print(f"✅ Structured response: {response.content[:150]}...")
# Try to parse as JSON to verify structure
try:
import json
parsed = json.loads(response.content)
print(f"🎯 Valid JSON with keys: {list(parsed.keys())}")
except json.JSONDecodeError:
print(" Response is not valid JSON")
except Exception as e:
print(f"❌ Structured output test failed: {e}")
# Test 6: Async generation
print("\n6 Async Generation Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = await llm.async_generate(
query="What's the capital of France? Answer briefly.",
model='vllm'
)
print(f"✅ Async response: {response.content[:100]}...")
except Exception as e:
print(f"❌ Async test failed: {e}")
# Test 7: Multiple tools with vllm (enhanced debugging)
print("\n7 Complex Integration Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
response = llm.generate(
query="I need to multiply 12 by 11",
model='vllm',
tools=tools,
think=True
)
print(f"✅ Complex response: {response.content[:100]}...")
# Enhanced checking for both thinking and tool usage
has_thinking = hasattr(response, 'thinking') and response.reasoning_content
has_tool_calls = hasattr(response, 'tool_calls') and response.tool_calls
print(f"🧠 Has thinking: {has_thinking}")
if has_thinking:
print(f" Thinking content: {response.reasoning_content[:50]}...")
print(f"🔧 Has OpenAI tool calls: {has_tool_calls}")
if has_tool_calls:
print(f" Tool calls count: {len(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f" Tool {i+1}: {tool_call.function.name}")
# Check for JSON-style tool responses
try:
import json
content_json = json.loads(response.content)
if isinstance(content_json, dict) and any(key in content_json for key in ['expression', 'calculation', 'result']):
print("🔧 JSON-style tool response detected:")
print(f" Content: {content_json}")
except json.JSONDecodeError:
pass
except Exception as e:
print(f"❌ Complex test failed: {e}")
# New Test 8: Tool Call Format Analysis
print("\n8 Tool Call Format Analysis")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools()
# Test with explicit tool instruction
response = llm.generate(
query="Use the calculate tool to compute 25 * 4. Make sure to call the function.",
model='vllm',
tools=tools
)
print(f"Response content: {response.content}")
print(f"Response type: {type(response)}")
print(f"Has tool_calls attribute: {hasattr(response, 'tool_calls')}")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f"Tool calls count: {len(response.tool_calls)}")
print(f"Tool calls type: {type(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f"Tool {i+1}:")
print(f" ID: {tool_call.id}")
print(f" Type: {tool_call.type}")
print(f" Function name: {tool_call.function.name}")
print(f" Function arguments: {tool_call.function.arguments}")
else:
print("No tool calls found")
except Exception as e:
print(f"❌ Tool format analysis failed: {e}")
# New Test 9: vLLM Tool Response Conversion Test
print("\n9 vLLM Tool Response Conversion Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
# Test multiple tool scenarios
test_cases = [
"Calculate 15 * 7 using the calculate tool",
"Get weather for New York using the weather tool",
"Use both tools: calculate 20 + 5 and get weather for London"
]
for i, test_query in enumerate(test_cases, 1):
print(f"\n Test {i}: {test_query}")
response = llm.generate(
query=test_query,
model='vllm',
tools=tools
)
print(f" Response: {response.content[:60]}...")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f" ✅ Converted to {len(response.tool_calls)} tool call(s)")
for j, tool_call in enumerate(response.tool_calls):
print(f" Tool {j+1}: {tool_call.function.name}")
else:
print(" No tool calls detected")
except Exception as e:
print(f"❌ vLLM conversion test failed: {e}")
print("\n" + "=" * 50)
print("🏁 Test suite completed!")
# Helper function for non-async testing
def translate_to_spanish(text):
llm = LLM()
prompt = f"Translate the following text to Spanish:\n\n{text}"
response = llm.generate(query=prompt, model='vllm')
return response.content
# Run the test suite
print("Starting comprehensive test suite...")
asyncio.run(run_tests())
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")
print(f"Spanish translation: {spanish_text}")
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")
print("\n6 Async Generation Test")
print("-" * 30)
try:
llm = get_fresh_llm()
response = await llm.async_generate(
query="What's the capital of France? Answer briefly.",
model='vllm'
)
print(f"✅ Async response: {response.content[:100]}...")
except Exception as e:
print(f"❌ Async test failed: {e}")
# Test 7: Multiple tools with vllm (enhanced debugging)
print("\n7 Complex Integration Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
response = llm.generate(
query="I need to multiply 12 by 11",
model='vllm',
tools=tools,
think=True
)
print(f"✅ Complex response: {response.content[:100]}...")
# Enhanced checking for both thinking and tool usage
has_thinking = hasattr(response, 'thinking') and response.reasoning_content
has_tool_calls = hasattr(response, 'tool_calls') and response.tool_calls
print(f"🧠 Has thinking: {has_thinking}")
if has_thinking:
print(f" Thinking content: {response.reasoning_content[:50]}...")
print(f"🔧 Has OpenAI tool calls: {has_tool_calls}")
if has_tool_calls:
print(f" Tool calls count: {len(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f" Tool {i+1}: {tool_call.function.name}")
# Check for JSON-style tool responses
try:
import json
content_json = json.loads(response.content)
if isinstance(content_json, dict) and any(key in content_json for key in ['expression', 'calculation', 'result']):
print("🔧 JSON-style tool response detected:")
print(f" Content: {content_json}")
except json.JSONDecodeError:
pass
except Exception as e:
print(f"❌ Complex test failed: {e}")
# New Test 8: Tool Call Format Analysis
print("\n8 Tool Call Format Analysis")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools()
# Test with explicit tool instruction
response = llm.generate(
query="Use the calculate tool to compute 25 * 4. Make sure to call the function.",
model='vllm',
tools=tools
)
print(f"Response content: {response.content}")
print(f"Response type: {type(response)}")
print(f"Has tool_calls attribute: {hasattr(response, 'tool_calls')}")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f"Tool calls count: {len(response.tool_calls)}")
print(f"Tool calls type: {type(response.tool_calls)}")
for i, tool_call in enumerate(response.tool_calls):
print(f"Tool {i+1}:")
print(f" ID: {tool_call.id}")
print(f" Type: {tool_call.type}")
print(f" Function name: {tool_call.function.name}")
print(f" Function arguments: {tool_call.function.arguments}")
else:
print("No tool calls found")
except Exception as e:
print(f"❌ Tool format analysis failed: {e}")
# New Test 9: vLLM Tool Response Conversion Test
print("\n9 vLLM Tool Response Conversion Test")
print("-" * 30)
try:
llm = get_fresh_llm()
tools = get_tools() # Get all registered tools
# Test multiple tool scenarios
test_cases = [
"Calculate 15 * 7 using the calculate tool",
"Get weather for New York using the weather tool",
"Use both tools: calculate 20 + 5 and get weather for London"
]
for i, test_query in enumerate(test_cases, 1):
print(f"\n Test {i}: {test_query}")
response = llm.generate(
query=test_query,
model='vllm',
tools=tools
)
print(f" Response: {response.content[:60]}...")
if hasattr(response, 'tool_calls') and response.tool_calls:
print(f" ✅ Converted to {len(response.tool_calls)} tool call(s)")
for j, tool_call in enumerate(response.tool_calls):
print(f" Tool {j+1}: {tool_call.function.name}")
else:
print(" No tool calls detected")
except Exception as e:
print(f"❌ vLLM conversion test failed: {e}")
print("\n" + "=" * 50)
print("🏁 Test suite completed!")
# Helper function for non-async testing
def translate_to_spanish(text):
llm = LLM()
prompt = f"Translate the following text to Spanish:\n\n{text}"
response = llm.generate(query=prompt, model='vllm')
return response.content
# Run the test suite
print("Starting comprehensive test suite...")
asyncio.run(run_tests())
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")
print(f"Spanish translation: {spanish_text}")
# Quick translation test
print("\n🌍 Translation Test:")
spanish_text = translate_to_spanish("Hello, how are you today?")

@ -0,0 +1,236 @@
import inspect, json, re, ast
from typing import Callable, Dict, Any, List, get_origin, get_args
from pydantic import BaseModel
TOOL_REGISTRY: Dict[str, Dict[str, Any]] = {}
# --- type mapping ---
def _pytype_to_jsonschema(t):
origin = get_origin(t)
if origin is list or origin is List:
args = get_args(t)
item_type = args[0] if args else str
return {"type": "array", "items": _pytype_to_jsonschema(item_type)}
if inspect.isclass(t) and issubclass(t, BaseModel):
sch = t.schema()
return {"type": "object", **sch}
mapping = {
str: {"type": "string"},
int: {"type": "integer"},
float: {"type": "number"},
bool: {"type": "boolean"},
dict: {"type": "object"},
list: {"type": "array", "items": {"type": "string"}},
}
return mapping.get(t, {"type": "string"})
# --- docstring parser (Google style) - FIXED VERSION ---
def _parse_google_docstring(docstring: str):
if not docstring:
return {"description": "", "params": {}}
lines = [ln.rstrip() for ln in docstring.splitlines()]
# Find where Args/Arguments section starts
args_start = None
for i, line in enumerate(lines):
if line.strip().lower() in ("args:", "arguments:"):
args_start = i
break
# Find where Args section ends (Returns:, Raises:, or another section)
args_end = len(lines)
if args_start is not None:
for i in range(args_start + 1, len(lines)):
line = lines[i].strip().lower()
if line.endswith(':') and line.rstrip(':') in ('returns', 'return', 'raises', 'raise', 'yields', 'yield', 'examples', 'example', 'notes', 'note'):
args_end = i
break
# Build description from everything EXCEPT the Args section content
desc_lines = []
# Before Args
if args_start is not None:
for i in range(args_start):
if lines[i].strip():
desc_lines.append(lines[i].strip())
else:
# No Args section, include everything
for line in lines:
if line.strip():
desc_lines.append(line.strip())
# After Args section (Returns, examples, etc.)
if args_start is not None and args_end < len(lines):
for i in range(args_end, len(lines)):
if lines[i].strip():
desc_lines.append(lines[i].strip())
description = " ".join(desc_lines).strip()
# Parse parameters from Args section
params = {}
if args_start is not None:
i = args_start + 1
while i < args_end:
line = lines[i].strip()
if not line:
i += 1
continue
# Match parameter line: "param_name (type): description" or "param_name: description"
m = re.match(r'^(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.*)$', line)
if m:
name = m.group(1)
desc = m.group(3)
# Collect continuation lines for this parameter
j = i + 1
while j < args_end:
next_line = lines[j].strip()
# Check if it's a new parameter or empty
if not next_line or re.match(r'^\w+\s*(?:\([^)]+\))?\s*:', next_line):
break
desc += " " + next_line
j += 1
params[name] = {"description": desc.strip(), "type": m.group(2)}
i = j
continue
i += 1
return {"description": description, "params": params}
# --- helper: make OpenAI-style function spec ---
def _wrap_openai_function_schema(name: str, description: str, parameters: dict):
"""Create OpenAI function calling format with 'function' wrapper"""
params = parameters.copy()
if params.get("type") != "object":
params = {"type": "object", "properties": params.get("properties", params), "required": params.get("required", [])}
params.setdefault("additionalProperties", False)
# Return in OpenAI function calling format with 'function' wrapper
return {
"type": "function",
"function": {
"name": name,
"description": description,
"parameters": params
}
}
# --- decorator to register tools ---
def register_tool(func: Callable = None, *, name: str = None, description: str = None, schema: dict = None):
def _register(f):
fname = name or f.__name__
doc = _parse_google_docstring(f.__doc__)
func_description = description or doc["description"] or ""
if schema is not None:
func_schema = schema
else:
sig = inspect.signature(f)
props = {}
required = []
for param_name, param in sig.parameters.items():
ann = param.annotation if param.annotation is not inspect._empty else str
prop_schema = _pytype_to_jsonschema(ann)
if param_name in doc["params"]:
prop_schema["description"] = doc["params"][param_name]["description"]
props[param_name] = prop_schema
if param.default is inspect._empty:
required.append(param_name)
func_schema = {"type": "object", "properties": props, "required": required, "additionalProperties": False}
TOOL_REGISTRY[fname] = {
"callable": f,
"schema": _wrap_openai_function_schema(fname, func_description, func_schema)
}
return f
if func is None:
return _register
else:
return _register(func)
# --- what to send to model ---
def get_tools(specific_tools: list[str] = False, exclude_tools: list[str]= False) -> List[dict]:
"""Return OpenAI-compatible functions list with proper 'function' wrapper."""
assert not (specific_tools and exclude_tools), "Cannot specify both specific_tools and exclude_tools"
if isinstance(specific_tools, str):
specific_tools = [specific_tools]
if specific_tools:
# Returned named tools only
result = []
for t in specific_tools:
entry = TOOL_REGISTRY.get(t)
if entry:
result.append(entry["schema"])
elif exclude_tools:
all_tools = [entry["schema"] for entry in TOOL_REGISTRY.values()]
result = [t for t in all_tools if t["function"]["name"] not in exclude_tools]
else:
# Return all registered tools
result = [entry["schema"] for entry in TOOL_REGISTRY.values()]
return result
# --- robust parser for arguments ---
def parse_function_call_arguments(raw) -> dict:
if isinstance(raw, dict):
return raw
if not isinstance(raw, str):
return {"_raw_unexpected": str(type(raw)), "value": raw}
try:
return json.loads(raw)
except json.JSONDecodeError:
pass
try:
return ast.literal_eval(raw)
except Exception:
pass
stripped = raw.strip()
if re.match(r'^(SELECT|WITH)\b', stripped, flags=re.IGNORECASE):
return {"sql_query": stripped}
m = re.search(r'\{.*\}', raw, flags=re.DOTALL)
if m:
candidate = m.group(0)
try:
return json.loads(candidate)
except Exception:
try:
return ast.literal_eval(candidate)
except Exception:
pass
return {"_raw": raw}
# --- safe executor ---
def execute_tool(name: str, args: dict):
"""
Execute registered callable with args (basic validation).
Returns Python object (dict/list/str).
"""
entry = TOOL_REGISTRY.get(name)
if not entry:
raise RuntimeError(f"Function {name} not registered")
fn = entry["callable"]
# simple SQL safety example: if function expects sql_query ensure SELECT
if "sql_query" in args:
q = args["sql_query"].strip()
if not re.match(r'^(SELECT|WITH)\b', q, flags=re.IGNORECASE):
raise ValueError("Only SELECT/ WITH queries allowed in sql_query")
if q.endswith(";"):
args["sql_query"] = q[:-1]
# Prepare kwargs with minimal type coercion
sig = inspect.signature(fn)
kwargs = {}
for pname, param in sig.parameters.items():
if pname not in args:
continue
val = args[pname]
ann = param.annotation if param.annotation is not inspect._empty else None
origin = get_origin(ann)
if origin in (list, List) and isinstance(val, str):
kwargs[pname] = [x.strip() for x in val.split(",") if x.strip() != ""]
else:
kwargs[pname] = val
result = fn(**kwargs)
return result

@ -2,6 +2,6 @@
llm_client package entry point to simplify imports llm_client package entry point to simplify imports
""" """
from _llm import LLM from _llm import LLM, register_tool, get_tools
__all__ = ["LLM"] __all__ = ["LLM", "register_tool", "get_tools"]
Loading…
Cancel
Save