From 4d71275f124a2bb38836e4a3637e2c72a463b248 Mon Sep 17 00:00:00 2001 From: 0x01FE Date: Tue, 30 Sep 2025 12:14:23 -0500 Subject: [PATCH] working on tool output --- README.md | 7 +++++++ requirements.txt | 2 ++ textgen/llm.py | 36 ++++++++++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..fccf79c --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ + +# LLM Install + +## NVIDIA + +`CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir` + diff --git a/requirements.txt b/requirements.txt index dc09bdb..3d2a460 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ requests-cache retry-requests numpy pandas + +llama-cpp-python diff --git a/textgen/llm.py b/textgen/llm.py index 291f6f7..43dbd48 100644 --- a/textgen/llm.py +++ b/textgen/llm.py @@ -2,12 +2,18 @@ import llama_cpp import json -tools = json.loads(open('tools.json', 'r').read())['tools'] +from . import tool_funcs + +tools: list[dict] = json.loads(open('tools.json', 'r').read())['tools'] class TextGen: llm: llama_cpp.Llama + messages: list[dict] = [ + {"role": "system", "content": "You are a helpful assistant that can use tools. When a function is called, return the results to the user."} + ] + def __init__(self, model_path: str, n_ctx: int, n_gpu_layers: int): # 1. Instantiate the Llama model # Provide the path to your downloaded .gguf file @@ -17,7 +23,8 @@ class TextGen: model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", # Path to your GGUF model n_ctx=n_ctx, # Context window size n_gpu_layers=n_gpu_layers, # Offload all layers to GPU. Set to 0 if no GPU. - verbose=False # Suppress verbose output + verbose=False, # Suppress verbose output + chat_format='chatml' ) def generate(self, prompt: str) -> str: @@ -38,3 +45,28 @@ class TextGen: print(text) return text + + def chat_completion(self, user_message: str) -> str: + self.messages.append({ + "role": "user", + "content": user_message + }) + + response = self.llm.create_chat_completion( + messages=self.messages, + tools=tools, + tool_choice='auto' + ) + + tool_call = response['choices'][0]['message'].get('tool_calls') + if not tool_call: + return response['choices'][0]['message']['content'] + + call_info = tool_call[0]['function'] + function_name = call_info['name'] + + print(f'Assistant decided to call {function_name}') + + tool_output = tool_funcs.get_high_low() + +