From 4d71275f124a2bb38836e4a3637e2c72a463b248 Mon Sep 17 00:00:00 2001
From: 0x01FE <jacksondhodge@gmail.com>
Date: Tue, 30 Sep 2025 12:14:23 -0500
Subject: [PATCH] working on tool output

---
 README.md        |  7 +++++++
 requirements.txt |  2 ++
 textgen/llm.py   | 36 ++++++++++++++++++++++++++++++++++--
 3 files changed, 43 insertions(+), 2 deletions(-)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fccf79c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,7 @@
+
+# LLM Install
+
+## NVIDIA
+
+`CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir`
+
diff --git a/requirements.txt b/requirements.txt
index dc09bdb..3d2a460 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,5 @@ requests-cache
 retry-requests
 numpy
 pandas
+
+llama-cpp-python
diff --git a/textgen/llm.py b/textgen/llm.py
index 291f6f7..43dbd48 100644
--- a/textgen/llm.py
+++ b/textgen/llm.py
@@ -2,12 +2,18 @@
 import llama_cpp
 import json
 
-tools = json.loads(open('tools.json', 'r').read())['tools']
+from . import tool_funcs
+
+tools: list[dict] = json.loads(open('tools.json', 'r').read())['tools']
 
 class TextGen:
 
     llm: llama_cpp.Llama
 
+    messages: list[dict] = [
+        {"role": "system", "content": "You are a helpful assistant that can use tools. When a function is called, return the results to the user."}
+    ]
+
     def __init__(self, model_path: str, n_ctx: int, n_gpu_layers: int):
         # 1. Instantiate the Llama model
         # Provide the path to your downloaded .gguf file
@@ -17,7 +23,8 @@ class TextGen:
         model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",  # Path to your GGUF model
         n_ctx=n_ctx,           # Context window size
         n_gpu_layers=n_gpu_layers,        # Offload all layers to GPU. Set to 0 if no GPU.
-        verbose=False         # Suppress verbose output
+        verbose=False,         # Suppress verbose output
+        chat_format='chatml'
         )
 
     def generate(self, prompt: str) -> str:
@@ -38,3 +45,28 @@ class TextGen:
         print(text)
 
         return text
+
+    def chat_completion(self, user_message: str) -> str:
+        self.messages.append({
+            "role": "user",
+            "content": user_message
+        })
+
+        response = self.llm.create_chat_completion(
+            messages=self.messages,
+            tools=tools,
+            tool_choice='auto'
+        )
+
+        tool_call = response['choices'][0]['message'].get('tool_calls')
+        if not tool_call:
+            return response['choices'][0]['message']['content']
+
+        call_info = tool_call[0]['function']
+        function_name = call_info['name']
+
+        print(f'Assistant decided to call {function_name}')
+
+        tool_output = tool_funcs.get_high_low()
+        
+