working on tool output

2025-09-30 12:14:23 -05:00 · 2025-09-30 12:14:23 -05:00 · 4d71275f12
commit 4d71275f12
parent 4d449f5333
3 changed files with 43 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,7 @@
 # LLM Install
 ## NVIDIA
 `CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir`
--- a/requirements.txt
+++ b/requirements.txt
@ -7,3 +7,5 @@ requests-cache
 retry-requests
 numpy
 pandas
 llama-cpp-python
--- a/textgen/llm.py
+++ b/textgen/llm.py
@ -2,12 +2,18 @@
 import llama_cpp
 import json
-tools = json.loads(open('tools.json', 'r').read())['tools']
+from . import tool_funcs
 tools: list[dict] = json.loads(open('tools.json', 'r').read())['tools']
 class TextGen:
    llm: llama_cpp.Llama
    messages: list[dict] = [
        {"role": "system", "content": "You are a helpful assistant that can use tools. When a function is called, return the results to the user."}
    ]
    def __init__(self, model_path: str, n_ctx: int, n_gpu_layers: int):
        # 1. Instantiate the Llama model
        # Provide the path to your downloaded .gguf file
@ -17,7 +23,8 @@ class TextGen:
        model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",  # Path to your GGUF model
        n_ctx=n_ctx,           # Context window size
        n_gpu_layers=n_gpu_layers,        # Offload all layers to GPU. Set to 0 if no GPU.
-        verbose=False         # Suppress verbose output
+        verbose=False,         # Suppress verbose output
        chat_format='chatml'
        )
    def generate(self, prompt: str) -> str:
@ -38,3 +45,28 @@ class TextGen:
        print(text)
        return text
    def chat_completion(self, user_message: str) -> str:
        self.messages.append({
            "role": "user",
            "content": user_message
        })
        response = self.llm.create_chat_completion(
            messages=self.messages,
            tools=tools,
            tool_choice='auto'
        )
        tool_call = response['choices'][0]['message'].get('tool_calls')
        if not tool_call:
            return response['choices'][0]['message']['content']
        call_info = tool_call[0]['function']
        function_name = call_info['name']
        print(f'Assistant decided to call {function_name}')
        tool_output = tool_funcs.get_high_low()