working on tool output
This commit is contained in:
parent
4d449f5333
commit
4d71275f12
7
README.md
Normal file
7
README.md
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
# LLM Install
|
||||||
|
|
||||||
|
## NVIDIA
|
||||||
|
|
||||||
|
`CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir`
|
||||||
|
|
||||||
@ -7,3 +7,5 @@ requests-cache
|
|||||||
retry-requests
|
retry-requests
|
||||||
numpy
|
numpy
|
||||||
pandas
|
pandas
|
||||||
|
|
||||||
|
llama-cpp-python
|
||||||
|
|||||||
@ -2,12 +2,18 @@
|
|||||||
import llama_cpp
|
import llama_cpp
|
||||||
import json
|
import json
|
||||||
|
|
||||||
tools = json.loads(open('tools.json', 'r').read())['tools']
|
from . import tool_funcs
|
||||||
|
|
||||||
|
tools: list[dict] = json.loads(open('tools.json', 'r').read())['tools']
|
||||||
|
|
||||||
class TextGen:
|
class TextGen:
|
||||||
|
|
||||||
llm: llama_cpp.Llama
|
llm: llama_cpp.Llama
|
||||||
|
|
||||||
|
messages: list[dict] = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant that can use tools. When a function is called, return the results to the user."}
|
||||||
|
]
|
||||||
|
|
||||||
def __init__(self, model_path: str, n_ctx: int, n_gpu_layers: int):
|
def __init__(self, model_path: str, n_ctx: int, n_gpu_layers: int):
|
||||||
# 1. Instantiate the Llama model
|
# 1. Instantiate the Llama model
|
||||||
# Provide the path to your downloaded .gguf file
|
# Provide the path to your downloaded .gguf file
|
||||||
@ -17,7 +23,8 @@ class TextGen:
|
|||||||
model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", # Path to your GGUF model
|
model_path="./models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", # Path to your GGUF model
|
||||||
n_ctx=n_ctx, # Context window size
|
n_ctx=n_ctx, # Context window size
|
||||||
n_gpu_layers=n_gpu_layers, # Offload all layers to GPU. Set to 0 if no GPU.
|
n_gpu_layers=n_gpu_layers, # Offload all layers to GPU. Set to 0 if no GPU.
|
||||||
verbose=False # Suppress verbose output
|
verbose=False, # Suppress verbose output
|
||||||
|
chat_format='chatml'
|
||||||
)
|
)
|
||||||
|
|
||||||
def generate(self, prompt: str) -> str:
|
def generate(self, prompt: str) -> str:
|
||||||
@ -38,3 +45,28 @@ class TextGen:
|
|||||||
print(text)
|
print(text)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def chat_completion(self, user_message: str) -> str:
|
||||||
|
self.messages.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": user_message
|
||||||
|
})
|
||||||
|
|
||||||
|
response = self.llm.create_chat_completion(
|
||||||
|
messages=self.messages,
|
||||||
|
tools=tools,
|
||||||
|
tool_choice='auto'
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_call = response['choices'][0]['message'].get('tool_calls')
|
||||||
|
if not tool_call:
|
||||||
|
return response['choices'][0]['message']['content']
|
||||||
|
|
||||||
|
call_info = tool_call[0]['function']
|
||||||
|
function_name = call_info['name']
|
||||||
|
|
||||||
|
print(f'Assistant decided to call {function_name}')
|
||||||
|
|
||||||
|
tool_output = tool_funcs.get_high_low()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user