Source code for BRAD.coder

"""
Code Caller
-----------

This module facilitates the discovery, selection, and execution of Python and MATLAB scripts 
based on user prompts and predefined configuration settings.

Key Features
~~~~~~~~~~~~

1. Python scripts must reside in the directories specified within the configuration settings.

2. Script execution requires the first argument to specify the output directory, where any 
   resulting files will be saved.

3. Each script must include clear and structured documentation, consisting of:

   - A concise one-line summary at the beginning of the docstring (used by the LLM for script selection).
   
   - Comprehensive descriptions detailing the script’s arguments, inputs, purpose, and usage 
     examples (utilized by the LLM for accurate execution).

Available Methods
~~~~~~~~~~~~~~~~~

This module has the following methods:

"""


import os
import time
from langchain import PromptTemplate, LLMChain
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_community.callbacks import get_openai_callback

from BRAD.pythonCaller import find_py_files, get_py_description, read_python_docstrings, pythonPromptTemplate, extract_python_code, execute_python_code
from BRAD.promptTemplates import scriptSelectorTemplate, pythonPromptTemplateWithFiles, summarize_code_template
from BRAD import log
from BRAD import utils
from BRAD import justchat

# History:
#  2024-10-01: This file was modified to remove support for running MATLAB codes
#  2024-11-27: Final stage of the pipeline added to have the LLM generate a response
#              at the end of the pipeline

[docs] def code_caller(state): """ Executes a Python script based on the user's prompt and chat status settings. This function performs the following steps: 1. Searches the specified directories for available Python scripts. 2. Extracts and analyzes docstrings from each script to identify their purpose. 3. Uses a large language model (LLM) to select the most appropriate script and format the command with the correct inputs. 4. Executes the selected script and updates the chat status accordingly. """ # Auth: Joshua Pickard # jpic@umich.edu # Date: June 22, 2024 log.debugLog("CODER", state=state) prompt = state['prompt'] # Get the user prompt llm = state['llm'] # Get the llm memory = ConversationBufferMemory(ai_prefix="BRAD") # state['memory'] # Get available matlab and python scripts path = state['config']['CODE']['path'] scripts = {} for fdir in path: scripts[fdir] = {} scripts[fdir]['python'] = find_py_files(fdir) # pythonScripts # scripts[fdir]['matlab'] = find_matlab_files(fdir) # matlabScripts # Get matlab and python docstrings scriptPurpose = {} for fdir in path: # print(fdir) for script in scripts[fdir]['python']: scriptPurpose[script] = {'description': get_py_description(os.path.join(fdir, script + '.py')), 'type': 'python'} script_list = "" for script in scriptPurpose.keys(): script_list += "Script Name: " + script + ", \t Description: " + scriptPurpose[script]['description'] + '\n' # Determine which code needs to be executed (first llm call) template = scriptSelectorTemplate() template = template.format(script_list=script_list) PROMPT = PromptTemplate(input_variables=["user_query"], template=template) log.debugLog(PROMPT, state=state) chain = PROMPT | llm # LCEL chain creation log.debugLog("FIRST LLM CALL", state=state) # Call LLM start_time = time.time() with get_openai_callback() as cb: res = chain.invoke(prompt) responseOriginal = {'original': res.copy()} responseOriginal['time'] = time.time() - start_time responseOriginal['call back'] = { "Total Tokens": cb.total_tokens, "Prompt Tokens": cb.prompt_tokens, "Completion Tokens": cb.completion_tokens, "Total Cost (USD)": cb.total_cost } log.debugLog(res.content, state=state) scriptName = res.content.strip().split('\n')[0].split(':')[1].strip() scriptType = scriptPurpose[scriptName]['type'] scriptPath = None for fdir in path: if scriptName in scripts[fdir][scriptType]: scriptPath = fdir break if scriptPath is None: log.debugLog('the scriptPath was not found', state=state) log.debugLog(f'scripts={scripts}', state=state) log.debugLog(f'scriptName={scriptName}', state=state) log.debugLog(f'scriptType={scriptType}', state=state) # NOTE: MATLAB is in an experimental stage and not fully integrated yet if scriptType == 'MATLAB': state, _ = activateMatlabEngine(state) # turn on and add matlab files to path scriptName = os.path.join(scriptPath, scriptName) else: log.debugLog('scriptPath=' + str(scriptPath), state=state) scriptName = os.path.join(scriptPath, scriptName) scriptSuffix = {'python': '.py', 'MATLAB': '.m'}.get(scriptType) scriptName += scriptSuffix state['process']['steps'].append( log.llmCallLog( llm = llm, prompt = PROMPT, input = prompt, output = responseOriginal, parsedOutput = { 'scriptName': scriptName, 'scriptType': scriptType, 'scriptPath': scriptPath }, purpose = 'Select which code to run' ) ) # Format code to execute: read the doc strings, format function call (second llm call), parse the llm output log.debugLog("ALL SCRIPTS FOUND. BUILDING TEMPLATE", state=state) docstringReader = {'python': read_python_docstrings}.get(scriptType) docstrings = docstringReader(os.path.join(scriptPath, scriptName)) scriptCallingTemplate = {'python': pythonPromptTemplateWithFiles}.get(scriptType) template = scriptCallingTemplate() if scriptType == 'python': createdFiles = "\n".join(utils.outputFiles(state)) # A string of previously created files filled_template = template.format( scriptName=scriptName, scriptDocumentation=docstrings, output_path=state['output-directory'], files=createdFiles ) else: filled_template = template.format( scriptName=scriptName, scriptDocumentation=docstrings, output_path=state['output-directory'] ) # Create the prompt template PROMPT = PromptTemplate(input_variables=["history", "input"], template=filled_template) log.debugLog(PROMPT, state=state) # this will allow better logging of the response from the query API try: # LCEL chain creation: prompt | llm chain = PROMPT | llm # Execute the chain with input prompt start_time = time.time() with get_openai_callback() as cb: response = chain.invoke({"history": memory.abuffer(), "input": state['prompt']}) responseOriginal = response responseOriginal['time'] = time.time() - start_time try: responseOriginal['call back'] = { "Total Tokens": cb.total_tokens, "Prompt Tokens": cb.prompt_tokens, "Completion Tokens": cb.completion_tokens, "Total Cost (USD)": cb.total_cost } except: responseOriginal['call back'] = { "Total Tokens": None, "Prompt Tokens": None, "Completion Tokens": None, "Total Cost (USD)": None } response = response.content # this catches the initial implementation except: conversation = ConversationChain( prompt = PROMPT, llm = llm, verbose = state['config']['debug'], memory = memory, ) start_time = time.time() with get_openai_callback() as cb: response = conversation.predict(input=state['prompt']) responseOriginal = response try: responseOriginal = { 'content' : response, 'time' : time.time() - start_time, 'call back': { "Total Tokens": cb.total_tokens, "Prompt Tokens": cb.prompt_tokens, "Completion Tokens": cb.completion_tokens, "Total Cost (USD)": cb.total_cost } } except: responseOriginal = { 'content' : response, 'time' : time.time() - start_time, 'call back': { "Total Tokens": None, "Prompt Tokens": None, "Completion Tokens": None, "Total Cost (USD)": None } } responseParser = {'python': extract_python_code}.get(scriptType) code2execute = responseParser(response, scriptPath, state, memory=memory) state['process']['steps'].append( log.llmCallLog( llm = llm, prompt = PROMPT, input = state['prompt'], output = responseOriginal, parsedOutput = { 'code': code2execute }, purpose = 'Format function call' ) ) # Check if it requires previous inputs code2execute = utils.add_output_file_path_to_string(code2execute, state) # Execute code executeCode(state, code2execute, scriptType) # LLM generated response state = summarize_code_execution_results(state) return state
[docs] def summarize_code_execution_results(state): """ This method takes the output of the code and formulates it into a prompt which can be passed to the LLM. The LLM response is generated with the justchat method. """ software_output = state['output'] template = summarize_code_template() template = template.format( user_query=state['prompt'], system_output=state['output'] ) state['prompt'] = template state = justchat.llm_only(state) state['output'] = software_output + '\n\n --- \n\n ' + state['output'] return state
[docs] def executeCode(state, code2execute, scriptType): """ Executes the provided code based on the specified script type. This function determines the appropriate execution environment (Python or MATLAB) based on the script type and runs the corresponding code. :param state: A dictionary containing the chat status, including configuration settings and other relevant data. :type state: dict :param code2execute: The code to be executed. :type code2execute: str :param scriptType: The type of the script to be executed. Must be either 'python' or 'MATLAB'. :type scriptType: str """ # Auth: Joshua Pickard # jpic@umich.edu # Date: June 22, 2024 executor = {'python': execute_python_code}.get(scriptType) executor(code2execute, state)