Source code for BRAD.coder

"""
Code Caller
-----------

This module facilitates the discovery, selection, and execution of Python and MATLAB scripts 
based on user prompts and predefined configuration settings.

Key Features
~~~~~~~~~~~~

1. Python scripts must reside in the directories specified within the configuration settings.

2. Script execution requires the first argument to specify the output directory, where any 
   resulting files will be saved.

3. Each script must include clear and structured documentation, consisting of:

   - A concise one-line summary at the beginning of the docstring (used by the LLM for script selection).
   
   - Comprehensive descriptions detailing the script’s arguments, inputs, purpose, and usage 
     examples (utilized by the LLM for accurate execution).

Available Methods
~~~~~~~~~~~~~~~~~

This module has the following methods:

"""


import os
import time
from langchain import PromptTemplate, LLMChain
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_community.callbacks import get_openai_callback

from BRAD.pythonCaller import find_py_files, get_py_description, read_python_docstrings, pythonPromptTemplate, extract_python_code, execute_python_code
from BRAD.promptTemplates import scriptSelectorTemplate, pythonPromptTemplateWithFiles, summarize_code_template
from BRAD import log
from BRAD import utils
from BRAD import justchat

# History:
#  2024-10-01: This file was modified to remove support for running MATLAB codes
#  2024-11-27: Final stage of the pipeline added to have the LLM generate a response
#              at the end of the pipeline


[docs]
def code_caller(state):
    """
    Executes a Python script based on the user's prompt and chat status settings.
    
    This function performs the following steps:
    
        1. Searches the specified directories for available Python scripts.
        
        2. Extracts and analyzes docstrings from each script to identify their purpose.
        
        3. Uses a large language model (LLM) to select the most appropriate script and format the command with the correct inputs.
        
        4. Executes the selected script and updates the chat status accordingly.
    """
    # Auth: Joshua Pickard
    #       jpic@umich.edu
    # Date: June 22, 2024

    log.debugLog("CODER", state=state)
    prompt = state['prompt']                                        # Get the user prompt
    llm = state['llm']                                              # Get the llm
    memory = ConversationBufferMemory(ai_prefix="BRAD")                  # state['memory']

    # Get available matlab and python scripts
    path = state['config']['CODE']['path']
    
    scripts = {}
    for fdir in path:
        scripts[fdir] = {}
        scripts[fdir]['python'] = find_py_files(fdir) # pythonScripts
    #    scripts[fdir]['matlab'] = find_matlab_files(fdir) # matlabScripts
    
    # Get matlab and python docstrings
    scriptPurpose = {}
    for fdir in path:
        # print(fdir)
        for script in scripts[fdir]['python']:
            scriptPurpose[script] = {'description': get_py_description(os.path.join(fdir, script + '.py')), 'type': 'python'}
    script_list = ""
    for script in scriptPurpose.keys():
        script_list += "Script Name: " + script + ", \t Description: " + scriptPurpose[script]['description'] + '\n'

    # Determine which code needs to be executed (first llm call)
    template = scriptSelectorTemplate()
    template = template.format(script_list=script_list)
    PROMPT = PromptTemplate(input_variables=["user_query"], template=template)
    log.debugLog(PROMPT, state=state)
    chain = PROMPT | llm # LCEL chain creation
    log.debugLog("FIRST LLM CALL", state=state)

    # Call LLM
    start_time = time.time()
    with get_openai_callback() as cb:
        res = chain.invoke(prompt)
    responseOriginal = {'original': res.copy()}
    responseOriginal['time'] = time.time() - start_time
    responseOriginal['call back'] = {
        "Total Tokens": cb.total_tokens,
        "Prompt Tokens": cb.prompt_tokens,
        "Completion Tokens": cb.completion_tokens,
        "Total Cost (USD)": cb.total_cost
    }

    log.debugLog(res.content, state=state)
    scriptName   = res.content.strip().split('\n')[0].split(':')[1].strip()
    scriptType   = scriptPurpose[scriptName]['type']
    scriptPath = None
    for fdir in path:
        if scriptName in scripts[fdir][scriptType]:
            scriptPath = fdir
            break
    if scriptPath is None:
        log.debugLog('the scriptPath was not found', state=state)
        log.debugLog(f'scripts={scripts}', state=state)
        log.debugLog(f'scriptName={scriptName}', state=state)
        log.debugLog(f'scriptType={scriptType}', state=state)

    # NOTE: MATLAB is in an experimental stage and not fully integrated yet
    if scriptType == 'MATLAB':
        state, _ = activateMatlabEngine(state) # turn on and add matlab files to path
        scriptName = os.path.join(scriptPath, scriptName)
    else:
        log.debugLog('scriptPath=' + str(scriptPath), state=state)
        scriptName = os.path.join(scriptPath, scriptName)

    scriptSuffix = {'python': '.py', 'MATLAB': '.m'}.get(scriptType)
    scriptName  += scriptSuffix

    state['process']['steps'].append(
        log.llmCallLog(
            llm             = llm,
            prompt          = PROMPT,
            input           = prompt,
            output          = responseOriginal,
            parsedOutput    = {
                'scriptName': scriptName,
                'scriptType': scriptType,
                'scriptPath': scriptPath
            },
            purpose         = 'Select which code to run'
        )
    )

    # Format code to execute: read the doc strings, format function call (second llm call), parse the llm output
    log.debugLog("ALL SCRIPTS FOUND. BUILDING TEMPLATE", state=state)
    
    docstringReader = {'python': read_python_docstrings}.get(scriptType)
    docstrings      = docstringReader(os.path.join(scriptPath, scriptName))
    scriptCallingTemplate = {'python': pythonPromptTemplateWithFiles}.get(scriptType)
    template        = scriptCallingTemplate()
    if scriptType == 'python':
        createdFiles = "\n".join(utils.outputFiles(state)) # A string of previously created files
        filled_template = template.format(
            scriptName=scriptName,
            scriptDocumentation=docstrings,
            output_path=state['output-directory'],
            files=createdFiles
        )
    else:
        filled_template = template.format(
            scriptName=scriptName,
            scriptDocumentation=docstrings,
            output_path=state['output-directory']
        )

    # Create the prompt template
    PROMPT = PromptTemplate(input_variables=["history", "input"], template=filled_template)
    log.debugLog(PROMPT, state=state)
    # this will allow better logging of the response from the query API
    try:
        # LCEL chain creation: prompt | llm
        chain = PROMPT | llm
        
        # Execute the chain with input prompt
        start_time = time.time()
        with get_openai_callback() as cb:
            response = chain.invoke({"history": memory.abuffer(), "input": state['prompt']})

        responseOriginal = response
        responseOriginal['time'] = time.time() - start_time

        try:
            responseOriginal['call back'] = {
                "Total Tokens": cb.total_tokens,
                "Prompt Tokens": cb.prompt_tokens,
                "Completion Tokens": cb.completion_tokens,
                "Total Cost (USD)": cb.total_cost
            }
        except:
           responseOriginal['call back'] = {
                "Total Tokens": None,
                "Prompt Tokens": None,
                "Completion Tokens": None,
                "Total Cost (USD)": None
            }

        response = response.content

    # this catches the initial implementation
    except:
        conversation = ConversationChain(
            prompt  = PROMPT,
            llm     =    llm,
            verbose = state['config']['debug'],
            memory  = memory,
        )
        start_time = time.time()
        with get_openai_callback() as cb:
            response = conversation.predict(input=state['prompt'])

        responseOriginal = response
        try:
            responseOriginal = {
                'content' : response,
                'time' : time.time() - start_time,
                'call back': {
                    "Total Tokens": cb.total_tokens,
                    "Prompt Tokens": cb.prompt_tokens,
                    "Completion Tokens": cb.completion_tokens,
                    "Total Cost (USD)": cb.total_cost
                }
            }
        except:
            responseOriginal = {
                'content' : response,
                'time' : time.time() - start_time,
                'call back': {
                    "Total Tokens": None,
                    "Prompt Tokens": None,
                    "Completion Tokens": None,
                    "Total Cost (USD)": None
                }
            }
    
    responseParser = {'python': extract_python_code}.get(scriptType)
    code2execute = responseParser(response, scriptPath, state, memory=memory)

    state['process']['steps'].append(
        log.llmCallLog(
            llm             = llm,
            prompt          = PROMPT,
            input           = state['prompt'],
            output          = responseOriginal,
            parsedOutput    = {
                'code': code2execute
            },
            purpose         = 'Format function call'
        )
    )

    # Check if it requires previous inputs
    code2execute = utils.add_output_file_path_to_string(code2execute, state)
    
    # Execute code
    executeCode(state, code2execute, scriptType)

    # LLM generated response
    state = summarize_code_execution_results(state)

    return state



[docs]
def summarize_code_execution_results(state):
    """
    This method takes the output of the code and formulates it into a prompt which can be passed to the LLM.
    The LLM response is generated with the justchat method.
    """
    software_output = state['output']
    
    template = summarize_code_template()
    template = template.format(
        user_query=state['prompt'],
        system_output=state['output']
    )
    state['prompt'] = template

    state = justchat.llm_only(state)
    state['output'] = software_output + '\n\n --- \n\n ' + state['output']

    return state



[docs]
def executeCode(state, code2execute, scriptType):
    """
    Executes the provided code based on the specified script type.
    
    This function determines the appropriate execution environment (Python or MATLAB) based on the script type and runs the corresponding code.
    
    :param state: A dictionary containing the chat status, including configuration settings and other relevant data.
    :type state: dict
    :param code2execute: The code to be executed.
    :type code2execute: str
    :param scriptType: The type of the script to be executed. Must be either 'python' or 'MATLAB'.
    :type scriptType: str
    """
    # Auth: Joshua Pickard
    #       jpic@umich.edu
    # Date: June 22, 2024
    executor = {'python': execute_python_code}.get(scriptType)
    executor(code2execute, state)