Source code for BRAD.agent

"""
The `brad` module serves as the main interface for user interactions, whether through a graphical user interface (GUI), command line, or programmatically.

The `Agent` class creates a single chatbot instance that can be queried in various ways. 

The `AgentFactory` class is a session factory method, used to fetch and maintain Agent sessions from outside the module.  

The `brad.chat` method allows users to initiate a command line chat session without needing to create an `Agent` instance.

Main Methods
-------------

Main Methods:
    
1. `Agent.chat`
    This method creates a chat session where a user and `Agent` can have a conversation with back-and-forth inputs.
2. `Agent.invoke`
    This method responds to an individual user query with a single tool.
3. `AgentFactory.get_agent`
    Method to instantiate a new 

.. _state-schema-section:

State Schema
------------

The `Agent` state is managed within a dictionary called `Agent.state`. This dictionary tracks the agents inputs, outputs, memory, configurations, and more.
To pass this information between the `Agent` and each tool the `state` dictionary is passed as the single input to each tool module. The `state` is structured as:

>>> Agent.state = {
... 'config'            : {
...     <configuration variables>
... },
... 'prompt'            : <user input>,
... 'output'            : <streaming output of Agent>,
... 'memory'            : <agent memory>,
... 'process'           : {
...     'MODULE'        : <Tool module used to respond to user input>,
...     <module specific information>: {
...         ...
...     }
... },
... 'queue'             : [<list of instructions to follow>],
... 'queue pointer'     : <instruction pointer to the queue>,
... 'llm-api-calls'     : <number of LLM calls used by Agent>,
... 'recursion_depth'   : <amount of recursion the Agent is using>
>>> }
    
Class Methods
-------------

The `Agent` class is organized as follows:


"""

# Standard
import pandas as pd
import os
import shutil
from datetime import datetime as dt
import warnings
import json
import logging
import time
from typing import Optional, List
import pickle
import atexit

# Router
from semantic_router.layer import RouteLayer

# RAG
import chromadb

# LangChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain.memory import ConversationBufferMemory

# LangChain Core
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM

# Library
from BRAD.planner import *
from BRAD.enrichr import *
from BRAD.scraper import *
from BRAD.router import *
from BRAD.rag import queryDocs, remove_repeats
from BRAD.gene_ontology import *
from BRAD.pythonCaller import *
from BRAD.llms import *
from BRAD.geneDatabaseCaller import geneDBRetriever
from BRAD.planner import planner
from BRAD.coder import code_caller
from BRAD.writer import summarizeSteps, chatReport
from BRAD import log
from BRAD.bradllm import BradLLM
from BRAD.constants import TOOL_MODULES


[docs]
class Agent():
    """
    This class organizes the agentic capabilities of BRAD. It facilitates interactions 
    with external LLMs, tools, core modules, literature, and other databases while
    managing the chat state and history.
    
    Key functions include:
    
    1. **invoke(user_input)**: Responds to a single user input.
    2. **chat()**: Initiates an interactive session between the user and the BRAD agent.
    
    To address user queries, the agent employs semantic routing to select the appropriate tool module, generates responses using code from the chosen module, and tracks its state throughout the interaction.

    :param model_path: The path to the Llama model file, defaults to '/nfs/turbo/umms-indikar/shared/projects/RAG/models/llama-2-7b-chat.Q8_0.gguf'.
    :type model_path: str, optional
    :param persist_directory: The directory where the literature database is stored, defaults to "/nfs/turbo/umms-indikar/shared/projects/RAG/databases/Transcription-Factors-5-10-2024/".
    :type persist_directory: str, optional
    :param llm: The language model to be used. If None, it will be loaded within the function.
    :type llm: PreTrainedModel, optional
    :param ragvectordb: The RAG vector database to be used. If None, it will prompt the user to load it.
    :type ragvectordb: Chroma, optional
    :param embeddings_model: The embeddings model to be used. If None, it will be loaded within the function.
    :type embeddings_model: HuggingFaceEmbeddings, optional
    :param max_api_calls: The maximum number of api / llm calls BRAD can make
    :type max_api_calls: int, optional
    :param tools: The set of available tool modules. If None, all modules are available for use
    :type tools: list, optional
    :param gui: Indicates if the Agent is used in the GUI
    :type gui: boolean, optional

    :raises FileNotFoundError: If the specified model or database directories do not exist.
    :raises json.JSONDecodeError: If the configuration file contains invalid JSON.
    :raises KeyError: If required keys are missing from the configuration or chat status.
    """

    

[docs]
    def __init__(self,
        model_path = '/nfs/turbo/umms-indikar/shared/projects/RAG/models/llama-2-7b-chat.Q8_0.gguf',
        persist_directory = "/nfs/turbo/umms-indikar/shared/projects/RAG/databases/DigitalLibrary-10-June-2024/",
        llm=None,
        ragvectordb=None,
        embeddings_model=None,
        restart=None,
        start_path=None,
        tools=None,
        name='BRAD',
        max_api_calls=None, # This prevents BRAD from finding infinite loops and using all your API credits,
        interactive=True,   # This indicates if BRAD is in interactive more or not
        config=None,        # This parameter lets a user specify an additional configuration file that will
                            # overwrite configurations with the same key
        gui=False
    ):
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024

        # Dev. Comments:
        # -------------------
        # This function initializes a chat session and the state variable
        #
        # History:
        # - 2024-06-04: wrote 1st draft of this code in the brad.chat() method
        # - 2024-07-10: refactored brad.py file to a class and converted the code
        #               used to initialize the chat session to initialize this class
        # - 2024-07-23: added interactive and max_api_call arguments
        # - 2024-07-29: added config (optional) argument to overwrite the defaults
        # - 2024-10-06: .chatstatus was renamed .state
        # - 2024-10-16: if the restart location doesn't have a log, then a new agent
        #               is created
        # - 2024-11-18: configurations are automatically read/written from created sessions
        #               when turning the Agent back on
        #
        # Issues:
        # - We should change the structure of the classes/modules. In this 1st iteration
        #   state was packed as a class variable and used similar to before, but it
        #   is likely reasonable to split this variable up into separate class variables.
        # - Current configurations only allow either a specific configuration file or a
        #   configuration file from a previous chat session (dominate) - JP

        self.state = self.load_state(config=config)
        self.name       = name.strip()
        self.state['interactive'] = interactive # By default a chatbot is not interactive
        self.state['gui'] = gui
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    
        base_dir = os.path.expanduser('~')
        log_dir = os.path.join(base_dir, self.state['config']['log_path'])
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

        # Check if it will be possible to restart the old session
        if restart is not None:
            log_path = os.path.join(restart, 'log.json')
            if not os.path.exists(log_path):
                restart = None

        if restart is None:
            # making this more human readable
            new_dir_name = start_path if start_path else dt.now().strftime("%B %d, %Y at %I:%M:%S %p")
            
            new_log_dir = os.path.join(log_dir, new_dir_name)
            try:
                os.makedirs(new_log_dir, exist_ok=True)
            except Exception as e:
                print(f"Failed to create directory '{new_log_dir}'. Error: {e}")
                fallback_dir = "C:\\Users\\jpic\\Documents\\BRAD-logs"  # Replace this with a directory that must exist
                print(f"Using fallback directory: {fallback_dir}")
                new_log_dir = fallback_dir
                os.makedirs(new_log_dir, exist_ok=True)  # This should always succeed since the fallback exists
            self.chatname = os.path.join(new_log_dir, 'log.json')
            self.chatlog  = {}
        else:
            new_log_dir = restart
            self.chatname = os.path.join(restart, 'log.json')
            self.chatlog  = json.load(open(self.chatname))

            state_file = os.path.join(new_log_dir, '.agent-state.pkl')
            
            if os.path.exists(state_file):
                try:
                    with open(state_file, 'rb') as f:
                        self.state = pickle.load(f)
                    logging.info(f"Loaded agent state from {state_file}")
                except Exception as e:
                    logging.error(f"Failed to load agent state from {state_file}: {e}")
            else:
                logging.info(f"No existing state file found in {new_log_dir}, starting fresh.")

            # Update from the old configurations
            config_file = os.path.join(new_log_dir, 'config.json')
            if os.path.exists(config_file):
                try:
                    saved_configs = self.load_config(config_file)
                    self.state['config'] = saved_configs
                    logging.info(f"Loaded configuration from {config_file}")
                except Exception as e:
                    logging.error(f"Failed to load configuration from {config_file}: {e}")
            else:
                logging.info(f"No configuration file found at {config_file}.")

        if max_api_calls is None:
            max_api_calls = 1000
        self.max_api_calls = max_api_calls
    
        # Initialize the dictionaries of tables and databases accessible to BRAD
        databases = {} # a dictionary to look up databases
        # tables = {}    # a dictionary to look up tables
    
        # Initialize the RAG database
        if llm is None:

            # By devault we use OpenAI
            llm = load_openai()

        if ragvectordb is None:
            if self.state['interactive']:
                state = log.userOutput('\nWould you like to use a database with ' + self.name + ' [Y/N]?', state=self.state)
                loadDB = input().strip().upper()
                if loadDB == 'Y':
                    ragvectordb, embeddings_model = self.load_literature_db(persist_directory) # load the literature database
                else:
                    ragvectordb, embeddings_model = None, None
            else:
                ragvectordb, embeddings_model = None, None
        
        databases['RAG'] = ragvectordb
        memory = ConversationBufferMemory(ai_prefix="BRAD")
    
        # Initialize the routers from router.py
        self.router = getRouter(available=tools)
    
        # Add other information to state
        # Assign values only if the key does not exist or is None/empty
        if 'llm' not in self.state or not self.state['llm']:
            self.state['llm'] = llm

        if 'memory' not in self.state or not self.state['memory']:
            self.state['memory'] = memory

        if 'databases' not in self.state or not self.state['databases']:
            self.state['databases'] = databases

        if 'output-directory' not in self.state or not self.state['output-directory']:
            self.state['output-directory'] = new_log_dir
    
        # Initialize all modules
        self.module_functions = self.getModules()
    
        # Start loop
        # only log if chat bot is fresh
        if restart is None:
            self.state = log.userOutput('Welcome to BRAD! The output from this conversation will be saved to ' + self.chatname + '. How can I help?', state=self.state)

            # Write an empty chat log
            self.chatlog, self.state = log.logger(self.chatlog, self.state, self.chatname, elapsed_time=0)


        # Ensure that the save_state function is registered to run at program exit
        # atexit.register(self.save_state)



[docs]
    def save_state(self):
        """
        Saves the agent state to a file named '.agent-state.pkl' in the output directory.
        This method is registered with atexit to ensure it is called when the program exits.
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: October 15, 2024

        output_directory = self.state['output-directory']
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        state_file = os.path.join(output_directory, '.agent-state.pkl')

        try:
            # Save the log to the json log file
            self.state['prompt'] = None
            self.state['output'] = None
            self.state['process'] = {
                'module' : 'SLEEP'
            }

            # Set the databases to none
            self.state['databases']['RAG'] = None
            self.state['llm'] = None

            # Save the state to a pickle file
            with open(state_file, 'wb') as f:
                pickle.dump(self.state, f)
            logging.info(f"Agent state saved to {state_file}")

            self.chatlog, self.state = log.logger(self.chatlog, self.state, self.chatname, elapsed_time=0)
            logging.info(f"Agent log written for power off")

        except Exception as e:
            logging.error(f"Failed to save agent state: {e}")




[docs]
    def invoke(self, query):
        """
        Executes a single query using the chatbot, similar to invoking a language model.
    
        This method processes the user input, determines the appropriate routing 
        based on explicit commands or the content of the query, and generates a 
        response using the selected module. It also manages the state of the 
        chatbot throughout the execution.
    
        :param query: The user input to be processed by the chatbot.
        :type query: str
    
        :return: The output generated by the chatbot in response to the input query.
        :rtype: str
    
        :raises Exception: If an error occurs during the execution of the selected module.
    
        :example:
            >>> response = agent.invoke("What's the weather today?")
        
        :note: 
            Special commands recognized include:
            - "exit", "quit", "q", "bye": Ends the session.
            - "help": Displays help information.
            - "/set": Configures settings.
            - "/force": Forces the use of a specified routing function.
    
        This method logs the process and clears memory based on configuration settings.
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024

        # Dev. Comments:
        # -------------------
        # This function executes a single user prompt with BRAD
        #
        # History:
        # - 2024-06-04: wrote 1st draft of this code in the brad.chat() method
        # - 2024-07-10: refactored brad.py file to a class and converted the code
        #               used to execute a single prompt into the invoke method

        # start the clock
        start_time = time.time()
        
        # This line packs a query into the state variable we were using previously
        self.state['prompt'] = query
        
        # Handle explicit commands and routing
        if self.state['prompt'].lower() in ['exit', 'quit', 'q', 'bye']:         # check to exit
            return False
        elif self.state['prompt'].lower() == 'help':              # print man to the screen
            self.chatbotHelp()
            return True
            # continue
        # Routing
        elif self.state['prompt'].startswith('/set'):             # set a configuration variable
            self.state = self.reconfig()
            return True
            # continue
        # Continue previous module
        elif self.state.get('continue-module') is not None:
            route = self.state['continue-module'][0]

        # Use router to select correct module
        elif '/force' not in self.state['prompt'].split(' '):     # use the router
            route = self.router(self.state['prompt']).name
            if route is None:
                route = 'RAG'
        # Forced routes
        else:
            route = self.state['prompt'].split(' ')[1]            # use the forced router
            if self.state['config']['ROUTER']['build router db']:
                buildRoutes(self.state['prompt'])
            self.state['prompt'] = " ".join(self.state['prompt'].split(' ')[2:]).strip()

        # Outputs
        self.state = log.userOutput('==================================================', state=self.state)
        self.state = log.userOutput(self.name + ' >> ' + str(len(self.chatlog)) + ': ', state=self.state)

        # select appropriate routeing function
        if route.upper() in self.module_functions.keys():
            routeName = route.upper()
        else:
            routeName = 'RAG'

        log.debugLog(routeName, state=self.state)

        # get the specific module to use
        module = self.module_functions[routeName]

        # Standardize state/logging schema for all modules
        #     see: https://github.com/Jpickard1/RAG-DEV/wiki/Logging
        self.state['process'] = {'module' : routeName,
                                 'steps'  : []
                            }
        self.state['output'] = ""

        # get current output files
        output_files = utils.outputFiles(self.state)
        IP = self.state['queue pointer']
        # if not self.state['interactive']:
        #     self.state['queue'] = [[]] # don't let it us an empty queue if it is not interactive
        
        # Query module
        try:
            self.state = module(self.state)
        except:
            log.debugLog('An error occurred while using a tool.', state=self.state)

        # Remove the item that was executed. We need must do it after running it for the current file naming system.
        log.debugLog('\n\n\nroute\n\n\n', state=self.state)
        log.debugLog(route, state=self.state)
        if len(self.state['queue']) != self.state['queue pointer'] and route != 'PLANNER':
            # log.debugLog(self.state['queue'], state=self.state)
            new_output_files = utils.outputFiles(self.state)
            new_output_files = list(set(new_output_files).difference(set(output_files)))
            if 'output' not in self.state['queue'][IP].keys():
                self.state['queue'][IP]['outputs'] = new_output_files
            self.state = utils.makeNamesConsistent(self.state, new_output_files)
            if self.state['process']['module'] != 'ROUTER':
                self.state['queue pointer'] += 1

        # Clear memory
        if self.state['config']['forgetful']:
            self.state['memory'].clear()

        # stop the clock
        end_time = time.time()
        elapsed_time = end_time - start_time
        
        # Log and reset these values
        self.chatlog, self.state = log.logger(
            self.chatlog,
            self.state,
            self.chatname,
            elapsed_time=elapsed_time
        )
        return self.state['output']



[docs]
    def chat(self):
        """
        Opens an interactive chat session where users can execute a series of prompts.
    
        This method allows users to engage in a back-and-forth dialogue with the chatbot. 
        Users can input queries, which the chatbot processes and responds to until 
        the session is terminated. It supports both direct user input and queued prompts.
    
        The chat session maintains a record of the conversation and tracks the number 
        of API calls made to language models. It ensures that the session can be exited 
        gracefully and provides feedback about the conversation's context.
    
        :example:
            >>> agent.chat()
        
        :note:
            The session continues until the user explicitly decides to exit by 
            inputting commands like "exit", "quit", or "bye".
    
            If a queue of prompts is available, the chatbot will process 
            them in sequence rather than waiting for user input.
    
            The memory can be temporarily integrated to enrich queries, 
            but its management is handled with care to avoid unintended 
            modifications.
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024

        # Dev. Comments:
        # -------------------
        # This function executes a series of prompts in an interactive mannor with BRAD
        #
        # History:
        # - 2024-06-04: wrote 1st draft of this code in the brad.chat() method
        # - 2024-07-10: refactored brad.py file to a class and converted the code
        #               used to execute consecutive prompts into this function
        # - 2024-07-21: added llm-api-calls to chat status to prevent the rerouting/
        #               planner modules from executing unnecessarily long loops.
        self.state['interactive'] = True
        while True:
            # Begin processing a new prompt
            print('==================================================')

            # By default the memory is not messed with in BRAD (but on special occasions it is!)
            resetMemory = False

            # Get the prompt from the user or from the queue
            if len(self.state['queue']) != 0 and self.state['queue pointer'] < len(self.state['queue']) and self.state['queue pointer'] != 0 and not self.state['interactive']:
                query = self.state['queue'][self.state['queue pointer']]['prompt']

                # update memory to use previous points of the pipeline
                if 'inputs' in list(self.state['queue'][self.state['queue pointer']].keys()):
                    print(query)
                    print(self.state['queue'][self.state['queue pointer']]['module'])
                    if self.state['queue'][self.state['queue pointer']]['module'] == 'RAG':
                        print('RAG MODULE')
                        print(f"self.state['queue'].keys()={self.state['queue'].keys()}")
                        inputStages = self.state['queue'][self.state['queue pointer']]['inputs']
                        print(inputStages)
                        query += "**Previous Work**\nYou have generated the following data previously in the pipeline:\n\n"
                        for ISIP in inputStages:
                            fileName  = self.state['queue'][ISIP]['outputs']
                            print(fileName)
                            for file in fileName:
                                file = os.path.join(self.state['output-directory'], file)
                                if not os.path.exists(file):
                                    continue
                                try:
                                    print(file)
                                    dfMemory  = pd.read_csv(file)
                                    if 'state' in df.columns:
                                        query += "Top Ranked Biomarkers:\n"
                                        query += str(df['state'].values[:100])[1:-1]
                                    if 'p_val' in df.columns:
                                        query += "Relevant Biological Processes from Enrichr:"
                                        query += str(df[['path_name']].values[:10])
                                except:
                                    print('this part of the code doesnt work so great, yet!')
            else:
                query = input('Input >> ')  # get query from user
            
            # This line needs to change to check if we should exist the chat session
            output = self.invoke(query)

            # output is false on exit
            if not output:
                break

            # reset memory
            # if resetMemory:
            #    self.resetMemory()
                
            # update llm-api-calls
            newCalls = self.getLLMcalls(self.state['process']['steps'])
            self.state['llm-api-calls'] += newCalls
            log.debugLog(f'current llm calls={self.state["llm-api-calls"]}', state=self.state)
            if self.state['llm-api-calls'] > self.max_api_calls:
                log.debugLog('The maximum number of llm calls has been exceeded', state=self.state)
                break
        self.state['interactive'] = False
        self.state = log.userOutput("Thanks for chatting today! I hope to talk soon, and don't forget that a record of this conversation is available at: " + self.chatname, state=self.state)



[docs]
    def get_display(self):
        """
        This function returns the history of all inputs/outputs to the agent. This is intended
        for use by the GUI, as it will allow the user to jump between sessions while loading in
        the history of the old session.

        :returns: a list of strings
        :rtype: list
        """
        log.debugLog("Get Display Method", display=True)
        # numIOpairs = len(self.chatlog.keys())
        display = []
        log.debugLog("display = []", display=True)
        for i in self.chatlog.keys():
            if 'module' in self.chatlog[i]['process'].keys() and self.chatlog[i]['process']['module'] == 'SLEEP':
                continue
            elif 'MODULE' in self.chatlog[i]['process'].keys() and self.chatlog[i]['process']['MODULE'] == 'SLEEP':
                continue
            else:
                display.append((self.chatlog[i]['prompt'], None))
                display.append((self.chatlog[i]['output'], self.chatlog[i]))
        log.debugLog(f"{display=}", display=True)
        return display


    @property
    def llm(self):
        """Get the current LLM."""
        return self.state['llm']


[docs]
    def set_llm(self, llm):
        """Set the LLM and handle any related logic."""
        if not llm:
            raise ValueError("LLM cannot be None")
        
        self.state['llm'] = llm



[docs]
    def updateMemory(self):
        """
        Thils function lets BRAD reset his memory to focus on specific previous interactions. This is useful
        when BRAD is executing a pipeline and want to manage how input flows from one section to the next.

        .. warning::
            This function may be removed in the near future.
            
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: July 28, 2024

        log.debugLog('updateMemory is starting', state=state)
        
        # get previous stages that must occur in the input
        inputs = self.state['queue'][self.state['queue pointer']]['input']

        # build a new memory object based upon the previous inputs and outputs of each stage
        memory = ConversationBufferMemory()
        for stage in inputs:
            stageHistory = self.state['queue'][stage]['history']
            memory.add_user_message(stageHistory['input'][-1])
            memory.add_ai_message(stageHistory['output'][-1])

        # Update the state main memory
        state['main memory'] = state['memory']
        state['memory'] = memory
        return

                


[docs]
    def resetMemory(self):
        """
        Resets the state of the memory by restoring the main memory from the recent messages.
    
        This function undoes the effects of the `updateMemory` method by taking
        the most recent messages from the stage memory and adding them back 
        into the main memory. It is designed to help maintain a consistent 
        memory state throughout the agent's execution.
    
        .. warning::
            This function may be removed in the near future.
    
        :return: None
        :rtype: None
    
        :example:
            >>> agent.resetMemory()
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: July 28, 2024

        # Update the state main memory
        mainMemory = state['main memory']
        stageMemory = state['main memory']
        
        recent_messages = stageMemory.buffer[-2:]  # Adjust -2 index to control # of recent messages
        for msg in recent_messages:
            mainMemory.add_message(msg['role'], msg['content'])

        state['memory'] = mainMemory
        return

    

[docs]
    def getLLMcalls(self, steps):
        """
        Counts the number of times the LLM has been called used the agent's execution.
        
        :param steps: A list of steps from the agents log that have been executed
        :type steps: list
        
        :return: The total number of LLM calls made by the agent.
        :rtype: int
        
        :example:
            >>> num_calls = agent.getLLMcalls(steps)
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: July 2024
        newLLMcalls = 0
        emptyLLMlog = log.llmCallLog()
        for step in steps:
            if all(k in step.keys() for k in emptyLLMlog.keys()):
                newLLMcalls += 1
        return newLLMcalls

    

[docs]
    def getModules(self):
        """
        Returns a dictionary mapping module names to their corresponding function handles for various tasks.
    
        :param None: This function does not take any parameters.
    
        :raises None: This function does not raise any specific errors.
    
        :return: A dictionary where the keys are module names and the values are function handles for tasks such as querying Enrichr, web scraping, generating seaborn plots, querying documents, and calling Snakemake.
        :rtype: dict
    
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024
        
        # this + imports should be the only code someone needs to write to add a new module
        module_functions = {
            'DATABASE'   : geneDBRetriever,     # gget
            'SCRAPE' : webScraping,      # webscrapping
            'RAG'    : queryDocs,        # standard rag
            'PYTHON' : callPython,
            'PLANNER': planner,
            'CODE'   : code_caller,
            'WRITE'  : chatReport, # summarizeSteps,
            'ROUTER' : reroute,
        }
        return module_functions



[docs]
    def load_config(self, configfile=None):
        """
        Loads the `Agent` configuration settings from a JSON file.
    
        :param None: This function does not take any parameters.
    
        :raises FileNotFoundError: If the configuration file is not found.
        :raises json.JSONDecodeError: If the configuration file contains invalid JSON.
    
        :return: A dictionary containing the configuration settings.
        :rtype: dict
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024

        # History:
        # - 2024-06-04: wrote 1st draft of this code
        # - 2024-07-29: added a new argument to allow additional configuration
        #               files to overwrite the defaults

        def deep_update(original, updates):
            """
            Recursively update the original dictionary with the updates dictionary.
            """
            for key, value in updates.items():
                if isinstance(value, dict) and key in original:
                    original[key] = deep_update(original.get(key, {}), value)
                else:
                    original[key] = value
            return original
        
        # Read the default configurations
        current_script_path = os.path.abspath(__file__)
        current_script_dir = os.path.dirname(current_script_path)
        file_path = os.path.join(current_script_dir, 'config', 'config.json')
        with open(file_path, 'r') as f:
            defaultConfigs = json.load(f)

        # print(defaultConfigs)

        # Read the specific configurations
        if configfile:
            with open(configfile, 'r') as f:
                newConfigs = json.load(f)
            defaultConfigs = deep_update(defaultConfigs, newConfigs)

        # print(defaultConfigs)
        return defaultConfigs    

    

[docs]
    def save_config(self):
        """
        Saves the agent configuration settings to a JSON file.
    
        :param config: A dictionary containing the configuration settings to be saved.
        :type config: dict
    
        :raises FileNotFoundError: If the directory for the configuration file is not found.
        :raises TypeError: If the configuration dictionary contains non-serializable values.    
        """
        # Dev. Comments
        # History:
        # - 2024-11-18: This method modifies the objects configurations but does not change
        #               the configurations of the package. (JP)
        #
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024

        # current_script_path = os.path.abspath(__file__)
        # current_script_dir = os.path.dirname(current_script_path)
        # file_path = os.path.join(current_script_dir, 'config', 'config.json')
        file_path = os.path.join(self.chatname[:-8], 'config.json')
        print(f"{file_path=}")
        with open(file_path, 'w') as f:
            json.dump(self.state['config'], f, indent=4)



[docs]
    def reconfig(self):
        """
        Updates a specific configuration setting based on the given chat status and saves the updated configuration.
    
        :param chat_status: A dictionary containing the current chat status, including the prompt and configuration.
        :type chat_status: dict
    
        :raises KeyError: If the specified configuration key is not found in the chat status.
        :raises ValueError: If the value cannot be converted to an integer or float when applicable.
    
        :return: The updated chat status dictionary.
        :rtype: dict
    
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024
        
        prompt = self.state['prompt']
        _, key, value = prompt.split(maxsplit=2)
        if key == 'debug':
            value = (value.lower() == 'true')
        try:
            value = int(value)
        except ValueError:
            try:
                value = float(value)
            except ValueError:
                value = str(value)
        if key in self.state['config']:
            state['config'][key] = value
            self.save_config()
            state = log.userOutput("Configuration " + str(key) + " updated to " + str(value), state=self.state)
        else:
            state = log.userOutput("Configuration " + str(key) + " not found", state=self.state)
        return state



[docs]
    def load_state(self, config=None):
        """
        Initializes and loads the agent state with default values and configuration settings.
    
        :param None: This function does not take any parameters.
    
        :raises FileNotFoundError: If the configuration file is not found.
        :raises json.JSONDecodeError: If the configuration file contains invalid JSON.
    
        :return: A dictionary representing the chat status with initial values and loaded configuration.
        :rtype: dict
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024
    
        state = {
            'config'            : self.load_config(configfile=config),
            'prompt'            : None,
            'output'            : None,
            'process'           : {},
            'current table'     : {'key':None, 'tab':None},
            'current documents' : None,
            'tables'            : {},
            'documents'         : {},
            'plottingParams'    : {},
            'matlabEng'         : None,
            'experiment'        : False,
            'queue'             : [],
            'queue pointer'     : 0,
            'llm-api-calls'     : 0,
            'search'            : {
                'used terms' : [],
            },
            'recursion_depth': 0,
            'continue-module': None, # None (if not continuing in a module) or tuple (with [0] being the module name)
            'gui':None,              # boolean to indicate if the agent is for the gui
            'interactive':False
        }
        return state



[docs]
    def load_literature_db(
        self,
        persist_directory = "/home/acicalo/BRAD/data/RAG_Database",
        db_name = "DB_cosine_cSize_700_cOver_200"
    ):
        """
        Loads a literature database using specified embedding model and settings.
    
        :param persist_directory: The directory where the database is stored, defaults to "/nfs/turbo/umms-indikar/shared/projects/RAG/databases/Transcription-Factors-5-10-2024/"
        :type persist_directory: str, optional
    
        :raises FileNotFoundError: If the specified directory does not exist or is inaccessible.
        :raises Warning: If the loaded database contains no articles.
    
        :return: A tuple containing the vector database and the embeddings model.
        :rtype: tuple

        The `persist_directory` should point to a directory that has this structure:

        >>> [Oct 16 19:28]  persist_directory
        ... └── [Oct 16 19:28]  DB_cosine_cSize_700_cOver_200
        ...     ├── [Oct 16 19:28]  aaa2c989-0e39-4be8-82b4-139ae2784c00
        ...     │   ├── [Oct 16 19:28]  data_level0.bin
        ...     │   ├── [Oct 16 19:28]  header.bin
        ...     │   ├── [Oct 16 19:28]  length.bin
        ...     │   └── [Oct 16 19:28]  link_lists.bin
        >>>     └── [Oct 16 19:28]  chroma.sqlite3
    
        """
        # Dev. Comments
        # History:
        # - 2024-06-04: 1st version of this was written
        # - 2024-10-17: changes to the pathing were made

        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: June 4, 2024
    
        # load database
        embeddings_model = HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')        # Embedding model
        _client_settings = chromadb.PersistentClient(path=os.path.join(persist_directory, db_name))
        vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings_model, client=_client_settings, collection_name=db_name)
        if len(vectordb.get()['ids']) == 0:
            print('The loaded database contains no articles. See the database: ' + str(persist_directory) + ' for details')
            warnings.warn('The loaded database contains no articles. See the database: ' + str(persist_directory) + ' for details')
        return vectordb, embeddings_model



[docs]
    def chatbotHelp(self):
        """
        Displays a help message to the user with information about the BRAD agents's capabilities and special commands.
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: May 19, 2024
        help_message = """
        Welcome to our RAG chatbot Help!
        
        You can chat with the llama-2 llm and several scientifici databases including:
        - retrieval augmented generation
        - web scraping
        - search enrichr and gene ontology databases
        - run codes
        - and more!
    
        Special commands:
        /set   - Allows the user to change configuration variables.
        /force - Allows the user to specify which database to use.
    
        For example:
        /set config_variable_name new_value
        --force option_name
    
        Enjoy chatting with the chatbot!
        """
        self.state = log.userOutput(help_message, state=self.state)
        return



[docs]
    def to_langchain(self):
        """
        This function constructs an object that may be used as an llm in langchain.

        :return: a LangChain compatible LLM instance
        :rtype: `BradLLM`
        """
        # Auth: Joshua Pickard
        #       jpic@umich.edu
        # Date: July 10, 2024
        llm = BradLLM(bot=self)
        return llm





[docs]
class AgentFactory():
    """
    The AgentFactory mechanism allows us to instantiate, terminate and maintain 
    bot sessions from within the module. Removes the need to have global agents.
    The factory generates a default agent with default parameters if no input is
    given. Based on the session input given instantiates a new agent with that 
    particular session restored

    Provides decoupling of objects used from execution logic. If a new agent class 
    is implemented the get_agent function needs to be updated appropriately.

    Functions:
    1. **get_agent**: instantiates the actual agent and returns the particular agent based on initialization


    :param tools: The set of available tool modules. If None, all modules are available for use
    :type tools: list, optional
    :param session_path: The path to where the bot session is stored. If None, generates a new agent
    :type session_path: str, optional
    :param start_path: The path to where a new bot session can be started. If None, generates a new agent
    :type start_path: str, optional
    :param interactive: Sets BRAD's mode to interactive or non inteactive. Default mode is non Interactive
    :type tools: bool, optional
    """


[docs]
    def __init__(self, tool_modules=TOOL_MODULES, session_path=None, start_path=None, interactive=False, db_name=None, persist_directory=None, llm_choice=None, gui=None, temperature=0):
        self.interactive = interactive
        suffix = '/log.json'
        if session_path and (session_path.endswith(suffix)):
            session_path = session_path[: -len(suffix)]
        self.session_path = session_path
        self.tool_modules = tool_modules
        self.start_path = start_path
        self.gui=gui
        if db_name:
            self.db_name = db_name
        else:
            self.db_name = None

        if persist_directory:
            self.persist_directory = persist_directory
        else:
            self.persist_directory = None

        self.llm_choice = llm_choice
        self.temperature = temperature

        


[docs]
    def get_agent(self):
        """
        The agent function for instantiating a new agent or retrieve an existing agent
        """
        if self.session_path:
            agent = Agent(interactive=self.interactive, tools=self.tool_modules, restart=self.session_path, gui=self.gui)
        elif self.start_path:
            agent = Agent(interactive=self.interactive, tools=self.tool_modules, start_path=self.start_path, gui=self.gui)
        else:
            agent = Agent(interactive=self.interactive, tools=self.tool_modules, gui=self.gui)


        if self.db_name and self.persist_directory:
            db, _ = agent.load_literature_db(persist_directory=self.persist_directory, db_name=self.db_name)
            agent.state['databases']['RAG'] = db
        elif self.db_name == None and self.persist_directory==None:
            agent.state['databases']['RAG'] = None

        if self.llm_choice:
            llm = llm_switcher(
                self.llm_choice,
                temperature = self.temperature,
            )
            agent.set_llm(llm)
        return agent