Reference

main.pyapp.pychain.pyembeddings.pymulti-retriever.pyoptions.pyresponse.py

main.py

Entry point that ensures necessary config and scripts are setup before handing off to app.py

`main()`

Create directories and embed scripts if needed, otw run options.read() and app.launch()

Source code in src/__main__.py

def main():
    """Create directories and embed scripts if needed, otw run options.read() and app.launch()"""
    if not os.path.exists(os.path.expanduser(CONFIG_FILE)):
        if not os.path.exists(os.path.expanduser(CONFIG_DIR)):
            os.makedirs(os.path.expanduser(CONFIG_DIR))
            options.create()
        else:
            options.create()
        if os.path.exists(os.path.expanduser(SCRIPTS_DIR)):
            options.read()
            app.launch()
    elif not os.path.exists(os.path.expanduser(SCRIPTS_DIR)):
        os.makedirs(os.path.expanduser(SCRIPTS_DIR))
        print("\nCreated SCRIPTS_DIR at: " + SCRIPTS_DIR)
        user_embed = None
        while not user_embed:
            user_embed = str(
                input(
                    """Would you like to embed the scripts now (if yes, then add your 
                scripts to ~/.chat-script/scripts before submitting)? y/n: """
                )
            )
            if user_embed:
                if user_embed[0] == "y" or user_embed[0] == "Y":
                    options.read()
                    embeddings.generate()
                    app.launch()
                elif user_embed[0] == "n" or user_embed[0] == "N":
                    options.read()
                    app.launch()
                else:
                    print("Input must be one of: y/n\n")
                    user_embed = None
    else:
        options.read()
        app.launch()

app.py

Gradio UI leveraging eponymous function in response

`launch()`

Launch app's Gradio UI

Source code in src/app.py

def launch():
    """Launch app's Gradio UI"""
    chain.create()
    app = gr.ChatInterface(
        response.generate,
        chatbot=gr.Chatbot(
            show_copy_button=True,
            bubble_full_width=False,
            scale=1,
            type="tuples",
        ),
        fill_height=True,
        title="chat-script",
        theme="gradio/monochrome",
        analytics_enabled=False,
        additional_inputs=[],
    ).queue()
    app.launch(
        share=opt("share"),
        server_name=opt("server_name"),
        server_port=opt("server_port"),
        inbrowser=opt("inbrowser"),
    )

`opt(option_name)`

Syntactic sugar for retrieving options

Source code in src/app.py

def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["app"][option_name]

chain.py

Setup language models and multi-query retriever, define the moderation and rag chains

`create()`

Set ChromaDB vectorstore (w/ opt('collection_name')) as a retriever and create rag_chain

Source code in src/chain.py

def create():
    """Set ChromaDB vectorstore (w/ opt('collection_name')) as a retriever and create rag_chain"""
    models = prepare_models()
    qa_prompt, contextualize_q_prompt = prepare_prompts()

    vectorstore = Chroma(
        collection_name=opt("collection_name"),
        embedding_function=models[0],
        persist_directory=os.path.expanduser(EMBED_DIR),
    )

    if opt("rag_fusion"):
        MultiQueryRetriever = multi_retriever.prepare(opt("num_queries"))
        retriever_fusion = MultiQueryRetriever.from_llm(
            retriever=vectorstore.as_retriever(
                search_kwargs={"k": opt("top_n_results_fusion")}
            ),
            llm=models[1],
            include_original=True,
        )
        retriever = create_history_aware_retriever(
            models[1],
            retriever_fusion,
            contextualize_q_prompt,
        )
    else:
        retriever = create_history_aware_retriever(
            models[1],
            vectorstore.as_retriever(search_kwargs={"k": opt("top_n_results")}),
            contextualize_q_prompt,
        )

    global rag_chain
    question_answer_chain = create_stuff_documents_chain(models[1], qa_prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

`create_moderation()`

Set Moderation LLM to local Ollama model, construct and return chain

Source code in src/chain.py

def create_moderation():
    """Set Moderation LLM to local Ollama model, construct and return chain"""
    moderation = ChatOllama(
        model=opt("moderation_model"),
        keep_alive=opt("keep_alive"),
        base_url=opt("moderation_url"),
    )
    moderation_chain = moderation | StrOutputParser()
    return moderation_chain

`opt(option_name)`

Syntactic sugar for retrieving options

Source code in src/chain.py

def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["chain"][option_name]

`prepare_models()`

Set num_gpu depending on whether opt('embeddings_gpu') is True or False

Source code in src/chain.py

def prepare_models():
    """Set num_gpu depending on whether opt('embeddings_gpu') is True or False"""
    if opt("embeddings_gpu"):
        num_gpu = None
    else:
        num_gpu = 0

    # Set Embedding LLM to local Ollama model
    embeddings = OllamaEmbeddings(
        model=opt("embeddings_model"),
        base_url=opt("embeddings_url"),
        show_progress=opt("show_progress"),
        num_gpu=num_gpu,
    )

    # Set LLM to local Ollama model
    model = ChatOllama(
        model=opt("chat_model"),
        keep_alive=opt("keep_alive"),
        base_url=opt("chat_url"),
        temperature=opt("temperature"),
        top_k=opt("top_k"),
        top_p=opt("top_p"),
    )

    return [embeddings, model]

`prepare_prompts()`

Define the contextualization prompt for summarizing chat history

Source code in src/chain.py

def prepare_prompts():
    """Define the contextualization prompt for summarizing chat history"""
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )

    # Define the question_answer_chain
    system_prompt = "Answer the question using the following context: " "{context}"
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    return qa_prompt, contextualize_q_prompt

embeddings.py

Refreshes/generates embeddings in based on scripts

`create_batches(all_splits, batch_size)`

Breaks all_splits into batches of size <= batch_size

Source code in src/embeddings.py

def create_batches(all_splits, batch_size):
    """Breaks all_splits into batches of size <= batch_size"""
    for i in range(0, len(all_splits), batch_size):
        yield all_splits[i : i + batch_size]

`generate()`

Embed and store text documents

Source code in src/embeddings.py

def generate():
    """Embed and store text documents"""
    docs = load()
    all_splits = split(docs)
    embeddings = prepare_model()

    # Remove Vector Store if it exists
    if os.path.exists(os.path.expanduser(EMBED_DIR)):
        shutil.rmtree(os.path.expanduser(EMBED_DIR))

    # Save to persistent ChromaDB Vector Store
    for batch in all_splits:
        # pylint: disable=unused-variable
        vectorstore = Chroma.from_documents(
            documents=batch,
            collection_name=opt("collection_name"),
            embedding=embeddings,
            persist_directory=os.path.expanduser(EMBED_DIR),
        )

`load()`

Loads documents in SCRIPTS_DIR

Source code in src/embeddings.py

def load():
    """Loads documents in SCRIPTS_DIR"""
    loader = DirectoryLoader(
        path=os.path.expanduser(SCRIPTS_DIR),
        loader_cls=TextLoader,
        show_progress=opt("show_progress"),
        use_multithreading=opt("use_multithreading"),
    )
    docs = loader.load()
    return docs

`opt(option_name)`

Syntactic sugar for retrieving options

Source code in src/embeddings.py

def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["embeddings"][option_name]

`prepare_model()`

Set and return Ollama embeddings model

Source code in src/embeddings.py

def prepare_model():
    """Set and return Ollama embeddings model"""
    embeddings = OllamaEmbeddings(
        model=opt("embeddings_model"),
        base_url=opt("embeddings_url"),
        show_progress=opt("show_progress"),
    )
    return embeddings

`split(docs)`

Split documents, then divide into batches to avoid ChromaDB/SQLite batch size limitations

Source code in src/embeddings.py

def split(docs):
    """Split documents, then divide into batches to avoid ChromaDB/SQLite batch size limitations"""
    text_splitter = TokenTextSplitter(
        chunk_size=opt("chunk_size"),
        chunk_overlap=opt("chunk_overlap"),
    )
    all_splits = text_splitter.split_documents(docs)
    all_splits = create_batches(all_splits, opt("batch_size"))
    return all_splits

multi-retriever.py

Define and return the rag-fusion retirever and output parser

`prepare(num_queries)`

Define output parser and MultiQueryRetriever

Source code in src/multi_retriever.py

def prepare(num_queries):
    """Define output parser and MultiQueryRetriever"""

    # Define the output parser for rag-fusion. Adapted from multi_query.py
    class LineListOutputParser(BaseOutputParser[List[str]]):
        """Output parser for a list of lines."""

        def parse(self, text: str) -> List[str]:
            lines = text.strip().split("\n")
            return lines

    # Set the rag-fusion prompt, enabling customization
    # of number of queries. Adapted from multi_query.py
    default_query_prompt = PromptTemplate(
        input_variables=["question"],
        template="""You are an AI language model assistant. Your task is 
        to generate """
        + str(num_queries - 1)
        + """ different versions of the given user 
        question to retrieve relevant documents from a vector  database. 
        By generating multiple perspectives on the user question, 
        your goal is to help the user overcome some of the limitations 
        of distance-based similarity search. Provide these alternative 
        questions separated by newlines. Original question: {question}""",
    )

    # Define the retriever for rag-fusion. Adapted from multi_query.py
    class MultiQueryRetriever(BaseRetriever):
        """Given a query, use an LLM to write several and retrieve unique docs."""

        retriever: BaseRetriever
        llm_chain: Runnable
        verbose: bool = True
        parser_key: str = "lines"
        include_original: bool = False

        @classmethod
        def from_llm(
            cls,
            retriever: BaseRetriever,
            llm: BaseLanguageModel,
            prompt: BasePromptTemplate = default_query_prompt,
            include_original: bool = False,
        ) -> "MultiQueryRetriever":
            """Initialize from llm using default template."""
            output_parser = LineListOutputParser()
            llm_chain = prompt | llm | output_parser
            return cls(
                retriever=retriever,
                llm_chain=llm_chain,
                include_original=include_original,
            )

        def _get_relevant_documents(
            self, query: str, *, run_manager: CallbackManagerForRetrieverRun
        ) -> List[Document]:
            """Get relevant docs from multiple derived queries"""
            # Generate queries
            response = self.llm_chain.invoke(
                {"question": query}, config={"callbacks": run_manager.get_child()}
            )
            if isinstance(self.llm_chain, LLMChain):
                lines = response["text"]
            else:
                lines = response
            queries = lines[: max(num_queries - 1, 0)]
            if self.include_original:
                queries.append(query)

            # Retrieve and combine documents for each query
            documents = []
            for query in queries:
                docs = self.retriever.invoke(
                    query, config={"callbacks": run_manager.get_child()}
                )
                documents.extend(docs)

            # Return unique union of retrieved documents
            return [doc for i, doc in enumerate(documents) if doc not in documents[:i]]

    return MultiQueryRetriever

options.py

Creates and reads options at ~/.config/chat-script/chat-script.ini

`create()`

Create options file at ~/.config/chat-script/chat-script.ini with defaults

Source code in src/options.py

def create():
    """Create options file at ~/.config/chat-script/chat-script.ini with defaults"""
    configuration = ConfigParser()
    configuration["APP"] = {
        "share": "False",
        "server_name": "127.0.0.1",
        "server_port": "7860",
        "inbrowser": "True",
    }
    configuration["CHAIN"] = {
        "embeddings_model": "mxbai-embed-large",
        "chat_model": "mistral",
        "moderation_model": "llama-guard3:1b",
        "embeddings_url": "https://localhost:11434",
        "chat_url": "http://localhost:11434",
        "moderation_url": "http://localhost:11434",
        "show_progress": "False",
        "keep_alive": "5m",
        "temperature": "0.6",
        "top_k": "30",
        "top_p": "0.7",
        "collection_name": "rag-chroma",
        "top_n_results": "3",
        "rag_fusion": "True",
        "num_queries": "2",
        "top_n_results_fusion": "2",
        "embeddings_gpu": "True",
    }
    configuration["EMBEDDINGS"] = {
        "embeddings_model": "mxbai-embed-large",
        "embeddings_url": "https://localhost:11434",
        "show_progress": "True",
        "collection_name": "rag-chroma",
        "use_multithreading": "True",
        "chunk_size": "250",
        "chunk_overlap": "50",
        "batch_size": "41666",
    }
    configuration["RESPONSE"] = {
        "context_stream_delay": "0.075",
        "max_history": "2",
        "print_state": "True",
        "moderate": "False",
        "moderate_alert": "False",
    }
    with open(os.path.expanduser(CONFIG_FILE), "w", encoding="UTF-8") as configfile:
        configuration.write(configfile)
    print(
        f"\nCreated CONFIG_FILE at: {CONFIG_FILE} and populated it with default settings"
    )

`read()`

Read options from ~/.config/chat-script/chat-script.ini and save in global dict: options

Source code in src/options.py

def read():
    """Read options from ~/.config/chat-script/chat-script.ini and save in global dict: options"""
    configuration = ConfigParser()
    configuration.read(os.path.expanduser(CONFIG_FILE))
    global OPTIONS
    OPTIONS = {
        "app": {
            "share": configuration.getboolean(
                "APP",
                "share",
                fallback=False,
            ),
            "server_name": configuration.get(
                "APP",
                "server_name",
                fallback="127.0.0.1",
            ),
            "server_port": configuration.getint(
                "APP",
                "server_port",
                fallback=7860,
            ),
            "inbrowser": configuration.getboolean(
                "APP",
                "inbrowser",
                fallback=True,
            ),
        },
        "chain": {
            "embeddings_model": configuration.get(
                "CHAIN",
                "embeddings_model",
                fallback="mxbai-embed-large",
            ),
            "chat_model": configuration.get(
                "CHAIN",
                "chat_model",
                fallback="mistral",
            ),
            "moderation_model": configuration.get(
                "CHAIN",
                "moderation_model",
                fallback="llama-guard3:1b",
            ),
            "embeddings_url": configuration.get(
                "CHAIN",
                "embeddings_url",
                fallback="http://localhost:11434",
            ),
            "chat_url": configuration.get(
                "CHAIN",
                "chat_url",
                fallback="http://localhost:11434",
            ),
            "moderation_url": configuration.get(
                "CHAIN",
                "moderation_url",
                fallback="http://localhost:11434",
            ),
            "show_progress": configuration.getboolean(
                "CHAIN",
                "show_progress",
                fallback=False,
            ),
            "keep_alive": configuration.get(
                "CHAIN",
                "keep_alive",
                fallback="5m",
            ),
            "temperature": configuration.getfloat(
                "CHAIN",
                "temperature",
                fallback=0.6,
            ),
            "top_k": configuration.getint(
                "CHAIN",
                "top_k",
                fallback=30,
            ),
            "top_p": configuration.getfloat(
                "CHAIN",
                "top_p",
                fallback=0.7,
            ),
            "collection_name": configuration.get(
                "CHAIN",
                "collection_name",
                fallback="rag-chroma",
            ),
            "top_n_results": configuration.getint(
                "CHAIN",
                "top_n_results",
                fallback=3,
            ),
            "rag_fusion": configuration.getboolean(
                "CHAIN",
                "rag_fusion",
                fallback=True,
            ),
            "num_queries": configuration.getint(
                "CHAIN",
                "num_queries",
                fallback=2,
            ),
            "top_n_results_fusion": configuration.getint(
                "CHAIN",
                "top_n_results_fusion",
                fallback=2,
            ),
            "embeddings_gpu": configuration.getboolean(
                "CHAIN",
                "embeddings_gpu",
                fallback=True,
            ),
        },
        "embeddings": {
            "embeddings_model": configuration.get(
                "EMBEDDINGS",
                "embeddings_model",
                fallback="mxbai-embed-large",
            ),
            "embeddings_url": configuration.get(
                "EMBEDDINGS",
                "embeddings_url",
                fallback="http://localhost:11434",
            ),
            "show_progress": configuration.getboolean(
                "EMBEDDINGS",
                "show_progress",
                fallback=True,
            ),
            "collection_name": configuration.get(
                "EMBEDDINGS",
                "collection_name",
                fallback="rag-chroma",
            ),
            "use_multithreading": configuration.getboolean(
                "EMBEDDINGS",
                "use_multithreading",
                fallback=True,
            ),
            "chunk_size": configuration.getint(
                "EMBEDDINGS",
                "chunk_size",
                fallback=250,
            ),
            "chunk_overlap": configuration.getint(
                "EMBEDDINGS",
                "chunk_overlap",
                fallback=50,
            ),
            "batch_size": configuration.getint(
                "EMBEDDINGS",
                "batch_size",
                fallback=41666,
            ),
        },
        "response": {
            "context_stream_delay": configuration.getfloat(
                "RESPONSE",
                "context_stream_delay",
                fallback=0.075,
            ),
            "max_history": configuration.getint(
                "RESPONSE",
                "max_history",
                fallback=2,
            ),
            "print_state": configuration.getboolean(
                "RESPONSE",
                "print_state",
                fallback=True,
            ),
            "moderate": configuration.getboolean(
                "RESPONSE",
                "moderate",
                fallback=False,
            ),
            "moderate_alert": configuration.getboolean(
                "RESPONSE",
                "moderate_alert",
                fallback=False,
            ),
        },
    }