Skip to content

Reference

main.py

Entry point that ensures necessary config and scripts are setup before handing off to app.py

main()

Create directories and embed scripts if needed, otw run options.read() and app.launch()

Source code in src/__main__.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def main():
    """Create directories and embed scripts if needed, otw run options.read() and app.launch()"""
    if not os.path.exists(os.path.expanduser(CONFIG_FILE)):
        if not os.path.exists(os.path.expanduser(CONFIG_DIR)):
            os.makedirs(os.path.expanduser(CONFIG_DIR))
            options.create()
        else:
            options.create()
        if os.path.exists(os.path.expanduser(SCRIPTS_DIR)):
            options.read()
            app.launch()
    elif not os.path.exists(os.path.expanduser(SCRIPTS_DIR)):
        os.makedirs(os.path.expanduser(SCRIPTS_DIR))
        print("\nCreated SCRIPTS_DIR at: " + SCRIPTS_DIR)
        user_embed = None
        while not user_embed:
            user_embed = str(
                input(
                    """Would you like to embed the scripts now (if yes, then add your 
                scripts to ~/.chat-script/scripts before submitting)? y/n: """
                )
            )
            if user_embed:
                if user_embed[0] == "y" or user_embed[0] == "Y":
                    options.read()
                    embeddings.generate()
                    app.launch()
                elif user_embed[0] == "n" or user_embed[0] == "N":
                    options.read()
                    app.launch()
                else:
                    print("Input must be one of: y/n\n")
                    user_embed = None
    else:
        options.read()
        app.launch()

app.py

Gradio UI leveraging eponymous function in response

launch()

Launch app's Gradio UI

Source code in src/app.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def launch():
    """Launch app's Gradio UI"""
    chain.create()
    app = gr.ChatInterface(
        response.generate,
        chatbot=gr.Chatbot(
            show_copy_button=True,
            bubble_full_width=False,
            scale=1,
            type="tuples",
        ),
        fill_height=True,
        title="chat-script",
        theme="gradio/monochrome",
        analytics_enabled=False,
        additional_inputs=[],
    ).queue()
    app.launch(
        share=opt("share"),
        server_name=opt("server_name"),
        server_port=opt("server_port"),
        inbrowser=opt("inbrowser"),
    )

opt(option_name)

Syntactic sugar for retrieving options

Source code in src/app.py
10
11
12
def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["app"][option_name]

chain.py

Setup language models and multi-query retriever, define the moderation and rag chains

create()

Set ChromaDB vectorstore (w/ opt('collection_name')) as a retriever and create rag_chain

Source code in src/chain.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def create():
    """Set ChromaDB vectorstore (w/ opt('collection_name')) as a retriever and create rag_chain"""
    models = prepare_models()
    qa_prompt, contextualize_q_prompt = prepare_prompts()

    vectorstore = Chroma(
        collection_name=opt("collection_name"),
        embedding_function=models[0],
        persist_directory=os.path.expanduser(EMBED_DIR),
    )

    if opt("rag_fusion"):
        MultiQueryRetriever = multi_retriever.prepare(opt("num_queries"))
        retriever_fusion = MultiQueryRetriever.from_llm(
            retriever=vectorstore.as_retriever(
                search_kwargs={"k": opt("top_n_results_fusion")}
            ),
            llm=models[1],
            include_original=True,
        )
        retriever = create_history_aware_retriever(
            models[1],
            retriever_fusion,
            contextualize_q_prompt,
        )
    else:
        retriever = create_history_aware_retriever(
            models[1],
            vectorstore.as_retriever(search_kwargs={"k": opt("top_n_results")}),
            contextualize_q_prompt,
        )

    global rag_chain
    question_answer_chain = create_stuff_documents_chain(models[1], qa_prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

create_moderation()

Set Moderation LLM to local Ollama model, construct and return chain

Source code in src/chain.py
120
121
122
123
124
125
126
127
128
def create_moderation():
    """Set Moderation LLM to local Ollama model, construct and return chain"""
    moderation = ChatOllama(
        model=opt("moderation_model"),
        keep_alive=opt("keep_alive"),
        base_url=opt("moderation_url"),
    )
    moderation_chain = moderation | StrOutputParser()
    return moderation_chain

opt(option_name)

Syntactic sugar for retrieving options

Source code in src/chain.py
21
22
23
def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["chain"][option_name]

prepare_models()

Set num_gpu depending on whether opt('embeddings_gpu') is True or False

Source code in src/chain.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def prepare_models():
    """Set num_gpu depending on whether opt('embeddings_gpu') is True or False"""
    if opt("embeddings_gpu"):
        num_gpu = None
    else:
        num_gpu = 0

    # Set Embedding LLM to local Ollama model
    embeddings = OllamaEmbeddings(
        model=opt("embeddings_model"),
        base_url=opt("embeddings_url"),
        show_progress=opt("show_progress"),
        num_gpu=num_gpu,
    )

    # Set LLM to local Ollama model
    model = ChatOllama(
        model=opt("chat_model"),
        keep_alive=opt("keep_alive"),
        base_url=opt("chat_url"),
        temperature=opt("temperature"),
        top_k=opt("top_k"),
        top_p=opt("top_p"),
    )

    return [embeddings, model]

prepare_prompts()

Define the contextualization prompt for summarizing chat history

Source code in src/chain.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def prepare_prompts():
    """Define the contextualization prompt for summarizing chat history"""
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )

    # Define the question_answer_chain
    system_prompt = "Answer the question using the following context: " "{context}"
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    return qa_prompt, contextualize_q_prompt

embeddings.py

Refreshes/generates embeddings in based on scripts

create_batches(all_splits, batch_size)

Breaks all_splits into batches of size <= batch_size

Source code in src/embeddings.py
36
37
38
39
def create_batches(all_splits, batch_size):
    """Breaks all_splits into batches of size <= batch_size"""
    for i in range(0, len(all_splits), batch_size):
        yield all_splits[i : i + batch_size]

generate()

Embed and store text documents

Source code in src/embeddings.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def generate():
    """Embed and store text documents"""
    docs = load()
    all_splits = split(docs)
    embeddings = prepare_model()

    # Remove Vector Store if it exists
    if os.path.exists(os.path.expanduser(EMBED_DIR)):
        shutil.rmtree(os.path.expanduser(EMBED_DIR))

    # Save to persistent ChromaDB Vector Store
    for batch in all_splits:
        # pylint: disable=unused-variable
        vectorstore = Chroma.from_documents(
            documents=batch,
            collection_name=opt("collection_name"),
            embedding=embeddings,
            persist_directory=os.path.expanduser(EMBED_DIR),
        )

load()

Loads documents in SCRIPTS_DIR

Source code in src/embeddings.py
24
25
26
27
28
29
30
31
32
33
def load():
    """Loads documents in SCRIPTS_DIR"""
    loader = DirectoryLoader(
        path=os.path.expanduser(SCRIPTS_DIR),
        loader_cls=TextLoader,
        show_progress=opt("show_progress"),
        use_multithreading=opt("use_multithreading"),
    )
    docs = loader.load()
    return docs

opt(option_name)

Syntactic sugar for retrieving options

Source code in src/embeddings.py
19
20
21
def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["embeddings"][option_name]

prepare_model()

Set and return Ollama embeddings model

Source code in src/embeddings.py
53
54
55
56
57
58
59
60
def prepare_model():
    """Set and return Ollama embeddings model"""
    embeddings = OllamaEmbeddings(
        model=opt("embeddings_model"),
        base_url=opt("embeddings_url"),
        show_progress=opt("show_progress"),
    )
    return embeddings

split(docs)

Split documents, then divide into batches to avoid ChromaDB/SQLite batch size limitations

Source code in src/embeddings.py
42
43
44
45
46
47
48
49
50
def split(docs):
    """Split documents, then divide into batches to avoid ChromaDB/SQLite batch size limitations"""
    text_splitter = TokenTextSplitter(
        chunk_size=opt("chunk_size"),
        chunk_overlap=opt("chunk_overlap"),
    )
    all_splits = text_splitter.split_documents(docs)
    all_splits = create_batches(all_splits, opt("batch_size"))
    return all_splits

multi-retriever.py

Define and return the rag-fusion retirever and output parser

prepare(num_queries)

Define output parser and MultiQueryRetriever

Source code in src/multi_retriever.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def prepare(num_queries):
    """Define output parser and MultiQueryRetriever"""

    # Define the output parser for rag-fusion. Adapted from multi_query.py
    class LineListOutputParser(BaseOutputParser[List[str]]):
        """Output parser for a list of lines."""

        def parse(self, text: str) -> List[str]:
            lines = text.strip().split("\n")
            return lines

    # Set the rag-fusion prompt, enabling customization
    # of number of queries. Adapted from multi_query.py
    default_query_prompt = PromptTemplate(
        input_variables=["question"],
        template="""You are an AI language model assistant. Your task is 
        to generate """
        + str(num_queries - 1)
        + """ different versions of the given user 
        question to retrieve relevant documents from a vector  database. 
        By generating multiple perspectives on the user question, 
        your goal is to help the user overcome some of the limitations 
        of distance-based similarity search. Provide these alternative 
        questions separated by newlines. Original question: {question}""",
    )

    # Define the retriever for rag-fusion. Adapted from multi_query.py
    class MultiQueryRetriever(BaseRetriever):
        """Given a query, use an LLM to write several and retrieve unique docs."""

        retriever: BaseRetriever
        llm_chain: Runnable
        verbose: bool = True
        parser_key: str = "lines"
        include_original: bool = False

        @classmethod
        def from_llm(
            cls,
            retriever: BaseRetriever,
            llm: BaseLanguageModel,
            prompt: BasePromptTemplate = default_query_prompt,
            include_original: bool = False,
        ) -> "MultiQueryRetriever":
            """Initialize from llm using default template."""
            output_parser = LineListOutputParser()
            llm_chain = prompt | llm | output_parser
            return cls(
                retriever=retriever,
                llm_chain=llm_chain,
                include_original=include_original,
            )

        def _get_relevant_documents(
            self, query: str, *, run_manager: CallbackManagerForRetrieverRun
        ) -> List[Document]:
            """Get relevant docs from multiple derived queries"""
            # Generate queries
            response = self.llm_chain.invoke(
                {"question": query}, config={"callbacks": run_manager.get_child()}
            )
            if isinstance(self.llm_chain, LLMChain):
                lines = response["text"]
            else:
                lines = response
            queries = lines[: max(num_queries - 1, 0)]
            if self.include_original:
                queries.append(query)

            # Retrieve and combine documents for each query
            documents = []
            for query in queries:
                docs = self.retriever.invoke(
                    query, config={"callbacks": run_manager.get_child()}
                )
                documents.extend(docs)

            # Return unique union of retrieved documents
            return [doc for i, doc in enumerate(documents) if doc not in documents[:i]]

    return MultiQueryRetriever

options.py

Creates and reads options at ~/.config/chat-script/chat-script.ini

create()

Create options file at ~/.config/chat-script/chat-script.ini with defaults

Source code in src/options.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def create():
    """Create options file at ~/.config/chat-script/chat-script.ini with defaults"""
    configuration = ConfigParser()
    configuration["APP"] = {
        "share": "False",
        "server_name": "127.0.0.1",
        "server_port": "7860",
        "inbrowser": "True",
    }
    configuration["CHAIN"] = {
        "embeddings_model": "mxbai-embed-large",
        "chat_model": "mistral",
        "moderation_model": "llama-guard3:1b",
        "embeddings_url": "https://localhost:11434",
        "chat_url": "http://localhost:11434",
        "moderation_url": "http://localhost:11434",
        "show_progress": "False",
        "keep_alive": "5m",
        "temperature": "0.6",
        "top_k": "30",
        "top_p": "0.7",
        "collection_name": "rag-chroma",
        "top_n_results": "3",
        "rag_fusion": "True",
        "num_queries": "2",
        "top_n_results_fusion": "2",
        "embeddings_gpu": "True",
    }
    configuration["EMBEDDINGS"] = {
        "embeddings_model": "mxbai-embed-large",
        "embeddings_url": "https://localhost:11434",
        "show_progress": "True",
        "collection_name": "rag-chroma",
        "use_multithreading": "True",
        "chunk_size": "250",
        "chunk_overlap": "50",
        "batch_size": "41666",
    }
    configuration["RESPONSE"] = {
        "context_stream_delay": "0.075",
        "max_history": "2",
        "print_state": "True",
        "moderate": "False",
        "moderate_alert": "False",
    }
    with open(os.path.expanduser(CONFIG_FILE), "w", encoding="UTF-8") as configfile:
        configuration.write(configfile)
    print(
        f"\nCreated CONFIG_FILE at: {CONFIG_FILE} and populated it with default settings"
    )

read()

Read options from ~/.config/chat-script/chat-script.ini and save in global dict: options

Source code in src/options.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def read():
    """Read options from ~/.config/chat-script/chat-script.ini and save in global dict: options"""
    configuration = ConfigParser()
    configuration.read(os.path.expanduser(CONFIG_FILE))
    global OPTIONS
    OPTIONS = {
        "app": {
            "share": configuration.getboolean(
                "APP",
                "share",
                fallback=False,
            ),
            "server_name": configuration.get(
                "APP",
                "server_name",
                fallback="127.0.0.1",
            ),
            "server_port": configuration.getint(
                "APP",
                "server_port",
                fallback=7860,
            ),
            "inbrowser": configuration.getboolean(
                "APP",
                "inbrowser",
                fallback=True,
            ),
        },
        "chain": {
            "embeddings_model": configuration.get(
                "CHAIN",
                "embeddings_model",
                fallback="mxbai-embed-large",
            ),
            "chat_model": configuration.get(
                "CHAIN",
                "chat_model",
                fallback="mistral",
            ),
            "moderation_model": configuration.get(
                "CHAIN",
                "moderation_model",
                fallback="llama-guard3:1b",
            ),
            "embeddings_url": configuration.get(
                "CHAIN",
                "embeddings_url",
                fallback="http://localhost:11434",
            ),
            "chat_url": configuration.get(
                "CHAIN",
                "chat_url",
                fallback="http://localhost:11434",
            ),
            "moderation_url": configuration.get(
                "CHAIN",
                "moderation_url",
                fallback="http://localhost:11434",
            ),
            "show_progress": configuration.getboolean(
                "CHAIN",
                "show_progress",
                fallback=False,
            ),
            "keep_alive": configuration.get(
                "CHAIN",
                "keep_alive",
                fallback="5m",
            ),
            "temperature": configuration.getfloat(
                "CHAIN",
                "temperature",
                fallback=0.6,
            ),
            "top_k": configuration.getint(
                "CHAIN",
                "top_k",
                fallback=30,
            ),
            "top_p": configuration.getfloat(
                "CHAIN",
                "top_p",
                fallback=0.7,
            ),
            "collection_name": configuration.get(
                "CHAIN",
                "collection_name",
                fallback="rag-chroma",
            ),
            "top_n_results": configuration.getint(
                "CHAIN",
                "top_n_results",
                fallback=3,
            ),
            "rag_fusion": configuration.getboolean(
                "CHAIN",
                "rag_fusion",
                fallback=True,
            ),
            "num_queries": configuration.getint(
                "CHAIN",
                "num_queries",
                fallback=2,
            ),
            "top_n_results_fusion": configuration.getint(
                "CHAIN",
                "top_n_results_fusion",
                fallback=2,
            ),
            "embeddings_gpu": configuration.getboolean(
                "CHAIN",
                "embeddings_gpu",
                fallback=True,
            ),
        },
        "embeddings": {
            "embeddings_model": configuration.get(
                "EMBEDDINGS",
                "embeddings_model",
                fallback="mxbai-embed-large",
            ),
            "embeddings_url": configuration.get(
                "EMBEDDINGS",
                "embeddings_url",
                fallback="http://localhost:11434",
            ),
            "show_progress": configuration.getboolean(
                "EMBEDDINGS",
                "show_progress",
                fallback=True,
            ),
            "collection_name": configuration.get(
                "EMBEDDINGS",
                "collection_name",
                fallback="rag-chroma",
            ),
            "use_multithreading": configuration.getboolean(
                "EMBEDDINGS",
                "use_multithreading",
                fallback=True,
            ),
            "chunk_size": configuration.getint(
                "EMBEDDINGS",
                "chunk_size",
                fallback=250,
            ),
            "chunk_overlap": configuration.getint(
                "EMBEDDINGS",
                "chunk_overlap",
                fallback=50,
            ),
            "batch_size": configuration.getint(
                "EMBEDDINGS",
                "batch_size",
                fallback=41666,
            ),
        },
        "response": {
            "context_stream_delay": configuration.getfloat(
                "RESPONSE",
                "context_stream_delay",
                fallback=0.075,
            ),
            "max_history": configuration.getint(
                "RESPONSE",
                "max_history",
                fallback=2,
            ),
            "print_state": configuration.getboolean(
                "RESPONSE",
                "print_state",
                fallback=True,
            ),
            "moderate": configuration.getboolean(
                "RESPONSE",
                "moderate",
                fallback=False,
            ),
            "moderate_alert": configuration.getboolean(
                "RESPONSE",
                "moderate_alert",
                fallback=False,
            ),
        },
    }

response.py

Returns response w/ citations from RAG-enabled LLM based on user question passed from app ui

check_question(question, request)

Determines whether a response may be generated based on config and user input

Source code in src/response.py
34
35
36
37
38
39
40
41
42
43
def check_question(question, request):
    """Determines whether a response may be generated based on config and user input"""
    if request and opt("print_state"):
        print("\nIP address of user: ", request.client.host, sep="")
    allow_response = True
    if opt("moderate"):
        moderation_chain = chain.create_moderation()
        moderation_result = moderation_chain.invoke(question)
        allow_response = moderation_result[0:4] == "safe"
    return allow_response

convert_session_history(history)

Workaround for converting Gradio history to Langchain-compatible chat_history.

Source code in src/response.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def convert_session_history(history):
    """Workaround for converting Gradio history to Langchain-compatible chat_history."""
    global session_history
    session_history = ChatMessageHistory()

    # Remove unsafe messages from history if applicable
    if opt("moderate"):
        for msgs in history:
            if msgs[1] == f"{UNSAFE_RESPONSE} ":
                history.remove(msgs)

    # Trim history before converting to langchain format
    if len(history) > opt("max_history"):
        history = history[-int(opt("max_history")) :]
    for msgs in history:
        session_history.add_user_message(msgs[0])
        session_history.add_ai_message(msgs[1].split("\n\nRelevant Sources")[0])

format_context(context)

Formats and yields context passed to LLM in human-readable format

Source code in src/response.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def format_context(context):
    """Formats and yields context passed to LLM in human-readable format"""
    if opt("print_state"):
        print("Context: ", context, sep="")
    formatted_context = "Relevant Sources: "
    yield "\n\n"
    for index, chunk in enumerate(context):
        formatted_context += (
            f"[{str(index+1)}] {chunk.metadata['source'][SCRIPTS_DIR_LEN:]}"
        )
        for fmt_chunks in formatted_context.split():
            yield f"{fmt_chunks} "
            if (index == 0) and (fmt_chunks == "Sources:"):
                yield "\n"
            time.sleep(opt("context_stream_delay"))
        yield "\n"
        formatted_context = ""

generate(question, history, request)

Creates RAG + history chain w/ local LLM and streams chain's text response

Source code in src/response.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def generate(question, history, request: Request):
    """Creates RAG + history chain w/ local LLM and streams chain's text response"""
    if check_question(question, request):
        convert_session_history(history)
        rag_history_chain = prepare_rag_history()

        # Yield response and formatted context (if applicable) as a text stream
        result = rag_history_chain.stream(
            {"input": question}, config={"configurable": {"session_id": "unused"}}
        )
        response_stream = ""
        context = None
        for chunks in result:
            answer_chunks = chunks.get("answer")
            get_context = chunks.get("context")
            if answer_chunks:
                response_stream += answer_chunks
            if get_context:
                context = get_context
            yield response_stream
        if context:
            formatted_context = format_context(context)
            for context_chunks in formatted_context:
                response_stream += context_chunks
                yield response_stream
    else:
        reject(question, request)
        rejection = rejection_message()
        response_stream = ""
        for reject_chunks in rejection:
            response_stream += reject_chunks
            yield response_stream

get_session_history()

Manage chat history

Source code in src/response.py
65
66
67
def get_session_history() -> BaseChatMessageHistory:
    """Manage chat history"""
    return session_history

inspect(state)

Print state between runnables and pass it on (includes: input, chat_history)

Source code in src/response.py
70
71
72
73
74
def inspect(state):
    """Print state between runnables and pass it on (includes: input, chat_history)"""
    if opt("print_state"):
        print("State: ", state, sep="")
    return state

opt(option_name)

Syntactic sugar for retrieving options

Source code in src/response.py
29
30
31
def opt(option_name):
    """Syntactic sugar for retrieving options"""
    return options.OPTIONS["response"][option_name]

prepare_rag_history()

Define retrieval chain w/ history

Source code in src/response.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def prepare_rag_history() -> RunnableWithMessageHistory:
    """Define retrieval chain w/ history"""
    rag_history_chain = RunnableWithMessageHistory(
        RunnableLambda(inspect) | chain.rag_chain,
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer",
    )
    # Old approach for including context in state -
    # investigate this further to prevent separate context printing
    # retrieve_docs = (lambda x: x["input"]) | retriever
    # chain = RunnablePassthrough.assign(
    #   context=retrieve_docs
    # ).assign(
    #   answer=rag_chain_from_docs
    # )
    return rag_history_chain

reject(question, request)

Display log, alert based on config

Source code in src/response.py
116
117
118
119
120
121
122
123
124
def reject(question, request):
    """Display log, alert based on config"""
    if opt("moderate_alert") and platform.system() == "Linux":
        notify2.init("chat-script")
        alert = notify2.Notification("Unsafe question received")
        alert.show()
    if request and not opt("print_state"):
        print("\nIP address of user: ", request.client.host, sep="")
    print("Unsafe question: '", question, "'", sep="")

rejection_message()

Yield unsafe response info to user

Source code in src/response.py
127
128
129
130
131
132
133
134
def rejection_message():
    """Yield unsafe response info to user"""
    response_stream = ""
    for chunks in UNSAFE_RESPONSE.split():
        response_stream += f"{chunks} "
        yield response_stream
        response_stream = ""
        time.sleep(opt("context_stream_delay"))