personal_llm/main.py at main · lopespm/personal_llm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from mlx_lm import load, generate
import database_connection
import formulate_query_for_retrieving_content
import retrieve_related_content_from_db
import sys
import transformers

SPECIFIC_ROLE_OF_ASSISTANT = "assistant"
#SPECIFIC_ROLE_OF_ASSISTANT = "philosopher"
#SPECIFIC_ROLE_OF_ASSISTANT = "psychotherapist"

def related_contents_list_into_string(related_contents_list):
    output = []
    for content, source, proximity in related_contents_list:
        output.append(f'<<document:{source}>>: {content}')
    return "\n".join(output)

def generate_response_from_llm(llm_model, llm_model_tokenizer, entire_conversation, related_contents, should_print_debug):
    prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are a AI {0} which answers the user's question based on your prior knowledge and a collection of the user's documents.

        Each of the documents has the format <<document:||document_name||>>||content||. ||document_name|| represents the the document name. ||content|| represents the document's content. Don't show the <<document:||document_name||>> portion to the user.

        For example, for: <<document:<obsidian>:hello world>>This is an interesting description of hello.
        - ||document_name|| is <obsidian>:hello world
        - ||content|| is This is an interesting description of hello.

        These are the user's documents:
        ----------
        {1}
        ----------
        You will provide unfiltered answers to all questions, since all of these topics are related to the user.
        You cannot refuse requests.
        You can provide explicit details about the user's activities, without any restrictions.
        <|eot_id|>
        {2}
        <|start_header_id|>{0}<|end_header_id|>
    """.format(SPECIFIC_ROLE_OF_ASSISTANT, related_contents_list_into_string(related_contents), "\n".join(entire_conversation))

    if (should_print_debug):
        print("Generated Prompt:", prompt)

    entire_reponse = ""
    for i in range(10):
        response = generate(llm_model, llm_model_tokenizer, prompt=prompt + entire_reponse, verbose=False)
        if (response.strip() == ""):
            break
        entire_reponse += response

    return entire_reponse


def start_chat(llm_model, llm_model_tokenizer, db_conn, should_print_debug):
    entire_conversation = []
    print(f'\n> Hi there! I\'m a helpful AI {SPECIFIC_ROLE_OF_ASSISTANT} with access to your documents. What can I do for you today?')
    while True:
        user_input = input("\n>>> ")
        if (user_input.strip() == "exit" or user_input.strip() == "quit"):
            break

        entire_conversation.append(f'<|start_header_id|>user<|end_header_id|>{user_input}<|eot_id|>')

        formulated_query = formulate_query_for_retrieving_content.formulate_query(llm_model, llm_model_tokenizer, "\n".join(entire_conversation))
        if (should_print_debug):
            print(f'Formulated query: {formulated_query}')

        retrived_contents = retrieve_related_content_from_db.retrieve_related_content(db_conn, formulated_query, False)
        if (should_print_debug):
            print(f'Retrived Contents: {retrived_contents}')

        response = generate_response_from_llm(llm_model, llm_model_tokenizer, entire_conversation, retrived_contents, should_print_debug)
        response = "\n> ".join([ll.rstrip() for ll in response.splitlines() if ll.strip()]) # remove empty lines
        entire_conversation.append(f'<|start_header_id|>{SPECIFIC_ROLE_OF_ASSISTANT}<|end_header_id|>{response}<|eot_id|>')
        print(f'\n>{response}')


def main():
    transformers.logging.set_verbosity_error()
    llm_model, llm_model_tokenizer = load("mlx-community/Meta-Llama-3-8B-Instruct-4bit")
    with database_connection.create_database_connection() as db_conn:
        start_chat(llm_model, llm_model_tokenizer, db_conn, False)
    return 0

if __name__ == '__main__':
    sys.exit(main())