diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2bb299d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +**/__pycache__/ +*.py[cod] +*$py.class diff --git a/config.py b/config.py new file mode 100644 index 0000000..654fe21 --- /dev/null +++ b/config.py @@ -0,0 +1,5 @@ +model_url = "http://localhost:11434/api/embed" +model_name = "nomic-embed-text" +memories_db_path = "./memories" +memories_table = "knowledge_stories" +memories_vector_size = 768 diff --git a/memories_semantic_search.py b/memories_semantic_search.py new file mode 100644 index 0000000..616d7c4 --- /dev/null +++ b/memories_semantic_search.py @@ -0,0 +1,20 @@ +import lancedb +import config, embedding +db = lancedb.connect(config.memories_db_path) +table = db.open_table(config.memories_table) +question = "Email group Microsoft tidak muncul, apakah masuk spam?" +query_vector = embedding.embed_text(question) +results = ( + table.search(query_vector, vector_column_name="vector_title") + .limit(3) + .to_list() +) +print("Pertanyaan:") +print(question) +print("Hasil paling mirip:") +for row in results: + print("=" * 60) + print("ID:", row["id"]) + print("Title:", row["title"]) + print("Story:", row["story"]) + print("Distance:", row["_distance"]) diff --git a/memories_test.py b/memories_test.py new file mode 100644 index 0000000..911d61a --- /dev/null +++ b/memories_test.py @@ -0,0 +1,7 @@ +import config, embedding +text = "Saya lupa password di HRIS AFMS2" +vector = embedding.embed_text(config.model_url, config.model_name, text) +print("Text:") +print(text) +print("Vector:") +print(str(vector)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9a3182a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +lancedb +pyarrow diff --git a/table_modify.py b/table_modify.py new file mode 100644 index 0000000..2628367 --- /dev/null +++ b/table_modify.py @@ -0,0 +1,43 @@ +import lancedb +import embedding +db = lancedb.connect(config.memories_db_path) +table = db.open_table(config.memories_table) +docs = [ + { + "id": "wifi-001", + "title": "Perubahan password WiFi menjadi login portal", + "story": ( + "Password WiFi kantor telah berubah. " + "Sekarang akses WiFi menggunakan login portal. " + "Akun login akan diberikan melalui chat pribadi masing-masing user." + ), + }, + { + "id": "m365-001", + "title": "Cara cek spam pada Outlook Group", + "story": ( + "Untuk mengecek spam pada Outlook Group, buka Outlook Web. " + "Masuk ke menu Groups, pilih group terkait, lalu cek folder Junk Email atau Spam." + ), + }, + { + "id": "printer-001", + "title": "Printer tidak terdeteksi di komputer", + "story": ( + "Jika printer tidak terdeteksi, cek koneksi kabel atau jaringan. " + "Pastikan driver printer sudah terinstall, lalu coba restart service Print Spooler." + ), + }, +] +for doc in docs: + row = { + "id" : doc["id" ], + "title" : doc["title" ], + "story" : doc["story" ], + "vector_title" : embedding.embed_text(doc["title"]), + } + table.add([row]) + print(f"Menambahkan record: {doc['title']}") +print("\nTable berhasil diproses.") +print("Nama table:", config.memories_table) +print("Jumlah row:", table.count_rows())