|
19 | 19 | model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
|
20 | 20 | embeddings = model.encode(sentences)
|
21 | 21 |
|
| 22 | + conn = get_mssql_connection() |
| 23 | + |
22 | 24 | print('Cleaning up the database...')
|
23 | 25 | try:
|
24 |
| - conn = get_mssql_connection() |
25 |
| - conn.execute("DELETE FROM dbo.document_embeddings;") |
26 |
| - conn.execute("DELETE FROM dbo.documents;") |
27 |
| - conn.commit(); |
| 26 | + cursor = conn.cursor() |
| 27 | + cursor.execute("DELETE FROM dbo.document_embeddings;") |
| 28 | + cursor.execute("DELETE FROM dbo.documents;") |
| 29 | + cursor.commit(); |
28 | 30 | finally:
|
29 |
| - conn.close() |
| 31 | + cursor.close() |
30 | 32 |
|
31 | 33 | print('Saving documents and embeddings in the database...')
|
32 | 34 | try:
|
33 |
| - conn = get_mssql_connection() |
34 | 35 | cursor = conn.cursor()
|
35 | 36 |
|
36 |
| - for content, embedding in zip(sentences, embeddings): |
| 37 | + for id, (content, embedding) in enumerate(zip(sentences, embeddings)): |
37 | 38 | cursor.execute(f"""
|
38 |
| - INSERT INTO dbo.documents (content, embedding) VALUES (?, ?); |
39 |
| - INSERT INTO dbo.document_embeddings SELECT SCOPE_IDENTITY(), CAST([key] AS INT), CAST([value] AS FLOAT) FROM OPENJSON(?); |
| 39 | + DECLARE @id INT = ?; |
| 40 | + DECLARE @content NVARCHAR(MAX) = ?; |
| 41 | + DECLARE @embedding NVARCHAR(MAX) = ?; |
| 42 | + INSERT INTO dbo.documents (id, content, embedding) VALUES (@id, @content, @embedding); |
| 43 | + INSERT INTO dbo.document_embeddings SELECT @id, CAST([key] AS INT), CAST([value] AS FLOAT) FROM OPENJSON(@embedding); |
40 | 44 | """,
|
| 45 | + id, |
41 | 46 | content,
|
42 |
| - json.dumps(embedding.tolist()), |
43 | 47 | json.dumps(embedding.tolist())
|
44 | 48 | )
|
45 | 49 |
|
46 |
| - cursor.close() |
47 |
| - conn.commit() |
| 50 | + cursor.commit() |
48 | 51 | finally:
|
49 |
| - conn.close() |
| 52 | + cursor.close() |
50 | 53 |
|
51 | 54 | print('Searching for similar documents...')
|
52 | 55 | print('Getting embeddings...')
|
|
56 | 59 | print('Querying database...')
|
57 | 60 | k = 5
|
58 | 61 | try:
|
59 |
| - conn = get_mssql_connection() |
60 | 62 | cursor = conn.cursor()
|
61 | 63 |
|
62 | 64 | results = cursor.execute(f"""
|
|
95 | 97 | )
|
96 | 98 |
|
97 | 99 | for row in results:
|
98 |
| - print('document:', row[0], 'RRF score:', row[1]) |
| 100 | + print(f'Document: {row[0]} -> RRF score: {row[1]:0.4}') |
99 | 101 |
|
100 |
| - cursor.close() |
101 |
| - conn.commit() |
102 | 102 | finally:
|
103 |
| - conn.close() |
| 103 | + cursor.close() |
0 commit comments