|
1 |
| -DROP TABLE IF EXISTS dbo.document_embeddings |
2 |
| -DROP TABLE IF EXISTS dbo.documents |
| 1 | +drop table if exists dbo.document_embeddings |
| 2 | +drop table if exists dbo.documents |
3 | 3 | go
|
4 | 4 |
|
5 |
| -CREATE TABLE dbo.documents (id INT CONSTRAINT pk__documents PRIMARY KEY IDENTITY, content NVARCHAR(MAX), embedding NVARCHAR(MAX)) |
6 |
| -CREATE TABLE dbo.document_embeddings (id INT REFERENCES dbo.documents(id), vector_value_id INT, vector_value FLOAT) |
| 5 | +create table dbo.documents |
| 6 | +( |
| 7 | + id int constraint pk__documents primary key, |
| 8 | + content nvarchar(max), |
| 9 | + embedding nvarchar(max) |
| 10 | +) |
| 11 | +create table dbo.document_embeddings |
| 12 | +( |
| 13 | + id int references dbo.documents(id), |
| 14 | + vector_value_id int, |
| 15 | + vector_value float |
| 16 | +) |
7 | 17 | go
|
8 | 18 |
|
9 |
| -CREATE CLUSTERED COLUMNSTORE INDEX csi__document_embeddings ON dbo.document_embeddings ORDER (id) |
| 19 | +create clustered columnstore index csi__document_embeddings |
| 20 | + on dbo.document_embeddings order (id) |
10 | 21 | go
|
11 | 22 |
|
12 |
| -IF NOT EXISTS(SELECT * FROM sys.fulltext_catalogs WHERE [name] = 'FullTextCatalog') |
13 |
| -BEGIN |
14 |
| - CREATE FULLTEXT CATALOG [FullTextCatalog] AS DEFAULT; |
15 |
| -END |
| 23 | +if not exists(select * from sys.fulltext_catalogs where [name] = 'FullTextCatalog') |
| 24 | +begin |
| 25 | + create fulltext catalog [FullTextCatalog] as default; |
| 26 | +end |
16 | 27 | go
|
17 | 28 |
|
18 |
| -CREATE FULLTEXT INDEX ON dbo.documents (content) KEY INDEX pk__documents; |
| 29 | +create fulltext index on dbo.documents (content) key index pk__documents; |
19 | 30 | go
|
20 | 31 |
|
21 |
| -ALTER FULLTEXT INDEX ON dbo.documents ENABLE; |
| 32 | +alter fulltext index on dbo.documents enable; |
| 33 | +go |
| 34 | + |
| 35 | +create or alter function dbo.similar_documents(@vector nvarchar(max)) |
| 36 | +returns table |
| 37 | +as |
| 38 | +return |
| 39 | +with cteVector as |
| 40 | +( |
| 41 | + select |
| 42 | + cast([key] as int) as [vector_value_id], |
| 43 | + cast([value] as float) as [vector_value] |
| 44 | + from |
| 45 | + openjson(@vector) |
| 46 | +), |
| 47 | +cteSimilar as |
| 48 | +( |
| 49 | + select top (50) |
| 50 | + v2.id, |
| 51 | + 1-sum(v1.[vector_value] * v2.[vector_value]) / |
| 52 | + ( |
| 53 | + sqrt(sum(v1.[vector_value] * v1.[vector_value])) |
| 54 | + * |
| 55 | + sqrt(sum(v2.[vector_value] * v2.[vector_value])) |
| 56 | + ) as cosine_distance |
| 57 | + from |
| 58 | + cteVector v1 |
| 59 | + inner join |
| 60 | + dbo.document_embeddings v2 on v1.vector_value_id = v2.vector_value_id |
| 61 | + group by |
| 62 | + v2.id |
| 63 | + order by |
| 64 | + cosine_distance |
| 65 | +) |
| 66 | +select |
| 67 | + rank() over (order by r.cosine_distance) as rank, |
| 68 | + r.id, |
| 69 | + r.cosine_distance |
| 70 | +from |
| 71 | + cteSimilar r |
22 | 72 | go
|
0 commit comments