File tree Expand file tree Collapse file tree 5 files changed +58
-53
lines changed
Filter options
Expand file tree Collapse file tree 5 files changed +58
-53
lines changed
Original file line number Diff line number Diff line change
1
+ /*
2
+ Cleanup if needed
3
+ */
4
+ if not exists (select * from sys .symmetric_keys where [name] = ' ##MS_DatabaseMasterKey##' )
5
+ begin
6
+ create master key encryption by password = ' Pa$$w0rd!'
7
+ end
8
+ go
9
+ if exists (select * from sys.[external_data_sources] where name = ' openai_playground' )
10
+ begin
11
+ drop external data source [openai_playground];
12
+ end
13
+ go
14
+ if exists (select * from sys.[database_scoped_credentials] where name = ' openai_playground' )
15
+ begin
16
+ drop database scoped credential [openai_playground];
17
+ end
18
+ go
19
+
20
+ /*
21
+ Create database scoped credential and external data source.
22
+ File is assumed to be in a path like:
23
+ https://<myaccount>.blob.core.windows.net/playground/wikipedia/vector_database_wikipedia_articles_embedded.csv
24
+
25
+ Please note that it is recommened to avoid using SAS tokens: the best practice is to use Managed Identity as described here:
26
+ https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-bulk-data-by-using-bulk-insert-or-openrowset-bulk-sql-server?view=sql-server-ver16#bulk-importing-from-azure-blob-storage
27
+
28
+ */
29
+ create database scoped credential [openai_playground]
30
+ with identity = ' SHARED ACCESS SIGNATURE' ,
31
+ secret = ' <sas-token>' ; -- make sure not to include the ? at the beginning
32
+ go
33
+ create external data source [openai_playground]
34
+ with
35
+ (
36
+ type = blob_storage,
37
+ location = ' https://<account>.blob.core.windows.net/playground' ,
38
+ credential = [openai_playground]
39
+ );
40
+ go
Original file line number Diff line number Diff line change 1
- /*
2
- Cleanup if needed
3
- */
4
- if not exists (select * from sys .symmetric_keys where [name] = ' ##MS_DatabaseMasterKey##' )
5
- begin
6
- create master key encryption by password = ' Pa$$w0rd!'
7
- end
8
- go
9
- if exists (select * from sys.[external_data_sources] where name = ' openai_playground' )
10
- begin
11
- drop external data source [openai_playground];
12
- end
13
- go
14
- if exists (select * from sys.[database_scoped_credentials] where name = ' openai_playground' )
15
- begin
16
- drop database scoped credential [openai_playground];
17
- end
18
- go
19
-
20
- /*
21
- Create database scoped credential and external data source.
22
- File is assumed to be in a path like:
23
- https://<myaccount>.blob.core.windows.net/playground/wikipedia/vector_database_wikipedia_articles_embedded.csv
24
- */
25
- create database scoped credential [openai_playground]
26
- with identity = ' SHARED ACCESS SIGNATURE' ,
27
- secret = ' <sas-token>' ; -- make sure not to include the ? at the beginning
28
- go
29
- create external data source [openai_playground]
30
- with
31
- (
32
- type = blob_storage,
33
- location = ' https://<account>.blob.core.windows.net/playground' ,
34
- credential = [openai_playground]
35
- );
36
- go
37
-
38
1
/*
39
2
Create table
40
3
*/
73
36
Add primary key
74
37
*/
75
38
alter table [dbo].[wikipedia_articles_embeddings]
76
- add constraint pk__wikipedia_articles_embeddings primary key nonclustered (id)
39
+ add constraint pk__wikipedia_articles_embeddings primary key clustered (id)
40
+ go
41
+
42
+ /*
43
+ Add index on title
44
+ */
45
+ create index [ix_title] on [dbo].[wikipedia_articles_embeddings](title)
77
46
go
78
47
79
48
/*
80
49
Verify data
81
50
*/
82
- select top (100 ) * from [dbo].[wikipedia_articles_embeddings]
51
+ select top (10 ) * from [dbo].[wikipedia_articles_embeddings]
83
52
go
84
53
85
54
select * from [dbo].[wikipedia_articles_embeddings] where title = ' Alan Turing'
86
55
go
87
56
88
57
89
58
59
+
Original file line number Diff line number Diff line change 2
2
Add columns to store the native vectors
3
3
*/
4
4
alter table wikipedia_articles_embeddings
5
- add title_vector_native varbinary ( 8000 );
5
+ add title_vector_ada2 vector( 1536 );
6
6
7
7
alter table wikipedia_articles_embeddings
8
- add content_vector_native varbinary (8000 );
8
+ add content_vector_ada2 vector(1536 );
9
+ go
9
10
10
11
/*
11
12
Update the native vectors
12
13
*/
13
14
update
14
15
wikipedia_articles_embeddings
15
16
set
16
- title_vector_native = json_array_to_vector (title_vector),
17
- content_vector_native = json_array_to_vector (content_vector);
17
+ title_vector_ada2 = cast (title_vector as vector( 1536 ) ),
18
+ content_vector_ada2 = cast (content_vector as vector( 1536 ))
18
19
go
19
20
20
21
/*
@@ -28,16 +29,10 @@ alter table wikipedia_articles_embeddings
28
29
drop column content_vector;
29
30
go
30
31
31
- /*
32
- Rename the columns
33
- */
34
- EXEC sp_rename ' dbo.wikipedia_articles_embeddings.title_vector_native' , ' title_vector_ada2' , ' COLUMN' ;
35
- EXEC sp_rename ' dbo.wikipedia_articles_embeddings.content_vector_native' , ' content_vector_ada2' , ' COLUMN' ;
36
-
37
32
/*
38
33
Verify data
39
34
*/
40
- select top (100 ) * from [dbo].[wikipedia_articles_embeddings]
35
+ select top (10 ) * from [dbo].[wikipedia_articles_embeddings]
41
36
go
42
37
43
38
select * from [dbo].[wikipedia_articles_embeddings] where title = ' Alan Turing'
Original file line number Diff line number Diff line change 4
4
create or alter procedure dbo .get_embedding
5
5
@deployedModelName nvarchar (1000 ),
6
6
@inputText nvarchar (max ),
7
- @embedding varbinary ( 8000 ) output
7
+ @embedding vector( 1536 ) output
8
8
as
9
9
declare @retval int , @response nvarchar (max );
10
10
declare @payload nvarchar (max ) = json_object(' input' : @inputText);
@@ -23,7 +23,7 @@ end else begin
23
23
select @response as ' Error message from OpenAI API' ;
24
24
end
25
25
26
- set @embedding = json_array_to_vector (@re);
26
+ set @embedding = cast (@re as vector( 1536 ) );
27
27
28
28
return @retval
29
29
go
Original file line number Diff line number Diff line change 5
5
*/
6
6
7
7
declare @inputText nvarchar (max ) = ' the foundation series by isaac asimov' ;
8
- declare @retval int , @embedding varbinary ( 8000 );
8
+ declare @retval int , @embedding vector( 1536 );
9
9
10
10
exec @retval = dbo .get_embedding ' <deployment-id>' , @inputText, @embedding output ;
11
11
You can’t perform that action at this time.
0 commit comments