Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 1c6d242

Browse filesBrowse files
committed
Merge branch 'master' into dan-guards-fairings
2 parents ce3fd30 + 1843058 commit 1c6d242
Copy full SHA for 1c6d242

File tree

14 files changed

+352
-43
lines changed
Filter options

14 files changed

+352
-43
lines changed

‎README.md

Copy file name to clipboardExpand all lines: README.md
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<p align="center">
22
<a href="https://postgresml.org/">
3-
<img src="https://postgresml.org/static/images/owl_gradient.svg" width="175" alt="PostgresML">
3+
<img src="https://postgresml.org/dashboard/static/images/owl_gradient.svg" width="175" alt="PostgresML">
44
</a>
55
</p>
66

‎pgml-dashboard/src/api/docs.rs

Copy file name to clipboardExpand all lines: pgml-dashboard/src/api/docs.rs
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ async fn blog_handler<'a>(
9797
current_user,
9898
&path,
9999
vec![
100+
NavLink::new("MindsDB vs PostgresML")
101+
.href("/blog/mindsdb-vs-postgresml"),
100102
NavLink::new("Introducing PostgresML Python SDK: Build End-to-End Vector Search Applications without OpenAI and Pinecone")
101103
.href("/blog/introducing-postgresml-python-sdk-build-end-to-end-vector-search-applications-without-openai-and-pinecone"),
102104
NavLink::new("PostgresML raises $4.7M to launch serverless AI application databases based on Postgres")

‎pgml-dashboard/src/utils/config.rs

Copy file name to clipboardExpand all lines: pgml-dashboard/src/utils/config.rs
-7Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,6 @@ pub fn static_dir() -> String {
3232
}
3333
}
3434

35-
// pub fn content_dir() -> String {
36-
// match var("CONTENT_DIRECTORY") {
37-
// Ok(dir) => dir,
38-
// Err(_) => "content".to_string(),
39-
// }
40-
// }
41-
4235
pub fn search_index_dir() -> String {
4336
match var("SEARCH_INDEX_DIRECTORY") {
4437
Ok(path) => path,

‎pgml-dashboard/static/blog/mindsdb-vs-postgresml.md

Copy file name to clipboardExpand all lines: pgml-dashboard/static/blog/mindsdb-vs-postgresml.md
+308Lines changed: 308 additions & 0 deletions
Large diffs are not rendered by default.
Loading
Binary file not shown.
145 KB
Loading

‎pgml-dashboard/templates/layout/footer.html

Copy file name to clipboardExpand all lines: pgml-dashboard/templates/layout/footer.html
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ <h5 class="h5 d-flex align-items-center gap-2 mb-5">
2424
<a class="nav-link text-white" href="/privacy" data-turbo="false">Privacy Policy</a>
2525
</nav>
2626
</div>
27-
<% } %>
27+
<% } %>
2828
</div>
2929
</div>
3030

‎pgml-dashboard/templates/layout/nav/top.html

Copy file name to clipboardExpand all lines: pgml-dashboard/templates/layout/nav/top.html
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<a class="nav-link" href="/docs/guides/setup/quick_start_with_docker/">Docs</a>
2222
</li>
2323
<li class="nav-item d-flex align-items-center">
24-
<a class="nav-link" href="/blog/postgresml-raises-4.7M-to-launch-serverless-ai-application-databases-based-on-postgres">Blog</a>
24+
<a class="nav-link" href="/blog/mindsdb-vs-postgresml">Blog</a>
2525
</li>
2626
<li class="nav-item d-flex align-items-center">
2727
<a class="nav-link" href="https://github.com/postgresml/postgresml" target="_blank">Open Source</a>

‎pgml-extension/docker/entrypoint.sh

Copy file name to clipboardExpand all lines: pgml-extension/docker/entrypoint.sh
+9Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ echo "Creating user and database..."
1515
echo "Installing pgml extension..."
1616
psql -U postgres -h 127.0.0.1 pgml_development -f sql/setup_examples.sql -P pager
1717

18+
19+
if [ -d "/docker-entrypoint-initdb.d" ]; then
20+
echo "Running custom scripts..."
21+
for f in /docker-entrypoint-initdb.d/*.sql; do
22+
echo "Running custom script ${f}"
23+
psql -U postgres -h 127.0.0.1 pgml_development -f "${f}"
24+
done
25+
fi
26+
1827
echo "Installing pgvector.. "
1928
psql -U postgres -h 127.0.0.1 pgml_development -c 'CREATE EXTENSION vector'
2029

‎pgml-extension/requirements.txt

Copy file name to clipboardExpand all lines: pgml-extension/requirements.txt
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ deepspeed==0.9.2
44
huggingface-hub==0.14.1
55
InstructorEmbedding==1.0.0
66
lightgbm==3.3.5
7+
orjson==3.9.0
78
pandas==2.0.1
89
rich==13.3.5
910
rouge==1.0.1
@@ -17,6 +18,7 @@ torchaudio==2.0.2
1718
torchvision==0.15.2
1819
tqdm==4.65.0
1920
transformers==4.29.2
21+
xformers==0.0.20
2022
xgboost==1.7.5
2123
langchain==0.0.180
2224
einops==0.6.1

‎pgml-extension/src/api.rs

Copy file name to clipboardExpand all lines: pgml-extension/src/api.rs
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ pub fn embed_batch(
577577
inputs: Vec<&str>,
578578
kwargs: default!(JsonB, "'{}'"),
579579
) -> Vec<Vec<f32>> {
580-
crate::bindings::transformers::embed(transformer, &inputs, &kwargs.0)
580+
crate::bindings::transformers::embed(transformer, inputs, &kwargs.0)
581581
}
582582

583583
#[pg_extern(immutable, parallel_safe)]
@@ -602,11 +602,11 @@ pub fn chunk(
602602
pub fn transform_json(
603603
task: JsonB,
604604
args: default!(JsonB, "'{}'"),
605-
inputs: default!(Vec<String>, "ARRAY[]::TEXT[]"),
605+
inputs: default!(Vec<&str>, "ARRAY[]::TEXT[]"),
606606
cache: default!(bool, false),
607607
) -> JsonB {
608608
JsonB(crate::bindings::transformers::transform(
609-
&task.0, &args.0, &inputs,
609+
&task.0, &args.0, inputs,
610610
))
611611
}
612612

@@ -616,14 +616,14 @@ pub fn transform_json(
616616
pub fn transform_string(
617617
task: String,
618618
args: default!(JsonB, "'{}'"),
619-
inputs: default!(Vec<String>, "ARRAY[]::TEXT[]"),
619+
inputs: default!(Vec<&str>, "ARRAY[]::TEXT[]"),
620620
cache: default!(bool, false),
621621
) -> JsonB {
622622
let mut task_map = HashMap::new();
623623
task_map.insert("task", task);
624624
let task_json = json!(task_map);
625625
JsonB(crate::bindings::transformers::transform(
626-
&task_json, &args.0, &inputs,
626+
&task_json, &args.0, inputs,
627627
))
628628
}
629629

‎pgml-extension/src/bindings/transformers.py

Copy file name to clipboardExpand all lines: pgml-extension/src/bindings/transformers.py
+20-24Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import os
2-
import json
31
import math
2+
import os
43
import shutil
54
import time
6-
import numpy as np
75

86
import datasets
97
from InstructorEmbedding import INSTRUCTOR
8+
import numpy
9+
import orjson
1010
from rouge import Rouge
1111
from sacrebleu.metrics import BLEU
1212
from sentence_transformers import SentenceTransformer
@@ -42,7 +42,6 @@
4242
__cache_sentence_transformer_by_name = {}
4343
__cache_transform_pipeline_by_task = {}
4444

45-
4645
DTYPE_MAP = {
4746
"uint8": torch.uint8,
4847
"int8": torch.int8,
@@ -58,6 +57,10 @@
5857
"bool": torch.bool,
5958
}
6059

60+
def orjson_default(obj):
61+
if isinstance(obj, numpy.float32):
62+
return float(obj)
63+
raise TypeError
6164

6265
def convert_dtype(kwargs):
6366
if "torch_dtype" in kwargs:
@@ -78,18 +81,10 @@ def ensure_device(kwargs):
7881
else:
7982
kwargs["device"] = "cpu"
8083

81-
82-
class NumpyJSONEncoder(json.JSONEncoder):
83-
def default(self, obj):
84-
if isinstance(obj, np.float32):
85-
return float(obj)
86-
return super().default(obj)
87-
88-
8984
def transform(task, args, inputs):
90-
task = json.loads(task)
91-
args = json.loads(args)
92-
inputs = json.loads(inputs)
85+
task = orjson.loads(task)
86+
args = orjson.loads(args)
87+
inputs = orjson.loads(inputs)
9388

9489
key = ",".join([f"{key}:{val}" for (key, val) in sorted(task.items())])
9590
if key not in __cache_transform_pipeline_by_task:
@@ -103,17 +98,18 @@ def transform(task, args, inputs):
10398
pipe = __cache_transform_pipeline_by_task[key]
10499

105100
if pipe.task == "question-answering":
106-
inputs = [json.loads(input) for input in inputs]
101+
inputs = [orjson.loads(input) for input in inputs]
107102

108103
convert_eos_token(pipe.tokenizer, args)
109104

110-
return json.dumps(pipe(inputs, **args), cls=NumpyJSONEncoder)
105+
results = pipe(inputs, **args)
106+
107+
return orjson.dumps(results, default=orjson_default).decode()
111108

112109

113110
def embed(transformer, inputs, kwargs):
114-
115-
inputs = json.loads(inputs)
116-
kwargs = json.loads(kwargs)
111+
kwargs = orjson.loads(kwargs)
112+
117113
ensure_device(kwargs)
118114
instructor = transformer.startswith("hkunlp/instructor")
119115

@@ -137,7 +133,7 @@ def embed(transformer, inputs, kwargs):
137133

138134

139135
def load_dataset(name, subset, limit: None, kwargs: "{}"):
140-
kwargs = json.loads(kwargs)
136+
kwargs = orjson.loads(kwargs)
141137

142138
if limit:
143139
dataset = datasets.load_dataset(
@@ -164,7 +160,7 @@ def load_dataset(name, subset, limit: None, kwargs: "{}"):
164160
else:
165161
raise PgMLException(f"Unhandled dataset type: {type(dataset)}")
166162

167-
return json.dumps({"data": data, "types": types})
163+
return orjson.dumps({"data": data, "types": types}).decode()
168164

169165

170166
def tokenize_text_classification(tokenizer, max_length, x, y):
@@ -421,7 +417,7 @@ def compute_metrics_text_generation(model, tokenizer, hyperparams, y):
421417

422418

423419
def tune(task, hyperparams, path, x_train, x_test, y_train, y_test):
424-
hyperparams = json.loads(hyperparams)
420+
hyperparams = orjson.loads(hyperparams)
425421
model_name = hyperparams.pop("model_name")
426422
tokenizer = AutoTokenizer.from_pretrained(model_name)
427423

@@ -562,7 +558,7 @@ def generate(model_id, data, config):
562558
result = get_transformer_by_model_id(model_id)
563559
tokenizer = result["tokenizer"]
564560
model = result["model"]
565-
config = json.loads(config)
561+
config = orjson.loads(config)
566562
all_preds = []
567563

568564
batch_size = 1 # TODO hyperparams

‎pgml-extension/src/bindings/transformers.rs

Copy file name to clipboardExpand all lines: pgml-extension/src/bindings/transformers.rs
+3-4Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ static PY_MODULE: Lazy<Py<PyModule>> = Lazy::new(|| {
2424
pub fn transform(
2525
task: &serde_json::Value,
2626
args: &serde_json::Value,
27-
inputs: &Vec<String>,
27+
inputs: Vec<&str>,
2828
) -> serde_json::Value {
2929
crate::bindings::venv::activate();
3030

3131
let task = serde_json::to_string(task).unwrap();
3232
let args = serde_json::to_string(args).unwrap();
33-
let inputs = serde_json::to_string(inputs).unwrap();
33+
let inputs = serde_json::to_string(&inputs).unwrap();
3434

3535
let results = Python::with_gil(|py| -> String {
3636
let transform: Py<PyAny> = PY_MODULE.getattr(py, "transform").unwrap().into();
@@ -56,11 +56,10 @@ pub fn transform(
5656
serde_json::from_str(&results).unwrap()
5757
}
5858

59-
pub fn embed(transformer: &str, inputs: &[&str], kwargs: &serde_json::Value) -> Vec<Vec<f32>> {
59+
pub fn embed(transformer: &str, inputs: Vec<&str>, kwargs: &serde_json::Value) -> Vec<Vec<f32>> {
6060
crate::bindings::venv::activate();
6161

6262
let kwargs = serde_json::to_string(kwargs).unwrap();
63-
let inputs = serde_json::to_string(&inputs).unwrap();
6463
Python::with_gil(|py| -> Vec<Vec<f32>> {
6564
let embed: Py<PyAny> = PY_MODULE.getattr(py, "embed").unwrap().into();
6665
embed

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.