Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 82969b8

Browse filesBrowse files
committed
Use orjson instead of json, when available
For `mypy -c 'import torch'`, the cache load time goes from 0.44s to 0.25s as measured by manager's data_json_load_time If I time dump times specifically, I see a saving of 0.65s to 0.07s. Overall, a pretty reasonable perf win -- should we make it a required dependency? I don't know if the sqlite cache path is used at all, but let me know if I need a cleverer migration than renaming the table
1 parent 1a074b6 commit 82969b8
Copy full SHA for 82969b8

File tree

Expand file treeCollapse file tree

4 files changed

+63
-53
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+63
-53
lines changed

‎mypy/build.py

Copy file name to clipboardExpand all lines: mypy/build.py
+18-27Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
from mypy.stubinfo import legacy_bundled_packages, non_bundled_packages, stub_distribution_name
9696
from mypy.types import Type
9797
from mypy.typestate import reset_global_state, type_state
98+
from mypy.util import json_dumps, json_loads
9899
from mypy.version import __version__
99100

100101
# Switch to True to produce debug output related to fine-grained incremental
@@ -858,7 +859,7 @@ def load_fine_grained_deps(self, id: str) -> dict[str, set[str]]:
858859
t0 = time.time()
859860
if id in self.fg_deps_meta:
860861
# TODO: Assert deps file wasn't changed.
861-
deps = json.loads(self.metastore.read(self.fg_deps_meta[id]["path"]))
862+
deps = json_loads(self.metastore.read(self.fg_deps_meta[id]["path"]))
862863
else:
863864
deps = {}
864865
val = {k: set(v) for k, v in deps.items()}
@@ -911,8 +912,8 @@ def stats_summary(self) -> Mapping[str, object]:
911912
return self.stats
912913

913914

914-
def deps_to_json(x: dict[str, set[str]]) -> str:
915-
return json.dumps({k: list(v) for k, v in x.items()}, separators=(",", ":"))
915+
def deps_to_json(x: dict[str, set[str]]) -> bytes:
916+
return json_dumps({k: list(v) for k, v in x.items()})
916917

917918

918919
# File for storing metadata about all the fine-grained dependency caches
@@ -980,7 +981,7 @@ def write_deps_cache(
980981

981982
meta = {"snapshot": meta_snapshot, "deps_meta": fg_deps_meta}
982983

983-
if not metastore.write(DEPS_META_FILE, json.dumps(meta, separators=(",", ":"))):
984+
if not metastore.write(DEPS_META_FILE, json_dumps(meta)):
984985
manager.log(f"Error writing fine-grained deps meta JSON file {DEPS_META_FILE}")
985986
error = True
986987

@@ -1048,7 +1049,7 @@ def generate_deps_for_cache(manager: BuildManager, graph: Graph) -> dict[str, di
10481049

10491050
def write_plugins_snapshot(manager: BuildManager) -> None:
10501051
"""Write snapshot of versions and hashes of currently active plugins."""
1051-
snapshot = json.dumps(manager.plugins_snapshot, separators=(",", ":"))
1052+
snapshot = json_dumps(manager.plugins_snapshot)
10521053
if not manager.metastore.write(PLUGIN_SNAPSHOT_FILE, snapshot):
10531054
manager.errors.set_file(_cache_dir_prefix(manager.options), None, manager.options)
10541055
manager.errors.report(0, 0, "Error writing plugins snapshot", blocker=True)
@@ -1079,8 +1080,8 @@ def read_quickstart_file(
10791080
# just ignore it.
10801081
raw_quickstart: dict[str, Any] = {}
10811082
try:
1082-
with open(options.quickstart_file) as f:
1083-
raw_quickstart = json.load(f)
1083+
with open(options.quickstart_file, "rb") as f:
1084+
raw_quickstart = json_loads(f.read())
10841085

10851086
quickstart = {}
10861087
for file, (x, y, z) in raw_quickstart.items():
@@ -1148,10 +1149,10 @@ def _load_json_file(
11481149
manager.add_stats(metastore_read_time=time.time() - t0)
11491150
# Only bother to compute the log message if we are logging it, since it could be big
11501151
if manager.verbosity() >= 2:
1151-
manager.trace(log_success + data.rstrip())
1152+
manager.trace(log_success + data.rstrip().decode())
11521153
try:
11531154
t1 = time.time()
1154-
result = json.loads(data)
1155+
result = json_loads(data)
11551156
manager.add_stats(data_json_load_time=time.time() - t1)
11561157
except json.JSONDecodeError:
11571158
manager.errors.set_file(file, None, manager.options)
@@ -1343,8 +1344,8 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | No
13431344
# So that plugins can return data with tuples in it without
13441345
# things silently always invalidating modules, we round-trip
13451346
# the config data. This isn't beautiful.
1346-
plugin_data = json.loads(
1347-
json.dumps(manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True)))
1347+
plugin_data = json_loads(
1348+
json_dumps(manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True)))
13481349
)
13491350
if m.plugin_data != plugin_data:
13501351
manager.log(f"Metadata abandoned for {id}: plugin configuration differs")
@@ -1478,18 +1479,15 @@ def validate_meta(
14781479
"ignore_all": meta.ignore_all,
14791480
"plugin_data": meta.plugin_data,
14801481
}
1481-
if manager.options.debug_cache:
1482-
meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
1483-
else:
1484-
meta_str = json.dumps(meta_dict, separators=(",", ":"))
1482+
meta_bytes = json_dumps(meta_dict, manager.options.debug_cache)
14851483
meta_json, _, _ = get_cache_names(id, path, manager.options)
14861484
manager.log(
14871485
"Updating mtime for {}: file {}, meta {}, mtime {}".format(
14881486
id, path, meta_json, meta.mtime
14891487
)
14901488
)
14911489
t1 = time.time()
1492-
manager.metastore.write(meta_json, meta_str) # Ignore errors, just an optimization.
1490+
manager.metastore.write(meta_json, meta_bytes) # Ignore errors, just an optimization.
14931491
manager.add_stats(validate_update_time=time.time() - t1, validate_munging_time=t1 - t0)
14941492
return meta
14951493

@@ -1507,13 +1505,6 @@ def compute_hash(text: str) -> str:
15071505
return hash_digest(text.encode("utf-8"))
15081506

15091507

1510-
def json_dumps(obj: Any, debug_cache: bool) -> str:
1511-
if debug_cache:
1512-
return json.dumps(obj, indent=2, sort_keys=True)
1513-
else:
1514-
return json.dumps(obj, sort_keys=True, separators=(",", ":"))
1515-
1516-
15171508
def write_cache(
15181509
id: str,
15191510
path: str,
@@ -1566,8 +1557,8 @@ def write_cache(
15661557

15671558
# Serialize data and analyze interface
15681559
data = tree.serialize()
1569-
data_str = json_dumps(data, manager.options.debug_cache)
1570-
interface_hash = compute_hash(data_str)
1560+
data_bytes = json_dumps(data, manager.options.debug_cache)
1561+
interface_hash = hash_digest(data_bytes)
15711562

15721563
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
15731564

@@ -1591,7 +1582,7 @@ def write_cache(
15911582
manager.trace(f"Interface for {id} is unchanged")
15921583
else:
15931584
manager.trace(f"Interface for {id} has changed")
1594-
if not metastore.write(data_json, data_str):
1585+
if not metastore.write(data_json, data_bytes):
15951586
# Most likely the error is the replace() call
15961587
# (see https://github.com/python/mypy/issues/3215).
15971588
manager.log(f"Error writing data JSON file {data_json}")
@@ -3566,4 +3557,4 @@ def write_undocumented_ref_info(
35663557
assert not ref_info_file.startswith(".")
35673558

35683559
deps_json = get_undocumented_ref_info_json(state.tree, type_map)
3569-
metastore.write(ref_info_file, json.dumps(deps_json, separators=(",", ":")))
3560+
metastore.write(ref_info_file, json_dumps(deps_json))

‎mypy/metastore.py

Copy file name to clipboardExpand all lines: mypy/metastore.py
+15-22Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ def getmtime(self, name: str) -> float:
3333
"""
3434

3535
@abstractmethod
36-
def read(self, name: str) -> str:
36+
def read(self, name: str) -> bytes:
3737
"""Read the contents of a metadata entry.
3838
3939
Raises FileNotFound if the entry does not exist.
4040
"""
4141

4242
@abstractmethod
43-
def write(self, name: str, data: str, mtime: float | None = None) -> bool:
43+
def write(self, name: str, data: bytes, mtime: float | None = None) -> bool:
4444
"""Write a metadata entry.
4545
4646
If mtime is specified, set it as the mtime of the entry. Otherwise,
@@ -86,16 +86,16 @@ def getmtime(self, name: str) -> float:
8686

8787
return int(os.path.getmtime(os.path.join(self.cache_dir_prefix, name)))
8888

89-
def read(self, name: str) -> str:
89+
def read(self, name: str) -> bytes:
9090
assert os.path.normpath(name) != os.path.abspath(name), "Don't use absolute paths!"
9191

9292
if not self.cache_dir_prefix:
9393
raise FileNotFoundError()
9494

95-
with open(os.path.join(self.cache_dir_prefix, name)) as f:
95+
with open(os.path.join(self.cache_dir_prefix, name), "rb") as f:
9696
return f.read()
9797

98-
def write(self, name: str, data: str, mtime: float | None = None) -> bool:
98+
def write(self, name: str, data: bytes, mtime: float | None = None) -> bool:
9999
assert os.path.normpath(name) != os.path.abspath(name), "Don't use absolute paths!"
100100

101101
if not self.cache_dir_prefix:
@@ -105,7 +105,7 @@ def write(self, name: str, data: str, mtime: float | None = None) -> bool:
105105
tmp_filename = path + "." + random_string()
106106
try:
107107
os.makedirs(os.path.dirname(path), exist_ok=True)
108-
with open(tmp_filename, "w") as f:
108+
with open(tmp_filename, "wb") as f:
109109
f.write(data)
110110
os.replace(tmp_filename, path)
111111
if mtime is not None:
@@ -135,27 +135,20 @@ def list_all(self) -> Iterable[str]:
135135

136136

137137
SCHEMA = """
138-
CREATE TABLE IF NOT EXISTS files (
138+
CREATE TABLE IF NOT EXISTS files2 (
139139
path TEXT UNIQUE NOT NULL,
140140
mtime REAL,
141-
data TEXT
141+
data BLOB
142142
);
143-
CREATE INDEX IF NOT EXISTS path_idx on files(path);
143+
CREATE INDEX IF NOT EXISTS path_idx on files2(path);
144144
"""
145-
# No migrations yet
146-
MIGRATIONS: list[str] = []
147145

148146

149147
def connect_db(db_file: str) -> sqlite3.Connection:
150148
import sqlite3.dbapi2
151149

152150
db = sqlite3.dbapi2.connect(db_file)
153151
db.executescript(SCHEMA)
154-
for migr in MIGRATIONS:
155-
try:
156-
db.executescript(migr)
157-
except sqlite3.OperationalError:
158-
pass
159152
return db
160153

161154

@@ -188,12 +181,12 @@ def getmtime(self, name: str) -> float:
188181
assert isinstance(mtime, float)
189182
return mtime
190183

191-
def read(self, name: str) -> str:
184+
def read(self, name: str) -> bytes:
192185
data = self._query(name, "data")
193-
assert isinstance(data, str)
186+
assert isinstance(data, bytes)
194187
return data
195188

196-
def write(self, name: str, data: str, mtime: float | None = None) -> bool:
189+
def write(self, name: str, data: bytes, mtime: float | None = None) -> bool:
197190
import sqlite3
198191

199192
if not self.db:
@@ -202,7 +195,7 @@ def write(self, name: str, data: str, mtime: float | None = None) -> bool:
202195
if mtime is None:
203196
mtime = time.time()
204197
self.db.execute(
205-
"INSERT OR REPLACE INTO files(path, mtime, data) VALUES(?, ?, ?)",
198+
"INSERT OR REPLACE INTO files2(path, mtime, data) VALUES(?, ?, ?)",
206199
(name, mtime, data),
207200
)
208201
except sqlite3.OperationalError:
@@ -213,13 +206,13 @@ def remove(self, name: str) -> None:
213206
if not self.db:
214207
raise FileNotFoundError()
215208

216-
self.db.execute("DELETE FROM files WHERE path = ?", (name,))
209+
self.db.execute("DELETE FROM files2 WHERE path = ?", (name,))
217210

218211
def commit(self) -> None:
219212
if self.db:
220213
self.db.commit()
221214

222215
def list_all(self) -> Iterable[str]:
223216
if self.db:
224-
for row in self.db.execute("SELECT path FROM files"):
217+
for row in self.db.execute("SELECT path FROM files2"):
225218
yield row[0]

‎mypy/util.py

Copy file name to clipboardExpand all lines: mypy/util.py
+27-1Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,23 @@
44

55
import hashlib
66
import io
7+
import json
78
import os
89
import pathlib
910
import re
1011
import shutil
1112
import sys
1213
import time
1314
from importlib import resources as importlib_resources
14-
from typing import IO, Callable, Container, Final, Iterable, Sequence, Sized, TypeVar
15+
from typing import Any, IO, Callable, Container, Final, Iterable, Sequence, Sized, TypeVar
1516
from typing_extensions import Literal
1617

18+
orjson: Any
19+
try:
20+
import orjson
21+
except ImportError:
22+
orjson = None
23+
1724
try:
1825
import curses
1926

@@ -874,3 +881,22 @@ def quote_docstring(docstr: str) -> str:
874881
return f"''{docstr_repr}''"
875882
else:
876883
return f'""{docstr_repr}""'
884+
885+
886+
def json_dumps(obj: object, debug: bool = False) -> bytes:
887+
if orjson is not None:
888+
if debug:
889+
return orjson.dumps(obj, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS) # type: ignore[no-any-return]
890+
else:
891+
return orjson.dumps(obj) # type: ignore[no-any-return]
892+
893+
if debug:
894+
return json.dumps(obj, indent=2, sort_keys=True).encode("utf-8")
895+
else:
896+
return json.dumps(obj, separators=(",", ":")).encode("utf-8")
897+
898+
899+
def json_loads(data: bytes) -> Any:
900+
if orjson is not None:
901+
return orjson.loads(data)
902+
return json.loads(data)

‎mypyc/codegen/emitmodule.py

Copy file name to clipboardExpand all lines: mypyc/codegen/emitmodule.py
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from mypy.nodes import MypyFile
2525
from mypy.options import Options
2626
from mypy.plugin import Plugin, ReportConfigContext
27-
from mypy.util import hash_digest
27+
from mypy.util import hash_digest, json_dumps
2828
from mypyc.codegen.cstring import c_string_initializer
2929
from mypyc.codegen.emit import Emitter, EmitterContext, HeaderDeclaration, c_array_initializer
3030
from mypyc.codegen.emitclass import generate_class, generate_class_type_decl
@@ -369,11 +369,11 @@ def write_cache(
369369
newpath = get_state_ir_cache_name(st)
370370
ir_data = {
371371
"ir": module.serialize(),
372-
"meta_hash": compute_hash(meta_data),
372+
"meta_hash": hash_digest(meta_data),
373373
"src_hashes": hashes[group_map[id]],
374374
}
375375

376-
result.manager.metastore.write(newpath, json.dumps(ir_data, separators=(",", ":")))
376+
result.manager.metastore.write(newpath, json_dumps(ir_data))
377377

378378
result.manager.metastore.commit()
379379

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.