Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 7c27808

Browse filesBrowse files
committed
Use orjson instead of json, when available (#17955)
For `mypy -c 'import torch'`, the cache load time goes from 0.44s to 0.25s as measured by manager's data_json_load_time. If I time dump times specifically, I see a saving of 0.65s to 0.07s. Overall, a pretty reasonable perf win -- should we make it a required dependency? See also #3456
1 parent 2cd2406 commit 7c27808
Copy full SHA for 7c27808

File tree

Expand file treeCollapse file tree

6 files changed

+81
-68
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+81
-68
lines changed

‎misc/apply-cache-diff.py

Copy file name to clipboardExpand all lines: misc/apply-cache-diff.py
+6-6Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
from __future__ import annotations
99

1010
import argparse
11-
import json
1211
import os
1312
import sys
1413

1514
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
1615

1716
from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore
17+
from mypy.util import json_dumps, json_loads
1818

1919

2020
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
@@ -26,21 +26,21 @@ def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
2626

2727
def apply_diff(cache_dir: str, diff_file: str, sqlite: bool = False) -> None:
2828
cache = make_cache(cache_dir, sqlite)
29-
with open(diff_file) as f:
30-
diff = json.load(f)
29+
with open(diff_file, "rb") as f:
30+
diff = json_loads(f.read())
3131

32-
old_deps = json.loads(cache.read("@deps.meta.json"))
32+
old_deps = json_loads(cache.read("@deps.meta.json"))
3333

3434
for file, data in diff.items():
3535
if data is None:
3636
cache.remove(file)
3737
else:
3838
cache.write(file, data)
3939
if file.endswith(".meta.json") and "@deps" not in file:
40-
meta = json.loads(data)
40+
meta = json_loads(data)
4141
old_deps["snapshot"][meta["id"]] = meta["hash"]
4242

43-
cache.write("@deps.meta.json", json.dumps(old_deps))
43+
cache.write("@deps.meta.json", json_dumps(old_deps))
4444

4545
cache.commit()
4646

‎misc/diff-cache.py

Copy file name to clipboardExpand all lines: misc/diff-cache.py
+7-7Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from __future__ import annotations
99

1010
import argparse
11-
import json
1211
import os
1312
import sys
1413
from collections import defaultdict
@@ -17,6 +16,7 @@
1716
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
1817

1918
from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore
19+
from mypy.util import json_dumps, json_loads
2020

2121

2222
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
@@ -33,7 +33,7 @@ def merge_deps(all: dict[str, set[str]], new: dict[str, set[str]]) -> None:
3333

3434
def load(cache: MetadataStore, s: str) -> Any:
3535
data = cache.read(s)
36-
obj = json.loads(data)
36+
obj = json_loads(data)
3737
if s.endswith(".meta.json"):
3838
# For meta files, zero out the mtimes and sort the
3939
# dependencies to avoid spurious conflicts
@@ -73,7 +73,7 @@ def main() -> None:
7373
type_misses: dict[str, int] = defaultdict(int)
7474
type_hits: dict[str, int] = defaultdict(int)
7575

76-
updates: dict[str, str | None] = {}
76+
updates: dict[str, bytes | None] = {}
7777

7878
deps1: dict[str, set[str]] = {}
7979
deps2: dict[str, set[str]] = {}
@@ -96,7 +96,7 @@ def main() -> None:
9696
# so we can produce a much smaller direct diff of them.
9797
if ".deps." not in s:
9898
if obj2 is not None:
99-
updates[s] = json.dumps(obj2)
99+
updates[s] = json_dumps(obj2)
100100
else:
101101
updates[s] = None
102102
elif obj2:
@@ -122,7 +122,7 @@ def main() -> None:
122122
merge_deps(new_deps, root_deps)
123123

124124
new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
125-
updates["@root.deps.json"] = json.dumps(new_deps_json)
125+
updates["@root.deps.json"] = json_dumps(new_deps_json)
126126

127127
# Drop updates to deps.meta.json for size reasons. The diff
128128
# applier will manually fix it up.
@@ -136,8 +136,8 @@ def main() -> None:
136136
print("hits", type_hits)
137137
print("misses", type_misses)
138138

139-
with open(args.output, "w") as f:
140-
json.dump(updates, f)
139+
with open(args.output, "wb") as f:
140+
f.write(json_dumps(updates))
141141

142142

143143
if __name__ == "__main__":

‎mypy/build.py

Copy file name to clipboardExpand all lines: mypy/build.py
+18-27Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
from mypy.stubinfo import legacy_bundled_packages, non_bundled_packages, stub_distribution_name
9696
from mypy.types import Type
9797
from mypy.typestate import reset_global_state, type_state
98+
from mypy.util import json_dumps, json_loads
9899
from mypy.version import __version__
99100

100101
# Switch to True to produce debug output related to fine-grained incremental
@@ -858,7 +859,7 @@ def load_fine_grained_deps(self, id: str) -> dict[str, set[str]]:
858859
t0 = time.time()
859860
if id in self.fg_deps_meta:
860861
# TODO: Assert deps file wasn't changed.
861-
deps = json.loads(self.metastore.read(self.fg_deps_meta[id]["path"]))
862+
deps = json_loads(self.metastore.read(self.fg_deps_meta[id]["path"]))
862863
else:
863864
deps = {}
864865
val = {k: set(v) for k, v in deps.items()}
@@ -911,8 +912,8 @@ def stats_summary(self) -> Mapping[str, object]:
911912
return self.stats
912913

913914

914-
def deps_to_json(x: dict[str, set[str]]) -> str:
915-
return json.dumps({k: list(v) for k, v in x.items()}, separators=(",", ":"))
915+
def deps_to_json(x: dict[str, set[str]]) -> bytes:
916+
return json_dumps({k: list(v) for k, v in x.items()})
916917

917918

918919
# File for storing metadata about all the fine-grained dependency caches
@@ -980,7 +981,7 @@ def write_deps_cache(
980981

981982
meta = {"snapshot": meta_snapshot, "deps_meta": fg_deps_meta}
982983

983-
if not metastore.write(DEPS_META_FILE, json.dumps(meta, separators=(",", ":"))):
984+
if not metastore.write(DEPS_META_FILE, json_dumps(meta)):
984985
manager.log(f"Error writing fine-grained deps meta JSON file {DEPS_META_FILE}")
985986
error = True
986987

@@ -1048,7 +1049,7 @@ def generate_deps_for_cache(manager: BuildManager, graph: Graph) -> dict[str, di
10481049

10491050
def write_plugins_snapshot(manager: BuildManager) -> None:
10501051
"""Write snapshot of versions and hashes of currently active plugins."""
1051-
snapshot = json.dumps(manager.plugins_snapshot, separators=(",", ":"))
1052+
snapshot = json_dumps(manager.plugins_snapshot)
10521053
if not manager.metastore.write(PLUGIN_SNAPSHOT_FILE, snapshot):
10531054
manager.errors.set_file(_cache_dir_prefix(manager.options), None, manager.options)
10541055
manager.errors.report(0, 0, "Error writing plugins snapshot", blocker=True)
@@ -1079,8 +1080,8 @@ def read_quickstart_file(
10791080
# just ignore it.
10801081
raw_quickstart: dict[str, Any] = {}
10811082
try:
1082-
with open(options.quickstart_file) as f:
1083-
raw_quickstart = json.load(f)
1083+
with open(options.quickstart_file, "rb") as f:
1084+
raw_quickstart = json_loads(f.read())
10841085

10851086
quickstart = {}
10861087
for file, (x, y, z) in raw_quickstart.items():
@@ -1148,10 +1149,10 @@ def _load_json_file(
11481149
manager.add_stats(metastore_read_time=time.time() - t0)
11491150
# Only bother to compute the log message if we are logging it, since it could be big
11501151
if manager.verbosity() >= 2:
1151-
manager.trace(log_success + data.rstrip())
1152+
manager.trace(log_success + data.rstrip().decode())
11521153
try:
11531154
t1 = time.time()
1154-
result = json.loads(data)
1155+
result = json_loads(data)
11551156
manager.add_stats(data_json_load_time=time.time() - t1)
11561157
except json.JSONDecodeError:
11571158
manager.errors.set_file(file, None, manager.options)
@@ -1343,8 +1344,8 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | No
13431344
# So that plugins can return data with tuples in it without
13441345
# things silently always invalidating modules, we round-trip
13451346
# the config data. This isn't beautiful.
1346-
plugin_data = json.loads(
1347-
json.dumps(manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True)))
1347+
plugin_data = json_loads(
1348+
json_dumps(manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True)))
13481349
)
13491350
if m.plugin_data != plugin_data:
13501351
manager.log(f"Metadata abandoned for {id}: plugin configuration differs")
@@ -1478,18 +1479,15 @@ def validate_meta(
14781479
"ignore_all": meta.ignore_all,
14791480
"plugin_data": meta.plugin_data,
14801481
}
1481-
if manager.options.debug_cache:
1482-
meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
1483-
else:
1484-
meta_str = json.dumps(meta_dict, separators=(",", ":"))
1482+
meta_bytes = json_dumps(meta_dict, manager.options.debug_cache)
14851483
meta_json, _, _ = get_cache_names(id, path, manager.options)
14861484
manager.log(
14871485
"Updating mtime for {}: file {}, meta {}, mtime {}".format(
14881486
id, path, meta_json, meta.mtime
14891487
)
14901488
)
14911489
t1 = time.time()
1492-
manager.metastore.write(meta_json, meta_str) # Ignore errors, just an optimization.
1490+
manager.metastore.write(meta_json, meta_bytes) # Ignore errors, just an optimization.
14931491
manager.add_stats(validate_update_time=time.time() - t1, validate_munging_time=t1 - t0)
14941492
return meta
14951493

@@ -1507,13 +1505,6 @@ def compute_hash(text: str) -> str:
15071505
return hash_digest(text.encode("utf-8"))
15081506

15091507

1510-
def json_dumps(obj: Any, debug_cache: bool) -> str:
1511-
if debug_cache:
1512-
return json.dumps(obj, indent=2, sort_keys=True)
1513-
else:
1514-
return json.dumps(obj, sort_keys=True, separators=(",", ":"))
1515-
1516-
15171508
def write_cache(
15181509
id: str,
15191510
path: str,
@@ -1566,8 +1557,8 @@ def write_cache(
15661557

15671558
# Serialize data and analyze interface
15681559
data = tree.serialize()
1569-
data_str = json_dumps(data, manager.options.debug_cache)
1570-
interface_hash = compute_hash(data_str)
1560+
data_bytes = json_dumps(data, manager.options.debug_cache)
1561+
interface_hash = hash_digest(data_bytes)
15711562

15721563
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
15731564

@@ -1591,7 +1582,7 @@ def write_cache(
15911582
manager.trace(f"Interface for {id} is unchanged")
15921583
else:
15931584
manager.trace(f"Interface for {id} has changed")
1594-
if not metastore.write(data_json, data_str):
1585+
if not metastore.write(data_json, data_bytes):
15951586
# Most likely the error is the replace() call
15961587
# (see https://github.com/python/mypy/issues/3215).
15971588
manager.log(f"Error writing data JSON file {data_json}")
@@ -3568,4 +3559,4 @@ def write_undocumented_ref_info(
35683559
assert not ref_info_file.startswith(".")
35693560

35703561
deps_json = get_undocumented_ref_info_json(state.tree, type_map)
3571-
metastore.write(ref_info_file, json.dumps(deps_json, separators=(",", ":")))
3562+
metastore.write(ref_info_file, json_dumps(deps_json))

‎mypy/metastore.py

Copy file name to clipboardExpand all lines: mypy/metastore.py
+16-23Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ def getmtime(self, name: str) -> float:
3333
"""
3434

3535
@abstractmethod
36-
def read(self, name: str) -> str:
36+
def read(self, name: str) -> bytes:
3737
"""Read the contents of a metadata entry.
3838
3939
Raises FileNotFound if the entry does not exist.
4040
"""
4141

4242
@abstractmethod
43-
def write(self, name: str, data: str, mtime: float | None = None) -> bool:
43+
def write(self, name: str, data: bytes, mtime: float | None = None) -> bool:
4444
"""Write a metadata entry.
4545
4646
If mtime is specified, set it as the mtime of the entry. Otherwise,
@@ -86,16 +86,16 @@ def getmtime(self, name: str) -> float:
8686

8787
return int(os.path.getmtime(os.path.join(self.cache_dir_prefix, name)))
8888

89-
def read(self, name: str) -> str:
89+
def read(self, name: str) -> bytes:
9090
assert os.path.normpath(name) != os.path.abspath(name), "Don't use absolute paths!"
9191

9292
if not self.cache_dir_prefix:
9393
raise FileNotFoundError()
9494

95-
with open(os.path.join(self.cache_dir_prefix, name)) as f:
95+
with open(os.path.join(self.cache_dir_prefix, name), "rb") as f:
9696
return f.read()
9797

98-
def write(self, name: str, data: str, mtime: float | None = None) -> bool:
98+
def write(self, name: str, data: bytes, mtime: float | None = None) -> bool:
9999
assert os.path.normpath(name) != os.path.abspath(name), "Don't use absolute paths!"
100100

101101
if not self.cache_dir_prefix:
@@ -105,7 +105,7 @@ def write(self, name: str, data: str, mtime: float | None = None) -> bool:
105105
tmp_filename = path + "." + random_string()
106106
try:
107107
os.makedirs(os.path.dirname(path), exist_ok=True)
108-
with open(tmp_filename, "w") as f:
108+
with open(tmp_filename, "wb") as f:
109109
f.write(data)
110110
os.replace(tmp_filename, path)
111111
if mtime is not None:
@@ -135,27 +135,20 @@ def list_all(self) -> Iterable[str]:
135135

136136

137137
SCHEMA = """
138-
CREATE TABLE IF NOT EXISTS files (
138+
CREATE TABLE IF NOT EXISTS files2 (
139139
path TEXT UNIQUE NOT NULL,
140140
mtime REAL,
141-
data TEXT
141+
data BLOB
142142
);
143-
CREATE INDEX IF NOT EXISTS path_idx on files(path);
143+
CREATE INDEX IF NOT EXISTS path_idx on files2(path);
144144
"""
145-
# No migrations yet
146-
MIGRATIONS: list[str] = []
147145

148146

149147
def connect_db(db_file: str) -> sqlite3.Connection:
150148
import sqlite3.dbapi2
151149

152150
db = sqlite3.dbapi2.connect(db_file)
153151
db.executescript(SCHEMA)
154-
for migr in MIGRATIONS:
155-
try:
156-
db.executescript(migr)
157-
except sqlite3.OperationalError:
158-
pass
159152
return db
160153

161154

@@ -176,7 +169,7 @@ def _query(self, name: str, field: str) -> Any:
176169
if not self.db:
177170
raise FileNotFoundError()
178171

179-
cur = self.db.execute(f"SELECT {field} FROM files WHERE path = ?", (name,))
172+
cur = self.db.execute(f"SELECT {field} FROM files2 WHERE path = ?", (name,))
180173
results = cur.fetchall()
181174
if not results:
182175
raise FileNotFoundError()
@@ -188,12 +181,12 @@ def getmtime(self, name: str) -> float:
188181
assert isinstance(mtime, float)
189182
return mtime
190183

191-
def read(self, name: str) -> str:
184+
def read(self, name: str) -> bytes:
192185
data = self._query(name, "data")
193-
assert isinstance(data, str)
186+
assert isinstance(data, bytes)
194187
return data
195188

196-
def write(self, name: str, data: str, mtime: float | None = None) -> bool:
189+
def write(self, name: str, data: bytes, mtime: float | None = None) -> bool:
197190
import sqlite3
198191

199192
if not self.db:
@@ -202,7 +195,7 @@ def write(self, name: str, data: str, mtime: float | None = None) -> bool:
202195
if mtime is None:
203196
mtime = time.time()
204197
self.db.execute(
205-
"INSERT OR REPLACE INTO files(path, mtime, data) VALUES(?, ?, ?)",
198+
"INSERT OR REPLACE INTO files2(path, mtime, data) VALUES(?, ?, ?)",
206199
(name, mtime, data),
207200
)
208201
except sqlite3.OperationalError:
@@ -213,13 +206,13 @@ def remove(self, name: str) -> None:
213206
if not self.db:
214207
raise FileNotFoundError()
215208

216-
self.db.execute("DELETE FROM files WHERE path = ?", (name,))
209+
self.db.execute("DELETE FROM files2 WHERE path = ?", (name,))
217210

218211
def commit(self) -> None:
219212
if self.db:
220213
self.db.commit()
221214

222215
def list_all(self) -> Iterable[str]:
223216
if self.db:
224-
for row in self.db.execute("SELECT path FROM files"):
217+
for row in self.db.execute("SELECT path FROM files2"):
225218
yield row[0]

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.