Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 87a9cf1

Browse filesBrowse files
dizzy57aignas
andauthored
fix(py_wheel): produce deterministic wheel files (bazel-contrib#1453)
Current implementation does not produce deterministic output because: - `ZipFile.writestr()` leaks current date and time - `ZipFile.write()` leaks the source file's mtime and mode bits (permissions) into the resulting zip archive. By manually creating our own `ZipInfo` objects we can explicitly set date and time fields to `Jan 1, 1980, 00:00` (minimum value allowed by the zip file standard), and ensure that other file attributes are uniform across all entries in a zip file. --------- Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
1 parent fde5fc1 commit 87a9cf1
Copy full SHA for 87a9cf1

File tree

3 files changed

+89
-11
lines changed
Filter options

3 files changed

+89
-11
lines changed

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ A brief description of the categories of changes:
105105

106106
* (gazelle) Improve runfiles lookup hermeticity.
107107

108+
* (py_wheel) Produce deterministic wheel files
109+
108110
## [0.25.0] - 2023-08-22
109111

110112
### Changed

‎examples/wheel/wheel_test.py

Copy file name to clipboardExpand all lines: examples/wheel/wheel_test.py
+58Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import hashlib
1516
import os
1617
import platform
1718
import subprocess
@@ -43,9 +44,29 @@ def _get_path(self, filename):
4344
else:
4445
return path
4546

47+
def assertFileSha256Equal(self, filename, sha):
48+
hash = hashlib.sha256()
49+
with open(filename, "rb") as f:
50+
while True:
51+
buf = f.read(2**20)
52+
if not buf:
53+
break
54+
hash.update(buf)
55+
self.assertEqual(hash.hexdigest(), sha)
56+
57+
def assertAllEntriesHasReproducibleMetadata(self, zf):
58+
for zinfo in zf.infolist():
59+
self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0), msg=zinfo.filename)
60+
self.assertEqual(zinfo.create_system, 3, msg=zinfo.filename)
61+
self.assertEqual(zinfo.external_attr, 0o777 << 16, msg=zinfo.filename)
62+
self.assertEqual(
63+
zinfo.compress_type, zipfile.ZIP_DEFLATED, msg=zinfo.filename
64+
)
65+
4666
def test_py_library_wheel(self):
4767
filename = self._get_path("example_minimal_library-0.0.1-py3-none-any.whl")
4868
with zipfile.ZipFile(filename) as zf:
69+
self.assertAllEntriesHasReproducibleMetadata(zf)
4970
self.assertEqual(
5071
zf.namelist(),
5172
[
@@ -56,12 +77,16 @@ def test_py_library_wheel(self):
5677
"example_minimal_library-0.0.1.dist-info/RECORD",
5778
],
5879
)
80+
self.assertFileSha256Equal(
81+
filename, "6da8e06a3fdd9ae5ee9fa8f796610723c05a4b0d7fde0ec5179401e956204139"
82+
)
5983

6084
def test_py_package_wheel(self):
6185
filename = self._get_path(
6286
"example_minimal_package-0.0.1-py3-none-any.whl",
6387
)
6488
with zipfile.ZipFile(filename) as zf:
89+
self.assertAllEntriesHasReproducibleMetadata(zf)
6590
self.assertEqual(
6691
zf.namelist(),
6792
[
@@ -74,12 +99,16 @@ def test_py_package_wheel(self):
7499
"example_minimal_package-0.0.1.dist-info/RECORD",
75100
],
76101
)
102+
self.assertFileSha256Equal(
103+
filename, "2948b0b5e0aa421e0b40f78b74018bbc2f218165f211da0a4609e431e8e52bee"
104+
)
77105

78106
def test_customized_wheel(self):
79107
filename = self._get_path(
80108
"example_customized-0.0.1-py3-none-any.whl",
81109
)
82110
with zipfile.ZipFile(filename) as zf:
111+
self.assertAllEntriesHasReproducibleMetadata(zf)
83112
self.assertEqual(
84113
zf.namelist(),
85114
[
@@ -159,12 +188,16 @@ def test_customized_wheel(self):
159188
first = first.main:f
160189
second = second.main:s""",
161190
)
191+
self.assertFileSha256Equal(
192+
filename, "66f0c1bfe2cedb2f4cf08d4fe955096860186c0a2f3524e0cb02387a55ac3e63"
193+
)
162194

163195
def test_legacy_filename_escaping(self):
164196
filename = self._get_path(
165197
"file_name_escaping-0.0.1_r7-py3-none-any.whl",
166198
)
167199
with zipfile.ZipFile(filename) as zf:
200+
self.assertAllEntriesHasReproducibleMetadata(zf)
168201
self.assertEquals(
169202
zf.namelist(),
170203
[
@@ -193,6 +226,9 @@ def test_legacy_filename_escaping(self):
193226
UNKNOWN
194227
""",
195228
)
229+
self.assertFileSha256Equal(
230+
filename, "593c6ab58627f2446d0f1ef2956fd6d42104eedce4493c72d462f7ebf8cb74fa"
231+
)
196232

197233
def test_filename_escaping(self):
198234
filename = self._get_path(
@@ -234,6 +270,7 @@ def test_custom_package_root_wheel(self):
234270
)
235271

236272
with zipfile.ZipFile(filename) as zf:
273+
self.assertAllEntriesHasReproducibleMetadata(zf)
237274
self.assertEqual(
238275
zf.namelist(),
239276
[
@@ -255,13 +292,17 @@ def test_custom_package_root_wheel(self):
255292
# Ensure RECORD files do not have leading forward slashes
256293
for line in record_contents.splitlines():
257294
self.assertFalse(line.startswith("/"))
295+
self.assertFileSha256Equal(
296+
filename, "1b1fa3a4e840211084ef80049d07947b845c99bedb2778496d30e0c1524686ac"
297+
)
258298

259299
def test_custom_package_root_multi_prefix_wheel(self):
260300
filename = self._get_path(
261301
"example_custom_package_root_multi_prefix-0.0.1-py3-none-any.whl",
262302
)
263303

264304
with zipfile.ZipFile(filename) as zf:
305+
self.assertAllEntriesHasReproducibleMetadata(zf)
265306
self.assertEqual(
266307
zf.namelist(),
267308
[
@@ -282,13 +323,17 @@ def test_custom_package_root_multi_prefix_wheel(self):
282323
# Ensure RECORD files do not have leading forward slashes
283324
for line in record_contents.splitlines():
284325
self.assertFalse(line.startswith("/"))
326+
self.assertFileSha256Equal(
327+
filename, "f0422d7a338de3c76bf2525927fd93c0f47f2e9c60ecc0944e3e32b642c28137"
328+
)
285329

286330
def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
287331
filename = self._get_path(
288332
"example_custom_package_root_multi_prefix_reverse_order-0.0.1-py3-none-any.whl",
289333
)
290334

291335
with zipfile.ZipFile(filename) as zf:
336+
self.assertAllEntriesHasReproducibleMetadata(zf)
292337
self.assertEqual(
293338
zf.namelist(),
294339
[
@@ -309,12 +354,16 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
309354
# Ensure RECORD files do not have leading forward slashes
310355
for line in record_contents.splitlines():
311356
self.assertFalse(line.startswith("/"))
357+
self.assertFileSha256Equal(
358+
filename, "4f9e8c917b4050f121ac81e9a2bb65723ef09a1b90b35d93792ac3a62a60efa3"
359+
)
312360

313361
def test_python_requires_wheel(self):
314362
filename = self._get_path(
315363
"example_python_requires_in_a_package-0.0.1-py3-none-any.whl",
316364
)
317365
with zipfile.ZipFile(filename) as zf:
366+
self.assertAllEntriesHasReproducibleMetadata(zf)
318367
metadata_contents = zf.read(
319368
"example_python_requires_in_a_package-0.0.1.dist-info/METADATA"
320369
)
@@ -330,6 +379,9 @@ def test_python_requires_wheel(self):
330379
UNKNOWN
331380
""",
332381
)
382+
self.assertFileSha256Equal(
383+
filename, "9bfe8197d379f88715458a75e45c1f521a8b9d3cc43fe19b407c4ab207228b7c"
384+
)
333385

334386
def test_python_abi3_binary_wheel(self):
335387
arch = "amd64"
@@ -346,6 +398,7 @@ def test_python_abi3_binary_wheel(self):
346398
f"example_python_abi3_binary_wheel-0.0.1-cp38-abi3-{os_string}_{arch}.whl",
347399
)
348400
with zipfile.ZipFile(filename) as zf:
401+
self.assertAllEntriesHasReproducibleMetadata(zf)
349402
metadata_contents = zf.read(
350403
"example_python_abi3_binary_wheel-0.0.1.dist-info/METADATA"
351404
)
@@ -380,6 +433,7 @@ def test_rule_creates_directory_and_is_included_in_wheel(self):
380433
)
381434

382435
with zipfile.ZipFile(filename) as zf:
436+
self.assertAllEntriesHasReproducibleMetadata(zf)
383437
self.assertEqual(
384438
zf.namelist(),
385439
[
@@ -390,13 +444,17 @@ def test_rule_creates_directory_and_is_included_in_wheel(self):
390444
"use_rule_with_dir_in_outs-0.0.1.dist-info/RECORD",
391445
],
392446
)
447+
self.assertFileSha256Equal(
448+
filename, "8ad5f639cc41ac6ac67eb70f6553a7fdecabaf3a1b952c3134eaea59610c2a64"
449+
)
393450

394451
def test_rule_expands_workspace_status_keys_in_wheel_metadata(self):
395452
filename = self._get_path(
396453
"example_minimal_library_BUILD_USER_-0.1._BUILD_TIMESTAMP_-py3-none-any.whl"
397454
)
398455

399456
with zipfile.ZipFile(filename) as zf:
457+
self.assertAllEntriesHasReproducibleMetadata(zf)
400458
metadata_file = None
401459
for f in zf.namelist():
402460
self.assertNotIn("_BUILD_TIMESTAMP_", f)

‎tools/wheelmaker.py

Copy file name to clipboardExpand all lines: tools/wheelmaker.py
+29-11Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414

1515
import argparse
1616
import base64
17-
import collections
1817
import hashlib
1918
import os
2019
import re
2120
import sys
2221
import zipfile
2322
from pathlib import Path
2423

24+
_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
25+
2526

2627
def commonpath(path1, path2):
2728
ret = []
@@ -189,7 +190,8 @@ def add_string(self, filename, contents):
189190
"""Add given 'contents' as filename to the distribution."""
190191
if sys.version_info[0] > 2 and isinstance(contents, str):
191192
contents = contents.encode("utf-8", "surrogateescape")
192-
self._zipfile.writestr(filename, contents)
193+
zinfo = self._zipinfo(filename)
194+
self._zipfile.writestr(zinfo, contents)
193195
hash = hashlib.sha256()
194196
hash.update(contents)
195197
self._add_to_record(filename, self._serialize_digest(hash), len(contents))
@@ -219,20 +221,36 @@ def arcname_from(name):
219221
return
220222

221223
arcname = arcname_from(package_filename)
224+
zinfo = self._zipinfo(arcname)
222225

223-
self._zipfile.write(real_filename, arcname=arcname)
224-
# Find the hash and length
226+
# Write file to the zip archive while computing the hash and length
225227
hash = hashlib.sha256()
226228
size = 0
227-
with open(real_filename, "rb") as f:
228-
while True:
229-
block = f.read(2**20)
230-
if not block:
231-
break
232-
hash.update(block)
233-
size += len(block)
229+
with open(real_filename, "rb") as fsrc:
230+
with self._zipfile.open(zinfo, "w") as fdst:
231+
while True:
232+
block = fsrc.read(2**20)
233+
if not block:
234+
break
235+
fdst.write(block)
236+
hash.update(block)
237+
size += len(block)
234238
self._add_to_record(arcname, self._serialize_digest(hash), size)
235239

240+
def _zipinfo(self, filename):
241+
"""Construct deterministic ZipInfo entry for a file named filename"""
242+
# Strip leading path separators to mirror ZipInfo.from_file behavior
243+
separators = os.path.sep
244+
if os.path.altsep is not None:
245+
separators += os.path.altsep
246+
arcname = filename.lstrip(separators)
247+
248+
zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
249+
zinfo.create_system = 3 # ZipInfo entry created on a unix-y system
250+
zinfo.external_attr = 0o777 << 16 # permissions: rwxrwxrwx
251+
zinfo.compress_type = self._zipfile.compression
252+
return zinfo
253+
236254
def add_wheelfile(self):
237255
"""Write WHEEL file to the distribution"""
238256
# TODO(pstradomski): Support non-purelib wheels.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.