7
7
import pathlib
8
8
import subprocess
9
9
import sys
10
+ import urllib .request
10
11
import typing
11
- import zipfile
12
- from urllib .request import urlopen
13
12
14
13
CPYTHON_ROOT_DIR = pathlib .Path (__file__ ).parent .parent .parent
15
14
@@ -125,30 +124,41 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]:
125
124
return sorted ([line .split ()[- 1 ] for line in git_check_ignore_lines if line .startswith ("::" )])
126
125
127
126
128
- def main () -> None :
129
- sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json"
130
- sbom_data = json .loads (sbom_path .read_bytes ())
127
+ def get_externals () -> list [str ]:
128
+ """
129
+ Parses 'PCbuild/get_externals.bat' for external libraries.
130
+ Returns a list of (git tag, name, version) tuples.
131
+ """
132
+ get_externals_bat_path = CPYTHON_ROOT_DIR / "PCbuild/get_externals.bat"
133
+ externals = re .findall (
134
+ r"set\s+libraries\s*=\s*%libraries%\s+([a-zA-Z0-9.-]+)\s" ,
135
+ get_externals_bat_path .read_text ()
136
+ )
137
+ return externals
131
138
132
- # We regenerate all of this information. Package information
133
- # should be preserved though since that is edited by humans.
134
- sbom_data ["files" ] = []
135
- sbom_data ["relationships" ] = []
136
139
137
- # Ensure all packages in this tool are represented also in the SBOM file.
138
- actual_names = {package ["name" ] for package in sbom_data ["packages" ]}
139
- expected_names = set (PACKAGE_TO_FILES )
140
- error_if (
141
- actual_names != expected_names ,
142
- f"Packages defined in SBOM tool don't match those defined in SBOM file: { actual_names } , { expected_names } " ,
143
- )
140
+ def check_sbom_packages (sbom_data : dict [str , typing .Any ]) -> None :
141
+ """Make a bunch of assertions about the SBOM package data to ensure it's consistent."""
144
142
145
- # Make a bunch of assertions about the SBOM data to ensure it's consistent.
146
143
for package in sbom_data ["packages" ]:
147
144
# Properties and ID must be properly formed.
148
145
error_if (
149
146
"name" not in package ,
150
147
"Package is missing the 'name' field"
151
148
)
149
+
150
+ # Verify that the checksum matches the expected value
151
+ # and that the download URL is valid.
152
+ if "checksums" not in package or "CI" in os .environ :
153
+ download_location = package ["downloadLocation" ]
154
+ resp = urllib .request .urlopen (download_location )
155
+ error_if (resp .status != 200 , f"Couldn't access URL: { download_location } '" )
156
+
157
+ package ["checksums" ] = [{
158
+ "algorithm" : "SHA256" ,
159
+ "checksumValue" : hashlib .sha256 (resp .read ()).hexdigest ()
160
+ }]
161
+
152
162
missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set (package .keys ())
153
163
error_if (
154
164
bool (missing_required_keys ),
@@ -180,6 +190,26 @@ def main() -> None:
180
190
f"License identifier must be 'NOASSERTION'"
181
191
)
182
192
193
+
194
+ def create_source_sbom () -> None :
195
+ sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json"
196
+ sbom_data = json .loads (sbom_path .read_bytes ())
197
+
198
+ # We regenerate all of this information. Package information
199
+ # should be preserved though since that is edited by humans.
200
+ sbom_data ["files" ] = []
201
+ sbom_data ["relationships" ] = []
202
+
203
+ # Ensure all packages in this tool are represented also in the SBOM file.
204
+ actual_names = {package ["name" ] for package in sbom_data ["packages" ]}
205
+ expected_names = set (PACKAGE_TO_FILES )
206
+ error_if (
207
+ actual_names != expected_names ,
208
+ f"Packages defined in SBOM tool don't match those defined in SBOM file: { actual_names } , { expected_names } " ,
209
+ )
210
+
211
+ check_sbom_packages (sbom_data )
212
+
183
213
# We call 'sorted()' here a lot to avoid filesystem scan order issues.
184
214
for name , files in sorted (PACKAGE_TO_FILES .items ()):
185
215
package_spdx_id = spdx_id (f"SPDXRef-PACKAGE-{ name } " )
@@ -224,5 +254,49 @@ def main() -> None:
224
254
sbom_path .write_text (json .dumps (sbom_data , indent = 2 , sort_keys = True ))
225
255
226
256
257
+ def create_externals_sbom () -> None :
258
+ sbom_path = CPYTHON_ROOT_DIR / "Misc/externals.spdx.json"
259
+ sbom_data = json .loads (sbom_path .read_bytes ())
260
+
261
+ externals = get_externals ()
262
+ externals_name_to_version = {}
263
+ externals_name_to_git_tag = {}
264
+ for git_tag in externals :
265
+ name , _ , version = git_tag .rpartition ("-" )
266
+ externals_name_to_version [name ] = version
267
+ externals_name_to_git_tag [name ] = git_tag
268
+
269
+ # Ensure all packages in this tool are represented also in the SBOM file.
270
+ actual_names = {package ["name" ] for package in sbom_data ["packages" ]}
271
+ expected_names = set (externals_name_to_version )
272
+ error_if (
273
+ actual_names != expected_names ,
274
+ f"Packages defined in SBOM tool don't match those defined in SBOM file: { actual_names } , { expected_names } " ,
275
+ )
276
+
277
+ # Set the versionInfo and downloadLocation fields for all packages.
278
+ for package in sbom_data ["packages" ]:
279
+ package ["versionInfo" ] = externals_name_to_version [package ["name" ]]
280
+ download_location = (
281
+ f"https://github.com/python/cpython-source-deps/archive/refs/tags/{ externals_name_to_git_tag [package ['name' ]]} .tar.gz"
282
+ )
283
+ download_location_changed = download_location != package ["downloadLocation" ]
284
+ package ["downloadLocation" ] = download_location
285
+
286
+ # If the download URL has changed we want one to get recalulated.
287
+ if download_location_changed :
288
+ package .pop ("checksums" , None )
289
+
290
+ check_sbom_packages (sbom_data )
291
+
292
+ # Update the SBOM on disk
293
+ sbom_path .write_text (json .dumps (sbom_data , indent = 2 , sort_keys = True ))
294
+
295
+
296
+ def main () -> None :
297
+ create_source_sbom ()
298
+ create_externals_sbom ()
299
+
300
+
227
301
if __name__ == "__main__" :
228
302
main ()
0 commit comments