Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit eef839b

Browse filesBrowse files
WillMorrisonaignas
andauthored
fix: Avoid creating URLs with empty path segments from index URLs in environment variables (bazel-contrib#2557)
This change updates `_read_simpleapi` such that it correctly handles the case where the index URL is specified in an environment variable and contains a trailing slash. The URL construction would have introduced an empty path segment, which is now removed. Fixes: bazel-contrib#2554 --------- Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
1 parent c0bd668 commit eef839b
Copy full SHA for eef839b

File tree

Expand file treeCollapse file tree

3 files changed

+153
-7
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+153
-7
lines changed

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ Unreleased changes template.
5757
{#v0-0-0-fixed}
5858
### Fixed
5959
* (gazelle) Providing multiple input requirements files to `gazelle_python_manifest` now works correctly.
60+
* (pypi) Handle trailing slashes in pip index URLs in environment variables,
61+
fixes [#2554](https://github.com/bazelbuild/rules_python/issues/2554).
6062

6163
{#v0-0-0-added}
6264
### Added

‎python/private/pypi/simpleapi_download.bzl

Copy file name to clipboardExpand all lines: python/private/pypi/simpleapi_download.bzl
+31-4Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ A file that houses private functions used in the `bzlmod` extension with the sam
1717
"""
1818

1919
load("@bazel_features//:features.bzl", "bazel_features")
20-
load("//python/private:auth.bzl", "get_auth")
20+
load("//python/private:auth.bzl", _get_auth = "get_auth")
2121
load("//python/private:envsubst.bzl", "envsubst")
2222
load("//python/private:normalize_name.bzl", "normalize_name")
2323
load("//python/private:text_util.bzl", "render")
@@ -30,6 +30,7 @@ def simpleapi_download(
3030
cache,
3131
parallel_download = True,
3232
read_simpleapi = None,
33+
get_auth = None,
3334
_fail = fail):
3435
"""Download Simple API HTML.
3536
@@ -59,6 +60,7 @@ def simpleapi_download(
5960
parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
6061
read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
6162
Used in tests.
63+
get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
6264
_fail: a function to print a failure. Used in tests.
6365
6466
Returns:
@@ -98,6 +100,7 @@ def simpleapi_download(
98100
),
99101
attr = attr,
100102
cache = cache,
103+
get_auth = get_auth,
101104
**download_kwargs
102105
)
103106
if hasattr(result, "wait"):
@@ -144,7 +147,7 @@ def simpleapi_download(
144147

145148
return contents
146149

147-
def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
150+
def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
148151
"""Read SimpleAPI.
149152
150153
Args:
@@ -157,6 +160,7 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
157160
* auth_patterns: The auth_patterns parameter for ctx.download, see
158161
http_file for docs.
159162
cache: A dict for storing the results.
163+
get_auth: A function to get auth information. Used in tests.
160164
**download_kwargs: Any extra params to ctx.download.
161165
Note that output and auth will be passed for you.
162166
@@ -169,11 +173,11 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
169173
# them to ctx.download if we want to correctly handle the relative URLs.
170174
# TODO: Add a test that env subbed index urls do not leak into the lock file.
171175

172-
real_url = envsubst(
176+
real_url = strip_empty_path_segments(envsubst(
173177
url,
174178
attr.envsubst,
175179
ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
176-
)
180+
))
177181

178182
cache_key = real_url
179183
if cache_key in cache:
@@ -194,6 +198,8 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
194198

195199
output = ctx.path(output_str.strip("_").lower() + ".html")
196200

201+
get_auth = get_auth or _get_auth
202+
197203
# NOTE: this may have block = True or block = False in the download_kwargs
198204
download = ctx.download(
199205
url = [real_url],
@@ -211,6 +217,27 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
211217

212218
return _read_index_result(ctx, download, output, real_url, cache, cache_key)
213219

220+
def strip_empty_path_segments(url):
221+
"""Removes empty path segments from a URL. Does nothing for urls with no scheme.
222+
223+
Public only for testing.
224+
225+
Args:
226+
url: The url to remove empty path segments from
227+
228+
Returns:
229+
The url with empty path segments removed and any trailing slash preserved.
230+
If the url had no scheme it is returned unchanged.
231+
"""
232+
scheme, _, rest = url.partition("://")
233+
if rest == "":
234+
return url
235+
stripped = "/".join([p for p in rest.split("/") if p])
236+
if url.endswith("/"):
237+
return "{}://{}/".format(scheme, stripped)
238+
else:
239+
return "{}://{}".format(scheme, stripped)
240+
214241
def _read_index_result(ctx, result, output, url, cache, cache_key):
215242
if not result.success:
216243
return struct(success = False)

‎tests/pypi/simpleapi_download/simpleapi_download_tests.bzl

Copy file name to clipboardExpand all lines: tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+120-3Lines changed: 120 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,18 @@
1515
""
1616

1717
load("@rules_testing//lib:test_suite.bzl", "test_suite")
18-
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download") # buildifier: disable=bzl-visibility
18+
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility
1919

2020
_tests = []
2121

2222
def _test_simple(env):
2323
calls = []
2424

25-
def read_simpleapi(ctx, url, attr, cache, block):
25+
def read_simpleapi(ctx, url, attr, cache, get_auth, block):
2626
_ = ctx # buildifier: disable=unused-variable
2727
_ = attr
2828
_ = cache
29+
_ = get_auth
2930
env.expect.that_bool(block).equals(False)
3031
calls.append(url)
3132
if "foo" in url and "main" in url:
@@ -73,10 +74,11 @@ def _test_fail(env):
7374
calls = []
7475
fails = []
7576

76-
def read_simpleapi(ctx, url, attr, cache, block):
77+
def read_simpleapi(ctx, url, attr, cache, get_auth, block):
7778
_ = ctx # buildifier: disable=unused-variable
7879
_ = attr
7980
_ = cache
81+
_ = get_auth
8082
env.expect.that_bool(block).equals(False)
8183
calls.append(url)
8284
if "foo" in url:
@@ -119,6 +121,121 @@ def _test_fail(env):
119121

120122
_tests.append(_test_fail)
121123

124+
def _test_download_url(env):
125+
downloads = {}
126+
127+
def download(url, output, **kwargs):
128+
_ = kwargs # buildifier: disable=unused-variable
129+
downloads[url[0]] = output
130+
return struct(success = True)
131+
132+
simpleapi_download(
133+
ctx = struct(
134+
os = struct(environ = {}),
135+
download = download,
136+
read = lambda i: "contents of " + i,
137+
path = lambda i: "path/for/" + i,
138+
),
139+
attr = struct(
140+
index_url_overrides = {},
141+
index_url = "https://example.com/main/simple/",
142+
extra_index_urls = [],
143+
sources = ["foo", "bar", "baz"],
144+
envsubst = [],
145+
),
146+
cache = {},
147+
parallel_download = False,
148+
get_auth = lambda ctx, urls, ctx_attr: struct(),
149+
)
150+
151+
env.expect.that_dict(downloads).contains_exactly({
152+
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
153+
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
154+
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
155+
})
156+
157+
_tests.append(_test_download_url)
158+
159+
def _test_download_url_parallel(env):
160+
downloads = {}
161+
162+
def download(url, output, **kwargs):
163+
_ = kwargs # buildifier: disable=unused-variable
164+
downloads[url[0]] = output
165+
return struct(wait = lambda: struct(success = True))
166+
167+
simpleapi_download(
168+
ctx = struct(
169+
os = struct(environ = {}),
170+
download = download,
171+
read = lambda i: "contents of " + i,
172+
path = lambda i: "path/for/" + i,
173+
),
174+
attr = struct(
175+
index_url_overrides = {},
176+
index_url = "https://example.com/main/simple/",
177+
extra_index_urls = [],
178+
sources = ["foo", "bar", "baz"],
179+
envsubst = [],
180+
),
181+
cache = {},
182+
parallel_download = True,
183+
get_auth = lambda ctx, urls, ctx_attr: struct(),
184+
)
185+
186+
env.expect.that_dict(downloads).contains_exactly({
187+
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
188+
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
189+
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
190+
})
191+
192+
_tests.append(_test_download_url_parallel)
193+
194+
def _test_download_envsubst_url(env):
195+
downloads = {}
196+
197+
def download(url, output, **kwargs):
198+
_ = kwargs # buildifier: disable=unused-variable
199+
downloads[url[0]] = output
200+
return struct(success = True)
201+
202+
simpleapi_download(
203+
ctx = struct(
204+
os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
205+
download = download,
206+
read = lambda i: "contents of " + i,
207+
path = lambda i: "path/for/" + i,
208+
),
209+
attr = struct(
210+
index_url_overrides = {},
211+
index_url = "$INDEX_URL",
212+
extra_index_urls = [],
213+
sources = ["foo", "bar", "baz"],
214+
envsubst = ["INDEX_URL"],
215+
),
216+
cache = {},
217+
parallel_download = False,
218+
get_auth = lambda ctx, urls, ctx_attr: struct(),
219+
)
220+
221+
env.expect.that_dict(downloads).contains_exactly({
222+
"https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
223+
"https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
224+
"https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
225+
})
226+
227+
_tests.append(_test_download_envsubst_url)
228+
229+
def _test_strip_empty_path_segments(env):
230+
env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
231+
env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
232+
env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
233+
env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
234+
env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
235+
env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
236+
237+
_tests.append(_test_strip_empty_path_segments)
238+
122239
def simpleapi_download_test_suite(name):
123240
"""Create the test suite.
124241

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.