Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit cea0edd

Browse filesBrowse files
filipchristiansenCopilot
authored andcommitted
refactor: centralize PAT validation, streamline repo checks & misc cleanup (#349)
* refactor: centralize PAT validation, streamline repo checks & housekeeping * `.venv*` to `.gitignore` * `# type: ignore[attr-defined]` hints in `compat_typing.py` for IDE-agnostic imports * Helpful PAT string in `InvalidGitHubTokenError` for easier debugging * Bump **ruff-pre-commit** hook → `v0.12.1` * CONTRIBUTING: * Require **Python 3.9+** * Recommend signed (`-S`) commits * PAT validation now happens **only** in entry points (`utils.auth.resolve_token` for CLI/lib, `server.process_query` for Web UI) * Unified `_check_github_repo_exists` into `check_repo_exists`, replacing `curl -I` with `curl --silent --location --write-out %{http_code} -o /dev/null` * Broaden `_GITHUB_PAT_PATTERN` * `create_git_auth_header` raises `ValueError` when hostname is missing * Tests updated to expect raw HTTP-code output * Superfluous “token can be set via `GITHUB_TOKEN`” notes in docstrings * `.gitingestignore` & `.terraform` from `DEFAULT_IGNORE_PATTERNS` * Token validation inside `create_git_command` * Obsolete `test_create_git_command_invalid_token` * Adjust `test_clone.py` and `test_git_utils.py` for new status-code handling * Consolidate mocks after token-validation relocation BREAKING CHANGE: `create_git_command` no longer validates GitHub tokens; callers must ensure tokens are valid (via `validate_github_token`) before invoking lower-level git helpers. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 6fe62cc commit cea0edd
Copy full SHA for cea0edd

File tree

Expand file treeCollapse file tree

5 files changed

+81
-39
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

5 files changed

+81
-39
lines changed
Open diff view settings
Collapse file

‎.pre-commit-config.yaml‎

Copy file name to clipboardExpand all lines: .pre-commit-config.yaml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,4 @@ repos:
151151
- repo: meta
152152
hooks:
153153
- id: check-hooks-apply
154-
- id: check-useless-excludes
154+
- id: check-useless-excludes
Collapse file

‎CONTRIBUTING.md‎

Copy file name to clipboardExpand all lines: CONTRIBUTING.md
+1-1Lines changed: 1 addition & 1 deletion
  • Display the source diff
  • Display the rich diff
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,4 @@ If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK
8989

9090
13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed.
9191

92-
*(Optional) Invite a maintainer to your branch for easier collaboration.*
92+
*(Optional) Invite a maintainer to your branch for easier collaboration.*
Collapse file

‎src/gitingest/utils/git_utils.py‎

Copy file name to clipboardExpand all lines: src/gitingest/utils/git_utils.py
+45-30Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,27 @@
44

55
import asyncio
66
import base64
7+
import os
78
import re
89
from typing import Final
910
import sys
11+
from typing import Final
1012
from urllib.parse import urlparse
1113

1214
import httpx
1315
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
1416

1517
from gitingest.utils.compat_func import removesuffix
18+
19+
from starlette.status import (
20+
HTTP_200_OK,
21+
HTTP_301_MOVED_PERMANENTLY,
22+
HTTP_302_FOUND,
23+
HTTP_401_UNAUTHORIZED,
24+
HTTP_403_FORBIDDEN,
25+
HTTP_404_NOT_FOUND,
26+
)
27+
1628
from gitingest.utils.exceptions import InvalidGitHubTokenError
1729
from server.server_utils import Colors
1830

@@ -130,28 +142,46 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
130142
If the host returns an unrecognised status code.
131143
132144
"""
133-
headers = {}
145+
# TODO: use `requests` instead of `curl`
146+
cmd: list[str] = [
147+
"curl",
148+
"--silent",
149+
"--location",
150+
"--head",
151+
"--write-out",
152+
"%{http_code}",
153+
"-o",
154+
os.devnull,
155+
]
134156

135157
if token and is_github_host(url):
136158
host, owner, repo = _parse_github_url(url)
137159
# Public GitHub vs. GitHub Enterprise
138160
base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
139161
url = f"{base_api}/repos/{owner}/{repo}"
140-
headers["Authorization"] = f"Bearer {token}"
162+
cmd += [f"Authorization: Bearer {token}"]
141163

142-
async with httpx.AsyncClient(follow_redirects=True) as client:
143-
try:
144-
response = await client.head(url, headers=headers)
145-
except httpx.RequestError:
146-
return False
164+
cmd.append(url)
147165

148-
status_code = response.status_code
166+
proc = await asyncio.create_subprocess_exec(
167+
*cmd,
168+
stdout=asyncio.subprocess.PIPE,
169+
stderr=asyncio.subprocess.PIPE,
170+
)
171+
stdout, _ = await proc.communicate()
149172

150-
if status_code == HTTP_200_OK:
173+
if proc.returncode != 0:
174+
return False
175+
176+
status = int(stdout.decode().strip())
177+
if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}:
151178
return True
152-
if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}:
179+
# TODO: handle 302 redirects
180+
if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}:
181+
return False
182+
if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}:
153183
return False
154-
msg = f"Unexpected HTTP status {status_code} for {url}"
184+
msg = f"Unexpected HTTP status {status} for {url}"
155185
raise RuntimeError(msg)
156186

157187

@@ -183,7 +213,7 @@ def _parse_github_url(url: str) -> tuple[str, str, str]:
183213
msg = f"Un-recognised GitHub hostname: {parsed.hostname!r}"
184214
raise ValueError(msg)
185215

186-
parts = removesuffix(parsed.path, ".git").strip("/").split("/")
216+
parts = parsed.path.strip("/").removesuffix(".git").split("/")
187217
expected_path_length = 2
188218
if len(parts) != expected_path_length:
189219
msg = f"Path must look like /<owner>/<repo>: {parsed.path!r}"
@@ -216,28 +246,13 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
216246
If the ``ref_type`` parameter is not "branches" or "tags".
217247
218248
"""
219-
if ref_type not in ("branches", "tags"):
220-
msg = f"Invalid fetch type: {ref_type}"
221-
raise ValueError(msg)
222-
223249
cmd = ["git"]
224250

225251
# Add authentication if needed
226252
if token and is_github_host(url):
227253
cmd += ["-c", create_git_auth_header(token, url=url)]
228254

229-
cmd += ["ls-remote"]
230-
231-
fetch_tags = ref_type == "tags"
232-
to_fetch = "tags" if fetch_tags else "heads"
233-
234-
cmd += [f"--{to_fetch}"]
235-
236-
# `--refs` filters out the peeled tag objects (those ending with "^{}") (for tags)
237-
if fetch_tags:
238-
cmd += ["--refs"]
239-
240-
cmd += [url]
255+
cmd += ["ls-remote", "--heads", url]
241256

242257
await ensure_git_installed()
243258
stdout, _ = await run_command(*cmd)
@@ -246,9 +261,9 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
246261
# - Skip empty lines and lines that don't contain "refs/{to_fetch}/"
247262
# - Extract the branch or tag name after "refs/{to_fetch}/"
248263
return [
249-
line.split(f"refs/{to_fetch}/", 1)[1]
264+
line.split("refs/heads/", 1)[1]
250265
for line in stdout.decode().splitlines()
251-
if line.strip() and f"refs/{to_fetch}/" in line
266+
if line.strip() and "refs/heads/" in line
252267
]
253268

254269

Collapse file

‎src/server/query_processor.py‎

Copy file name to clipboardExpand all lines: src/server/query_processor.py
+12-3Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
from gitingest.ingestion import ingest_query
1010
from gitingest.query_parser import IngestionQuery, parse_query
1111
from gitingest.utils.git_utils import validate_github_token
12-
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse
13-
from server.server_config import MAX_DISPLAY_SIZE
12+
from server.server_config import (
13+
DEFAULT_FILE_SIZE_KB,
14+
EXAMPLE_REPOS,
15+
MAX_DISPLAY_SIZE,
16+
templates,
17+
)
1418
from server.server_utils import Colors, log_slider_to_size
1519

1620

@@ -63,6 +67,8 @@ async def process_query(
6367
if token:
6468
validate_github_token(token)
6569

70+
template = "index.jinja" if is_index else "git.jinja"
71+
template_response = partial(templates.TemplateResponse, name=template)
6672
max_file_size = log_slider_to_size(slider_position)
6773

6874
query: IngestionQuery | None = None
@@ -99,7 +105,10 @@ async def process_query(
99105
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
100106
print(f"{Colors.RED}{exc}{Colors.END}")
101107

102-
return IngestErrorResponse(error=str(exc), repo_url=short_repo_url)
108+
context["error_message"] = f"Error: {exc}"
109+
if "405" in str(exc):
110+
context["error_message"] = "Repository not found. Please make sure it is public."
111+
return template_response(context=context)
103112

104113
if len(content) > MAX_DISPLAY_SIZE:
105114
content = (
Collapse file

‎tests/test_clone.py‎

Copy file name to clipboardExpand all lines: tests/test_clone.py
+22-4Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,9 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None
9191
@pytest.mark.parametrize(
9292
("status_code", "expected"),
9393
[
94-
(HTTP_200_OK, True),
95-
(HTTP_401_UNAUTHORIZED, False),
96-
(HTTP_403_FORBIDDEN, False),
97-
(HTTP_404_NOT_FOUND, False),
94+
(b"200\n", 0, True), # Existing repo
95+
(b"404\n", 0, False), # Non-existing repo
96+
(b"200\n", 1, False), # Failed request
9897
],
9998
)
10099
async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None:
@@ -209,6 +208,25 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None:
209208
assert repo_exists is False
210209

211210

211+
@pytest.mark.asyncio
212+
async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None:
213+
"""Test ``check_repo_exists`` when a permanent redirect (301) is returned.
214+
215+
Given a URL that responds with "301 Found":
216+
When ``check_repo_exists`` is called,
217+
Then it should return ``True``, indicating the repo may exist at the new location.
218+
"""
219+
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
220+
mock_process = AsyncMock()
221+
mock_process.communicate.return_value = (b"301\n", b"")
222+
mock_process.returncode = 0 # Simulate successful request
223+
mock_exec.return_value = mock_process
224+
225+
repo_exists = await check_repo_exists(DEMO_URL)
226+
227+
assert repo_exists
228+
229+
212230
@pytest.mark.asyncio
213231
async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None:
214232
"""Test cloning a repository when a timeout occurs.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.