From 537f99bafa5ab4e74a038b730a21c54619f2e58f Mon Sep 17 00:00:00 2001
From: Peter Simpson
Date: Tue, 23 Sep 2025 14:07:50 +0100
Subject: [PATCH] initial hackathon commit
---
code_utils/code_checker.py | 372 ++++++++++++++++++++++++++
code_utils/docker/build.bat | 34 ---
code_utils/docker/build.sh | 35 ---
code_utils/extract_code_from_files.py | 12 -
code_utils/main.py | 26 +-
5 files changed, 394 insertions(+), 85 deletions(-)
delete mode 100644 code_utils/docker/build.bat
delete mode 100644 code_utils/docker/build.sh
delete mode 100644 code_utils/extract_code_from_files.py
diff --git a/code_utils/code_checker.py b/code_utils/code_checker.py
index 46153a925..92e24433c 100644
--- a/code_utils/code_checker.py
+++ b/code_utils/code_checker.py
@@ -5,6 +5,7 @@
import os
import sys
+import subprocess
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import docker
@@ -342,4 +343,375 @@ def check_complete_snippets(self) -> Dict[str, Dict[str, any]]:
return results
+ def check_incomplete_snippets(self, limit: int = 10, languages: Optional[set] = None) -> Dict[str, Dict[str, any]]:
+ """
+ Check and analyze incomplete code snippets that are missing imports or have incomplete structure.
+
+ Args:
+ limit: Maximum number of files to analyze per language (default: 10)
+ languages: Set of programming languages to analyze. If None, analyzes all languages.
+ Valid options: {'python', 'typescript', 'csharp'}
+
+ Returns:
+ Dictionary with analysis results for each language:
+ {
+ 'python': {'count': int, 'files': List[str], 'issues': List[str]},
+ 'typescript': {'count': int, 'files': List[str], 'issues': List[str]},
+ 'csharp': {'count': int, 'files': List[str], 'issues': List[str]}
+ }
+ """
+ print("š Analyzing incomplete code snippets...")
+ print(f"š Analysis limit: {limit} files per language")
+ if languages:
+ print(f"šÆ Target languages: {', '.join(sorted(languages))}")
+ else:
+ print("šÆ Target languages: all (python, typescript, csharp)")
+
+ # Check WSL and Cursor CLI prerequisites
+ print("\nš§ Checking prerequisites...")
+ wsl_ready = self._check_wsl_cursor_prerequisites()
+ if not wsl_ready:
+ print("ā Prerequisites not met. Cannot proceed with analysis.")
+ return {
+ 'python': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']},
+ 'typescript': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']},
+ 'csharp': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']}
+ }
+
+ print("=" * 60)
+
+ results = {
+ 'python': {'count': 0, 'files': [], 'issues': []},
+ 'typescript': {'count': 0, 'files': [], 'issues': []},
+ 'csharp': {'count': 0, 'files': [], 'issues': []}
+ }
+
+ # Define the languages and their corresponding directories/extensions
+ all_languages = {
+ 'python': {'dir': 'python', 'extension': '.py'},
+ 'typescript': {'dir': 'javascript', 'extension': '.ts'},
+ 'csharp': {'dir': 'csharp', 'extension': '.cs'}
+ }
+
+ # Filter languages based on user selection
+ if languages:
+ # Validate language names
+ invalid_languages = languages - set(all_languages.keys())
+ if invalid_languages:
+ print(f"ā ļø Warning: Invalid language(s) specified: {', '.join(invalid_languages)}")
+ print(f" Valid options are: {', '.join(all_languages.keys())}")
+
+ # Filter to only requested languages
+ target_languages = {lang: info for lang, info in all_languages.items() if lang in languages}
+ else:
+ target_languages = all_languages
+
+ temp_dir = self.base_dir / "temp"
+
+ for lang_name, lang_info in target_languages.items():
+ incomplete_dir = temp_dir / lang_info['dir'] / 'incomplete'
+
+ print(f"š {lang_name.upper()} incomplete snippets:")
+
+ if incomplete_dir.exists() and incomplete_dir.is_dir():
+ # Get all files with the appropriate extension
+ all_files = list(incomplete_dir.glob(f"*{lang_info['extension']}"))
+ all_files.sort() # Sort for consistent output
+
+ # Apply limit to files to analyze
+ files_to_analyze = all_files[:limit]
+
+ total_count = len(all_files)
+ analyzed_count = len(files_to_analyze)
+
+ results[lang_name]['count'] = analyzed_count
+ results[lang_name]['files'] = [f.name for f in files_to_analyze]
+
+ if all_files:
+ if total_count > limit:
+ print(f" Found {total_count} incomplete {lang_name} files (analyzing first {analyzed_count}):")
+ else:
+ print(f" Found {total_count} incomplete {lang_name} files:")
+
+ # Analyze each file within the limit
+ for file in files_to_analyze:
+ print(f" - {file.name}")
+ # Perform actual analysis of the incomplete snippet
+ issues = self._analyze_incomplete_snippet(file, lang_name)
+ if issues:
+ results[lang_name]['issues'].extend(issues)
+
+ if total_count > limit:
+ print(f" ... and {total_count - limit} more files (not analyzed due to limit)")
+ else:
+ print(f" No incomplete {lang_name} files found")
+ else:
+ print(f" Directory not found: {incomplete_dir}")
+
+ print() # Add spacing between languages
+
+ # Summary
+ analyzed_languages = list(target_languages.keys())
+ total_analyzed = sum(results[lang]['count'] for lang in analyzed_languages)
+ print("=" * 60)
+ print("š Summary:")
+ print(f" Total incomplete snippets analyzed: {total_analyzed}")
+ for lang in analyzed_languages:
+ count = results[lang]['count']
+ print(f" {lang.capitalize()}: {count} files analyzed")
+
+ return results
+ def _check_wsl_cursor_prerequisites(self) -> bool:
+ """
+ Check that WSL is available, cursor CLI is installed, and user is signed in.
+
+ Returns:
+ bool: True if all prerequisites are met, False otherwise
+ """
+ try:
+ # Step 1: Check WSL is available
+ print(" š Testing WSL availability...")
+ wsl_test = subprocess.run(
+ ["wsl", "echo", "WSL is working"],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if wsl_test.returncode != 0:
+ print(" ā WSL is not available or not working properly")
+ print(f" Error: {wsl_test.stderr.strip()}")
+ return False
+ else:
+ print(" ā
WSL is available and working")
+
+ # Step 2: Check cursor CLI is installed
+ print(" š Checking cursor CLI installation...")
+ cursor_check = subprocess.run(
+ ["wsl", "cursor", "--version"],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if cursor_check.returncode != 0:
+ print(" ā Cursor CLI is not installed in WSL")
+ print(" Please install cursor CLI in your WSL environment")
+ print(" You can install it with: curl -fsSL https://cursor.sh/install | sh")
+ return False
+ else:
+ version = cursor_check.stdout.strip()
+ print(f" ā
Cursor CLI is installed: {version}")
+
+ # Step 2b: Check cursor chat command is available
+ print(" š Checking cursor chat command...")
+ chat_check = subprocess.run(
+ ["wsl", "cursor", "chat", "--help"],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if chat_check.returncode != 0:
+ print(" ā Cursor chat command not available")
+ print(" Please ensure you have the latest version of cursor CLI")
+ return False
+ else:
+ print(" ā
Cursor chat command is available")
+
+ # Step 3: Check if user is signed in
+ print(" š Checking cursor authentication...")
+ auth_check = subprocess.run(
+ ["wsl", "cursor", "auth", "status"],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if auth_check.returncode != 0:
+ print(" ā Not signed in to cursor")
+ print(" Please sign in with: cursor auth login")
+ return False
+ else:
+ auth_info = auth_check.stdout.strip()
+ print(f" ā
Signed in to cursor: {auth_info}")
+
+ print(" š All prerequisites met!")
+ return True
+
+ except subprocess.TimeoutExpired:
+ print(" ā Timeout while checking prerequisites")
+ return False
+ except FileNotFoundError:
+ print(" ā WSL command not found. Please ensure WSL is installed.")
+ return False
+ except Exception as e:
+ print(f" ā Error checking prerequisites: {str(e)}")
+ return False
+
+ def _analyze_incomplete_snippet(self, file_path: Path, language: str) -> List[str]:
+ """
+ Analyze a single incomplete code snippet file to identify issues.
+
+ Args:
+ file_path: Path to the incomplete snippet file
+ language: Programming language (python, typescript, csharp)
+
+ Returns:
+ List of issues found in the snippet (e.g., missing imports, syntax errors, etc.)
+ """
+ # Copy file to WSL, echo name, then remove it
+ try:
+ # Convert Windows path to WSL-accessible path
+ windows_path = str(file_path).replace('\\', '/')
+ wsl_windows_path = f"/mnt/c{windows_path[2:]}" # Convert C:\... to /mnt/c/...
+ wsl_temp_path = f"/tmp/{file_path.name}"
+
+ # Step 1: Copy file to WSL temp directory
+ copy_result = subprocess.run(
+ ["wsl", "cp", wsl_windows_path, wsl_temp_path],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if copy_result.returncode == 0:
+ print(f" ā
Copied {file_path.name} to WSL")
+
+ # Step 2: Read the file content and analyze with cursor CLI
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ file_content = f.read()
+ except Exception as e:
+ print(f" ā Error reading file: {e}")
+ return [f"Failed to read {file_path.name}"]
+
+ analysis_question = (
+ f"IMPORTANT: DO NOT EXECUTE OR RUN THIS CODE. Please perform STATIC ANALYSIS ONLY.\n\n"
+ f"The following is an incomplete snippet of {language} code from {file_path.name}. "
+ f"Please analyze the code WITHOUT running it. As best you can infer, is the code "
+ f"syntactically correct and logically sound, bearing in mind that imports and some "
+ f"variable assignments may be missing? The code will be utilizing the appropriate "
+ f"codat sdk for the given extension @https://github.com/codatio\n\n"
+ f"Please provide STATIC ANALYSIS only - check for:\n"
+ f"- Syntax correctness\n"
+ f"- Logical flow\n"
+ f"- Potential issues\n"
+ f"- Missing imports that would be needed\n\n"
+ f"Code to analyze:\n```{language}\n{file_content}\n```"
+ )
+
+ # Try cursor CLI without the problematic flags first
+ print(f" š Testing cursor CLI command structure...")
+
+ # Test 1: Try basic cursor chat command
+ test_result = subprocess.run(
+ ["wsl", "cursor", "chat", "--help"],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+ print(f" š Help command result: {test_result.returncode}")
+ if test_result.stdout:
+ print(f" š Available options: {test_result.stdout[:200]}...")
+
+ # Try the analysis with just basic chat command
+ analysis_result = subprocess.run(
+ ["wsl", "cursor", "chat", analysis_question],
+ capture_output=True,
+ text=True,
+ timeout=60 # Increased timeout for LLM response
+ )
+
+ # If that doesn't work, try using a temp file approach
+ if analysis_result.returncode != 0 or not analysis_result.stdout.strip():
+ print(f" š Basic chat failed, trying file-based approach...")
+
+ # Write question to a temp file in WSL
+ question_file = f"/tmp/question_{file_path.name}.txt"
+
+ # Create the question file
+ write_result = subprocess.run(
+ ["wsl", "bash", "-c", f"echo '{analysis_question}' > {question_file}"],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if write_result.returncode == 0:
+ # Try cursor with file input (if such option exists)
+ analysis_result = subprocess.run(
+ ["wsl", "cursor", "chat", f"$(cat {question_file})"],
+ capture_output=True,
+ text=True,
+ timeout=60
+ )
+
+ # Clean up question file
+ subprocess.run(["wsl", "rm", question_file], capture_output=True, timeout=5)
+
+ if analysis_result.returncode == 0:
+ analysis_output = analysis_result.stdout.strip()
+ print(f" š¤ Cursor Analysis:")
+ print(f" š Debug: Return code: {analysis_result.returncode}")
+ print(f" š Debug: Stdout length: {len(analysis_output)}")
+ print(f" š Debug: Stderr: '{analysis_result.stderr.strip()}'")
+
+ if analysis_output:
+ # Format the output for better readability
+ for line in analysis_output.split('\n'):
+ if line.strip():
+ print(f" {line.strip()}")
+
+ # Extract potential issues for return value
+ if "error" in analysis_output.lower() or "issue" in analysis_output.lower():
+ issues_found = [f"Cursor identified potential issues in {file_path.name}"]
+ else:
+ issues_found = []
+ else:
+ print(f" ā ļø No output received from cursor CLI")
+ print(f" š Command was: cursor chat -p --output-format text '{analysis_question}'")
+ issues_found = [f"No analysis output for {file_path.name}"]
+ else:
+ print(f" ā Analysis Error: {analysis_result.stderr.strip()}")
+ print(f" š Debug: Return code: {analysis_result.returncode}")
+ issues_found = [f"Failed to analyze {file_path.name}"]
+
+ # Step 3: Remove the file from WSL
+ rm_result = subprocess.run(
+ ["wsl", "rm", wsl_temp_path],
+ capture_output=True,
+ text=True,
+ timeout=10
+ )
+
+ if rm_result.returncode == 0:
+ print(f" šļø Removed {file_path.name} from WSL")
+ return issues_found
+ else:
+ print(f" Remove Error: {rm_result.stderr.strip()}")
+ return issues_found # Still return analysis results even if cleanup fails
+
+ else:
+ print(f" Copy Error: {copy_result.stderr.strip()}")
+ return [f"Failed to copy {file_path.name} to WSL"]
+
+ except subprocess.TimeoutExpired:
+ print(f" CLI Timeout when processing {file_path.name}")
+ # Attempt cleanup on timeout
+ try:
+ subprocess.run(["wsl", "rm", f"/tmp/{file_path.name}"],
+ capture_output=True, timeout=5)
+ except:
+ pass # Ignore cleanup errors
+ return [f"Timeout analyzing {file_path.name}"]
+ except Exception as e:
+ print(f" CLI Exception: {str(e)}")
+ # Attempt cleanup on exception
+ try:
+ subprocess.run(["wsl", "rm", f"/tmp/{file_path.name}"],
+ capture_output=True, timeout=5)
+ except:
+ pass # Ignore cleanup errors
+ return [f"Exception analyzing {file_path.name}: {str(e)}"]
\ No newline at end of file
diff --git a/code_utils/docker/build.bat b/code_utils/docker/build.bat
deleted file mode 100644
index 583050e05..000000000
--- a/code_utils/docker/build.bat
+++ /dev/null
@@ -1,34 +0,0 @@
-@echo off
-REM Build script for the Code Snippets Docker container (Windows)
-REM This script builds a Docker image with Python, TypeScript, and .NET Core
-REM and copies all complete code snippets from the temp directory
-
-echo Building Code Snippets Docker container...
-echo This container includes:
-echo - Python 3.11
-echo - Node.js 18.x with TypeScript
-echo - .NET 8.0 SDK
-echo - Complete code snippets organized by language
-echo.
-
-REM Build the Docker image from the code_utils directory
-cd ..
-docker build -f docker/Dockerfile -t code-snippets:latest .
-
-if %ERRORLEVEL% EQU 0 (
- echo.
- echo ā
Docker image built successfully!
- echo.
- echo To run the container:
- echo docker run -it code-snippets:latest
- echo.
- echo To run with volume mount for development:
- echo docker run -it -v %cd%:/host code-snippets:latest
- echo.
- echo To inspect the code snippets:
- echo docker run -it code-snippets:latest find /workspace/code-snippets -name "*.py" -o -name "*.ts" -o -name "*.cs"
-) else (
- echo.
- echo ā Docker build failed!
- exit /b 1
-)
diff --git a/code_utils/docker/build.sh b/code_utils/docker/build.sh
deleted file mode 100644
index 74065394e..000000000
--- a/code_utils/docker/build.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-
-# Build script for the Code Snippets Docker container
-# This script builds a Docker image with Python, TypeScript, and .NET Core
-# and copies all complete code snippets from the temp directory
-
-echo "Building Code Snippets Docker container..."
-echo "This container includes:"
-echo " - Python 3.11"
-echo " - Node.js 18.x with TypeScript"
-echo " - .NET 8.0 SDK"
-echo " - Complete code snippets organized by language"
-echo ""
-
-# Build the Docker image from the code_utils directory
-cd ..
-docker build -f docker/Dockerfile -t code-snippets:latest .
-
-if [ $? -eq 0 ]; then
- echo ""
- echo "ā
Docker image built successfully!"
- echo ""
- echo "To run the container:"
- echo " docker run -it code-snippets:latest"
- echo ""
- echo "To run with volume mount for development:"
- echo " docker run -it -v \$(pwd):/host code-snippets:latest"
- echo ""
- echo "To inspect the code snippets:"
- echo " docker run -it code-snippets:latest find /workspace/code-snippets -name '*.py' -o -name '*.ts' -o -name '*.cs'"
-else
- echo ""
- echo "ā Docker build failed!"
- exit 1
-fi
diff --git a/code_utils/extract_code_from_files.py b/code_utils/extract_code_from_files.py
deleted file mode 100644
index d3904e104..000000000
--- a/code_utils/extract_code_from_files.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-"""
-Script to walk through the 'docs' directory and find markdown files with code snippets.
-Looks for code blocks with specific programming languages: python, javascript, csharp, go.
-Saves matching file paths to files_with_code.txt.
-"""
-from code_finder import CodeFinder
-
-if __name__ == "__main__":
- finder = CodeFinder()
- finder.find_files_with_code()
- finder.extract_code()
diff --git a/code_utils/main.py b/code_utils/main.py
index 6b6a00def..64d0ef53b 100644
--- a/code_utils/main.py
+++ b/code_utils/main.py
@@ -17,10 +17,7 @@ def cli() -> None:
@click.option('--languages', '-l',
multiple=True,
help='Programming languages to extract (can be specified multiple times)')
-@click.option('--remove', '-x',
- multiple=True,
- help='Programming languages to remove (can be specified multiple times)')
-def extract(languages: Tuple[str, ...], exclude: Tuple[str, ...]) -> None:
+def extract(languages: Tuple[str, ...]) -> None:
"""Extract code snippets from markdown files in the docs directory."""
# Convert languages tuple to set if provided, otherwise use defaults
@@ -44,5 +41,26 @@ def check() -> None:
click.echo(result)
+@cli.command()
+@click.option('--limit', '-l',
+ default=10,
+ type=int,
+ help='Maximum number of files to analyze per language (default: 10)')
+@click.option('--languages', '-lang',
+ multiple=True,
+ help='Programming languages to analyze (can be specified multiple times). Available: python, typescript, csharp')
+def check_incomplete(limit: int, languages: Tuple[str, ...]) -> None:
+ """
+ Check and validate incomplete code snippets.
+ Analyzes snippets that are missing imports or have incomplete structure.
+ We will be using an LLM to do this, which is expensive so we will limit the number of files to analyze.
+ """
+ # Convert languages tuple to set if provided, otherwise use all languages
+ target_languages = set(languages) if languages else None
+
+ checker = CodeChecker()
+ result = checker.check_incomplete_snippets(limit=limit, languages=target_languages)
+
+
if __name__ == '__main__':
cli()