From 537f99bafa5ab4e74a038b730a21c54619f2e58f Mon Sep 17 00:00:00 2001 From: Peter Simpson Date: Tue, 23 Sep 2025 14:07:50 +0100 Subject: [PATCH] initial hackathon commit --- code_utils/code_checker.py | 372 ++++++++++++++++++++++++++ code_utils/docker/build.bat | 34 --- code_utils/docker/build.sh | 35 --- code_utils/extract_code_from_files.py | 12 - code_utils/main.py | 26 +- 5 files changed, 394 insertions(+), 85 deletions(-) delete mode 100644 code_utils/docker/build.bat delete mode 100644 code_utils/docker/build.sh delete mode 100644 code_utils/extract_code_from_files.py diff --git a/code_utils/code_checker.py b/code_utils/code_checker.py index 46153a925..92e24433c 100644 --- a/code_utils/code_checker.py +++ b/code_utils/code_checker.py @@ -5,6 +5,7 @@ import os import sys +import subprocess from pathlib import Path from typing import Dict, List, Tuple, Optional import docker @@ -342,4 +343,375 @@ def check_complete_snippets(self) -> Dict[str, Dict[str, any]]: return results + def check_incomplete_snippets(self, limit: int = 10, languages: Optional[set] = None) -> Dict[str, Dict[str, any]]: + """ + Check and analyze incomplete code snippets that are missing imports or have incomplete structure. + + Args: + limit: Maximum number of files to analyze per language (default: 10) + languages: Set of programming languages to analyze. If None, analyzes all languages. + Valid options: {'python', 'typescript', 'csharp'} + + Returns: + Dictionary with analysis results for each language: + { + 'python': {'count': int, 'files': List[str], 'issues': List[str]}, + 'typescript': {'count': int, 'files': List[str], 'issues': List[str]}, + 'csharp': {'count': int, 'files': List[str], 'issues': List[str]} + } + """ + print("šŸ” Analyzing incomplete code snippets...") + print(f"šŸ“‹ Analysis limit: {limit} files per language") + if languages: + print(f"šŸŽÆ Target languages: {', '.join(sorted(languages))}") + else: + print("šŸŽÆ Target languages: all (python, typescript, csharp)") + + # Check WSL and Cursor CLI prerequisites + print("\nšŸ”§ Checking prerequisites...") + wsl_ready = self._check_wsl_cursor_prerequisites() + if not wsl_ready: + print("āŒ Prerequisites not met. Cannot proceed with analysis.") + return { + 'python': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']}, + 'typescript': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']}, + 'csharp': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']} + } + + print("=" * 60) + + results = { + 'python': {'count': 0, 'files': [], 'issues': []}, + 'typescript': {'count': 0, 'files': [], 'issues': []}, + 'csharp': {'count': 0, 'files': [], 'issues': []} + } + + # Define the languages and their corresponding directories/extensions + all_languages = { + 'python': {'dir': 'python', 'extension': '.py'}, + 'typescript': {'dir': 'javascript', 'extension': '.ts'}, + 'csharp': {'dir': 'csharp', 'extension': '.cs'} + } + + # Filter languages based on user selection + if languages: + # Validate language names + invalid_languages = languages - set(all_languages.keys()) + if invalid_languages: + print(f"āš ļø Warning: Invalid language(s) specified: {', '.join(invalid_languages)}") + print(f" Valid options are: {', '.join(all_languages.keys())}") + + # Filter to only requested languages + target_languages = {lang: info for lang, info in all_languages.items() if lang in languages} + else: + target_languages = all_languages + + temp_dir = self.base_dir / "temp" + + for lang_name, lang_info in target_languages.items(): + incomplete_dir = temp_dir / lang_info['dir'] / 'incomplete' + + print(f"šŸ“ {lang_name.upper()} incomplete snippets:") + + if incomplete_dir.exists() and incomplete_dir.is_dir(): + # Get all files with the appropriate extension + all_files = list(incomplete_dir.glob(f"*{lang_info['extension']}")) + all_files.sort() # Sort for consistent output + + # Apply limit to files to analyze + files_to_analyze = all_files[:limit] + + total_count = len(all_files) + analyzed_count = len(files_to_analyze) + + results[lang_name]['count'] = analyzed_count + results[lang_name]['files'] = [f.name for f in files_to_analyze] + + if all_files: + if total_count > limit: + print(f" Found {total_count} incomplete {lang_name} files (analyzing first {analyzed_count}):") + else: + print(f" Found {total_count} incomplete {lang_name} files:") + + # Analyze each file within the limit + for file in files_to_analyze: + print(f" - {file.name}") + # Perform actual analysis of the incomplete snippet + issues = self._analyze_incomplete_snippet(file, lang_name) + if issues: + results[lang_name]['issues'].extend(issues) + + if total_count > limit: + print(f" ... and {total_count - limit} more files (not analyzed due to limit)") + else: + print(f" No incomplete {lang_name} files found") + else: + print(f" Directory not found: {incomplete_dir}") + + print() # Add spacing between languages + + # Summary + analyzed_languages = list(target_languages.keys()) + total_analyzed = sum(results[lang]['count'] for lang in analyzed_languages) + print("=" * 60) + print("šŸ“Š Summary:") + print(f" Total incomplete snippets analyzed: {total_analyzed}") + for lang in analyzed_languages: + count = results[lang]['count'] + print(f" {lang.capitalize()}: {count} files analyzed") + + return results + def _check_wsl_cursor_prerequisites(self) -> bool: + """ + Check that WSL is available, cursor CLI is installed, and user is signed in. + + Returns: + bool: True if all prerequisites are met, False otherwise + """ + try: + # Step 1: Check WSL is available + print(" šŸ” Testing WSL availability...") + wsl_test = subprocess.run( + ["wsl", "echo", "WSL is working"], + capture_output=True, + text=True, + timeout=10 + ) + + if wsl_test.returncode != 0: + print(" āŒ WSL is not available or not working properly") + print(f" Error: {wsl_test.stderr.strip()}") + return False + else: + print(" āœ… WSL is available and working") + + # Step 2: Check cursor CLI is installed + print(" šŸ” Checking cursor CLI installation...") + cursor_check = subprocess.run( + ["wsl", "cursor", "--version"], + capture_output=True, + text=True, + timeout=10 + ) + + if cursor_check.returncode != 0: + print(" āŒ Cursor CLI is not installed in WSL") + print(" Please install cursor CLI in your WSL environment") + print(" You can install it with: curl -fsSL https://cursor.sh/install | sh") + return False + else: + version = cursor_check.stdout.strip() + print(f" āœ… Cursor CLI is installed: {version}") + + # Step 2b: Check cursor chat command is available + print(" šŸ” Checking cursor chat command...") + chat_check = subprocess.run( + ["wsl", "cursor", "chat", "--help"], + capture_output=True, + text=True, + timeout=10 + ) + + if chat_check.returncode != 0: + print(" āŒ Cursor chat command not available") + print(" Please ensure you have the latest version of cursor CLI") + return False + else: + print(" āœ… Cursor chat command is available") + + # Step 3: Check if user is signed in + print(" šŸ” Checking cursor authentication...") + auth_check = subprocess.run( + ["wsl", "cursor", "auth", "status"], + capture_output=True, + text=True, + timeout=10 + ) + + if auth_check.returncode != 0: + print(" āŒ Not signed in to cursor") + print(" Please sign in with: cursor auth login") + return False + else: + auth_info = auth_check.stdout.strip() + print(f" āœ… Signed in to cursor: {auth_info}") + + print(" šŸŽ‰ All prerequisites met!") + return True + + except subprocess.TimeoutExpired: + print(" āŒ Timeout while checking prerequisites") + return False + except FileNotFoundError: + print(" āŒ WSL command not found. Please ensure WSL is installed.") + return False + except Exception as e: + print(f" āŒ Error checking prerequisites: {str(e)}") + return False + + def _analyze_incomplete_snippet(self, file_path: Path, language: str) -> List[str]: + """ + Analyze a single incomplete code snippet file to identify issues. + + Args: + file_path: Path to the incomplete snippet file + language: Programming language (python, typescript, csharp) + + Returns: + List of issues found in the snippet (e.g., missing imports, syntax errors, etc.) + """ + # Copy file to WSL, echo name, then remove it + try: + # Convert Windows path to WSL-accessible path + windows_path = str(file_path).replace('\\', '/') + wsl_windows_path = f"/mnt/c{windows_path[2:]}" # Convert C:\... to /mnt/c/... + wsl_temp_path = f"/tmp/{file_path.name}" + + # Step 1: Copy file to WSL temp directory + copy_result = subprocess.run( + ["wsl", "cp", wsl_windows_path, wsl_temp_path], + capture_output=True, + text=True, + timeout=10 + ) + + if copy_result.returncode == 0: + print(f" āœ… Copied {file_path.name} to WSL") + + # Step 2: Read the file content and analyze with cursor CLI + try: + with open(file_path, 'r', encoding='utf-8') as f: + file_content = f.read() + except Exception as e: + print(f" āŒ Error reading file: {e}") + return [f"Failed to read {file_path.name}"] + + analysis_question = ( + f"IMPORTANT: DO NOT EXECUTE OR RUN THIS CODE. Please perform STATIC ANALYSIS ONLY.\n\n" + f"The following is an incomplete snippet of {language} code from {file_path.name}. " + f"Please analyze the code WITHOUT running it. As best you can infer, is the code " + f"syntactically correct and logically sound, bearing in mind that imports and some " + f"variable assignments may be missing? The code will be utilizing the appropriate " + f"codat sdk for the given extension @https://github.com/codatio\n\n" + f"Please provide STATIC ANALYSIS only - check for:\n" + f"- Syntax correctness\n" + f"- Logical flow\n" + f"- Potential issues\n" + f"- Missing imports that would be needed\n\n" + f"Code to analyze:\n```{language}\n{file_content}\n```" + ) + + # Try cursor CLI without the problematic flags first + print(f" šŸ” Testing cursor CLI command structure...") + + # Test 1: Try basic cursor chat command + test_result = subprocess.run( + ["wsl", "cursor", "chat", "--help"], + capture_output=True, + text=True, + timeout=10 + ) + print(f" šŸ” Help command result: {test_result.returncode}") + if test_result.stdout: + print(f" šŸ” Available options: {test_result.stdout[:200]}...") + + # Try the analysis with just basic chat command + analysis_result = subprocess.run( + ["wsl", "cursor", "chat", analysis_question], + capture_output=True, + text=True, + timeout=60 # Increased timeout for LLM response + ) + + # If that doesn't work, try using a temp file approach + if analysis_result.returncode != 0 or not analysis_result.stdout.strip(): + print(f" šŸ” Basic chat failed, trying file-based approach...") + + # Write question to a temp file in WSL + question_file = f"/tmp/question_{file_path.name}.txt" + + # Create the question file + write_result = subprocess.run( + ["wsl", "bash", "-c", f"echo '{analysis_question}' > {question_file}"], + capture_output=True, + text=True, + timeout=10 + ) + + if write_result.returncode == 0: + # Try cursor with file input (if such option exists) + analysis_result = subprocess.run( + ["wsl", "cursor", "chat", f"$(cat {question_file})"], + capture_output=True, + text=True, + timeout=60 + ) + + # Clean up question file + subprocess.run(["wsl", "rm", question_file], capture_output=True, timeout=5) + + if analysis_result.returncode == 0: + analysis_output = analysis_result.stdout.strip() + print(f" šŸ¤– Cursor Analysis:") + print(f" šŸ” Debug: Return code: {analysis_result.returncode}") + print(f" šŸ” Debug: Stdout length: {len(analysis_output)}") + print(f" šŸ” Debug: Stderr: '{analysis_result.stderr.strip()}'") + + if analysis_output: + # Format the output for better readability + for line in analysis_output.split('\n'): + if line.strip(): + print(f" {line.strip()}") + + # Extract potential issues for return value + if "error" in analysis_output.lower() or "issue" in analysis_output.lower(): + issues_found = [f"Cursor identified potential issues in {file_path.name}"] + else: + issues_found = [] + else: + print(f" āš ļø No output received from cursor CLI") + print(f" šŸ” Command was: cursor chat -p --output-format text '{analysis_question}'") + issues_found = [f"No analysis output for {file_path.name}"] + else: + print(f" āŒ Analysis Error: {analysis_result.stderr.strip()}") + print(f" šŸ” Debug: Return code: {analysis_result.returncode}") + issues_found = [f"Failed to analyze {file_path.name}"] + + # Step 3: Remove the file from WSL + rm_result = subprocess.run( + ["wsl", "rm", wsl_temp_path], + capture_output=True, + text=True, + timeout=10 + ) + + if rm_result.returncode == 0: + print(f" šŸ—‘ļø Removed {file_path.name} from WSL") + return issues_found + else: + print(f" Remove Error: {rm_result.stderr.strip()}") + return issues_found # Still return analysis results even if cleanup fails + + else: + print(f" Copy Error: {copy_result.stderr.strip()}") + return [f"Failed to copy {file_path.name} to WSL"] + + except subprocess.TimeoutExpired: + print(f" CLI Timeout when processing {file_path.name}") + # Attempt cleanup on timeout + try: + subprocess.run(["wsl", "rm", f"/tmp/{file_path.name}"], + capture_output=True, timeout=5) + except: + pass # Ignore cleanup errors + return [f"Timeout analyzing {file_path.name}"] + except Exception as e: + print(f" CLI Exception: {str(e)}") + # Attempt cleanup on exception + try: + subprocess.run(["wsl", "rm", f"/tmp/{file_path.name}"], + capture_output=True, timeout=5) + except: + pass # Ignore cleanup errors + return [f"Exception analyzing {file_path.name}: {str(e)}"] \ No newline at end of file diff --git a/code_utils/docker/build.bat b/code_utils/docker/build.bat deleted file mode 100644 index 583050e05..000000000 --- a/code_utils/docker/build.bat +++ /dev/null @@ -1,34 +0,0 @@ -@echo off -REM Build script for the Code Snippets Docker container (Windows) -REM This script builds a Docker image with Python, TypeScript, and .NET Core -REM and copies all complete code snippets from the temp directory - -echo Building Code Snippets Docker container... -echo This container includes: -echo - Python 3.11 -echo - Node.js 18.x with TypeScript -echo - .NET 8.0 SDK -echo - Complete code snippets organized by language -echo. - -REM Build the Docker image from the code_utils directory -cd .. -docker build -f docker/Dockerfile -t code-snippets:latest . - -if %ERRORLEVEL% EQU 0 ( - echo. - echo āœ… Docker image built successfully! - echo. - echo To run the container: - echo docker run -it code-snippets:latest - echo. - echo To run with volume mount for development: - echo docker run -it -v %cd%:/host code-snippets:latest - echo. - echo To inspect the code snippets: - echo docker run -it code-snippets:latest find /workspace/code-snippets -name "*.py" -o -name "*.ts" -o -name "*.cs" -) else ( - echo. - echo āŒ Docker build failed! - exit /b 1 -) diff --git a/code_utils/docker/build.sh b/code_utils/docker/build.sh deleted file mode 100644 index 74065394e..000000000 --- a/code_utils/docker/build.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -# Build script for the Code Snippets Docker container -# This script builds a Docker image with Python, TypeScript, and .NET Core -# and copies all complete code snippets from the temp directory - -echo "Building Code Snippets Docker container..." -echo "This container includes:" -echo " - Python 3.11" -echo " - Node.js 18.x with TypeScript" -echo " - .NET 8.0 SDK" -echo " - Complete code snippets organized by language" -echo "" - -# Build the Docker image from the code_utils directory -cd .. -docker build -f docker/Dockerfile -t code-snippets:latest . - -if [ $? -eq 0 ]; then - echo "" - echo "āœ… Docker image built successfully!" - echo "" - echo "To run the container:" - echo " docker run -it code-snippets:latest" - echo "" - echo "To run with volume mount for development:" - echo " docker run -it -v \$(pwd):/host code-snippets:latest" - echo "" - echo "To inspect the code snippets:" - echo " docker run -it code-snippets:latest find /workspace/code-snippets -name '*.py' -o -name '*.ts' -o -name '*.cs'" -else - echo "" - echo "āŒ Docker build failed!" - exit 1 -fi diff --git a/code_utils/extract_code_from_files.py b/code_utils/extract_code_from_files.py deleted file mode 100644 index d3904e104..000000000 --- a/code_utils/extract_code_from_files.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to walk through the 'docs' directory and find markdown files with code snippets. -Looks for code blocks with specific programming languages: python, javascript, csharp, go. -Saves matching file paths to files_with_code.txt. -""" -from code_finder import CodeFinder - -if __name__ == "__main__": - finder = CodeFinder() - finder.find_files_with_code() - finder.extract_code() diff --git a/code_utils/main.py b/code_utils/main.py index 6b6a00def..64d0ef53b 100644 --- a/code_utils/main.py +++ b/code_utils/main.py @@ -17,10 +17,7 @@ def cli() -> None: @click.option('--languages', '-l', multiple=True, help='Programming languages to extract (can be specified multiple times)') -@click.option('--remove', '-x', - multiple=True, - help='Programming languages to remove (can be specified multiple times)') -def extract(languages: Tuple[str, ...], exclude: Tuple[str, ...]) -> None: +def extract(languages: Tuple[str, ...]) -> None: """Extract code snippets from markdown files in the docs directory.""" # Convert languages tuple to set if provided, otherwise use defaults @@ -44,5 +41,26 @@ def check() -> None: click.echo(result) +@cli.command() +@click.option('--limit', '-l', + default=10, + type=int, + help='Maximum number of files to analyze per language (default: 10)') +@click.option('--languages', '-lang', + multiple=True, + help='Programming languages to analyze (can be specified multiple times). Available: python, typescript, csharp') +def check_incomplete(limit: int, languages: Tuple[str, ...]) -> None: + """ + Check and validate incomplete code snippets. + Analyzes snippets that are missing imports or have incomplete structure. + We will be using an LLM to do this, which is expensive so we will limit the number of files to analyze. + """ + # Convert languages tuple to set if provided, otherwise use all languages + target_languages = set(languages) if languages else None + + checker = CodeChecker() + result = checker.check_incomplete_snippets(limit=limit, languages=target_languages) + + if __name__ == '__main__': cli()