From 537f99bafa5ab4e74a038b730a21c54619f2e58f Mon Sep 17 00:00:00 2001
From: Peter Simpson <p.simpson@codat.io>
Date: Tue, 23 Sep 2025 14:07:50 +0100
Subject: [PATCH] initial hackathon commit

---
 code_utils/code_checker.py            | 372 ++++++++++++++++++++++++++
 code_utils/docker/build.bat           |  34 ---
 code_utils/docker/build.sh            |  35 ---
 code_utils/extract_code_from_files.py |  12 -
 code_utils/main.py                    |  26 +-
 5 files changed, 394 insertions(+), 85 deletions(-)
 delete mode 100644 code_utils/docker/build.bat
 delete mode 100644 code_utils/docker/build.sh
 delete mode 100644 code_utils/extract_code_from_files.py

diff --git a/code_utils/code_checker.py b/code_utils/code_checker.py
index 46153a925..92e24433c 100644
--- a/code_utils/code_checker.py
+++ b/code_utils/code_checker.py
@@ -5,6 +5,7 @@
 
 import os
 import sys
+import subprocess
 from pathlib import Path
 from typing import Dict, List, Tuple, Optional
 import docker
@@ -342,4 +343,375 @@ def check_complete_snippets(self) -> Dict[str, Dict[str, any]]:
         
         return results
 
+    def check_incomplete_snippets(self, limit: int = 10, languages: Optional[set] = None) -> Dict[str, Dict[str, any]]:
+        """
+        Check and analyze incomplete code snippets that are missing imports or have incomplete structure.
+        
+        Args:
+            limit: Maximum number of files to analyze per language (default: 10)
+            languages: Set of programming languages to analyze. If None, analyzes all languages.
+                      Valid options: {'python', 'typescript', 'csharp'}
+        
+        Returns:
+            Dictionary with analysis results for each language:
+            {
+                'python': {'count': int, 'files': List[str], 'issues': List[str]},
+                'typescript': {'count': int, 'files': List[str], 'issues': List[str]},
+                'csharp': {'count': int, 'files': List[str], 'issues': List[str]}
+            }
+        """
+        print("🔍 Analyzing incomplete code snippets...")
+        print(f"📋 Analysis limit: {limit} files per language")
+        if languages:
+            print(f"🎯 Target languages: {', '.join(sorted(languages))}")
+        else:
+            print("🎯 Target languages: all (python, typescript, csharp)")
+        
+        # Check WSL and Cursor CLI prerequisites
+        print("\n🔧 Checking prerequisites...")
+        wsl_ready = self._check_wsl_cursor_prerequisites()
+        if not wsl_ready:
+            print("❌ Prerequisites not met. Cannot proceed with analysis.")
+            return {
+                'python': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']},
+                'typescript': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']},
+                'csharp': {'count': 0, 'files': [], 'issues': ['Prerequisites not met']}
+            }
+        
+        print("=" * 60)
+        
+        results = {
+            'python': {'count': 0, 'files': [], 'issues': []},
+            'typescript': {'count': 0, 'files': [], 'issues': []},
+            'csharp': {'count': 0, 'files': [], 'issues': []}
+        }
+        
+        # Define the languages and their corresponding directories/extensions
+        all_languages = {
+            'python': {'dir': 'python', 'extension': '.py'},
+            'typescript': {'dir': 'javascript', 'extension': '.ts'},
+            'csharp': {'dir': 'csharp', 'extension': '.cs'}
+        }
+        
+        # Filter languages based on user selection
+        if languages:
+            # Validate language names
+            invalid_languages = languages - set(all_languages.keys())
+            if invalid_languages:
+                print(f"⚠️  Warning: Invalid language(s) specified: {', '.join(invalid_languages)}")
+                print(f"   Valid options are: {', '.join(all_languages.keys())}")
+            
+            # Filter to only requested languages
+            target_languages = {lang: info for lang, info in all_languages.items() if lang in languages}
+        else:
+            target_languages = all_languages
+        
+        temp_dir = self.base_dir / "temp"
+        
+        for lang_name, lang_info in target_languages.items():
+            incomplete_dir = temp_dir / lang_info['dir'] / 'incomplete'
+            
+            print(f"📁 {lang_name.upper()} incomplete snippets:")
+            
+            if incomplete_dir.exists() and incomplete_dir.is_dir():
+                # Get all files with the appropriate extension
+                all_files = list(incomplete_dir.glob(f"*{lang_info['extension']}"))
+                all_files.sort()  # Sort for consistent output
+                
+                # Apply limit to files to analyze
+                files_to_analyze = all_files[:limit]
+                
+                total_count = len(all_files)
+                analyzed_count = len(files_to_analyze)
+                
+                results[lang_name]['count'] = analyzed_count
+                results[lang_name]['files'] = [f.name for f in files_to_analyze]
+                
+                if all_files:
+                    if total_count > limit:
+                        print(f"   Found {total_count} incomplete {lang_name} files (analyzing first {analyzed_count}):")
+                    else:
+                        print(f"   Found {total_count} incomplete {lang_name} files:")
+                    
+                    # Analyze each file within the limit
+                    for file in files_to_analyze:
+                        print(f"   - {file.name}")
+                        # Perform actual analysis of the incomplete snippet
+                        issues = self._analyze_incomplete_snippet(file, lang_name)
+                        if issues:
+                            results[lang_name]['issues'].extend(issues)
+                    
+                    if total_count > limit:
+                        print(f"   ... and {total_count - limit} more files (not analyzed due to limit)")
+                else:
+                    print(f"   No incomplete {lang_name} files found")
+            else:
+                print(f"   Directory not found: {incomplete_dir}")
+            
+            print()  # Add spacing between languages
+        
+        # Summary
+        analyzed_languages = list(target_languages.keys())
+        total_analyzed = sum(results[lang]['count'] for lang in analyzed_languages)
+        print("=" * 60)
+        print("📊 Summary:")
+        print(f"   Total incomplete snippets analyzed: {total_analyzed}")
+        for lang in analyzed_languages:
+            count = results[lang]['count']
+            print(f"   {lang.capitalize()}: {count} files analyzed")
+        
+        return results
 
+    def _check_wsl_cursor_prerequisites(self) -> bool:
+        """
+        Check that WSL is available, cursor CLI is installed, and user is signed in.
+        
+        Returns:
+            bool: True if all prerequisites are met, False otherwise
+        """
+        try:
+            # Step 1: Check WSL is available
+            print("   🔍 Testing WSL availability...")
+            wsl_test = subprocess.run(
+                ["wsl", "echo", "WSL is working"],
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+            
+            if wsl_test.returncode != 0:
+                print("   ❌ WSL is not available or not working properly")
+                print(f"      Error: {wsl_test.stderr.strip()}")
+                return False
+            else:
+                print("   ✅ WSL is available and working")
+            
+            # Step 2: Check cursor CLI is installed
+            print("   🔍 Checking cursor CLI installation...")
+            cursor_check = subprocess.run(
+                ["wsl", "cursor", "--version"],
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+            
+            if cursor_check.returncode != 0:
+                print("   ❌ Cursor CLI is not installed in WSL")
+                print("      Please install cursor CLI in your WSL environment")
+                print("      You can install it with: curl -fsSL https://cursor.sh/install | sh")
+                return False
+            else:
+                version = cursor_check.stdout.strip()
+                print(f"   ✅ Cursor CLI is installed: {version}")
+                
+            # Step 2b: Check cursor chat command is available
+            print("   🔍 Checking cursor chat command...")
+            chat_check = subprocess.run(
+                ["wsl", "cursor", "chat", "--help"],
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+            
+            if chat_check.returncode != 0:
+                print("   ❌ Cursor chat command not available")
+                print("      Please ensure you have the latest version of cursor CLI")
+                return False
+            else:
+                print("   ✅ Cursor chat command is available")
+            
+            # Step 3: Check if user is signed in
+            print("   🔍 Checking cursor authentication...")
+            auth_check = subprocess.run(
+                ["wsl", "cursor", "auth", "status"],
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+            
+            if auth_check.returncode != 0:
+                print("   ❌ Not signed in to cursor")
+                print("      Please sign in with: cursor auth login")
+                return False
+            else:
+                auth_info = auth_check.stdout.strip()
+                print(f"   ✅ Signed in to cursor: {auth_info}")
+            
+            print("   🎉 All prerequisites met!")
+            return True
+            
+        except subprocess.TimeoutExpired:
+            print("   ❌ Timeout while checking prerequisites")
+            return False
+        except FileNotFoundError:
+            print("   ❌ WSL command not found. Please ensure WSL is installed.")
+            return False
+        except Exception as e:
+            print(f"   ❌ Error checking prerequisites: {str(e)}")
+            return False
+
+    def _analyze_incomplete_snippet(self, file_path: Path, language: str) -> List[str]:
+        """
+        Analyze a single incomplete code snippet file to identify issues.
+        
+        Args:
+            file_path: Path to the incomplete snippet file
+            language: Programming language (python, typescript, csharp)
+            
+        Returns:
+            List of issues found in the snippet (e.g., missing imports, syntax errors, etc.)
+        """
+        # Copy file to WSL, echo name, then remove it
+        try:
+            # Convert Windows path to WSL-accessible path
+            windows_path = str(file_path).replace('\\', '/')
+            wsl_windows_path = f"/mnt/c{windows_path[2:]}"  # Convert C:\... to /mnt/c/...
+            wsl_temp_path = f"/tmp/{file_path.name}"
+            
+            # Step 1: Copy file to WSL temp directory
+            copy_result = subprocess.run(
+                ["wsl", "cp", wsl_windows_path, wsl_temp_path],
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+            
+            if copy_result.returncode == 0:
+                print(f"     ✅ Copied {file_path.name} to WSL")
+                
+                # Step 2: Read the file content and analyze with cursor CLI
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        file_content = f.read()
+                except Exception as e:
+                    print(f"     ❌ Error reading file: {e}")
+                    return [f"Failed to read {file_path.name}"]
+                
+                analysis_question = (
+                    f"IMPORTANT: DO NOT EXECUTE OR RUN THIS CODE. Please perform STATIC ANALYSIS ONLY.\n\n"
+                    f"The following is an incomplete snippet of {language} code from {file_path.name}. "
+                    f"Please analyze the code WITHOUT running it. As best you can infer, is the code "
+                    f"syntactically correct and logically sound, bearing in mind that imports and some "
+                    f"variable assignments may be missing? The code will be utilizing the appropriate "
+                    f"codat sdk for the given extension @https://github.com/codatio\n\n"
+                    f"Please provide STATIC ANALYSIS only - check for:\n"
+                    f"- Syntax correctness\n"
+                    f"- Logical flow\n"
+                    f"- Potential issues\n"
+                    f"- Missing imports that would be needed\n\n"
+                    f"Code to analyze:\n```{language}\n{file_content}\n```"
+                )
+                
+                # Try cursor CLI without the problematic flags first
+                print(f"     🔍 Testing cursor CLI command structure...")
+                
+                # Test 1: Try basic cursor chat command
+                test_result = subprocess.run(
+                    ["wsl", "cursor", "chat", "--help"],
+                    capture_output=True,
+                    text=True,
+                    timeout=10
+                )
+                print(f"     🔍 Help command result: {test_result.returncode}")
+                if test_result.stdout:
+                    print(f"     🔍 Available options: {test_result.stdout[:200]}...")
+                
+                # Try the analysis with just basic chat command
+                analysis_result = subprocess.run(
+                    ["wsl", "cursor", "chat", analysis_question],
+                    capture_output=True,
+                    text=True,
+                    timeout=60  # Increased timeout for LLM response
+                )
+                
+                # If that doesn't work, try using a temp file approach
+                if analysis_result.returncode != 0 or not analysis_result.stdout.strip():
+                    print(f"     🔍 Basic chat failed, trying file-based approach...")
+                    
+                    # Write question to a temp file in WSL
+                    question_file = f"/tmp/question_{file_path.name}.txt"
+                    
+                    # Create the question file
+                    write_result = subprocess.run(
+                        ["wsl", "bash", "-c", f"echo '{analysis_question}' > {question_file}"],
+                        capture_output=True,
+                        text=True,
+                        timeout=10
+                    )
+                    
+                    if write_result.returncode == 0:
+                        # Try cursor with file input (if such option exists)
+                        analysis_result = subprocess.run(
+                            ["wsl", "cursor", "chat", f"$(cat {question_file})"],
+                            capture_output=True,
+                            text=True,
+                            timeout=60
+                        )
+                        
+                        # Clean up question file
+                        subprocess.run(["wsl", "rm", question_file], capture_output=True, timeout=5)
+                
+                if analysis_result.returncode == 0:
+                    analysis_output = analysis_result.stdout.strip()
+                    print(f"     🤖 Cursor Analysis:")
+                    print(f"     🔍 Debug: Return code: {analysis_result.returncode}")
+                    print(f"     🔍 Debug: Stdout length: {len(analysis_output)}")
+                    print(f"     🔍 Debug: Stderr: '{analysis_result.stderr.strip()}'")
+                    
+                    if analysis_output:
+                        # Format the output for better readability
+                        for line in analysis_output.split('\n'):
+                            if line.strip():
+                                print(f"         {line.strip()}")
+                        
+                        # Extract potential issues for return value
+                        if "error" in analysis_output.lower() or "issue" in analysis_output.lower():
+                            issues_found = [f"Cursor identified potential issues in {file_path.name}"]
+                        else:
+                            issues_found = []
+                    else:
+                        print(f"         ⚠️  No output received from cursor CLI")
+                        print(f"         🔍 Command was: cursor chat -p --output-format text '{analysis_question}'")
+                        issues_found = [f"No analysis output for {file_path.name}"]
+                else:
+                    print(f"     ❌ Analysis Error: {analysis_result.stderr.strip()}")
+                    print(f"     🔍 Debug: Return code: {analysis_result.returncode}")
+                    issues_found = [f"Failed to analyze {file_path.name}"]
+                
+                # Step 3: Remove the file from WSL
+                rm_result = subprocess.run(
+                    ["wsl", "rm", wsl_temp_path],
+                    capture_output=True,
+                    text=True,
+                    timeout=10
+                )
+                
+                if rm_result.returncode == 0:
+                    print(f"     🗑️  Removed {file_path.name} from WSL")
+                    return issues_found
+                else:
+                    print(f"     Remove Error: {rm_result.stderr.strip()}")
+                    return issues_found  # Still return analysis results even if cleanup fails
+                    
+            else:
+                print(f"     Copy Error: {copy_result.stderr.strip()}")
+                return [f"Failed to copy {file_path.name} to WSL"]
+                
+        except subprocess.TimeoutExpired:
+            print(f"     CLI Timeout when processing {file_path.name}")
+            # Attempt cleanup on timeout
+            try:
+                subprocess.run(["wsl", "rm", f"/tmp/{file_path.name}"], 
+                             capture_output=True, timeout=5)
+            except:
+                pass  # Ignore cleanup errors
+            return [f"Timeout analyzing {file_path.name}"]
+        except Exception as e:
+            print(f"     CLI Exception: {str(e)}")
+            # Attempt cleanup on exception
+            try:
+                subprocess.run(["wsl", "rm", f"/tmp/{file_path.name}"], 
+                             capture_output=True, timeout=5)
+            except:
+                pass  # Ignore cleanup errors
+            return [f"Exception analyzing {file_path.name}: {str(e)}"]
\ No newline at end of file
diff --git a/code_utils/docker/build.bat b/code_utils/docker/build.bat
deleted file mode 100644
index 583050e05..000000000
--- a/code_utils/docker/build.bat
+++ /dev/null
@@ -1,34 +0,0 @@
-@echo off
-REM Build script for the Code Snippets Docker container (Windows)
-REM This script builds a Docker image with Python, TypeScript, and .NET Core
-REM and copies all complete code snippets from the temp directory
-
-echo Building Code Snippets Docker container...
-echo This container includes:
-echo   - Python 3.11
-echo   - Node.js 18.x with TypeScript
-echo   - .NET 8.0 SDK
-echo   - Complete code snippets organized by language
-echo.
-
-REM Build the Docker image from the code_utils directory
-cd ..
-docker build -f docker/Dockerfile -t code-snippets:latest .
-
-if %ERRORLEVEL% EQU 0 (
-    echo.
-    echo ✅ Docker image built successfully!
-    echo.
-    echo To run the container:
-    echo   docker run -it code-snippets:latest
-    echo.
-    echo To run with volume mount for development:
-    echo   docker run -it -v %cd%:/host code-snippets:latest
-    echo.
-    echo To inspect the code snippets:
-    echo   docker run -it code-snippets:latest find /workspace/code-snippets -name "*.py" -o -name "*.ts" -o -name "*.cs"
-) else (
-    echo.
-    echo ❌ Docker build failed!
-    exit /b 1
-)
diff --git a/code_utils/docker/build.sh b/code_utils/docker/build.sh
deleted file mode 100644
index 74065394e..000000000
--- a/code_utils/docker/build.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-
-# Build script for the Code Snippets Docker container
-# This script builds a Docker image with Python, TypeScript, and .NET Core
-# and copies all complete code snippets from the temp directory
-
-echo "Building Code Snippets Docker container..."
-echo "This container includes:"
-echo "  - Python 3.11"
-echo "  - Node.js 18.x with TypeScript"
-echo "  - .NET 8.0 SDK"
-echo "  - Complete code snippets organized by language"
-echo ""
-
-# Build the Docker image from the code_utils directory
-cd ..
-docker build -f docker/Dockerfile -t code-snippets:latest .
-
-if [ $? -eq 0 ]; then
-    echo ""
-    echo "✅ Docker image built successfully!"
-    echo ""
-    echo "To run the container:"
-    echo "  docker run -it code-snippets:latest"
-    echo ""
-    echo "To run with volume mount for development:"
-    echo "  docker run -it -v \$(pwd):/host code-snippets:latest"
-    echo ""
-    echo "To inspect the code snippets:"
-    echo "  docker run -it code-snippets:latest find /workspace/code-snippets -name '*.py' -o -name '*.ts' -o -name '*.cs'"
-else
-    echo ""
-    echo "❌ Docker build failed!"
-    exit 1
-fi
diff --git a/code_utils/extract_code_from_files.py b/code_utils/extract_code_from_files.py
deleted file mode 100644
index d3904e104..000000000
--- a/code_utils/extract_code_from_files.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-"""
-Script to walk through the 'docs' directory and find markdown files with code snippets.
-Looks for code blocks with specific programming languages: python, javascript, csharp, go.
-Saves matching file paths to files_with_code.txt.
-"""
-from code_finder import CodeFinder
-
-if __name__ == "__main__":
-    finder = CodeFinder()
-    finder.find_files_with_code()
-    finder.extract_code()
diff --git a/code_utils/main.py b/code_utils/main.py
index 6b6a00def..64d0ef53b 100644
--- a/code_utils/main.py
+++ b/code_utils/main.py
@@ -17,10 +17,7 @@ def cli() -> None:
 @click.option('--languages', '-l', 
               multiple=True, 
               help='Programming languages to extract (can be specified multiple times)')
-@click.option('--remove', '-x',
-              multiple=True,
-              help='Programming languages to remove (can be specified multiple times)')
-def extract(languages: Tuple[str, ...], exclude: Tuple[str, ...]) -> None:
+def extract(languages: Tuple[str, ...]) -> None:
     """Extract code snippets from markdown files in the docs directory."""
     
     # Convert languages tuple to set if provided, otherwise use defaults
@@ -44,5 +41,26 @@ def check() -> None:
     click.echo(result)
 
 
+@cli.command()
+@click.option('--limit', '-l', 
+              default=10, 
+              type=int,
+              help='Maximum number of files to analyze per language (default: 10)')
+@click.option('--languages', '-lang',
+              multiple=True,
+              help='Programming languages to analyze (can be specified multiple times). Available: python, typescript, csharp')
+def check_incomplete(limit: int, languages: Tuple[str, ...]) -> None:
+    """
+        Check and validate incomplete code snippets. 
+        Analyzes snippets that are missing imports or have incomplete structure.
+        We will be using an LLM to do this, which is expensive so we will limit the number of files to analyze.
+    """
+    # Convert languages tuple to set if provided, otherwise use all languages
+    target_languages = set(languages) if languages else None
+    
+    checker = CodeChecker()
+    result = checker.check_incomplete_snippets(limit=limit, languages=target_languages)
+
+
 if __name__ == '__main__':
     cli()