#!/usr/bin/env python import os import sys, grp import fire import json import tqdm import shutil import tarfile import logging import zipfile import tempfile import requests from pathlib import Path from gitbug.project import Project from gitbugactions.docker.client import DockerClient from gitbug.bug import Bug from gitbug.bug import get_project_root from typing import Optional class GitBugJavaCli(object): """GitBug-Java CLI""" def __init__(self, verbose: bool = False): self.__init_logging(verbose) self.__projects = {} # Load the GitBug-Java dataset for project_file in Path(get_project_root(), "data", "bugs").glob("*.json"): with project_file.open("r") as f: pid = project_file.stem project = Project(pid) for line in f.readlines(): bug = json.loads(line) project.add_bug(Bug(bug)) self.__projects[pid] = project # Sort the projects by pid self.__projects = dict(sorted(self.__projects.items(), key=lambda x: x[0])) def __init_logging(self, verbose: bool = False): level = logging.DEBUG if verbose else logging.ERROR logging.basicConfig(format="[%(asctime)s] %(message)s", level=level) def __setup_base_image(self): base_image = f"nunosaavedra/gitbug-actions:setup" runner_image = f"gitbug-java:base" client = DockerClient.getInstance() # Return if image already exists if len(client.images.list(name=runner_image)) > 0: return tmp_dir = tempfile.mkdtemp() Path(tmp_dir).mkdir(parents=True, exist_ok=True) dockerfile_path = Path(tmp_dir, "Dockerfile") with dockerfile_path.open("w") as f: dockerfile = f"FROM {base_image}\n" # HACK: We set runneradmin to an arbitrarily large uid to avoid conflicts with the host's dockerfile += f"RUN sudo usermod -u 4000000 runneradmin\n" dockerfile += f"RUN sudo groupadd -o -g {os.getgid()} {grp.getgrgid(os.getgid()).gr_name}\n" dockerfile += f"RUN sudo usermod -G {os.getgid()} runner\n" dockerfile += f"RUN sudo usermod -o -u {os.getuid()} runner\n" f.write(dockerfile) client.images.build(path=tmp_dir, tag=runner_image, forcerm=True) shutil.rmtree(tmp_dir, ignore_errors=True) def __download(self, url: str, filename: str): with open(filename, "wb") as f: with requests.get(url, stream=True) as r: r.raise_for_status() total = int(r.headers.get("content-length", 0)) # tqdm has many interesting parameters. Feel free to experiment! tqdm_params = { "desc": url, "total": total, "miniters": 1, "unit": "B", "unit_scale": True, "unit_divisor": 1024, } with tqdm.tqdm(**tqdm_params) as pb: for chunk in r.iter_content(chunk_size=8192): pb.update(len(chunk)) f.write(chunk) def __setup_exports(self, name: str, download_url: str): gitbug = Path(get_project_root(), "data", f"{name}.tar.gz") if not gitbug.exists(): self.__download(download_url, gitbug) # Extract the dataset data_dir = Path(get_project_root(), "data") source = os.path.join(data_dir, f"{name}") with tarfile.open(gitbug) as tar: tar.extractall(source) shutil.copytree(source, data_dir, dirs_exist_ok=True) shutil.rmtree(source) os.remove(gitbug) def __setup_act_cache(self, name: str, download_url: str): # Download the zip zip_path = Path(get_project_root(), f"{name}.zip") if not zip_path.exists(): self.__download(download_url, zip_path) # Extract the zip cache_path = Path(get_project_root(), name) cache_path.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(cache_path) os.remove(zip_path) def pids(self): """ List all available project ids in GitBug-Java """ for project in self.__projects: print(project) def bids(self, pid: str = None): """ List all available bug ids in GitBug-Java (for a given project id if specified) """ for ppid in self.__projects: if pid is None or ppid == pid: for bug in self.__projects[ppid].get_bugs(): print(bug.bid) def info(self, bid: str): """ Get information about a specific bug in GitBug-Java """ # Split bid on the last occurence of "-" pid, _ = bid.rsplit("-", 1) # Check the pid and bid exist if pid not in self.__projects: raise ValueError(f"Unknown project id {pid}") project = self.__projects[pid] bug = project.get_bug(bid) if bug is None: raise ValueError(f"Unknown bug id {bid} for project {pid}") print(bug.info()) def checkout(self, bid: str, workdir: str, fixed: bool = False): """ Checkout a specific bug in GitBug-Java """ # Split bid on the last occurence of "-" pid, _ = bid.rsplit("-", 1) # Check the pid and bid exist if pid not in self.__projects: raise ValueError(f"Unknown project id {pid}") project = self.__projects[pid] bug = project.get_bug(bid) if bug is None: raise ValueError(f"Unknown bug id {bid} for project {pid}") # Create the workdir if it does not exist os.makedirs(workdir, exist_ok=True) # Make sure the workdir is empty if os.listdir(workdir): raise ValueError(f"Workdir {workdir} is not empty") # Checkout the bug bug.checkout(workdir, fixed=fixed) def run( self, workdir: str, output: str = "report", act_cache_dir: Optional[str] = f"{get_project_root()}/act-cache", timeout: int = 0, ) -> bool: """ Run the bug checked-out in workdir """ if not Path(output).exists(): os.makedirs(output) # Read the bug info from the workdir with Path(workdir, "gitbug.json").open("r") as f: bug_info = json.load(f) bug = Bug(bug_info) # Run the bug return bug.run(workdir, output, act_cache_dir=act_cache_dir, timeout=timeout) def setup(self): """ Setup the GitBug-Java dataset """ if not os.path.exists(os.path.join(get_project_root(), "data")): os.makedirs(os.path.join(get_project_root(), "data")) self.__setup_base_image() # TODO: check if exports are already downloaded self.__setup_exports( "gitbug-java_offline_environments_1", "https://zenodo.org/records/10578602/files/gitbug-java_offline_environments_1.tar.gz?download=1", ) self.__setup_exports( "gitbug-java_offline_environments_2", "https://zenodo.org/records/10578617/files/gitbug-java_offline_environments_2.tar.gz?download=1", ) self.__setup_act_cache( "act-cache", "https://zenodo.org/records/10592626/files/act-cache.zip?download=1", ) def main(): fire.Fire(GitBugJavaCli) if __name__ == "__main__": sys.exit(main())