diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..1f487de --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: techgaun diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..98b90f0 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,13 @@ +### Please include all of the following fields when adding dorks/patterns +- Search URL: https://github.com/search?q= +- Number of search results at time of PR: +- Impact of data disclosed (see table below): +- Description of data disclosed: + +| Icon/Name | Description | Examples | +|-----------|---------------------------------------------------------------------------------------------------------|----------------------------------------------------------------| +❓ Unknown | The impact of this data is highly variable or unknown) | N/A | +➖ Low | This data will provide minimal access or mostly public information) | Non-stored XSS, Limited scope + read-only API access | +➕ Moderate | This data will provide some access or information | Stored XSS in some cases, read-only or limited write API access| +⚠️ High | This data will provide single-user access or secret information) | Usernames/passwords, OAuth tokens | +❗️ Critical | This data will provide complete control, access to several users, or confidential/personal information | Credential database dumps, AWS keys diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..8476c21 --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,34 @@ +name: Docker Build & Test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + load: true + tags: github-dorks:test + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Test Docker image + run: | + # Test the version flag with version flag + docker run github-dorks:test -v + + - name: Verify image size + run: docker image ls github-dorks:test diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0d8ecb9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Use Python 3.8 as base - this version has good compatibility with older packages +FROM python:3.8-slim + +# Set working directory +WORKDIR /app + +# Install git (needed for pip install from git repos) +RUN apt-get update && \ + apt-get install -y git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Copy only the necessary files +COPY github-dork.py /app/ +COPY github-dorks.txt /app/ +COPY setup.py /app/ +COPY README.md /app/ +COPY requirements.txt /app/ + +# Install dependencies +# Using the specific version of github3.py that's known to work +RUN pip install --no-cache-dir github3.py==1.0.0a2 feedparser==6.0.2 + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + +# Create volume for potential output files +VOLUME ["/app/output"] + +ENTRYPOINT ["python", "github-dork.py"] \ No newline at end of file diff --git a/README.md b/README.md index 5d1647c..eb36e4f 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,79 @@ +[![Docker Build & Test](https://github.com/techgaun/github-dorks/actions/workflows/docker-build.yml/badge.svg)](https://github.com/techgaun/github-dorks/actions/workflows/docker-build.yml) + # Github Dorks -[Github search](https://github.com/search) is quite powerful and useful feature and can be used to search sensitive data on the repositories. Collection of github dorks that can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems. -### GitHub Dork Search Tool -[github-dork.py](github-dork.py) is a simple python tool that can search through your repository or your organization/user repositories. Its not a perfect tool at the moment but provides a basic functionality to automate the search on your repositories against the dorks specified in text file. +[Github Search](https://github.com/search) is a quite powerful and useful feature that can be used to search for sensitive data on repositories. Collection of Github dorks can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems. + +## GitHub Dork Search Tool + +[github-dork.py](github-dork.py) is a simple python tool that can search through your repository or your organization/user repositories. It's not a perfect tool at the moment but provides basic functionality to automate the search on your repositories against the dorks specified in the text file. + +### Installation -#### Installation This tool uses [github3.py](https://github.com/sigmavirus24/github3.py) to talk with GitHub Search API. Clone this repository and run: + +```shell +pip install . +``` + +### Docker Installation + +You can also run github-dorks using Docker for a consistent environment: + ```shell -pip install -r requirements.txt +# Build the Docker image +docker build -t github-dorks . + +# Run with a GitHub token (recommended) +docker run -e GH_TOKEN=your_github_token github-dorks -u someuser + +# Run with username/password +docker run -e GH_USER=your_username -e GH_PWD=your_password github-dorks -u someuser + +# Save results to a CSV file +docker run -v $(pwd)/output:/app/output -e GH_TOKEN=your_github_token github-dorks -u someuser -o /app/output/results.csv ``` -#### Usage +### Usage + ``` -GH_USER - Environment variable to specify github user -GH_PWD - Environment variable to specify password -GH_TOKEN - Environment variable to specify github token +GH_USER - Environment variable to specify Github user +GH_PWD - Environment variable to specify a password +GH_TOKEN - Environment variable to specify Github token GH_URL - Environment variable to specify GitHub Enterprise base URL ``` Some example usages are listed below: ```shell -python github-dork.py -r techgaun/github-dorks # search single repo +github-dork.py -r techgaun/github-dorks # search a single repo -python github-dork.py -u techgaun # search all repos of user +github-dork.py -u techgaun # search all repos of a user -python github-dork.py -u dev-nepal # search all repos of an organization +github-dork.py -u dev-nepal # search all repos of an organization -GH_USER=techgaun GH_PWD= python github-dork.py -u dev-nepal # search as authenticated user +GH_USER=techgaun GH_PWD= github-dork.py -u dev-nepal # search as authenticated user -GH_TOKEN= python github-dork.py -u dev-nepal # search using auth token +GH_TOKEN= github-dork.py -u dev-nepal # search using auth token -GH_URL=https://github.example.com python github-dork.py -u dev-nepal # search a GitHub Enterprise instance +GH_URL=https://github.example.com github-dork.py -u dev-nepal # search a GitHub Enterprise instance ``` -#### Limitations +### Limitations - Authenticated requests get a higher rate limit. But, since this tool waits for the api rate limit to be reset (which is usually less than a minute), it can be slightly slow. - Output formatting is not great. PR welcome - ~~Handle rate limit and retry. PR welcome~~ ### Contribution -Please consider contributing the dorks that can reveal potentially sensitive information in github. + +Please consider contributing dorks that can reveal potentially sensitive information on Github. ### List of Dorks -I am not categorizing at the moment. Instead I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options [here](https://github.com/search#search_cheatsheet_pane). + +I am not categorizing at the moment. Instead, I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options [here](https://github.com/search#search_cheatsheet_pane). Dork | Description ------------------------------------------------|-------------------------------------------------------------------------- @@ -73,8 +100,8 @@ extension:json api.forecast.io | try variations, find api keys/ extension:json mongolab.com | mongolab credentials in json configs extension:yaml mongolab.com | mongolab credentials in yaml configs (try with yml) jsforce extension:js conn.login | possible salesforce credentials in nodejs projects -SF_USERNAME "salesforce" | possible salesforce credentials -filename:.tugboat NOT "_tugboat" | Digital Ocean tugboat config +SF_USERNAME salesforce | possible salesforce credentials +filename:.tugboat NOT _tugboat | Digital Ocean tugboat config HEROKU_API_KEY language:shell | Heroku api keys HEROKU_API_KEY language:json | Heroku api keys in json files filename:.netrc password | netrc that possibly holds sensitive credentials @@ -100,8 +127,37 @@ filename:.history | history file (often used by ma filename:.sh_history | korn shell history filename:sshd_config | OpenSSH server config filename:dhcpd.conf | DHCP service config -filename:prod.exs NOT "prod.secret.exs" | Phoenix prod configuration file +filename:prod.exs NOT prod.secret.exs | Phoenix prod configuration file filename:prod.secret.exs | Phoenix prod secret filename:configuration.php JConfig password | Joomla configuration file filename:config.php dbpasswd | PHP application database password (e.g., phpBB forum software) path:sites databases password | Drupal website database credentials +shodan_api_key language:python | Shodan API keys (try other languages too) +filename:shadow path:etc | Contains encrypted passwords and account information of new unix systems +filename:passwd path:etc | Contains user account information including encrypted passwords of traditional unix systems +extension:avastlic "support.avast.com" | Contains license keys for Avast! Antivirus +filename:dbeaver-data-sources.xml | DBeaver config containing MySQL Credentials +filename:.esmtprc password | esmtp configuration +extension:json googleusercontent client_secret | OAuth credentials for accessing Google APIs +HOMEBREW_GITHUB_API_TOKEN language:shell | Github token usually set by homebrew users +xoxp OR xoxb | Slack bot and private tokens +.mlab.com password | MLAB Hosted MongoDB Credentials +filename:logins.json | Firefox saved password collection (key3.db usually in same repo) +filename:CCCam.cfg | CCCam Server config file +msg nickserv identify filename:config | Possible IRC login passwords +filename:settings.py SECRET_KEY | Django secret keys (usually allows for session hijacking, RCE, etc) +filename:secrets.yml password | Usernames/passwords, Rails applications +filename:master.key path:config | Rails master key (used for decrypting `credentials.yml.enc` for Rails 5.2+) +filename:deployment-config.json | Created by sftp-deployment for Atom, contains server details and credentials +filename:.ftpconfig | Created by remote-ssh for Atom, contains SFTP/SSH server details and credentials +filename:.remote-sync.json | Created by remote-sync for Atom, contains FTP and/or SCP/SFTP/SSH server details and credentials +filename:sftp.json path:.vscode | Created by vscode-sftp for VSCode, contains SFTP/SSH server details and credentails +filename:sftp-config.json | Created by SFTP for Sublime Text, contains FTP/FTPS or SFTP/SSH server details and credentials +filename:WebServers.xml | Created by Jetbrains IDEs, contains webserver credentials with encoded passwords ([not encrypted!](https://intellij-support.jetbrains.com/hc/en-us/community/posts/207074025/comments/207034775)) +"api_hash" "api_id" | Telegram API token +"https://hooks.slack.com/services/" | Slack services URL often have secret API token as a suffix +filename:github-recovery-codes.txt | GitHub recovery key +filename:gitlab-recovery-codes.txt | GitLab recovery key +filename:discord_backup_codes.txt | Discord recovery key +extension:yaml cloud.redislabs.com | Redis credentials provided by Redis Labs found in a YAML file +extension:json cloud.redislabs.com | Redis credentials provided by Redis Labs found in a JSON file diff --git a/github-dork.py b/github-dork.py index 9e2e83f..713cbc1 100644 --- a/github-dork.py +++ b/github-dork.py @@ -1,14 +1,13 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- - import github3 as github import os import argparse import time +import feedparser from copy import copy -from sys import stderr - +from sys import stderr, prefix gh_user = os.getenv('GH_USER', None) gh_pass = os.getenv('GH_PWD', None) @@ -18,7 +17,9 @@ if gh_url is None: gh = github.GitHub(username=gh_user, password=gh_pass, token=gh_token) else: - gh = github.GitHubEnterprise(url=gh_url, username=gh_user, password=gh_pass, token=gh_token) + gh = github.GitHubEnterprise( + url=gh_url, username=gh_user, password=gh_pass, token=gh_token) + def search_wrapper(gen): while True: @@ -26,27 +27,88 @@ def search_wrapper(gen): try: yield next(gen) except StopIteration: - raise + return except github.exceptions.ForbiddenError as e: search_rate_limit = gh.rate_limit()['resources']['search'] - limit_remaining = search_rate_limit['remaining'] + # limit_remaining = search_rate_limit['remaining'] reset_time = search_rate_limit['reset'] current_time = int(time.time()) sleep_time = reset_time - current_time + 1 - stderr.write('GitHub Search API rate limit reached. Sleeping for %d seconds.\n\n' %(sleep_time)) + stderr.write( + 'GitHub Search API rate limit reached. Sleeping for %d seconds.\n\n' + % (sleep_time)) time.sleep(sleep_time) yield next(gen_back) except Exception as e: raise e -def search(repo_to_search=None, user_to_search=None, gh_dorks_file=None): + +def metasearch(repo_to_search=None, + user_to_search=None, + gh_dorks_file=None, + active_monit=None, + output_filename=None, + refresh_time=60): + if active_monit is None: + search(repo_to_search, user_to_search, gh_dorks_file, active_monit, output_filename) + else: + monit(gh_dorks_file, active_monit, refresh_time) + + +def monit(gh_dorks_file=None, active_monit=None, refresh_time=60): + if gh_user is None: + raise Exception('Error, env Github user variable needed') + else: + print( + 'Monitoring user private feed searching new code to be dorked.' + + 'Every new merged pull request trigger user scan.' + ) + print('-----') + items_history = list() + gh_private_feed = "https://github.com/{}.private.atom?token={}".format( + gh_user, active_monit) + while True: + feed = feedparser.parse(gh_private_feed) + for i in feed['items']: + if 'merged pull' in i['title']: + if i['title'] not in items_history: + search( + user_to_search=i['author_detail']['name'], + gh_dorks_file=gh_dorks_file) + items_history.append(i['title']) + print('Waiting for new items...') + time.sleep(refresh_time) + + +def search(repo_to_search=None, + user_to_search=None, + gh_dorks_file=None, + active_monit=None, + output_filename=None): + if gh_dorks_file is None: - gh_dorks_file = 'github-dorks.txt' + for path_prefix in ['.', os.path.join(prefix, 'github-dorks/')]: + filename = os.path.join(path_prefix, 'github-dorks.txt') + if os.path.isfile(filename): + gh_dorks_file = filename + break + if not os.path.isfile(gh_dorks_file): raise Exception('Error, the dorks file path is not valid') - + if user_to_search: + print("Scanning User: ", user_to_search) + if repo_to_search: + print("Scanning Repo: ", repo_to_search) found = False + + outputFile = None + if output_filename: + outputFile = open(output_filename, 'w') + with open(gh_dorks_file, 'r') as dork_file: + # Write CSV Header + if outputFile: + outputFile.write('Issue Type (Dork), Text Matches, File Path, Score/Relevance, URL of File\n') for dork in dork_file: dork = dork.strip() if not dork or dork[0] in '#;': @@ -69,15 +131,18 @@ def search(repo_to_search=None, user_to_search=None, gh_dorks_file=None): 'score': search_result.score, 'url': search_result.html_url } - result = '\n'.join([ - 'Found result for {dork}', - 'Text matches: {text_matches}', - 'File path: {path}', - 'Score/Relevance: {score}', - 'URL of File: {url}', - '' - ]).format(**fmt_args) - print(result) + + # Either write to file or print output + if outputFile: + outputFile.write('{dork}, {text_matches}, {path}, {score}, {url}\n'.format(**fmt_args)) + else: + result = '\n'.join([ + 'Found result for {dork}', + 'Text matches: {text_matches}', 'File path: {path}', + 'Score/Relevance: {score}', 'URL of File: {url}', '' + ]).format(**fmt_args) + print(result) + except github.exceptions.GitHubError as e: print('GitHubError encountered on search of dork: ' + dork) print(e) @@ -93,15 +158,10 @@ def search(repo_to_search=None, user_to_search=None, gh_dorks_file=None): def main(): parser = argparse.ArgumentParser( description='Search github for github dorks', - epilog='Use responsibly, Enjoy pentesting' - ) + epilog='Use responsibly, Enjoy pentesting') parser.add_argument( - '-v', - '--version', - action='version', - version='%(prog)s 0.1.0' - ) + '-v', '--version', action='version', version='%(prog)s 0.1.1') group = parser.add_mutually_exclusive_group(required=True) group.add_argument( @@ -109,31 +169,46 @@ def main(): '--user', dest='user_to_search', action='store', - help='Github user/org to search within. Eg: techgaun' - ) + help='Github user/org to search within. Eg: techgaun') group.add_argument( '-r', '--repo', dest='repo_to_search', action='store', - help='Github repo to search within. Eg: techgaun/github-dorks' - ) + help='Github repo to search within. Eg: techgaun/github-dorks') parser.add_argument( '-d', '--dork', dest='gh_dorks_file', action='store', - help='Github dorks file. Eg: github-dorks.txt' + help='Github dorks file. Eg: github-dorks.txt') + + group.add_argument( + '-m', + '--monit', + dest='active_monit', + action='store', + help='Monitors Github user private feed with feed token' + ) + + parser.add_argument( + '-o', + '--outputFile', + dest='output_filename', + action='store', + help='CSV File to write results to. This overwrites the file provided! Eg: out.csv' ) args = parser.parse_args() - search( + metasearch( repo_to_search=args.repo_to_search, user_to_search=args.user_to_search, - gh_dorks_file=args.gh_dorks_file - ) + gh_dorks_file=args.gh_dorks_file, + active_monit=args.active_monit, + output_filename=args.output_filename) + if __name__ == '__main__': main() diff --git a/github-dorks.txt b/github-dorks.txt index e7ccc43..c5625a6 100644 --- a/github-dorks.txt +++ b/github-dorks.txt @@ -21,8 +21,8 @@ extension:json api.forecast.io extension:json mongolab.com extension:yaml mongolab.com jsforce extension:js conn.login -SF_USERNAME "salesforce" -filename:.tugboat NOT "_tugboat" +SF_USERNAME salesforce +filename:.tugboat NOT _tugboat HEROKU_API_KEY language:shell HEROKU_API_KEY language:json filename:.netrc password @@ -48,7 +48,7 @@ filename:.history filename:.sh_history filename:sshd_config filename:dhcpd.conf -filename:prod.exs NOT "prod.secret.exs" +filename:prod.exs NOT prod.secret.exs filename:prod.secret.exs filename:configuration.php JConfig password filename:config.php dbpasswd @@ -59,3 +59,32 @@ shodan_api_key language:shell shodan_api_key language:json shodan_api_key language:ruby filename:shadow path:etc +filename:passwd path:etc +extension:avastlic "support.avast.com" +filename:dbeaver-data-sources.xml +filename:sftp-config.json +filename:.esmtprc password +extension:json googleusercontent client_secret +HOMEBREW_GITHUB_API_TOKEN language:shell +xoxp OR xoxb +.mlab.com password +filename:logins.json +filename:CCCam.cfg +msg nickserv identify filename:config +filename:settings.py SECRET_KEY +filename:secrets.yml password +filename:master.key path:config +filename:deployment-config.json +filename:.ftpconfig +filename:.remote-sync.json +filename:sftp.json path:.vscode +filename:WebServers.xml +filename:jupyter_notebook_config.json +"api_hash" "api_id" +"https://hooks.slack.com/services/" +filename:github-recovery-codes.txt +filename:gitlab-recovery-codes.txt +filename:discord_backup_codes.txt +extension:yaml cloud.redislabs.com +extension:json cloud.redislabs.com +DATADOG_API_KEY language:shell diff --git a/requirements.txt b/requirements.txt index 2aeb53d..cfe346b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ github3.py==1.0.0a2 +feedparser==6.0.2 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6deafc2 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 120 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..979e932 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup + +with open('README.md', 'r') as f: + long_description = f.read() + +setup( + name='github-dorks', + version='0.1', + description='Find leaked secrets via github search.', + license='Apache License 2.0', + long_description=long_description, + author='Samar Dhwoj Acharya (@techgaun)', + long_description_content_type='text/markdown', + scripts=['github-dork.py'], + data_files=[('github-dorks', ['github-dorks.txt'])], + install_requires=[ + 'github3.py==4.0.1', + 'feedparser==6.0.2', + ], +)