GitHub - scrapingbypass/google-serp-scraper: Google SERP scraper

Google SERP scraper

Google SERP scraper code with Python

# SERP scraper
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

# Set up WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# Load Google search page
url = 'https://www.google.com/'
driver.get(url)

# Search for keyword
search_box = driver.find_element(By.NAME, 'q')
search_term = 'scrapingbypass'
search_box.send_keys(search_term)
search_box.send_keys(Keys.RETURN)

num = 1

# Scrape multiple pages
for page in range(1, 6):  # Scrape the first 5 pages of results
    # Wait for the search results page to load
    try:
        element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '.g'))
        WebDriverWait(driver, 10).until(element_present)
    except TimeoutException:
        print("Timed out waiting for page to load")

    # Parse the search results
    search_results = driver.find_elements(By.CSS_SELECTOR, '.g')
    for result in search_results:
        link = result.find_element(By.CSS_SELECTOR, 'a').get_attribute('href')
        title = result.find_element(By.CSS_SELECTOR, 'h3').text
        print(num)
        print(title)
        print(link)
        num = num + 1

    # Click on the next page
    try:
        next_button = driver.find_element(By.CSS_SELECTOR, '#pnnext')
        next_button.click()
    except:
        break

# Close the WebDriver
driver.quit()

Or

import requests
from bs4 import BeautifulSoup

# Search keywords
search_term = "scrapingbypass"

# Request headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"}

# Search pages of results
num_pages = 5
no=1

for page in range(0,num_pages):
    # Request URL
    if page == 0:
        url = f"https://www.google.com/search?q={search_term}"
    else:
        url = f"https://www.google.com/search?q={search_term}&start={page*10}"

    # Request
    response = requests.get(url, headers=headers)

    # Parse HTML
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract search reult
    search_results = soup.select(".yuRUbf")

    # Print title and link
    for result in search_results:
        title = result.select_one("h3").text
        link = result.select_one("a")["href"]
        print(f"{no}: {title}: {link}")
        no=no+1

Name	Name	Last commit message	Last commit date
Latest commit History 4 Commits
README.md	README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

Google SERP scraper

About

Uh oh!

Releases

Packages

Search code, repositories, users, issues, pull requests...

scrapingbypass/google-serp-scraper

Folders and files

Latest commit

History

Repository files navigation

Google SERP scraper

About

Topics

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Packages