diff --git a/README.md b/README.md index f4eeb3f..9513b3a 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,41 @@ -# Wikipedia Population Table Data Extraction +# Sensex Table Extraction -This project demonstrates web scraping using Selenium WebDriver to extract data from an HTML table on a Wikipedia page. Specifically, it retrieves the list of countries and territories by total population, presenting a real-world example of data extraction and automation using Selenium. +This project automates the extraction of data from the Indian Indices table on the MoneyControl website. The extracted data provides insights into NIFTY 50 market trends, including real-time values such as the Last Traded Price (LTP), percentage change, volume and more. The data is displayed in a neatly formatted table in the console. ## Features -- Automates navigation to a Wikipedia page on country populations. -- Extracts data from an HTML table, including: - - **Location** (Country or territory) - - **Population** - - **Percentage of world population** - - **Date** of population data - - **Source** of the data - - **Notes** -- Processes table rows dynamically to handle updates to the table structure or content. -- Uses JavaScript for smooth scrolling to the target table. - -## Prerequisites -Ensure you have the following before running the project: -1. **Java Development Kit (JDK)** - Version 8 or above. -2. **Google Chrome** - Latest stable version. -3. **ChromeDriver** - Version compatible with your Chrome browser. -4. **Selenium WebDriver** - Included in the project dependencies. + +### Indian Indices Extraction +- Extracts live data from the **Indian Indices - NIFTY 50** table on the MoneyControl website. +- Columns extracted include: + - **Name**: Name of the stock or index. + - **LTP**: Last traded price. + - **%Chg**: Percentage change in price. + - **Chg**: Change in price. + - **Volume**: Total trading volume. + - **Buy Price**: Current buy price. + - **Sell Price**: Current sell price. + - **Buy Qty**: Quantity available for buying. + - **Sell Qty**: Quantity available for selling. + - **Analysis**: Additional analysis details. + +### Console Output +- Displays the extracted data in a visually formatted table with proper alignment and separators for easy readability. + +### Scroll Functionality +- Uses JavaScript Executor to scroll the webpage to ensure that the dynamic content is fully loaded before extraction. + +## Pre-requisites + +Before running this project, ensure the following are installed: +1. **Java Development Kit (JDK)** - Version 8 or higher. +2. **Selenium WebDriver** - Include Selenium libraries in your project. +3. **Google Chrome** - Latest version of the Chrome browser. +4. **ChromeDriver** - Ensure the ChromeDriver version matches your Chrome browser. +5. **Integrated Development Environment (IDE)** - IntelliJ IDEA, Eclipse, or any other IDE for Java development. ## Technologies Used -- **Java** - The programming language for the project. -- **Selenium WebDriver** - For web element interaction and automation. -- **Google Chrome & ChromeDriver** - For browser-based automation. -- **JavaScript Executor** - For advanced browser interactions like scrolling. + +- **Java** - Programming language used for development. +- **Selenium WebDriver** - To interact with and extract data from the MoneyControl website. +- **Google Chrome & ChromeDriver** - For browsing and interacting with the webpage. +- **JavaScript Executor** - For scrolling functionality. diff --git a/WikiPopulationScraper/.classpath b/Sensex_Streamer_Project/.classpath similarity index 100% rename from WikiPopulationScraper/.classpath rename to Sensex_Streamer_Project/.classpath diff --git a/WikiPopulationScraper/.project b/Sensex_Streamer_Project/.project similarity index 90% rename from WikiPopulationScraper/.project rename to Sensex_Streamer_Project/.project index 68201ba..b38e8c6 100644 --- a/WikiPopulationScraper/.project +++ b/Sensex_Streamer_Project/.project @@ -1,6 +1,6 @@ - WikiPopulationScraper + Sensex_Streamer_Project diff --git a/WikiPopulationScraper/.settings/org.eclipse.core.resources.prefs b/Sensex_Streamer_Project/.settings/org.eclipse.core.resources.prefs similarity index 100% rename from WikiPopulationScraper/.settings/org.eclipse.core.resources.prefs rename to Sensex_Streamer_Project/.settings/org.eclipse.core.resources.prefs diff --git a/WikiPopulationScraper/.settings/org.eclipse.jdt.core.prefs b/Sensex_Streamer_Project/.settings/org.eclipse.jdt.core.prefs similarity index 100% rename from WikiPopulationScraper/.settings/org.eclipse.jdt.core.prefs rename to Sensex_Streamer_Project/.settings/org.eclipse.jdt.core.prefs diff --git a/Sensex_Streamer_Project/bin/sensex_table/Sensex_Table_Extraction.class b/Sensex_Streamer_Project/bin/sensex_table/Sensex_Table_Extraction.class new file mode 100644 index 0000000..070a870 Binary files /dev/null and b/Sensex_Streamer_Project/bin/sensex_table/Sensex_Table_Extraction.class differ diff --git a/Sensex_Streamer_Project/src/sensex_table/Sensex_Table_Extraction.java b/Sensex_Streamer_Project/src/sensex_table/Sensex_Table_Extraction.java new file mode 100644 index 0000000..87ae0f9 --- /dev/null +++ b/Sensex_Streamer_Project/src/sensex_table/Sensex_Table_Extraction.java @@ -0,0 +1,90 @@ +package sensex_table; + +import java.time.Duration; +import java.util.List; +import org.openqa.selenium.By; +import org.openqa.selenium.JavascriptExecutor; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.WebElement; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; + +public class Sensex_Table_Extraction { + + public static void main(String[] args) { + WebDriver driver = new ChromeDriver(); + driver.manage().window().maximize(); + + try { + WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); + driver.get("https://www.moneycontrol.com/markets/indian-indices/"); + waitForTheUser(); + + JavascriptExecutor scrollDownOne = (JavascriptExecutor) driver; + scrollDownOne.executeScript("window.scrollBy(0,400)"); + waitForTheUser(); + + // Locate the table by XPath + WebElement table = wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//table[@id='indicesTable']"))); + waitForTheUser(); + + // Get all rows from the table (skip the first row as it's the header) + List rows = table.findElements(By.tagName("tr")); + + // Print the message before extraction + System.out.println("Indian Indices - Markets Terminal : NIFTY 50"); + System.out.println(); + + // Print table headers with outer box + printLine(); // Top border + System.out.printf("| %-20s | %-12s | %-8s | %-8s | %-12s | %-12s | %-12s | %-10s | %-10s | %-15s |\n", + "Name", "LTP", "%Chg", "Chg", "Volume", "Buy Price", "Sell Price", "Buy Qty", "Sell Qty", "Analysis"); + printLine(); // Header separator + + // Loop through each row + for (int i = 1; i < rows.size(); i++) { + WebElement row = rows.get(i); + + // Get all columns (td) in the current row + List cols = row.findElements(By.tagName("td")); + + // Extract the data + if (cols.size() > 1) { + String name = cols.get(0).getText().trim(); // Name (1st column) + String ltp = cols.get(1).getText().trim(); // LTP (2nd column) + String per_chg = cols.get(2).getText().trim(); // %Chg (3rd column) + String chg = cols.get(3).getText().trim(); // Chg (4th column) + String volume = cols.get(4).getText().trim(); // Volume (5th column) + String buy_price = cols.get(5).getText().trim(); // Buy Price (6th column) + String sell_price = cols.get(6).getText().trim(); // Sell Price (7th column) + String buy_qty = cols.get(7).getText().trim(); // Buy Qty (8th column) + String sell_qty = cols.get(8).getText().trim(); // Sell Qty (9th column) + String analysis = cols.get(9).getText().trim(); // Analysis (10th column) + + // Print the extracted data in a table format + System.out.printf("| %-20s | %-12s | %-8s | %-8s | %-12s | %-12s | %-12s | %-10s | %-10s | %-15s |\n", + name, ltp, per_chg, chg, volume, buy_price, sell_price, buy_qty, sell_qty, analysis); + } + } + printLine(); // Bottom border + } catch (Exception e) { + e.printStackTrace(); + } finally { + driver.quit(); + } + } + + // Helper method to print a horizontal line + public static void printLine() { + System.out.println("+----------------------+--------------+----------+----------+--------------+--------------+--------------+------------+------------+-----------------+"); + } + + public static void waitForTheUser() { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } +} diff --git a/WikiPopulationScraper/bin/data_extraction/WebTableDataExtraction.class b/WikiPopulationScraper/bin/data_extraction/WebTableDataExtraction.class deleted file mode 100644 index 1811f0f..0000000 Binary files a/WikiPopulationScraper/bin/data_extraction/WebTableDataExtraction.class and /dev/null differ diff --git a/WikiPopulationScraper/src/data_extraction/WebTableDataExtraction.java b/WikiPopulationScraper/src/data_extraction/WebTableDataExtraction.java deleted file mode 100644 index dbb0edc..0000000 --- a/WikiPopulationScraper/src/data_extraction/WebTableDataExtraction.java +++ /dev/null @@ -1,74 +0,0 @@ -package data_extraction; - -import java.time.Duration; -import java.util.List; -import org.openqa.selenium.By; -import org.openqa.selenium.JavascriptExecutor; -import org.openqa.selenium.WebDriver; -import org.openqa.selenium.WebElement; -import org.openqa.selenium.chrome.ChromeDriver; -import org.openqa.selenium.support.ui.ExpectedConditions; -import org.openqa.selenium.support.ui.WebDriverWait; - -public class WebTableDataExtraction { - - public static void main(String[] args) { - WebDriver driver = new ChromeDriver(); - driver.manage().window().maximize(); - - try { - // Print the message before extraction - System.out.println("List of countries and territories by total population"); - - // Open the Wikipedia page - WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10)); - driver.get("https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population"); - waitForTheUser(); - - JavascriptExecutor scrollDownOne = (JavascriptExecutor) driver; - scrollDownOne.executeScript("window.scrollBy(0,1200)"); - waitForTheUser(); - - // Locate the table by XPath - WebElement table = wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//table[@class='wikitable sortable sticky-header sort-under mw-datatable col2left col6left jquery-tablesorter']"))); - waitForTheUser(); - - // Get all rows from the table (skip the first row as it's the header) - List rows = table.findElements(By.tagName("tr")); - - // Loop through each row - for (int i = 1; i < rows.size(); i++) { // Start from 1 to skip header - WebElement row = rows.get(i); - - // Get all columns (td) in the current row - List cols = row.findElements(By.tagName("td")); - - // Extract the Location, Population, % of world, Date, Source and Notes - if (cols.size() > 1) { - String location = cols.get(0).getText().trim(); // Location (1st column) - String population = cols.get(1).getText().trim(); // Population (2nd column) - String perc_world = cols.get(2).getText().trim(); // % of world (3rd column) - String date = cols.get(3).getText().trim(); // Date (4th column) - String source = cols.get(4).getText().trim(); // Source (5th column) - String notes = cols.get(5).getText().trim(); // Notes (6th column) - - // Print the extracted data - System.out.println("Location: " + location + " | Population: " + population + " | % of world: " + perc_world + " | Date: " + date + " | Source: " + source + " | Notes: " + notes); - } - } - } catch (Exception e) { - e.printStackTrace(); - } finally { - // Close the browser after scraping - driver.quit(); - } - } - - public static void waitForTheUser() { - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } -}