forked from avinashkranjan/Amazing-Python-Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprogram.py
More file actions
67 lines (65 loc) 路 2.91 KB
/
program.py
File metadata and controls
67 lines (65 loc) 路 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from bs4 import BeautifulSoup
import requests
import csv
pages = int(input('How many pages do you want to scrape ? : '))
dict0 = {
1:'Computer Science',
2: 'Marketing',
3: 'Finance Internship',
4:'Mechanical Internship',
5:'HR Internship',
6:'Digital Marketing Internship',
7:'Electronics Internship',
8:'Content Writing Internship',
9:'Civil Internship',
}
dict = {
'Computer Science':'https://internshala.com/internships/computer%20science-internship',
'Marketing': 'https://internshala.com/internships/marketing-internship',
'Finance Internship':'https://internshala.com/internships/finance-internship',
'Mechanical Internship':'https://internshala.com/internships/mechanical-internship',
'HR Internship':'https://internshala.com/internships/hr-internship',
'Digital Marketing Internship':'https://internshala.com/internships/digital%20marketing-internship',
'Electronics Internship':'https://internshala.com/internships/electronics-internship',
'Content Writing Internship':'https://internshala.com/internships/content%20writing-internship',
'Civil Internship':'https://internshala.com/internships/civil-internship'
}
x=1
for item in dict.keys():
print(x,item)
x+=1
ch = int(input("Enter the categroy. eg 1 for Computer Science : "))
url = dict[dict0[ch]]
print('--------URL : '+ url)
with open('internshala.csv', mode='a') as f:
writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Company', 'Profile', 'Location/s','From','Upto','Duration','Stipend','Link'])
for i in range(1,pages+1):
print('Page',i)
resp=requests.get(url+"/page-"+str(i))
data=BeautifulSoup(resp.content,'lxml')
companies = data.findAll("div", { "class" : "heading_6 company_name" })
profiles = data.findAll("div", { "class" : "heading_4_5 profile" })
locations = data.findAll("div", { "id" : "location_names" })
details = data.findAll("div", { "class" : "internship_other_details_container" })
links = data.findAll("a",{"class":"view_detail_button"})
for x in range(0,len(companies)):
company = companies[x].text.strip()
profile = profiles[x].text.strip()
location = locations[x].text.strip()
link = 'www.internshala.com/'+links[x]['href']
detail = details[x].text
detail = detail.split('\n')
extracted = []
for item in detail:
item = item.strip()
if item != '':
extracted.append(item)
info = [company,profile,location]
info.append(extracted[1].replace('immediatelyImmediately','Immediately'))
info.append(extracted[7])
info.append(extracted[3])
info.append(extracted[5])
info.append(link)
writer.writerow(info)
input('Done!\nAll the best ;-)')