Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

History

History
History
67 lines (65 loc) 路 2.91 KB

File metadata and controls

67 lines (65 loc) 路 2.91 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from bs4 import BeautifulSoup
import requests
import csv
pages = int(input('How many pages do you want to scrape ? : '))
dict0 = {
1:'Computer Science',
2: 'Marketing',
3: 'Finance Internship',
4:'Mechanical Internship',
5:'HR Internship',
6:'Digital Marketing Internship',
7:'Electronics Internship',
8:'Content Writing Internship',
9:'Civil Internship',
}
dict = {
'Computer Science':'https://internshala.com/internships/computer%20science-internship',
'Marketing': 'https://internshala.com/internships/marketing-internship',
'Finance Internship':'https://internshala.com/internships/finance-internship',
'Mechanical Internship':'https://internshala.com/internships/mechanical-internship',
'HR Internship':'https://internshala.com/internships/hr-internship',
'Digital Marketing Internship':'https://internshala.com/internships/digital%20marketing-internship',
'Electronics Internship':'https://internshala.com/internships/electronics-internship',
'Content Writing Internship':'https://internshala.com/internships/content%20writing-internship',
'Civil Internship':'https://internshala.com/internships/civil-internship'
}
x=1
for item in dict.keys():
print(x,item)
x+=1
ch = int(input("Enter the categroy. eg 1 for Computer Science : "))
url = dict[dict0[ch]]
print('--------URL : '+ url)
with open('internshala.csv', mode='a') as f:
writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Company', 'Profile', 'Location/s','From','Upto','Duration','Stipend','Link'])
for i in range(1,pages+1):
print('Page',i)
resp=requests.get(url+"/page-"+str(i))
data=BeautifulSoup(resp.content,'lxml')
companies = data.findAll("div", { "class" : "heading_6 company_name" })
profiles = data.findAll("div", { "class" : "heading_4_5 profile" })
locations = data.findAll("div", { "id" : "location_names" })
details = data.findAll("div", { "class" : "internship_other_details_container" })
links = data.findAll("a",{"class":"view_detail_button"})
for x in range(0,len(companies)):
company = companies[x].text.strip()
profile = profiles[x].text.strip()
location = locations[x].text.strip()
link = 'www.internshala.com/'+links[x]['href']
detail = details[x].text
detail = detail.split('\n')
extracted = []
for item in detail:
item = item.strip()
if item != '':
extracted.append(item)
info = [company,profile,location]
info.append(extracted[1].replace('immediatelyImmediately','Immediately'))
info.append(extracted[7])
info.append(extracted[3])
info.append(extracted[5])
info.append(link)
writer.writerow(info)
input('Done!\nAll the best ;-)')
Morty Proxy This is a proxified and sanitized view of the page, visit original site.