-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.py
131 lines (94 loc) · 3.63 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import parameters
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from parsel import Selector
driver = webdriver.Chrome('/Users/username/bin/chromedriver')
driver.get('https://www.linkedin.com')
username = driver.find_element_by_class_name('login-email')
username.send_keys(parameters.linkedin_username)
sleep(0.5)
password = driver.find_element_by_class_name('login-password')
password.send_keys(parameters.linkedin_password)
sleep(0.5)
sign_in_button = driver.find_element_by_xpath('//*[@type="submit"]')
sign_in_button.click()
sleep(0.5)
driver.get('https:www.google.com')
sleep(3)
search_query = driver.find_element_by_name('q')
search_query.send_keys(parameters.search_query)
sleep(0.5)
search_query.send_keys(Keys.RETURN)
sleep(3)
linkedin_urls = driver.find_elements_by_class_name('iUh30')
linkedin_urls = [url.text for url in linkedin_urls]
sleep(0.5)
# For loop to iterate over each URL in the list
for linkedin_url in linkedin_urls:
# get the profile URL
driver.get(linkedin_url)
# add a 5 second pause loading each URL
sleep(5)
# assigning the source code for the webpage to variable sel
sel = Selector(text=driver.page_source)
# xpath to extract the text from the class containing the name
name = sel.xpath('//*[starts-with(@class,
"pv-top-card-section__name")]/text()').extract_first()
if name:
name = name.strip()
# xpath to extract the text from the class containing the job title
job_title = sel.xpath('//*[starts-with(@class,
"pv-top-card-section__headline")]/text()').extract_first()
if job_title:
job_title = job_title.strip()
# xpath to extract the text from the class containing the company
company = sel.xpath('//*[starts-with(@class,
"pv-top-card-v2-section__entity-name pv-top-card-v2-section__company-name")]/text()').extract_first()
if company:
company = company.strip()
# xpath to extract the text from the class containing the college
college = sel.xpath('//*[starts-with(@class,
"pv-top-card-v2-section__entity-name pv-top-card-v2-section__school-name")]/text()').extract_first()
if college:
college = college.strip()
# xpath to extract the text from the class containing the location
location = sel.xpath('//*[starts-with(@class,
"pv-top-card-section__location")]/text()').extract_first()
if location:
location = location.strip()
linkedin_url = driver.current_url
# printing the output to the terminal
print('\n')
print('Name: ' + name)
print('Job Title: ' + job_title)
print('Company: ' + company)
print('College: ' + college)
print('Location: ' + location)
print('URL: ' + linkedin_url)
print('\n')
# defining new variable passing two parameters
writer = csv.writer(open(parameters.file_name, 'wb'))
# writerow() method to the write to the file object
writer.writerow(['Name','Job Title','Company','College', 'Location','URL'])
# writing the corresponding values to the header
writer.writerow([name.encode('utf-8'),
job_title.encode('utf-8'),
company.encode('utf-8'),
college.encode('utf-8'),
location.encode('utf-8'),
linkedin_url.encode('utf-8')])
# function to ensure all key data fields have a value
def validate_field(field):# if field is present pass if field:pass
# if field is not present print text else:
field = 'No results'
return field
# validating if the fields exist on the profile
name = validate_field(name)
job_title = validate_field(job_title)
company = validate_field(company)
college = validate_field(college)
location = validate_field(location)
linkedin_url = validate_field(linkedin_url)
# terminates the application
driver.quit()