From 20d70b5c9db42d5dd700d6444bcb9577f2507b91 Mon Sep 17 00:00:00 2001 From: Kaif Date: Mon, 20 Jul 2020 03:10:51 +0530 Subject: [PATCH 1/4] Adding job scarping algorithm to web programming --- web_programming/fetch_jobs.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web_programming/fetch_jobs.py diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py new file mode 100644 index 000000000000..9cb25c14c326 --- /dev/null +++ b/web_programming/fetch_jobs.py @@ -0,0 +1,33 @@ +''' +Scraping jobs given job title and location from indeed website +''' + +import requests +from bs4 import BeautifulSoup + + +url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=mumbai" + + +def fetch_jobs(): + #request the url + res = requests.get(url) + soup = BeautifulSoup(res.content, 'lxml') + + # This attribute finds out all the specifics listed in a job + results = soup.find_all('div', attrs={'data-tn-component': 'organicJob'}) + + for x in results: + #Job Title + job = x.find('a', attrs={'data-tn-element': "jobTitle"}) + #company name + company = x.find('span', {"class" : "company"}) + print("Job is {} and Company is {}".format(job.text.strip(), company.text.strip())) + + + + +if __name__ == "__main__": + fetch_jobs() + + From d033aa09c71e1335c0ce44e03acd6617298168ed Mon Sep 17 00:00:00 2001 From: Kaif Kohari Date: Tue, 21 Jul 2020 15:26:50 +0530 Subject: [PATCH 2/4] Delete fetch_jobs.py --- web_programming/fetch_jobs.py | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 web_programming/fetch_jobs.py diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py deleted file mode 100644 index 9cb25c14c326..000000000000 --- a/web_programming/fetch_jobs.py +++ /dev/null @@ -1,33 +0,0 @@ -''' -Scraping jobs given job title and location from indeed website -''' - -import requests -from bs4 import BeautifulSoup - - -url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=mumbai" - - -def fetch_jobs(): - #request the url - res = requests.get(url) - soup = BeautifulSoup(res.content, 'lxml') - - # This attribute finds out all the specifics listed in a job - results = soup.find_all('div', attrs={'data-tn-component': 'organicJob'}) - - for x in results: - #Job Title - job = x.find('a', attrs={'data-tn-element': "jobTitle"}) - #company name - company = x.find('span', {"class" : "company"}) - print("Job is {} and Company is {}".format(job.text.strip(), company.text.strip())) - - - - -if __name__ == "__main__": - fetch_jobs() - - From 0855e89c1b5ae73c9e13adbe1c6cc7bc358e44dc Mon Sep 17 00:00:00 2001 From: Kaif Date: Tue, 21 Jul 2020 15:44:52 +0530 Subject: [PATCH 3/4] Adding Jobs Scraping to web programming --- web_programming/fetch_jobs.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 web_programming/fetch_jobs.py diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py new file mode 100644 index 000000000000..c3df2085921c --- /dev/null +++ b/web_programming/fetch_jobs.py @@ -0,0 +1,33 @@ +''' +Scraping jobs given job title and location from indeed website +''' + +import requests +from bs4 import BeautifulSoup + + +url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=mumbai" + + +def fetch_jobs(): + #request the url + res = requests.get(url) + soup = BeautifulSoup(res.content, 'lxml') + + # This attribute finds out all the specifics listed in a job + results = soup.find_all('div', attrs={'data-tn-component': 'organicJob'}) + + for x in results: + #Job Title + job = x.find('a', attrs={'data-tn-element': "jobTitle"}) + #company name + company = x.find('span', {"class" : "company"}) + print(f"Job is {job.text.strip()} and Company is {company.text.strip()}") + + + + +if __name__ == "__main__": + fetch_jobs() + + From af6c05fe2e509ec2d1b4af923d2d1f9cab1c25fd Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Tue, 21 Jul 2020 13:38:04 +0200 Subject: [PATCH 4/4] Add Python type hints --- web_programming/fetch_jobs.py | 36 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py index c3df2085921c..888f41294974 100644 --- a/web_programming/fetch_jobs.py +++ b/web_programming/fetch_jobs.py @@ -1,33 +1,23 @@ -''' +""" Scraping jobs given job title and location from indeed website -''' +""" +from typing import Generator, Tuple import requests from bs4 import BeautifulSoup - -url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=mumbai" +url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=" -def fetch_jobs(): - #request the url - res = requests.get(url) - soup = BeautifulSoup(res.content, 'lxml') - +def fetch_jobs(location: str = "mumbai") -> Generator[Tuple[str, str], None, None]: + soup = BeautifulSoup(requests.get(url + location).content, "html.parser") # This attribute finds out all the specifics listed in a job - results = soup.find_all('div', attrs={'data-tn-component': 'organicJob'}) + for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}): + job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip() + company_name = job.find("span", {"class": "company"}).text.strip() + yield job_title, company_name + - for x in results: - #Job Title - job = x.find('a', attrs={'data-tn-element': "jobTitle"}) - #company name - company = x.find('span', {"class" : "company"}) - print(f"Job is {job.text.strip()} and Company is {company.text.strip()}") - - - - if __name__ == "__main__": - fetch_jobs() - - + for i, job in enumerate(fetch_jobs("Bangalore"), 1): + print(f"Job {i:>2} is {job[0]} at {job[1]}")