From 38b1590bd0edd5427c809034fb02ed3315c79819 Mon Sep 17 00:00:00 2001 From: nishithshowri006 <58651995+nishithshowri006@users.noreply.github.com> Date: Mon, 6 Jan 2020 01:05:52 +0530 Subject: [PATCH 1/4] Create get_imdb_top_250_movies_csv.py --- .../get_imdb_top_250_movies_csv.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 web_programming/get_imdb_top_250_movies_csv.py diff --git a/web_programming/get_imdb_top_250_movies_csv.py b/web_programming/get_imdb_top_250_movies_csv.py new file mode 100644 index 000000000000..8654a01eddbb --- /dev/null +++ b/web_programming/get_imdb_top_250_movies_csv.py @@ -0,0 +1,47 @@ +import requests +import csv +import os +from bs4 import BeautifulSoup + + +def movies(filename,path): + + os.chdir(filename) + titlelist=[] + connect=requests.get(""" https://www.imdb.com/chart/top/?ref_=nv_mv_250 """) + soup=BeautifulSoup(connect.text,'html.parser') + elements_title=soup.find_all('td',attrs='titleColumn') + elements_rating=soup.find_all('td',class_='ratingColumn imdbRating') + Movies={} + + + for i in range(len(elements_title)): + title=elements_title[i].find('a').text + rating=elements_rating[i].find('strong').text + Movies[title]=Movies.get(title,rating) + + + with open ('Top_250_Movies_IMDB.csv','w') as newfile: + Writer=csv.writer(newfile) + for Movie,Ratings in Movies.items(): + Writer.writerow([Movie,Ratings]) + newfile.close() + + + +if __name__ == "__main__": + + + + try: + filename=input("Please enter a name for the file to be stored:\n")+".csv" + path=input('Enter the path you want to store the file:\n') + movies(filename,path) + print("Done") + + + except: + filename='Top_250_Movies_IMDB.csv' + path=os.path.join(os.environ["HOMEPATH"], "Desktop") + movies(filename,path) + print("Done!") From a7aec61506a539637bc9bcece0b266b43c8e5702 Mon Sep 17 00:00:00 2001 From: nishithshowri006 <58651995+nishithshowri006@users.noreply.github.com> Date: Mon, 6 Jan 2020 01:23:10 +0530 Subject: [PATCH 2/4] Update get_imdb_top_250_movies_csv.py --- web_programming/get_imdb_top_250_movies_csv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/web_programming/get_imdb_top_250_movies_csv.py b/web_programming/get_imdb_top_250_movies_csv.py index 8654a01eddbb..25a2395cf115 100644 --- a/web_programming/get_imdb_top_250_movies_csv.py +++ b/web_programming/get_imdb_top_250_movies_csv.py @@ -7,7 +7,6 @@ def movies(filename,path): os.chdir(filename) - titlelist=[] connect=requests.get(""" https://www.imdb.com/chart/top/?ref_=nv_mv_250 """) soup=BeautifulSoup(connect.text,'html.parser') elements_title=soup.find_all('td',attrs='titleColumn') From 3d4cea93023cbcd4b8e3f77ea8f3d0504e0e44d3 Mon Sep 17 00:00:00 2001 From: nishithshowri006 <58651995+nishithshowri006@users.noreply.github.com> Date: Mon, 6 Jan 2020 01:25:11 +0530 Subject: [PATCH 3/4] Update get_imdb_top_250_movies_csv.py --- web_programming/get_imdb_top_250_movies_csv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/web_programming/get_imdb_top_250_movies_csv.py b/web_programming/get_imdb_top_250_movies_csv.py index 25a2395cf115..b32d49a00a18 100644 --- a/web_programming/get_imdb_top_250_movies_csv.py +++ b/web_programming/get_imdb_top_250_movies_csv.py @@ -24,7 +24,6 @@ def movies(filename,path): Writer=csv.writer(newfile) for Movie,Ratings in Movies.items(): Writer.writerow([Movie,Ratings]) - newfile.close() From 5f18bcc803fb8005b060d5cdca23209659312801 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 5 Jan 2020 22:22:45 +0100 Subject: [PATCH 4/4] get_imdb_top_250_movies() --- .../get_imdb_top_250_movies_csv.py | 56 +++++++------------ 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/web_programming/get_imdb_top_250_movies_csv.py b/web_programming/get_imdb_top_250_movies_csv.py index b32d49a00a18..811c21fb00e4 100644 --- a/web_programming/get_imdb_top_250_movies_csv.py +++ b/web_programming/get_imdb_top_250_movies_csv.py @@ -1,45 +1,29 @@ -import requests import csv -import os -from bs4 import BeautifulSoup - +from typing import Dict -def movies(filename,path): - - os.chdir(filename) - connect=requests.get(""" https://www.imdb.com/chart/top/?ref_=nv_mv_250 """) - soup=BeautifulSoup(connect.text,'html.parser') - elements_title=soup.find_all('td',attrs='titleColumn') - elements_rating=soup.find_all('td',class_='ratingColumn imdbRating') - Movies={} +import requests +from bs4 import BeautifulSoup - for i in range(len(elements_title)): - title=elements_title[i].find('a').text - rating=elements_rating[i].find('strong').text - Movies[title]=Movies.get(title,rating) +def get_imdb_top_250_movies(url: str = "") -> Dict[str, float]: + url = url or "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + soup = BeautifulSoup(requests.get(url).text, "html.parser") + titles = soup.find_all("td", attrs="titleColumn") + ratings = soup.find_all("td", class_="ratingColumn imdbRating") + return { + title.a.text: float(rating.strong.text) + for title, rating in zip(titles, ratings) + } - with open ('Top_250_Movies_IMDB.csv','w') as newfile: - Writer=csv.writer(newfile) - for Movie,Ratings in Movies.items(): - Writer.writerow([Movie,Ratings]) - +def write_movies(filename: str = "IMDb_Top_250_Movies.csv") -> None: + movies = get_imdb_top_250_movies() + with open(filename, "w", newline="") as out_file: + writer = csv.writer(out_file) + writer.writerow(["Movie title", "IMDb rating"]) + for title, rating in movies.items(): + writer.writerow([title, rating]) if __name__ == "__main__": - - - - try: - filename=input("Please enter a name for the file to be stored:\n")+".csv" - path=input('Enter the path you want to store the file:\n') - movies(filename,path) - print("Done") - - - except: - filename='Top_250_Movies_IMDB.csv' - path=os.path.join(os.environ["HOMEPATH"], "Desktop") - movies(filename,path) - print("Done!") + write_movies()