From e61b31b47dd8717b0a8c125d7d03f4d9606c924b Mon Sep 17 00:00:00 2001 From: Jaydeep Das Date: Sun, 31 Oct 2021 09:24:01 +0530 Subject: [PATCH 1/6] Rewritten reddit.py --- web_programming/reddit.py | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 web_programming/reddit.py diff --git a/web_programming/reddit.py b/web_programming/reddit.py new file mode 100644 index 000000000000..33409f1923ac --- /dev/null +++ b/web_programming/reddit.py @@ -0,0 +1,58 @@ +from logging import raiseExceptions + +import requests + + +def get_data( + sub: str, limit: int = 1, age: str = "new", wanted_data: list = [] +) -> dict: + """ + sub : Subreddit to query + limit : Number of posts to fetch + age : ["new", "top", "hot"] + wanted_data : Get only the required data in the list + Possible values: + [ + 'approved_at_utc', 'subreddit', 'selftext', + 'author_fullname', 'saved', mod_reason_title', + 'gilded', 'clicked', 'title', 'link_flair_richtext', + 'subreddit_name_prefixed', 'hidden', 'pwls', + 'link_flair_css_class', 'downs', 'top_awarded_type', + 'hide_score', 'name', 'quarantine', 'link_flair_text_color', + 'upvote_ratio', 'author_flair_background_color', + 'subreddit_type', 'ups', 'total_awards_received', + 'media_embed', 'author_flair_template_id', + 'is_original_content', 'user_reports', 'secure_media', + 'is_reddit_media_domain', 'is_meta', 'category', + 'secure_media_embed', 'link_flair_text', 'can_mod_post', + 'score', 'approved_by', 'is_created_from_ads_ui', + 'author_premium', 'thumbnail', 'edited', + 'author_flair_css_class', 'author_flair_richtext', + 'gildings', 'content_categories','url', 'is_video', + 'created_utc', 'permalink' + ] + """ + response = requests.get(f"https://reddit.com/r/{sub}/{age}.json?limit={limit}") + if response.raise_for_status(): + raiseExceptions + + data = response.json() + data_dict = {} + + if wanted_data == []: + for id_ in range(limit): + data_dict[id_] = data["data"]["children"][id_] + else: + for id_ in range(limit): + singleton = {} + for item in wanted_data: + singleton[item] = data["data"]["children"][id_]["data"][item] + + data_dict[id_] = singleton + + return data_dict + + +if __name__ == "__main__": + # If you get Error 429, that means you are rate limited.Try after some time + print(get_data("learnpython", wanted_data=["title", "url", "selftext"])) From 8bcb3e24e37427c9b1dac419952a66944b9fc08b Mon Sep 17 00:00:00 2001 From: Jaydeep Das Date: Sun, 31 Oct 2021 09:43:05 +0530 Subject: [PATCH 2/6] Removed logging module import --- web_programming/reddit.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/web_programming/reddit.py b/web_programming/reddit.py index 33409f1923ac..eda5a2362978 100644 --- a/web_programming/reddit.py +++ b/web_programming/reddit.py @@ -1,5 +1,3 @@ -from logging import raiseExceptions - import requests @@ -34,7 +32,7 @@ def get_data( """ response = requests.get(f"https://reddit.com/r/{sub}/{age}.json?limit={limit}") if response.raise_for_status(): - raiseExceptions + raise requests.HTTPError data = response.json() data_dict = {} From 280dcfe13148a45a339ad4f331f70bdc7b52d9e4 Mon Sep 17 00:00:00 2001 From: Jaydeep Das Date: Sun, 31 Oct 2021 09:58:56 +0530 Subject: [PATCH 3/6] Fixed minor bug which was causing extreme rate limiting --- web_programming/reddit.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/web_programming/reddit.py b/web_programming/reddit.py index eda5a2362978..0b17702923a0 100644 --- a/web_programming/reddit.py +++ b/web_programming/reddit.py @@ -30,8 +30,11 @@ def get_data( 'created_utc', 'permalink' ] """ - response = requests.get(f"https://reddit.com/r/{sub}/{age}.json?limit={limit}") - if response.raise_for_status(): + response = requests.get( + f"https://reddit.com/r/{sub}/{age}.json?limit={limit}", + headers={"User-agent": "A random string"}, + ) + if response.status_code == 429: raise requests.HTTPError data = response.json() From 9b914d0c9ce5772696e6b7d05168f01ddf309422 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 4 Nov 2021 16:15:37 +0100 Subject: [PATCH 4/6] Update reddit.py --- web_programming/reddit.py | 67 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/web_programming/reddit.py b/web_programming/reddit.py index 0b17702923a0..2a7dc2c78c7c 100644 --- a/web_programming/reddit.py +++ b/web_programming/reddit.py @@ -1,59 +1,52 @@ +from __future__ import annotations + import requests -def get_data( - sub: str, limit: int = 1, age: str = "new", wanted_data: list = [] +valid_terms = set( + """approved_at_utc approved_by author_flair_background_color +author_flair_css_class author_flair_richtext author_flair_template_id author_fullname +author_premium can_mod_post category clicked content_categories created_utc downs +edited gilded gildings hidden hide_score is_created_from_ads_ui is_meta +is_original_content is_reddit_media_domain is_video link_flair_css_class +link_flair_richtext link_flair_text link_flair_text_color media_embed mod_reason_title +name permalink pwls quarantine saved score secure_media secure_media_embed selftext +subreddit subreddit_name_prefixed subreddit_type thumbnail title top_awarded_type +total_awards_received ups upvote_ratio url user_reports""".split() +) + + +def get_subreddit_data( + subreddit: str, limit: int = 1, age: str = "new", wanted_data: list | None = None ) -> dict: """ - sub : Subreddit to query + subreddit : Subreddit to query limit : Number of posts to fetch age : ["new", "top", "hot"] wanted_data : Get only the required data in the list - Possible values: - [ - 'approved_at_utc', 'subreddit', 'selftext', - 'author_fullname', 'saved', mod_reason_title', - 'gilded', 'clicked', 'title', 'link_flair_richtext', - 'subreddit_name_prefixed', 'hidden', 'pwls', - 'link_flair_css_class', 'downs', 'top_awarded_type', - 'hide_score', 'name', 'quarantine', 'link_flair_text_color', - 'upvote_ratio', 'author_flair_background_color', - 'subreddit_type', 'ups', 'total_awards_received', - 'media_embed', 'author_flair_template_id', - 'is_original_content', 'user_reports', 'secure_media', - 'is_reddit_media_domain', 'is_meta', 'category', - 'secure_media_embed', 'link_flair_text', 'can_mod_post', - 'score', 'approved_by', 'is_created_from_ads_ui', - 'author_premium', 'thumbnail', 'edited', - 'author_flair_css_class', 'author_flair_richtext', - 'gildings', 'content_categories','url', 'is_video', - 'created_utc', 'permalink' - ] """ + wanted_data = wanted_data or [] + if invalid_search_terms := ", ".join(sorted(set(wanted_data) - valid_terms)): + raise ValueError(f"Invalid search term: {invalid_search_terms}") response = requests.get( - f"https://reddit.com/r/{sub}/{age}.json?limit={limit}", + f"https://reddit.com/r/{subreddit}/{age}.json?limit={limit}", headers={"User-agent": "A random string"}, ) if response.status_code == 429: raise requests.HTTPError data = response.json() - data_dict = {} - - if wanted_data == []: - for id_ in range(limit): - data_dict[id_] = data["data"]["children"][id_] - else: - for id_ in range(limit): - singleton = {} - for item in wanted_data: - singleton[item] = data["data"]["children"][id_]["data"][item] - - data_dict[id_] = singleton + if not wanted_data: + return {id_: data["data"]["children"][id_] for id_ in range(limit)} + data_dict = {} + for id_ in range(limit): + data_dict[id_] = { + item: data["data"]["children"][id_]["data"][item] for item in wanted_data + } return data_dict if __name__ == "__main__": # If you get Error 429, that means you are rate limited.Try after some time - print(get_data("learnpython", wanted_data=["title", "url", "selftext"])) + print(get_subreddit_data("learnpython", wanted_data=["title", "url", "selftext"])) From 3ad0bdaa6d943a63a72e03c7b16b03c5e12d5506 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 4 Nov 2021 16:21:40 +0100 Subject: [PATCH 5/6] Update reddit.py --- web_programming/reddit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web_programming/reddit.py b/web_programming/reddit.py index 2a7dc2c78c7c..affd03601fb4 100644 --- a/web_programming/reddit.py +++ b/web_programming/reddit.py @@ -2,7 +2,6 @@ import requests - valid_terms = set( """approved_at_utc approved_by author_flair_background_color author_flair_css_class author_flair_richtext author_flair_template_id author_fullname @@ -24,6 +23,8 @@ def get_subreddit_data( limit : Number of posts to fetch age : ["new", "top", "hot"] wanted_data : Get only the required data in the list + + >>> pass """ wanted_data = wanted_data or [] if invalid_search_terms := ", ".join(sorted(set(wanted_data) - valid_terms)): From 81556cd51e33eff6e18ea1861eab1da9f9b5068a Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 4 Nov 2021 16:33:05 +0100 Subject: [PATCH 6/6] Update reddit.py --- web_programming/reddit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_programming/reddit.py b/web_programming/reddit.py index affd03601fb4..672109f1399d 100644 --- a/web_programming/reddit.py +++ b/web_programming/reddit.py @@ -23,7 +23,7 @@ def get_subreddit_data( limit : Number of posts to fetch age : ["new", "top", "hot"] wanted_data : Get only the required data in the list - + >>> pass """ wanted_data = wanted_data or []