-
-
Notifications
You must be signed in to change notification settings - Fork 47.1k
Added reddit.py to get data from reddit #5698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e61b31b
8bcb3e2
280dcfe
9b914d0
3ad0bda
81556cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,53 @@ | ||||||
from __future__ import annotations | ||||||
|
||||||
import requests | ||||||
|
||||||
valid_terms = set( | ||||||
"""approved_at_utc approved_by author_flair_background_color | ||||||
author_flair_css_class author_flair_richtext author_flair_template_id author_fullname | ||||||
author_premium can_mod_post category clicked content_categories created_utc downs | ||||||
edited gilded gildings hidden hide_score is_created_from_ads_ui is_meta | ||||||
is_original_content is_reddit_media_domain is_video link_flair_css_class | ||||||
link_flair_richtext link_flair_text link_flair_text_color media_embed mod_reason_title | ||||||
name permalink pwls quarantine saved score secure_media secure_media_embed selftext | ||||||
subreddit subreddit_name_prefixed subreddit_type thumbnail title top_awarded_type | ||||||
total_awards_received ups upvote_ratio url user_reports""".split() | ||||||
) | ||||||
|
||||||
|
||||||
def get_subreddit_data( | ||||||
subreddit: str, limit: int = 1, age: str = "new", wanted_data: list | None = None | ||||||
) -> dict: | ||||||
""" | ||||||
subreddit : Subreddit to query | ||||||
limit : Number of posts to fetch | ||||||
age : ["new", "top", "hot"] | ||||||
wanted_data : Get only the required data in the list | ||||||
|
||||||
>>> pass | ||||||
""" | ||||||
wanted_data = wanted_data or [] | ||||||
if invalid_search_terms := ", ".join(sorted(set(wanted_data) - valid_terms)): | ||||||
raise ValueError(f"Invalid search term: {invalid_search_terms}") | ||||||
response = requests.get( | ||||||
f"https://reddit.com/r/{subreddit}/{age}.json?limit={limit}", | ||||||
headers={"User-agent": "A random string"}, | ||||||
) | ||||||
if response.status_code == 429: | ||||||
raise requests.HTTPError | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case, it is important to raise exception otherwise if the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be a comment in the code. |
||||||
|
||||||
data = response.json() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's break this into two functions... The first one above returns all the data at this line. The second one below should call the first function and then manipulate the results to return only the
Suggested change
|
||||||
if not wanted_data: | ||||||
return {id_: data["data"]["children"][id_] for id_ in range(limit)} | ||||||
|
||||||
data_dict = {} | ||||||
for id_ in range(limit): | ||||||
data_dict[id_] = { | ||||||
item: data["data"]["children"][id_]["data"][item] for item in wanted_data | ||||||
} | ||||||
return data_dict | ||||||
|
||||||
|
||||||
if __name__ == "__main__": | ||||||
# If you get Error 429, that means you are rate limited.Try after some time | ||||||
print(get_subreddit_data("learnpython", wanted_data=["title", "url", "selftext"])) |
Uh oh!
There was an error while loading. Please reload this page.