diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 288e3f591403..394b75269363 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,7 +55,6 @@ repos: - --ignore-missing-imports - --install-types # See mirrors-mypy README.md - --non-interactive - additional_dependencies: [types-requests] - repo: https://github.com/pre-commit/mirrors-prettier rev: "v4.0.0-alpha.8" diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index 1d11e5a9cc2b..5b1e663116cc 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -8,8 +8,16 @@ Rating). We try to best fit a line through dataset and estimate the parameters. """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# "numpy", +# ] +# /// + +import httpx import numpy as np -import requests def collect_dataset(): @@ -17,7 +25,7 @@ def collect_dataset(): The dataset contains ADR vs Rating of a Player :return : dataset obtained from the link, as matrix """ - response = requests.get( + response = httpx.get( "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/" "master/Week1/ADRvsRating.csv", timeout=10, diff --git a/requirements.txt b/requirements.txt index b104505e01bc..66b5d8a6b94e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ beautifulsoup4 fake-useragent +httpx imageio keras lxml @@ -8,7 +9,6 @@ numpy opencv-python pandas pillow -requests rich scikit-learn sphinx-pyproject diff --git a/scripts/validate_solutions.py b/scripts/validate_solutions.py index c3f872203591..f426153b5683 100755 --- a/scripts/validate_solutions.py +++ b/scripts/validate_solutions.py @@ -3,8 +3,8 @@ # /// script # requires-python = ">=3.13" # dependencies = [ +# "httpx", # "pytest", -# "requests", # ] # /// @@ -15,8 +15,8 @@ import pathlib from types import ModuleType +import httpx import pytest -import requests PROJECT_EULER_DIR_PATH = pathlib.Path.cwd().joinpath("project_euler") PROJECT_EULER_ANSWERS_PATH = pathlib.Path.cwd().joinpath( @@ -66,7 +66,7 @@ def added_solution_file_path() -> list[pathlib.Path]: "Accept": "application/vnd.github.v3+json", "Authorization": "token " + os.environ["GITHUB_TOKEN"], } - files = requests.get(get_files_url(), headers=headers, timeout=10).json() + files = httpx.get(get_files_url(), headers=headers, timeout=10).json() for file in files: filepath = pathlib.Path.cwd().joinpath(file["filename"]) if ( diff --git a/web_programming/co2_emission.py b/web_programming/co2_emission.py index 19af70489d1d..4f62df8447b0 100644 --- a/web_programming/co2_emission.py +++ b/web_programming/co2_emission.py @@ -2,22 +2,29 @@ Get CO2 emission data from the UK CarbonIntensity API """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + from datetime import date -import requests +import httpx BASE_URL = "https://api.carbonintensity.org.uk/intensity" # Emission in the last half hour def fetch_last_half_hour() -> str: - last_half_hour = requests.get(BASE_URL, timeout=10).json()["data"][0] + last_half_hour = httpx.get(BASE_URL, timeout=10).json()["data"][0] return last_half_hour["intensity"]["actual"] # Emissions in a specific date range def fetch_from_to(start, end) -> list: - return requests.get(f"{BASE_URL}/{start}/{end}", timeout=10).json()["data"] + return httpx.get(f"{BASE_URL}/{start}/{end}", timeout=10).json()["data"] if __name__ == "__main__": diff --git a/web_programming/covid_stats_via_xpath.py b/web_programming/covid_stats_via_xpath.py index c27a5d12bb3f..f7db51b63169 100644 --- a/web_programming/covid_stats_via_xpath.py +++ b/web_programming/covid_stats_via_xpath.py @@ -4,9 +4,17 @@ more convenient to use in Python web projects (e.g. Django or Flask-based) """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# "lxml", +# ] +# /// + from typing import NamedTuple -import requests +import httpx from lxml import html @@ -19,7 +27,7 @@ class CovidData(NamedTuple): def covid_stats(url: str = "https://www.worldometers.info/coronavirus/") -> CovidData: xpath_str = '//div[@class = "maincounter-number"]/span/text()' return CovidData( - *html.fromstring(requests.get(url, timeout=10).content).xpath(xpath_str) + *html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str) ) diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py index cb75d450ff82..0ae13792e048 100644 --- a/web_programming/crawl_google_results.py +++ b/web_programming/crawl_google_results.py @@ -1,14 +1,28 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "fake-useragent", +# "httpx", +# ] +# /// + import sys import webbrowser -import requests +import httpx from bs4 import BeautifulSoup from fake_useragent import UserAgent if __name__ == "__main__": print("Googling.....") url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:]) - res = requests.get(url, headers={"UserAgent": UserAgent().random}, timeout=10) + res = httpx.get( + url, + headers={"UserAgent": UserAgent().random}, + timeout=10, + follow_redirects=True, + ) # res.raise_for_status() with open("project1a.html", "wb") as out_file: # only for knowing the class for data in res.iter_content(10000): diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py index 5f2ccad5f414..77d7be00c7b6 100644 --- a/web_programming/crawl_google_scholar_citation.py +++ b/web_programming/crawl_google_scholar_citation.py @@ -3,7 +3,15 @@ using title and year of publication, and volume and pages of journal. """ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + +import httpx from bs4 import BeautifulSoup @@ -12,7 +20,7 @@ def get_citation(base_url: str, params: dict) -> str: Return the citation number. """ soup = BeautifulSoup( - requests.get(base_url, params=params, timeout=10).content, "html.parser" + httpx.get(base_url, params=params, timeout=10).content, "html.parser" ) div = soup.find("div", attrs={"class": "gs_ri"}) anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a") diff --git a/web_programming/currency_converter.py b/web_programming/currency_converter.py index 9623504b89ea..a007c9c35a65 100644 --- a/web_programming/currency_converter.py +++ b/web_programming/currency_converter.py @@ -3,9 +3,16 @@ https://www.amdoren.com """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + import os -import requests +import httpx URL_BASE = "https://www.amdoren.com/api/currency.php" @@ -176,7 +183,7 @@ def convert_currency( params = locals() # from is a reserved keyword params["from"] = params.pop("from_") - res = requests.get(URL_BASE, params=params, timeout=10).json() + res = httpx.get(URL_BASE, params=params, timeout=10).json() return str(res["amount"]) if res["error"] == 0 else res["error_message"] diff --git a/web_programming/current_stock_price.py b/web_programming/current_stock_price.py index 573e1f575c8e..16b0b6772a9c 100644 --- a/web_programming/current_stock_price.py +++ b/web_programming/current_stock_price.py @@ -1,4 +1,12 @@ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + +import httpx from bs4 import BeautifulSoup """ @@ -20,8 +28,8 @@ def stock_price(symbol: str = "AAPL") -> str: True """ url = f"https://finance.yahoo.com/quote/{symbol}?p={symbol}" - yahoo_finance_source = requests.get( - url, headers={"USER-AGENT": "Mozilla/5.0"}, timeout=10 + yahoo_finance_source = httpx.get( + url, headers={"USER-AGENT": "Mozilla/5.0"}, timeout=10, follow_redirects=True ).text soup = BeautifulSoup(yahoo_finance_source, "html.parser") diff --git a/web_programming/current_weather.py b/web_programming/current_weather.py index 4a8fa5e3c845..001eaf9020f4 100644 --- a/web_programming/current_weather.py +++ b/web_programming/current_weather.py @@ -1,4 +1,11 @@ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + +import httpx # Put your API key(s) here OPENWEATHERMAP_API_KEY = "" @@ -19,13 +26,13 @@ def current_weather(location: str) -> list[dict]: weather_data = [] if OPENWEATHERMAP_API_KEY: params_openweathermap = {"q": location, "appid": OPENWEATHERMAP_API_KEY} - response_openweathermap = requests.get( + response_openweathermap = httpx.get( OPENWEATHERMAP_URL_BASE, params=params_openweathermap, timeout=10 ) weather_data.append({"OpenWeatherMap": response_openweathermap.json()}) if WEATHERSTACK_API_KEY: params_weatherstack = {"query": location, "access_key": WEATHERSTACK_API_KEY} - response_weatherstack = requests.get( + response_weatherstack = httpx.get( WEATHERSTACK_URL_BASE, params=params_weatherstack, timeout=10 ) weather_data.append({"Weatherstack": response_weatherstack.json()}) diff --git a/web_programming/daily_horoscope.py b/web_programming/daily_horoscope.py index 75e637d8e52c..b84ea13238cc 100644 --- a/web_programming/daily_horoscope.py +++ b/web_programming/daily_horoscope.py @@ -1,4 +1,12 @@ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + +import httpx from bs4 import BeautifulSoup @@ -7,7 +15,7 @@ def horoscope(zodiac_sign: int, day: str) -> str: "https://www.horoscope.com/us/horoscopes/general/" f"horoscope-general-daily-{day}.aspx?sign={zodiac_sign}" ) - soup = BeautifulSoup(requests.get(url, timeout=10).content, "html.parser") + soup = BeautifulSoup(httpx.get(url, timeout=10).content, "html.parser") return soup.find("div", class_="main-horoscope").p.text diff --git a/web_programming/download_images_from_google_query.py b/web_programming/download_images_from_google_query.py index 235cd35763ef..659cf6a398a3 100644 --- a/web_programming/download_images_from_google_query.py +++ b/web_programming/download_images_from_google_query.py @@ -1,10 +1,18 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + import json import os import re import sys import urllib.request -import requests +import httpx from bs4 import BeautifulSoup headers = { @@ -39,7 +47,7 @@ def download_images_from_google_query(query: str = "dhaka", max_images: int = 5) "ijn": "0", } - html = requests.get( + html = httpx.get( "https://www.google.com/search", params=params, headers=headers, timeout=10 ) soup = BeautifulSoup(html.text, "html.parser") diff --git a/web_programming/emails_from_url.py b/web_programming/emails_from_url.py index d41dc4893608..3fa24085df95 100644 --- a/web_programming/emails_from_url.py +++ b/web_programming/emails_from_url.py @@ -1,5 +1,12 @@ """Get the site emails from URL.""" +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + from __future__ import annotations __author__ = "Muhammad Umer Farooq" @@ -13,7 +20,7 @@ from html.parser import HTMLParser from urllib import parse -import requests +import httpx class Parser(HTMLParser): @@ -72,7 +79,7 @@ def emails_from_url(url: str = "https://github.com") -> list[str]: try: # Open URL - r = requests.get(url, timeout=10) + r = httpx.get(url, timeout=10, follow_redirects=True) # pass the raw HTML to the parser to get links parser.feed(r.text) @@ -81,9 +88,15 @@ def emails_from_url(url: str = "https://github.com") -> list[str]: valid_emails = set() for link in parser.urls: # open URL. - # read = requests.get(link) + # Check if the link is already absolute + if not link.startswith("http://") and not link.startswith("https://"): + # Prepend protocol only if link starts with domain, normalize otherwise + if link.startswith(domain): + link = f"https://{link}" + else: + link = parse.urljoin(f"https://{domain}", link) try: - read = requests.get(link, timeout=10) + read = httpx.get(link, timeout=10, follow_redirects=True) # Get the valid email. emails = re.findall("[a-zA-Z0-9]+@" + domain, read.text) # If not in list then append it. diff --git a/web_programming/fetch_anime_and_play.py b/web_programming/fetch_anime_and_play.py index e56b7124eeb5..616116595697 100644 --- a/web_programming/fetch_anime_and_play.py +++ b/web_programming/fetch_anime_and_play.py @@ -1,8 +1,17 @@ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "fake-useragent", +# "httpx", +# ] +# /// + +import httpx from bs4 import BeautifulSoup, NavigableString, Tag from fake_useragent import UserAgent -BASE_URL = "https://ww1.gogoanime2.org" +BASE_URL = "https://ww7.gogoanime2.org" def search_scraper(anime_name: str) -> list: @@ -25,9 +34,9 @@ def search_scraper(anime_name: str) -> list: """ # concat the name to form the search url. - search_url = f"{BASE_URL}/search/{anime_name}" + search_url = f"{BASE_URL}/search?keyword={anime_name}" - response = requests.get( + response = httpx.get( search_url, headers={"UserAgent": UserAgent().chrome}, timeout=10 ) # request the url. @@ -82,7 +91,7 @@ def search_anime_episode_list(episode_endpoint: str) -> list: request_url = f"{BASE_URL}{episode_endpoint}" - response = requests.get( + response = httpx.get( url=request_url, headers={"UserAgent": UserAgent().chrome}, timeout=10 ) response.raise_for_status() @@ -133,7 +142,7 @@ def get_anime_episode(episode_endpoint: str) -> list: episode_page_url = f"{BASE_URL}{episode_endpoint}" - response = requests.get( + response = httpx.get( url=episode_page_url, headers={"User-Agent": UserAgent().chrome}, timeout=10 ) response.raise_for_status() diff --git a/web_programming/fetch_bbc_news.py b/web_programming/fetch_bbc_news.py index e5cd864a9d83..f3121fee7db4 100644 --- a/web_programming/fetch_bbc_news.py +++ b/web_programming/fetch_bbc_news.py @@ -1,13 +1,20 @@ # Created by sarathkaul on 12/11/19 -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + +import httpx _NEWS_API = "https://newsapi.org/v1/articles?source=bbc-news&sortBy=top&apiKey=" def fetch_bbc_news(bbc_news_api_key: str) -> None: # fetching a list of articles in json format - bbc_news_page = requests.get(_NEWS_API + bbc_news_api_key, timeout=10).json() + bbc_news_page = httpx.get(_NEWS_API + bbc_news_api_key, timeout=10).json() # each article in the list is a dict for i, article in enumerate(bbc_news_page["articles"], 1): print(f"{i}.) {article['title']}") diff --git a/web_programming/fetch_github_info.py b/web_programming/fetch_github_info.py index 25d44245bb58..1d3d344e177e 100644 --- a/web_programming/fetch_github_info.py +++ b/web_programming/fetch_github_info.py @@ -18,12 +18,19 @@ export USER_TOKEN="" """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + from __future__ import annotations import os from typing import Any -import requests +import httpx BASE_URL = "https://api.github.com" @@ -36,13 +43,13 @@ def fetch_github_info(auth_token: str) -> dict[Any, Any]: """ - Fetch GitHub info of a user using the requests module + Fetch GitHub info of a user using the httpx module """ headers = { "Authorization": f"token {auth_token}", "Accept": "application/vnd.github.v3+json", } - return requests.get(AUTHENTICATED_USER_ENDPOINT, headers=headers, timeout=10).json() + return httpx.get(AUTHENTICATED_USER_ENDPOINT, headers=headers, timeout=10).json() if __name__ == "__main__": # pragma: no cover diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py index 3753d25bbe5f..7a95f997078d 100644 --- a/web_programming/fetch_jobs.py +++ b/web_programming/fetch_jobs.py @@ -2,20 +2,26 @@ Scraping jobs given job title and location from indeed website """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + from __future__ import annotations from collections.abc import Generator -import requests +import httpx from bs4 import BeautifulSoup url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=" def fetch_jobs(location: str = "mumbai") -> Generator[tuple[str, str]]: - soup = BeautifulSoup( - requests.get(url + location, timeout=10).content, "html.parser" - ) + soup = BeautifulSoup(httpx.get(url + location, timeout=10).content, "html.parser") # This attribute finds out all the specifics listed in a job for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}): job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip() diff --git a/web_programming/fetch_quotes.py b/web_programming/fetch_quotes.py index cf0add43f002..38113c888d89 100644 --- a/web_programming/fetch_quotes.py +++ b/web_programming/fetch_quotes.py @@ -6,19 +6,26 @@ https://zenquotes.io/ """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + import pprint -import requests +import httpx API_ENDPOINT_URL = "https://zenquotes.io/api" def quote_of_the_day() -> list: - return requests.get(API_ENDPOINT_URL + "/today", timeout=10).json() + return httpx.get(API_ENDPOINT_URL + "/today", timeout=10).json() def random_quotes() -> list: - return requests.get(API_ENDPOINT_URL + "/random", timeout=10).json() + return httpx.get(API_ENDPOINT_URL + "/random", timeout=10).json() if __name__ == "__main__": diff --git a/web_programming/get_amazon_product_data.py b/web_programming/get_amazon_product_data.py index b98ff2c030af..02db708e02bf 100644 --- a/web_programming/get_amazon_product_data.py +++ b/web_programming/get_amazon_product_data.py @@ -4,9 +4,18 @@ information will include title, URL, price, ratings, and the discount available. """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# "pandas", +# ] +# /// + from itertools import zip_longest -import requests +import httpx from bs4 import BeautifulSoup from pandas import DataFrame @@ -25,7 +34,7 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: "Accept-Language": "en-US, en;q=0.5", } soup = BeautifulSoup( - requests.get(url, headers=header, timeout=10).text, features="lxml" + httpx.get(url, headers=header, timeout=10).text, features="lxml" ) # Initialize a Pandas dataframe with the column titles data_frame = DataFrame( diff --git a/web_programming/get_imdb_top_250_movies_csv.py b/web_programming/get_imdb_top_250_movies_csv.py index c914b29cb3b3..4cd29f0e1426 100644 --- a/web_programming/get_imdb_top_250_movies_csv.py +++ b/web_programming/get_imdb_top_250_movies_csv.py @@ -1,16 +1,24 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + from __future__ import annotations import csv -import requests +import httpx from bs4 import BeautifulSoup def get_imdb_top_250_movies(url: str = "") -> dict[str, float]: url = url or "https://www.imdb.com/chart/top/?ref_=nv_mv_250" - soup = BeautifulSoup(requests.get(url, timeout=10).text, "html.parser") - titles = soup.find_all("td", attrs="titleColumn") - ratings = soup.find_all("td", class_="ratingColumn imdbRating") + soup = BeautifulSoup(httpx.get(url, timeout=10).text, "html.parser") + titles = soup.find_all("h3", class_="ipc-title__text") + ratings = soup.find_all("span", class_="ipc-rating-star--rating") return { title.a.text: float(rating.strong.text) for title, rating in zip(titles, ratings) diff --git a/web_programming/get_imdbtop.py.DISABLED b/web_programming/get_imdbtop.py.DISABLED index 5f7105f83239..35ba2d5edfcf 100644 --- a/web_programming/get_imdbtop.py.DISABLED +++ b/web_programming/get_imdbtop.py.DISABLED @@ -1,5 +1,13 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + import bs4 -import requests +import httpx def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]: @@ -35,7 +43,7 @@ def get_imdb_top_movies(num_movies: int = 5) -> tuple: "https://www.imdb.com/search/title?title_type=" f"feature&sort=num_votes,desc&count={num_movies}" ) - source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser") + source = bs4.BeautifulSoup(httpx.get(base_url).content, "html.parser") return tuple( get_movie_data_from_soup(movie) for movie in source.find_all("div", class_="lister-item mode-advanced") diff --git a/web_programming/get_ip_geolocation.py b/web_programming/get_ip_geolocation.py index 574d287f0db1..153655257324 100644 --- a/web_programming/get_ip_geolocation.py +++ b/web_programming/get_ip_geolocation.py @@ -1,4 +1,11 @@ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + +import httpx # Function to get geolocation data for an IP address @@ -8,7 +15,7 @@ def get_ip_geolocation(ip_address: str) -> str: url = f"https://ipinfo.io/{ip_address}/json" # Send a GET request to the API - response = requests.get(url, timeout=10) + response = httpx.get(url, timeout=10) # Check if the HTTP request was successful response.raise_for_status() @@ -23,7 +30,7 @@ def get_ip_geolocation(ip_address: str) -> str: location = "Location data not found." return location - except requests.exceptions.RequestException as e: + except httpx.RequestError as e: # Handle network-related exceptions return f"Request error: {e}" except ValueError as e: diff --git a/web_programming/get_top_billionaires.py b/web_programming/get_top_billionaires.py index 99f6e0be948a..b6f3bf0ca5be 100644 --- a/web_programming/get_top_billionaires.py +++ b/web_programming/get_top_billionaires.py @@ -3,9 +3,17 @@ This works for some of us but fails for others. """ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# "rich", +# ] +# /// + from datetime import UTC, date, datetime -import requests +import httpx from rich import box from rich import console as rich_console from rich import table as rich_table @@ -57,7 +65,7 @@ def get_forbes_real_time_billionaires() -> list[dict[str, int | str]]: Returns: List of top 10 realtime billionaires data. """ - response_json = requests.get(API_URL, timeout=10).json() + response_json = httpx.get(API_URL, timeout=10).json() return [ { "Name": person["personName"], diff --git a/web_programming/get_top_hn_posts.py b/web_programming/get_top_hn_posts.py index f5d4f874c6c6..7d6308ae70c1 100644 --- a/web_programming/get_top_hn_posts.py +++ b/web_programming/get_top_hn_posts.py @@ -1,11 +1,18 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + from __future__ import annotations -import requests +import httpx def get_hackernews_story(story_id: str) -> dict: url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json?print=pretty" - return requests.get(url, timeout=10).json() + return httpx.get(url, timeout=10).json() def hackernews_top_stories(max_stories: int = 10) -> list[dict]: @@ -13,7 +20,7 @@ def hackernews_top_stories(max_stories: int = 10) -> list[dict]: Get the top max_stories posts from HackerNews - https://news.ycombinator.com/ """ url = "https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty" - story_ids = requests.get(url, timeout=10).json()[:max_stories] + story_ids = httpx.get(url, timeout=10).json()[:max_stories] return [get_hackernews_story(story_id) for story_id in story_ids] diff --git a/web_programming/giphy.py b/web_programming/giphy.py index 2bf3e3ea9c0b..90f4e51a7242 100644 --- a/web_programming/giphy.py +++ b/web_programming/giphy.py @@ -1,5 +1,13 @@ #!/usr/bin/env python3 -import requests + +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + +import httpx giphy_api_key = "YOUR API KEY" # Can be fetched from https://developers.giphy.com/dashboard/ @@ -11,7 +19,7 @@ def get_gifs(query: str, api_key: str = giphy_api_key) -> list: """ formatted_query = "+".join(query.split()) url = f"https://api.giphy.com/v1/gifs/search?q={formatted_query}&api_key={api_key}" - gifs = requests.get(url, timeout=10).json()["data"] + gifs = httpx.get(url, timeout=10).json()["data"] return [gif["url"] for gif in gifs] diff --git a/web_programming/instagram_crawler.py b/web_programming/instagram_crawler.py index df62735fb328..68271c1c4643 100644 --- a/web_programming/instagram_crawler.py +++ b/web_programming/instagram_crawler.py @@ -1,9 +1,19 @@ #!/usr/bin/env python3 + +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "fake-useragent", +# "httpx", +# ] +# /// + from __future__ import annotations import json -import requests +import httpx from bs4 import BeautifulSoup from fake_useragent import UserAgent @@ -39,7 +49,7 @@ def get_json(self) -> dict: """ Return a dict of user information """ - html = requests.get(self.url, headers=headers, timeout=10).text + html = httpx.get(self.url, headers=headers, timeout=10).text scripts = BeautifulSoup(html, "html.parser").find_all("script") try: return extract_user_profile(scripts[4]) diff --git a/web_programming/instagram_pic.py b/web_programming/instagram_pic.py index 292cacc16c04..b6918f35515b 100644 --- a/web_programming/instagram_pic.py +++ b/web_programming/instagram_pic.py @@ -1,6 +1,14 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + from datetime import UTC, datetime -import requests +import httpx from bs4 import BeautifulSoup @@ -15,9 +23,9 @@ def download_image(url: str) -> str: A message indicating the result of the operation. """ try: - response = requests.get(url, timeout=10) + response = httpx.get(url, timeout=10) response.raise_for_status() - except requests.exceptions.RequestException as e: + except httpx.RequestError as e: return f"An error occurred during the HTTP request to {url}: {e!r}" soup = BeautifulSoup(response.text, "html.parser") @@ -30,13 +38,13 @@ def download_image(url: str) -> str: return f"Image URL not found in meta tag {image_meta_tag}." try: - image_data = requests.get(image_url, timeout=10).content - except requests.exceptions.RequestException as e: + image_data = httpx.get(image_url, timeout=10).content + except httpx.RequestError as e: return f"An error occurred during the HTTP request to {image_url}: {e!r}" if not image_data: return f"Failed to download the image from {image_url}." - file_name = f"{datetime.now(tz=UTC).astimezone():%Y-%m-%d_%H:%M:%S}.jpg" + file_name = f"{datetime.now(tz=UTC).astimezone():%Y-%m-%d_%H-%M-%S}.jpg" with open(file_name, "wb") as out_file: out_file.write(image_data) return f"Image downloaded and saved in the file {file_name}" diff --git a/web_programming/instagram_video.py b/web_programming/instagram_video.py index a4cddce25138..c6f72aef455a 100644 --- a/web_programming/instagram_video.py +++ b/web_programming/instagram_video.py @@ -1,17 +1,24 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + from datetime import UTC, datetime -import requests +import httpx def download_video(url: str) -> bytes: base_url = "https://downloadgram.net/wp-json/wppress/video-downloader/video?url=" - video_url = requests.get(base_url + url, timeout=10).json()[0]["urls"][0]["src"] - return requests.get(video_url, timeout=10).content + video_url = httpx.get(base_url + url, timeout=10) + return httpx.get(video_url, timeout=10).content if __name__ == "__main__": url = input("Enter Video/IGTV url: ").strip() - file_name = f"{datetime.now(tz=UTC).astimezone():%Y-%m-%d_%H:%M:%S}.mp4" + file_name = f"{datetime.now(tz=UTC).astimezone():%Y-%m-%d_%H-%M-%S}.mp4" with open(file_name, "wb") as fp: fp.write(download_video(url)) print(f"Done. Video saved to disk as {file_name}.") diff --git a/web_programming/nasa_data.py b/web_programming/nasa_data.py index 33a6406c52a6..1025d2b4ab5f 100644 --- a/web_programming/nasa_data.py +++ b/web_programming/nasa_data.py @@ -1,6 +1,11 @@ -import shutil +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// -import requests +import httpx def get_apod_data(api_key: str) -> dict: @@ -9,17 +14,17 @@ def get_apod_data(api_key: str) -> dict: Get your API Key from: https://api.nasa.gov/ """ url = "https://api.nasa.gov/planetary/apod" - return requests.get(url, params={"api_key": api_key}, timeout=10).json() + return httpx.get(url, params={"api_key": api_key}, timeout=10).json() def save_apod(api_key: str, path: str = ".") -> dict: apod_data = get_apod_data(api_key) img_url = apod_data["url"] img_name = img_url.split("/")[-1] - response = requests.get(img_url, stream=True, timeout=10) + response = httpx.get(img_url, timeout=10) with open(f"{path}/{img_name}", "wb+") as img_file: - shutil.copyfileobj(response.raw, img_file) + img_file.write(response.content) del response return apod_data @@ -29,7 +34,7 @@ def get_archive_data(query: str) -> dict: Get the data of a particular query from NASA archives """ url = "https://images-api.nasa.gov/search" - return requests.get(url, params={"q": query}, timeout=10).json() + return httpx.get(url, params={"q": query}, timeout=10).json() if __name__ == "__main__": diff --git a/web_programming/open_google_results.py b/web_programming/open_google_results.py index 52dd37d7b91a..bcc315a63686 100644 --- a/web_programming/open_google_results.py +++ b/web_programming/open_google_results.py @@ -1,8 +1,17 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "fake-useragent", +# "httpx", +# ] +# /// + import webbrowser from sys import argv from urllib.parse import parse_qs, quote -import requests +import httpx from bs4 import BeautifulSoup from fake_useragent import UserAgent @@ -13,16 +22,17 @@ url = f"https://www.google.com/search?q={query}&num=100" - res = requests.get( + res = httpx.get( url, headers={"User-Agent": str(UserAgent().random)}, timeout=10, ) + print(BeautifulSoup(res.text, "html.parser")) try: link = ( BeautifulSoup(res.text, "html.parser") - .find("div", attrs={"class": "yuRUbf"}) + .find("div") .find("a") .get("href") ) @@ -30,7 +40,7 @@ except AttributeError: link = parse_qs( BeautifulSoup(res.text, "html.parser") - .find("div", attrs={"class": "kCrYT"}) + .find("div") .find("a") .get("href") )["url"][0] diff --git a/web_programming/random_anime_character.py b/web_programming/random_anime_character.py index aed932866258..b94894e7ffe1 100644 --- a/web_programming/random_anime_character.py +++ b/web_programming/random_anime_character.py @@ -1,6 +1,15 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "fake-useragent", +# "httpx", +# ] +# /// + import os -import requests +import httpx from bs4 import BeautifulSoup from fake_useragent import UserAgent @@ -12,7 +21,7 @@ def save_image(image_url: str, image_title: str) -> None: """ Saves the image of anime character """ - image = requests.get(image_url, headers=headers, timeout=10) + image = httpx.get(image_url, headers=headers, timeout=10) with open(image_title, "wb") as file: file.write(image.content) @@ -22,7 +31,7 @@ def random_anime_character() -> tuple[str, str, str]: Returns the Title, Description, and Image Title of a random anime character . """ soup = BeautifulSoup( - requests.get(URL, headers=headers, timeout=10).text, "html.parser" + httpx.get(URL, headers=headers, timeout=10).text, "html.parser" ) title = soup.find("meta", attrs={"property": "og:title"}).attrs["content"] image_url = soup.find("meta", attrs={"property": "og:image"}).attrs["content"] diff --git a/web_programming/recaptcha_verification.py b/web_programming/recaptcha_verification.py index 168862204fa9..c3a53da0b3da 100644 --- a/web_programming/recaptcha_verification.py +++ b/web_programming/recaptcha_verification.py @@ -32,7 +32,14 @@ recaptcha verification. """ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + +import httpx try: from django.contrib.auth import authenticate, login @@ -56,7 +63,7 @@ def login_using_recaptcha(request): client_key = request.POST.get("g-recaptcha-response") # post recaptcha response to Google's recaptcha api - response = requests.post( + response = httpx.post( url, data={"secret": secret_key, "response": client_key}, timeout=10 ) # if the recaptcha api verified our keys diff --git a/web_programming/reddit.py b/web_programming/reddit.py index 6cc1a6b62009..863b44f60b16 100644 --- a/web_programming/reddit.py +++ b/web_programming/reddit.py @@ -1,6 +1,13 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + from __future__ import annotations -import requests +import httpx valid_terms = set( """approved_at_utc approved_by author_flair_background_color @@ -28,13 +35,14 @@ def get_subreddit_data( if invalid_search_terms := ", ".join(sorted(set(wanted_data) - valid_terms)): msg = f"Invalid search term: {invalid_search_terms}" raise ValueError(msg) - response = requests.get( - f"https://reddit.com/r/{subreddit}/{age}.json?limit={limit}", + response = httpx.get( + f"https://www.reddit.com/r/{subreddit}/{age}.json?limit={limit}", headers={"User-agent": "A random string"}, timeout=10, ) + response.raise_for_status() if response.status_code == 429: - raise requests.HTTPError(response=response) + raise httpx.HTTPError(response=response) data = response.json() if not wanted_data: diff --git a/web_programming/search_books_by_isbn.py b/web_programming/search_books_by_isbn.py index 6b69018e6639..02d096181fff 100644 --- a/web_programming/search_books_by_isbn.py +++ b/web_programming/search_books_by_isbn.py @@ -4,9 +4,16 @@ ISBN: https://en.wikipedia.org/wiki/International_Standard_Book_Number """ -from json import JSONDecodeError # Workaround for requests.exceptions.JSONDecodeError +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// -import requests +from json import JSONDecodeError + +import httpx def get_openlibrary_data(olid: str = "isbn/0140328726") -> dict: @@ -25,7 +32,9 @@ def get_openlibrary_data(olid: str = "isbn/0140328726") -> dict: if new_olid.count("/") != 1: msg = f"{olid} is not a valid Open Library olid" raise ValueError(msg) - return requests.get(f"https://openlibrary.org/{new_olid}.json", timeout=10).json() + return httpx.get( + f"https://openlibrary.org/{new_olid}.json", timeout=10, follow_redirects=True + ).json() def summarize_book(ol_book_data: dict) -> dict: @@ -36,8 +45,7 @@ def summarize_book(ol_book_data: dict) -> dict: "title": "Title", "publish_date": "Publish date", "authors": "Authors", - "number_of_pages": "Number of pages:", - "first_sentence": "First sentence", + "number_of_pages": "Number of pages", "isbn_10": "ISBN (10)", "isbn_13": "ISBN (13)", } @@ -45,7 +53,6 @@ def summarize_book(ol_book_data: dict) -> dict: data["Authors"] = [ get_openlibrary_data(author["key"])["name"] for author in data["Authors"] ] - data["First sentence"] = data["First sentence"]["value"] for key, value in data.items(): if isinstance(value, list): data[key] = ", ".join(value) @@ -71,5 +78,5 @@ def summarize_book(ol_book_data: dict) -> dict: try: book_summary = summarize_book(get_openlibrary_data(f"isbn/{isbn}")) print("\n".join(f"{key}: {value}" for key, value in book_summary.items())) - except JSONDecodeError: # Workaround for requests.exceptions.RequestException: + except JSONDecodeError: print(f"Sorry, there are no results for ISBN: {isbn}.") diff --git a/web_programming/slack_message.py b/web_programming/slack_message.py index d4d5658898ac..b00dac16688f 100644 --- a/web_programming/slack_message.py +++ b/web_programming/slack_message.py @@ -1,11 +1,18 @@ # Created by sarathkaul on 12/11/19 -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "httpx", +# ] +# /// + +import httpx def send_slack_message(message_body: str, slack_url: str) -> None: headers = {"Content-Type": "application/json"} - response = requests.post( + response = httpx.post( slack_url, json={"text": message_body}, headers=headers, timeout=10 ) if response.status_code != 200: diff --git a/web_programming/test_fetch_github_info.py b/web_programming/test_fetch_github_info.py index 2da97c782df7..2c433952fe10 100644 --- a/web_programming/test_fetch_github_info.py +++ b/web_programming/test_fetch_github_info.py @@ -1,6 +1,6 @@ import json -import requests +import httpx from .fetch_github_info import AUTHENTICATED_USER_ENDPOINT, fetch_github_info @@ -21,7 +21,7 @@ def mock_response(*args, **kwargs): assert "Accept" in kwargs["headers"] return FakeResponse(b'{"login":"test","id":1}') - monkeypatch.setattr(requests, "get", mock_response) + monkeypatch.setattr(httpx, "get", mock_response) result = fetch_github_info("token") assert result["login"] == "test" assert result["id"] == 1 diff --git a/web_programming/world_covid19_stats.py b/web_programming/world_covid19_stats.py index 4948d8cfd43c..99383577d627 100644 --- a/web_programming/world_covid19_stats.py +++ b/web_programming/world_covid19_stats.py @@ -5,19 +5,31 @@ This data is being scrapped from 'https://www.worldometers.info/coronavirus/'. """ -import requests +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "beautifulsoup4", +# "httpx", +# ] +# /// + +import httpx from bs4 import BeautifulSoup -def world_covid19_stats(url: str = "https://www.worldometers.info/coronavirus") -> dict: +def world_covid19_stats( + url: str = "https://www.worldometers.info/coronavirus/", +) -> dict: """ Return a dict of current worldwide COVID-19 statistics """ - soup = BeautifulSoup(requests.get(url, timeout=10).text, "html.parser") - keys = soup.findAll("h1") - values = soup.findAll("div", {"class": "maincounter-number"}) - keys += soup.findAll("span", {"class": "panel-title"}) - values += soup.findAll("div", {"class": "number-table-main"}) + soup = BeautifulSoup( + httpx.get(url, timeout=10, follow_redirects=True).text, "html.parser" + ) + keys = soup.find_all("h1") + values = soup.find_all("div", {"class": "maincounter-number"}) + keys += soup.find_all("span", {"class": "panel-title"}) + values += soup.find_all("div", {"class": "number-table-main"}) return {key.text.strip(): value.text.strip() for key, value in zip(keys, values)}