-
-
Notifications
You must be signed in to change notification settings - Fork 47.2k
Create emails_from_url.py #1756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0f96b74
090cd66
f86fd16
df2938d
13ec4e5
a0af0fd
47d15bb
f0f0118
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,12 +4,12 @@ | |
__version__ = "1.0.0" | ||
__maintainer__ = "Muhammad Umer Farooq" | ||
__email__ = "contact@muhammadumerfarooq.me" | ||
__status__ = "Production" | ||
__status__ = "Alpha" | ||
|
||
import re | ||
import requests | ||
from urllib import parse | ||
from html.parser import HTMLParser | ||
from urllib import parse | ||
|
||
|
||
class Parser(HTMLParser): | ||
|
@@ -20,8 +20,7 @@ def __init__(self, domain): | |
|
||
def handle_starttag(self, tag, attrs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add type hints as discussed in CONTRIBUTING.md. |
||
""" | ||
This function parse html to take takes url from | ||
tags | ||
This function parse html to take takes url from tags | ||
""" | ||
# Only parse the 'anchor' tag. | ||
if tag == "a": | ||
|
@@ -38,15 +37,15 @@ def handle_starttag(self, tag, attrs): | |
# Get main domain name (example.com) | ||
def get_domain_name(url): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add type hints and doctests as discussed in CONTRIBUTING.md. |
||
""" | ||
This function get the main domain name | ||
This function get the main domain name | ||
""" | ||
return ".".join(get_sub_domain_name(url).split(".")[-2:]) | ||
|
||
|
||
# Get sub domain name (sub.example.com) | ||
def get_sub_domain_name(url): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add type hints and doctests as discussed in CONTRIBUTING.md. |
||
""" | ||
This function get sub domin name | ||
This function get sub domin name | ||
""" | ||
return parse.urlparse(url).netloc | ||
|
||
|
@@ -55,8 +54,6 @@ def emails_from_url(url: str = "https://github.com") -> list: | |
""" | ||
This function takes url and return all valid urls | ||
""" | ||
# Get the url | ||
url = "https://github.com" | ||
# Get the base domain from the url | ||
domain = get_domain_name(url) | ||
|
||
|
@@ -98,4 +95,6 @@ def emails_from_url(url: str = "https://github.com") -> list: | |
|
||
|
||
if __name__ == "__main__": | ||
emails_from_url("https://github.com") | ||
emails = emails_from_url("https://github.com") | ||
print(f"{len(emails)} emails found:") | ||
print("\n".join(sorted(emails))) |
Uh oh!
There was an error while loading. Please reload this page.