Skip to content

Create emails_from_url.py #1756

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 26, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
0 emails found:
  • Loading branch information
cclauss authored Feb 26, 2020
commit df2938dc3a399469c354f261e4d1122e81ce7170
17 changes: 8 additions & 9 deletions web_programming/emails_from_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
__version__ = "1.0.0"
__maintainer__ = "Muhammad Umer Farooq"
__email__ = "contact@muhammadumerfarooq.me"
__status__ = "Production"
__status__ = "Alpha"

import re
import requests
from urllib import parse
from html.parser import HTMLParser
from urllib import parse


class Parser(HTMLParser):
Expand All @@ -20,8 +20,7 @@ def __init__(self, domain):

def handle_starttag(self, tag, attrs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add type hints as discussed in CONTRIBUTING.md.

"""
This function parse html to take takes url from
tags
This function parse html to take takes url from tags
"""
# Only parse the 'anchor' tag.
if tag == "a":
Expand All @@ -38,15 +37,15 @@ def handle_starttag(self, tag, attrs):
# Get main domain name (example.com)
def get_domain_name(url):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add type hints and doctests as discussed in CONTRIBUTING.md.

"""
This function get the main domain name
This function get the main domain name
"""
return ".".join(get_sub_domain_name(url).split(".")[-2:])


# Get sub domain name (sub.example.com)
def get_sub_domain_name(url):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add type hints and doctests as discussed in CONTRIBUTING.md.

"""
This function get sub domin name
This function get sub domin name
"""
return parse.urlparse(url).netloc

Expand All @@ -55,8 +54,6 @@ def emails_from_url(url: str = "https://github.com") -> list:
"""
This function takes url and return all valid urls
"""
# Get the url
url = "https://github.com"
# Get the base domain from the url
domain = get_domain_name(url)

Expand Down Expand Up @@ -98,4 +95,6 @@ def emails_from_url(url: str = "https://github.com") -> list:


if __name__ == "__main__":
emails_from_url("https://github.com")
emails = emails_from_url("https://github.com")
print(f"{len(emails)} emails found:")
print("\n".join(sorted(emails)))