From 79b5940eeb8d47a32e51063e253d03a87763e506 Mon Sep 17 00:00:00 2001 From: dtuffin <36351452+dtuffin@users.noreply.github.com> Date: Thu, 8 Feb 2024 06:01:19 -0600 Subject: [PATCH 1/2] Create port-scanner.py A basic port scanner that will test a range of ip addresses. --- port-scanner.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 port-scanner.py diff --git a/port-scanner.py b/port-scanner.py new file mode 100644 index 000000000..48cb669e8 --- /dev/null +++ b/port-scanner.py @@ -0,0 +1,40 @@ +import socket + +def scan_port(ip, port): + try: + # Create a socket object + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Set a timeout for the connection attempt + sock.settimeout(1) + # Attempt to connect to the specified IP and port + sock.connect((ip, port)) + # If successful, mark the port as open + print(f"Port {port} is open on {ip}") + # Log the IP in a text file + with open("open_ports.txt", "a") as file: + file.write(f"{ip}:{port}\n") + # Close the socket + sock.close() + except socket.error: + # If connection attempt fails, mark the port as closed + print(f"Port {port} is closed on {ip}") + + +def scan_ip_range(start_ip, end_ip, port): + # Convert start and end IPs to integers + start_ip_int = int(''.join(f"{int(octet):03}" for octet in start_ip.split('.'))) + end_ip_int = int(''.join(f"{int(octet):03}" for octet in end_ip.split('.'))) + + # Loop through the IP range and scan each IP for the specified port + for ip_int in range(start_ip_int, end_ip_int + 1): + ip = '.'.join(str((ip_int >> (8 * i)) & 0xFF) for i in range(3, -1, -1)) + scan_port(ip, port) + +if __name__ == "__main__": + # Get user input for IP range and port + start_ip = input("Enter starting IP: ") + end_ip = input("Enter ending IP: ") + port = int(input("Enter port to scan: ")) + + # Call the function to scan the IP range for the specified port + scan_ip_range(start_ip, end_ip, port) From 9fa64e20be83b871f5a91662b38a190710024d44 Mon Sep 17 00:00:00 2001 From: dtuffin <36351452+dtuffin@users.noreply.github.com> Date: Fri, 9 Feb 2024 04:16:00 -0600 Subject: [PATCH 2/2] simple-web-scraper When run the program will ask for a link and the amount of threads it will use. the output of the scrape will be saved to a file named index.json. --- web-scraper.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 web-scraper.py diff --git a/web-scraper.py b/web-scraper.py new file mode 100644 index 000000000..368328084 --- /dev/null +++ b/web-scraper.py @@ -0,0 +1,75 @@ +#Rembember to install beautifulsoup +#pip install requests beautifulsoup4 + +import requests +from bs4 import BeautifulSoup +import json + +class WebCrawler: + def __init__(self, base_url): + self.base_url = base_url + self.visited_links = set() + self.links = [] + self.images = [] + + def get_page_content(self, url): + try: + response = requests.get(url) + if response.status_code == 200: + return response.text + else: + print(f"Failed to fetch {url}. Status code: {response.status_code}") + except Exception as e: + print(f"Error fetching {url}: {e}") + return None + + def extract_links_and_images(self, html_content): + if html_content: + soup = BeautifulSoup(html_content, 'html.parser') + for link in soup.find_all('a', href=True): + self.links.append(link['href']) + for img in soup.find_all('img', src=True): + self.images.append(img['src']) + + def crawl_page(self, url): + if url not in self.visited_links: + print(f"Crawling: {url}") + html_content = self.get_page_content(url) + if html_content: + self.extract_links_and_images(html_content) + self.visited_links.add(url) + + def save_results_to_json(self): + results = { + "links": list(set(self.links)), + "images": list(set(self.images)) + } + with open("index.json", "w") as index_file: + json.dump(results, index_file, indent=2) + print("Results saved to index.json") + + def crawl_site(self, max_pages=10): + queue = [self.base_url] + + while queue and len(self.visited_links) < max_pages: + current_url = queue.pop(0) + self.crawl_page(current_url) + + # Extract links from the current page and add them to the queue + html_content = self.get_page_content(current_url) + if html_content: + soup = BeautifulSoup(html_content, 'html.parser') + for link in soup.find_all('a', href=True): + absolute_url = link['href'] + if absolute_url.startswith(self.base_url) and absolute_url not in self.visited_links: + queue.append(absolute_url) + + self.save_results_to_json() + +if __name__ == '__main__': + # Replace 'https://example.com' with the target website URL + base_url = input("Enter the website URL: ") + + # Create an instance of WebCrawler and crawl the site (adjust max_pages as needed) + web_crawler = WebCrawler(base_url) + web_crawler.crawl_site(max_pages=10)