From 79b5940eeb8d47a32e51063e253d03a87763e506 Mon Sep 17 00:00:00 2001
From: dtuffin <36351452+dtuffin@users.noreply.github.com>
Date: Thu, 8 Feb 2024 06:01:19 -0600
Subject: [PATCH 1/2] Create port-scanner.py

A basic port scanner that will test a range of ip addresses.
---
 port-scanner.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 port-scanner.py

diff --git a/port-scanner.py b/port-scanner.py
new file mode 100644
index 000000000..48cb669e8
--- /dev/null
+++ b/port-scanner.py
@@ -0,0 +1,40 @@
+import socket
+
+def scan_port(ip, port):
+    try:
+        # Create a socket object
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        # Set a timeout for the connection attempt
+        sock.settimeout(1)
+        # Attempt to connect to the specified IP and port
+        sock.connect((ip, port))
+        # If successful, mark the port as open
+        print(f"Port {port} is open on {ip}")
+        # Log the IP in a text file
+        with open("open_ports.txt", "a") as file:
+            file.write(f"{ip}:{port}\n")
+        # Close the socket
+        sock.close()
+    except socket.error:
+        # If connection attempt fails, mark the port as closed
+        print(f"Port {port} is closed on {ip}")
+        
+
+def scan_ip_range(start_ip, end_ip, port):
+    # Convert start and end IPs to integers
+    start_ip_int = int(''.join(f"{int(octet):03}" for octet in start_ip.split('.')))
+    end_ip_int = int(''.join(f"{int(octet):03}" for octet in end_ip.split('.')))
+
+    # Loop through the IP range and scan each IP for the specified port
+    for ip_int in range(start_ip_int, end_ip_int + 1):
+        ip = '.'.join(str((ip_int >> (8 * i)) & 0xFF) for i in range(3, -1, -1))
+        scan_port(ip, port)
+
+if __name__ == "__main__":
+    # Get user input for IP range and port
+    start_ip = input("Enter starting IP: ")
+    end_ip = input("Enter ending IP: ")
+    port = int(input("Enter port to scan: "))
+
+    # Call the function to scan the IP range for the specified port
+    scan_ip_range(start_ip, end_ip, port)

From 9fa64e20be83b871f5a91662b38a190710024d44 Mon Sep 17 00:00:00 2001
From: dtuffin <36351452+dtuffin@users.noreply.github.com>
Date: Fri, 9 Feb 2024 04:16:00 -0600
Subject: [PATCH 2/2] simple-web-scraper

When run the program will ask for a link and the amount of threads it will use. the output of the scrape will be saved to a file named index.json.
---
 web-scraper.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 web-scraper.py

diff --git a/web-scraper.py b/web-scraper.py
new file mode 100644
index 000000000..368328084
--- /dev/null
+++ b/web-scraper.py
@@ -0,0 +1,75 @@
+#Rembember to install beautifulsoup
+#pip install requests beautifulsoup4
+
+import requests
+from bs4 import BeautifulSoup
+import json
+
+class WebCrawler:
+    def __init__(self, base_url):
+        self.base_url = base_url
+        self.visited_links = set()
+        self.links = []
+        self.images = []
+
+    def get_page_content(self, url):
+        try:
+            response = requests.get(url)
+            if response.status_code == 200:
+                return response.text
+            else:
+                print(f"Failed to fetch {url}. Status code: {response.status_code}")
+        except Exception as e:
+            print(f"Error fetching {url}: {e}")
+        return None
+
+    def extract_links_and_images(self, html_content):
+        if html_content:
+            soup = BeautifulSoup(html_content, 'html.parser')
+            for link in soup.find_all('a', href=True):
+                self.links.append(link['href'])
+            for img in soup.find_all('img', src=True):
+                self.images.append(img['src'])
+
+    def crawl_page(self, url):
+        if url not in self.visited_links:
+            print(f"Crawling: {url}")
+            html_content = self.get_page_content(url)
+            if html_content:
+                self.extract_links_and_images(html_content)
+                self.visited_links.add(url)
+
+    def save_results_to_json(self):
+        results = {
+            "links": list(set(self.links)),
+            "images": list(set(self.images))
+        }
+        with open("index.json", "w") as index_file:
+            json.dump(results, index_file, indent=2)
+        print("Results saved to index.json")
+
+    def crawl_site(self, max_pages=10):
+        queue = [self.base_url]
+
+        while queue and len(self.visited_links) < max_pages:
+            current_url = queue.pop(0)
+            self.crawl_page(current_url)
+
+            # Extract links from the current page and add them to the queue
+            html_content = self.get_page_content(current_url)
+            if html_content:
+                soup = BeautifulSoup(html_content, 'html.parser')
+                for link in soup.find_all('a', href=True):
+                    absolute_url = link['href']
+                    if absolute_url.startswith(self.base_url) and absolute_url not in self.visited_links:
+                        queue.append(absolute_url)
+
+        self.save_results_to_json()
+
+if __name__ == '__main__':
+    # Replace 'https://example.com' with the target website URL
+    base_url = input("Enter the website URL: ")
+    
+    # Create an instance of WebCrawler and crawl the site (adjust max_pages as needed)
+    web_crawler = WebCrawler(base_url)
+    web_crawler.crawl_site(max_pages=10)