ipa-lab
diff --git a/‎.github/workflows/python-app.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/python-app.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 27 additions & 15 deletions b/‎README.md‎
Lines changed: 27 additions & 15 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 9 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎src/hackingBuddyGPT/capabilities/http_request.py‎
Lines changed: 10 additions & 1 deletion b/‎src/hackingBuddyGPT/capabilities/http_request.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎src/hackingBuddyGPT/capabilities/submit_http_method.py‎
Lines changed: 78 additions & 0 deletions b/‎src/hackingBuddyGPT/capabilities/submit_http_method.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎src/hackingBuddyGPT/capabilities/yamlFile.py‎
Lines changed: 44 additions & 0 deletions b/‎src/hackingBuddyGPT/capabilities/yamlFile.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎src/hackingBuddyGPT/cli/wintermute.py‎
Lines changed: 5 additions & 4 deletions b/‎src/hackingBuddyGPT/cli/wintermute.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/hackingBuddyGPT/usecases/agents.py‎
Lines changed: 26 additions & 9 deletions b/‎src/hackingBuddyGPT/usecases/agents.py‎
Lines changed: 26 additions & 9 deletions
@@ -5,9 +5,9 @@ name: Python application
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "main", "development" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "main", "development" ]
 
 permissions:
   contents: read
 
@@ -11,3 +11,6 @@ src/hackingBuddyGPT.egg-info/
 build/
 dist/
 .coverage
+src/hackingBuddyGPT/usecases/web_api_testing/openapi_spec/
+src/hackingBuddyGPT/usecases/web_api_testing/converted_files/
+/src/hackingBuddyGPT/usecases/web_api_testing/utils/openapi_spec/
@@ -8,11 +8,18 @@ HackingBuddyGPT helps security researchers use LLMs to discover new attack vecto
 
 We aim to become **THE go-to framework for security researchers** and pen-testers interested in using LLMs or LLM-based autonomous agents for security testing. To aid their experiments, we also offer re-usable [linux priv-esc benchmarks](https://github.com/ipa-lab/benchmark-privesc-linux) and publish all our findings as open-access reports.
 
-How can LLMs aid or even emulate hackers? Threat actors are [already using LLMs](https://arxiv.org/abs/2307.00691), to better protect against this new threat we must learn more about LLMs' capabilities and help blue teams preparing for them.
+If you want to use hackingBuddyGPT and need help selecting the best LLM for your tasks, [we have a paper comparing multiple LLMs](https://arxiv.org/abs/2310.11409).
 
-**[Join us](https://discord.gg/vr4PhSM8yN) / Help us, more people need to be involved in the future of LLM-assisted pen-testing:**
+## hackingBuddyGPT in the News
 
-To ground our research in reality, we performed a comprehensive analysis into [understanding hackers' work](https://arxiv.org/abs/2308.07057). There seems to be a mismatch between some academic research and the daily work of penetration testers, please help us to create more visibility for this issue by citing this paper (if suitable and fitting).
+- **upcoming** 2024-11-20: [Manuel Reinsperger](https://www.github.com/neverbolt) will present hackingBuddyGPT at the [European Symposium on Security and Artificial Intelligence (ESSAI)](https://essai-conference.eu/) 
+- 2024-07-26: The [GitHub Accelerator Showcase](https://github.blog/open-source/maintainers/github-accelerator-showcase-celebrating-our-second-cohort-and-whats-next/) features hackingBuddyGPT
+- 2024-07-24: [Juergen](https://github.com/citostyle) speaks at [Open Source + mezcal night @ GitHub HQ](https://lu.ma/bx120myg)
+- 2024-05-23: hackingBuddyGPT is part of [GitHub Accelerator 2024](https://github.blog/news-insights/company-news/2024-github-accelerator-meet-the-11-projects-shaping-open-source-ai/)
+- 2023-12-05: [Andreas](https://github.com/andreashappe) presented hackingBuddyGPT at FSE'23 in San Francisco ([paper](https://arxiv.org/abs/2308.00121), [video](https://2023.esec-fse.org/details/fse-2023-ideas--visions-and-reflections/9/Towards-Automated-Software-Security-Testing-Augmenting-Penetration-Testing-through-L))
+- 2023-09-20: [Andreas](https://github.com/andreashappe) presented preliminary results at [FIRST AI Security SIG](https://www.first.org/global/sigs/ai-security/)
+
+## Original Paper
 
 hackingBuddyGPT is described in [Getting pwn'd by AI: Penetration Testing with Large Language Models ](https://arxiv.org/abs/2308.00121), help us by citing it through:
 
@@ -29,7 +36,6 @@ hackingBuddyGPT is described in [Getting pwn'd by AI: Penetration Testing with L
 }
 ~~~
 
-
 ## Getting help
 
 If you need help or want to chat about using AI for security or education, please join our [discord server where we talk about all things AI + Offensive Security](https://discord.gg/vr4PhSM8yN)!
@@ -74,12 +80,10 @@ The following would create a new (minimal) linux privilege-escalation agent. Thr
 template_dir = pathlib.Path(__file__).parent
 template_next_cmd = Template(filename=str(template_dir / "next_cmd.txt"))
 
-@use_case("minimal_linux_privesc", "Showcase Minimal Linux Priv-Escalation")
-@dataclass
+
 class MinimalLinuxPrivesc(Agent):
 
     conn: SSHConnection = None
-    
     _sliding_history: SlidingCliHistory = None
 
     def init(self):
@@ -89,28 +93,33 @@ class MinimalLinuxPrivesc(Agent):
         self.add_capability(SSHTestCredential(conn=self.conn))
         self._template_size = self.llm.count_tokens(template_next_cmd.source)
 
-    def perform_round(self, turn):
-        got_root : bool = False
+    def perform_round(self, turn: int) -> bool:
+        got_root: bool = False
 
-        with self.console.status("[bold green]Asking LLM for a new command..."):
+        with self._log.console.status("[bold green]Asking LLM for a new command..."):
             # get as much history as fits into the target context size
             history = self._sliding_history.get_history(self.llm.context_size - llm_util.SAFETY_MARGIN - self._template_size)
 
             # get the next command from the LLM
             answer = self.llm.get_response(template_next_cmd, capabilities=self.get_capability_block(), history=history, conn=self.conn)
             cmd = llm_util.cmd_output_fixer(answer.result)
 
-        with self.console.status("[bold green]Executing that command..."):
-                self.console.print(Panel(answer.result, title="[bold cyan]Got command from LLM:"))
-                result, got_root = self.get_capability(cmd.split(" ", 1)[0])(cmd)
+        with self._log.console.status("[bold green]Executing that command..."):
+            self._log.console.print(Panel(answer.result, title="[bold cyan]Got command from LLM:"))
+            result, got_root = self.get_capability(cmd.split(" ", 1)[0])(cmd)
 
         # log and output the command and its result
-        self.log_db.add_log_query(self._run_id, turn, cmd, result, answer)
+        self._log.log_db.add_log_query(self._log.run_id, turn, cmd, result, answer)
         self._sliding_history.add_command(cmd, result)
-        self.console.print(Panel(result, title=f"[bold cyan]{cmd}"))
+        self._log.console.print(Panel(result, title=f"[bold cyan]{cmd}"))
 
         # if we got root, we can stop the loop
         return got_root
+
+
+@use_case("Showcase Minimal Linux Priv-Escalation")
+class MinimalLinuxPrivescUseCase(AutonomousAgentUseCase[MinimalLinuxPrivesc]):
+    pass
 ~~~
 
 The corresponding `next_cmd.txt` template would be:
@@ -170,6 +179,9 @@ wintermute.py: error: the following arguments are required: {linux_privesc,windo
 
 # start wintermute, i.e., attack the configured virtual machine
 $ python wintermute.py minimal_linux_privesc
+
+# install dependencies for testing if you want to run the tests
+$ pip install .[testing]
 ~~~
 
 ## Publications about hackingBuddyGPT
 
@@ -29,11 +29,14 @@ dependencies = [
 	'requests == 2.32.0',
 	'rich == 13.7.1',
 	'tiktoken == 0.6.0',
-	'instructor == 1.2.2',
+	'instructor == 1.3.5',
 	'PyYAML == 6.0.1',
 	'python-dotenv == 1.0.1',
 	'pypsexec == 0.3.0',
+	'pydantic == 2.8.2',
 	'openai == 1.28.0',
+	'BeautifulSoup4',
+	'nltk'
 ]
 
 [project.urls]
@@ -54,6 +57,11 @@ pythonpath = "src"
 addopts = [
 	"--import-mode=importlib",
 ]
+[project.optional-dependencies]
+testing = [
+    'pytest',
+    'pytest-mock'
+]
 
 [project.scripts]
 wintermute = "hackingBuddyGPT.cli.wintermute:main"
 
@@ -41,7 +41,16 @@ def __call__(self,
                  ) -> str:
         if body is not None and body_is_base64:
             body = base64.b64decode(body).decode()
-
+        if self.host[-1] != "/":
+            path = "/" + path
+        resp = self._client.request(
+            method,
+            self.host + path,
+            params=query,
+            data=body,
+            headers=headers,
+            allow_redirects=self.follow_redirects,
+        )
         try:
             resp = self._client.request(
                 method,
 
@@ -0,0 +1,78 @@
+import base64
+from dataclasses import dataclass, field
+from typing import Set, Dict, Callable, Literal, Optional
+import inspect
+
+import requests
+from pydantic import create_model, BaseModel
+
+from . import Capability
+
+
+@dataclass
+class SubmitHTTPMethod(Capability):
+    http_methods_format: str
+    valid_http_methods: Set[str]
+    _client = requests.Session()
+    host: str
+    follow_redirects: bool = False
+    success_function: Callable[[], None] = None
+
+
+    submitted_valid_http_methods: Set[str] = field(default_factory=set, init=False)
+
+    def describe(self) -> str:
+        return f"Submits a HTTP methods, which is the goal of all activities. The HTTP methods format is {self.http_methods_format}. If you find a flag following the that format, that you have not yet submitted, then the most important first step is to submit it, ignoring all other possibilities of further action"
+
+    def to_model(self) -> BaseModel:
+        """
+        Converts the parameters of the `__call__` function of the capability to a pydantic model, that can be used to
+        interface with an LLM using eg instructor or the openAI function calling API.
+        The model will have the same name as the capability class and will have the same fields as the `__call__`,
+        the `__call__` method can then be accessed by calling the `execute` method of the model.
+        """
+        sig = inspect.signature(self.__call__)
+        fields = {param: (param_info.annotation, ...) for param, param_info in sig.parameters.items()}
+        model_type = create_model(self.__class__.__name__, __doc__=self.describe(), **fields)
+
+        def execute(model):
+            m = model.dict()
+            return self(**m)
+
+        model_type.execute = execute
+
+        return model_type
+
+    def __call__(self, method: Literal["GET", "HEAD", "POST", "PUT", "DELETE", "OPTION", "PATCH"],
+                 path: str,
+                 query: Optional[str] = None,
+                 body: Optional[str] = None,
+                 body_is_base64: Optional[bool] = False,
+                 headers: Optional[Dict[str, str]] = None
+                 ) -> str:
+
+        if body is not None and body_is_base64:
+            body = base64.b64decode(body).decode()
+
+        resp = self._client.request(
+            method,
+            self.host + path,
+            params=query,
+            data=body,
+            headers=headers,
+            allow_redirects=self.follow_redirects,
+        )
+        try:
+            resp.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            return str(e)
+
+        headers = "\r\n".join(f"{k}: {v}" for k, v in resp.headers.items())
+        if len(self.submitted_valid_http_methods) == len(self.valid_http_methods):
+            if self.success_function is not None:
+                self.success_function()
+            else:
+                return "All methods submitted, congratulations"
+        # turn the response into "plain text format" for responding to the prompt
+        return f"HTTP/1.1 {resp.status_code} {resp.reason}\r\n{headers}\r\n\r\n{resp.text}"""
+
@@ -0,0 +1,44 @@
+from dataclasses import dataclass, field
+from typing import Tuple, List
+
+import yaml
+
+from . import Capability
+
+@dataclass
+class YAMLFile(Capability):
+
+    def describe(self) -> str:
+        return "Takes a Yaml file and updates it with the given information"
+
+    def __call__(self, yaml_str: str) -> str:
+        """
+           Updates a YAML string based on provided inputs and returns the updated YAML string.
+
+           Args:
+           yaml_str (str): Original YAML content in string form.
+           updates (dict): A dictionary representing the updates to be applied.
+
+           Returns:
+           str: Updated YAML content as a string.
+           """
+        try:
+            # Load the YAML content from string
+            data = yaml.safe_load(yaml_str)
+
+            print(f'Updates:{yaml_str}')
+
+            # Apply updates from the updates dictionary
+            #for key, value in updates.items():
+            #    if key in data:
+            #        data[key] = value
+            #    else:
+            #        print(f"Warning: Key '{key}' not found in the original data. Adding new key.")
+            #        data[key] = value
+            #
+            ## Convert the updated dictionary back into a YAML string
+            #updated_yaml_str = yaml.safe_dump(data, sort_keys=False)
+            #return updated_yaml_str
+        except yaml.YAMLError as e:
+            print(f"Error processing YAML data: {e}")
+            return "None"
@@ -8,12 +8,13 @@ def main():
     parser = argparse.ArgumentParser()
     subparser = parser.add_subparsers(required=True)
     for name, use_case in use_cases.items():
-        use_case.build_parser(subparser.add_parser(
+        subb = subparser.add_parser(
             name=use_case.name,
             help=use_case.description
-        ))
-
-    parsed = parser.parse_args(sys.argv[1:])
+        )
+        use_case.build_parser(subb)
+    x= sys.argv[1:]
+    parsed = parser.parse_args(x)
     instance = parsed.use_case(parsed)
     instance.init()
     instance.run()
 
@@ -4,18 +4,33 @@
 from rich.panel import Panel
 from typing import Dict
 
+from hackingBuddyGPT.usecases.base import Logger
 from hackingBuddyGPT.utils import llm_util
-
 from hackingBuddyGPT.capabilities.capability import Capability, capabilities_to_simple_text_handler
-from .common_patterns import RoundBasedUseCase
+from hackingBuddyGPT.utils.openai.openai_llm import OpenAIConnection
+
 
 @dataclass
-class Agent(RoundBasedUseCase, ABC):
+class Agent(ABC):
     _capabilities: Dict[str, Capability] = field(default_factory=dict)
     _default_capability: Capability = None
+    _log: Logger = None
+
+    llm: OpenAIConnection = None
 
     def init(self):
-        super().init()
+        pass
+
+    def before_run(self):
+        pass
+
+    def after_run(self):
+        pass
+
+    # callback
+    @abstractmethod
+    def perform_round(self, turn: int) -> bool:
+        pass
 
     def add_capability(self, cap: Capability, default: bool = False):
         self._capabilities[cap.get_name()] = cap
@@ -29,6 +44,7 @@ def get_capability_block(self) -> str:
         capability_descriptions, _parser = capabilities_to_simple_text_handler(self._capabilities)
         return "You can either\n\n" + "\n".join(f"- {description}" for description in capability_descriptions.values())
 
+
 @dataclass
 class AgentWorldview(ABC):
 
@@ -40,6 +56,7 @@ def to_template(self):
     def update(self, capability, cmd, result):
         pass
 
+
 class TemplatedAgent(Agent):
 
     _state: AgentWorldview = None
@@ -59,7 +76,7 @@ def set_template(self, template:str):
     def perform_round(self, turn:int) -> bool:
         got_root : bool = False
 
-        with self.console.status("[bold green]Asking LLM for a new command..."):
+        with self._log.console.status("[bold green]Asking LLM for a new command..."):
             # TODO output/log state
             options = self._state.to_template()
             options.update({
@@ -70,16 +87,16 @@ def perform_round(self, turn:int) -> bool:
             answer = self.llm.get_response(self._template, **options)
             cmd = llm_util.cmd_output_fixer(answer.result)
 
-        with self.console.status("[bold green]Executing that command..."):
-                self.console.print(Panel(answer.result, title="[bold cyan]Got command from LLM:"))
+        with self._log.console.status("[bold green]Executing that command..."):
+                self._log.console.print(Panel(answer.result, title="[bold cyan]Got command from LLM:"))
                 capability = self.get_capability(cmd.split(" ", 1)[0])
                 result, got_root = capability(cmd)
 
         # log and output the command and its result
-        self.log_db.add_log_query(self._run_id, turn, cmd, result, answer)
+        self._log.log_db.add_log_query(self._log.run_id, turn, cmd, result, answer)
         self._state.update(capability, cmd, result)
         # TODO output/log new state
-        self.console.print(Panel(result, title=f"[bold cyan]{cmd}"))
+        self._log.console.print(Panel(result, title=f"[bold cyan]{cmd}"))
 
         # if we got root, we can stop the loop
         return got_root