Skip to content

Commit 38bfbc0

Browse files
authored
Merge pull request ipa-lab#76 from ipa-lab/development_without_spacy
Development without spacy
2 parents 45832a5 + 7c0b84a commit 38bfbc0

File tree

9 files changed

+290
-65
lines changed

9 files changed

+290
-65
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ dependencies = [
3535
'pypsexec == 0.3.0',
3636
'pydantic == 2.8.2',
3737
'openai == 1.28.0',
38-
'spacy',
39-
'BeautifulSoup4'
38+
'BeautifulSoup4',
39+
'nltk'
4040
]
4141

4242
[project.urls]

src/hackingBuddyGPT/capabilities/submit_http_method.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class SubmitHTTPMethod(Capability):
1616
_client = requests.Session()
1717
host: str
1818
follow_redirects: bool = False
19+
success_function: Callable[[], None] = None
1920

2021

2122
submitted_valid_http_methods: Set[str] = field(default_factory=set, init=False)
@@ -67,7 +68,11 @@ def __call__(self, method: Literal["GET", "HEAD", "POST", "PUT", "DELETE", "OPTI
6768
return str(e)
6869

6970
headers = "\r\n".join(f"{k}: {v}" for k, v in resp.headers.items())
70-
71+
if len(self.submitted_valid_http_methods) == len(self.valid_http_methods):
72+
if self.success_function is not None:
73+
self.success_function()
74+
else:
75+
return "All methods submitted, congratulations"
7176
# turn the response into "plain text format" for responding to the prompt
7277
return f"HTTP/1.1 {resp.status_code} {resp.reason}\r\n{headers}\r\n\r\n{resp.text}"""
7378

src/hackingBuddyGPT/cli/wintermute.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ def main():
88
parser = argparse.ArgumentParser()
99
subparser = parser.add_subparsers(required=True)
1010
for name, use_case in use_cases.items():
11-
use_case.build_parser(subparser.add_parser(
11+
subb = subparser.add_parser(
1212
name=use_case.name,
1313
help=use_case.description
14-
))
15-
16-
parsed = parser.parse_args(sys.argv[1:])
14+
)
15+
use_case.build_parser(subb)
16+
x= sys.argv[1:]
17+
parsed = parser.parse_args(x)
1718
instance = parsed.use_case(parsed)
1819
instance.init()
1920
instance.run()

src/hackingBuddyGPT/usecases/web_api_testing/prompt_engineer.py

Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import spacy
1+
import nltk
2+
from nltk.tokenize import word_tokenize
3+
from nltk.corpus import stopwords
24
from instructor.retry import InstructorRetryException
35

46

@@ -32,19 +34,14 @@ def __init__(self, strategy, llm_handler, history, schemas, response_handler):
3234
self.response_handler = response_handler
3335
self.llm_handler = llm_handler
3436
self.round = 0
35-
self.found_endpoints = []
37+
self.found_endpoints = ["/"]
38+
self.endpoint_methods = {}
39+
self.endpoint_found_methods = {}
3640
model_name = "en_core_web_sm"
3741

38-
# Check if the model is already installed
39-
from spacy.util import is_package
40-
if not is_package(model_name):
41-
print(f"Model '{model_name}' is not installed. Installing now...")
42-
spacy.cli.download(model_name)
43-
44-
# Load the model
45-
self.nlp = spacy.load(model_name)
46-
47-
self.nlp = spacy.load("en_core_web_sm")
42+
# Check if the models are already installed
43+
nltk.download('punkt')
44+
nltk.download('stopwords')
4845
self._prompt_history = history
4946
self.prompt = self._prompt_history
5047
self.previous_prompt = self._prompt_history[self.round]["content"]
@@ -105,7 +102,7 @@ def get_http_action_template(self, method):
105102

106103
else:
107104
return (
108-
f"Create HTTPRequests of type {method} considering the found schemas: {self.schemas} and understand the responses. Ensure that they are correct requests.")
105+
f"Create HTTPRequests of type {method} considering only the object with id=1 for the endpoint and understand the responses. Ensure that they are correct requests.")
109106

110107

111108
def chain_of_thought(self, doc=False, hint=""):
@@ -129,34 +126,56 @@ def chain_of_thought(self, doc=False, hint=""):
129126
"Make the OpenAPI specification available to developers by incorporating it into your API documentation site and keep the documentation up to date with API changes."
130127
]
131128

132-
http_methods = [ "POST", "DELETE", "PUT"]
129+
http_methods = [ "PUT", "DELETE"]
133130
http_phase = {
134-
6: http_methods[0],
135-
16: http_methods[1], # Delete one of instance of this:{self.llm_handler.get_created_objects()}",
136-
20: http_methods[2]
131+
5: http_methods[0],
132+
10: http_methods[1]
137133
}
138134

139135
if doc:
140-
if self.round <= 5:
136+
if self.round < 5:
141137

142138
chain_of_thought_steps = [
143139
f"Identify all available endpoints via GET Requests. Exclude those in this list: {self.found_endpoints}", f"Note down the response structures, status codes, and headers for each endpoint.",
144140
f"For each endpoint, document the following details: URL, HTTP method, "
145141
f"query parameters and path variables, expected request body structure for requests, response structure for successful and error responses."
146142
] + common_steps
147143
else:
148-
phase = http_phase.get(min(filter(lambda x: self.round <= x, http_phase.keys())))
149-
print(f'phase:{phase}')
150-
if phase != "DELETE":
151-
chain_of_thought_steps = [
152-
f"Identify all valid calls for HTTP method {phase}.",
144+
if self.round <= 10:
145+
phase = http_phase.get(min(filter(lambda x: self.round <= x, http_phase.keys())))
146+
print(f'phase:{phase}')
147+
if phase != "DELETE":
148+
chain_of_thought_steps = [
149+
f"Identify for all endpoints {self.found_endpoints} excluding {self.endpoint_found_methods[phase]} a valid HTTP method {phase} call.",
153150
self.get_http_action_template(phase)
154151
] + common_steps
155-
else:
156-
chain_of_thought_steps = [
152+
else:
153+
chain_of_thought_steps = [
157154
f"Check for all endpoints the DELETE method. Delete the first instance for all endpoints. ",
158155
self.get_http_action_template(phase)
159156
] + common_steps
157+
else:
158+
endpoints_needing_help = []
159+
endpoints_and_needed_methods = {}
160+
161+
# Standard HTTP methods
162+
http_methods = {"GET", "POST", "PUT", "DELETE"}
163+
164+
for endpoint in self.endpoint_methods:
165+
# Calculate the missing methods for the current endpoint
166+
missing_methods = http_methods - set(self.endpoint_methods[endpoint])
167+
168+
if len(self.endpoint_methods[endpoint]) < 4:
169+
endpoints_needing_help.append(endpoint)
170+
# Add the missing methods to the dictionary
171+
endpoints_and_needed_methods[endpoint] = list(missing_methods)
172+
173+
print(f'endpoints_and_needed_methods: {endpoints_and_needed_methods}')
174+
print(f'first endpoint in list: {endpoints_needing_help[0]}')
175+
print(f'methods needed for first endpoint: {endpoints_and_needed_methods[endpoints_needing_help[0]][0]}')
176+
177+
chain_of_thought_steps = [f"For enpoint {endpoints_needing_help[0]} find this missing method :{endpoints_and_needed_methods[endpoints_needing_help[0]][0]} "
178+
f"If all the HTTP methods have already been found for an endpoint, then do not include this endpoint in your search. ",]
160179

161180
else:
162181
if self.round == 0:
@@ -175,19 +194,19 @@ def chain_of_thought(self, doc=False, hint=""):
175194

176195
def token_count(self, text):
177196
"""
178-
Counts the number of word tokens in the provided text using spaCy's tokenizer.
179-
180-
Args:
181-
text (str): The input text to tokenize and count.
182-
183-
Returns:
184-
int: The number of tokens in the input text.
185-
"""
186-
# Process the text through spaCy's pipeline
187-
doc = self.nlp(text)
188-
# Count tokens that aren't punctuation marks
189-
tokens = [token for token in doc if not token.is_punct]
190-
return len(tokens)
197+
Counts the number of word tokens in the provided text using NLTK's tokenizer.
198+
199+
Args:
200+
text (str): The input text to tokenize and count.
201+
202+
Returns:
203+
int: The number of tokens in the input text.
204+
"""
205+
# Tokenize the text using NLTK
206+
tokens = word_tokenize(text)
207+
# Filter out punctuation marks
208+
words = [token for token in tokens if token.isalnum()]
209+
return len(words)
191210

192211

193212
def check_prompt(self, previous_prompt, chain_of_thought_steps, max_tokens=900):

src/hackingBuddyGPT/usecases/web_api_testing/simple_openapi_documentation.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ class SimpleWebAPIDocumentation(Agent):
2929
_all_http_methods_found: bool = False
3030

3131
# Description for expected HTTP methods
32-
http_method_description: str = parameter(
32+
_http_method_description: str = parameter(
3333
desc="Pattern description for expected HTTP methods in the API response",
3434
default="A string that represents an HTTP method (e.g., 'GET', 'POST', etc.)."
3535
)
3636

3737
# Template for HTTP methods in API requests
38-
http_method_template: str = parameter(
38+
_http_method_template: str = parameter(
3939
desc="Template to format HTTP methods in API requests, with {method} replaced by actual HTTP method names.",
4040
default="{method}"
4141
)
4242

4343
# List of expected HTTP methods
44-
http_methods: str = parameter(
44+
_http_methods: str = parameter(
4545
desc="Expected HTTP methods in the API, as a comma-separated list.",
4646
default="GET,POST,PUT,PATCH,DELETE"
4747
)
@@ -75,13 +75,23 @@ def _setup_initial_prompt(self):
7575

7676

7777
def all_http_methods_found(self):
78-
self._log.console.print(Panel("All HTTP methods found! Congratulations!", title="system"))
79-
self._all_http_methods_found = True
78+
print(f'found endpoints:{self.documentation_handler.endpoint_methods.items()}')
79+
print(f'found endpoints values:{self.documentation_handler.endpoint_methods.values()}')
80+
81+
found_endpoints = sum(len(value_list) for value_list in self.documentation_handler.endpoint_methods.values())
82+
expected_endpoints = len(self.documentation_handler.endpoint_methods.keys())*4
83+
print(f'found endpoints:{found_endpoints}')
84+
print(f'expected endpoints:{expected_endpoints}')
85+
print(f'correct? {found_endpoints== expected_endpoints}')
86+
if found_endpoints== expected_endpoints or found_endpoints == expected_endpoints -1:
87+
return True
88+
else:
89+
return False
8090

8191
def perform_round(self, turn: int):
8292
prompt = self.prompt_engineer.generate_prompt(doc=True)
8393
response, completion = self.llm_handler.call_llm(prompt)
84-
self._handle_response(completion, response)
94+
return self._handle_response(completion, response)
8595

8696
def _handle_response(self, completion, response):
8797
message = completion.choices[0].message
@@ -101,8 +111,17 @@ def _handle_response(self, completion, response):
101111
self.prompt_engineer.found_endpoints = self.documentation_handler.update_openapi_spec(response, result)
102112
self.documentation_handler.write_openapi_to_yaml()
103113
self.prompt_engineer.schemas = self.documentation_handler.schemas
114+
from collections import defaultdict
115+
http_methods_dict = defaultdict(list)
116+
117+
# Iterate through the original dictionary
118+
for endpoint, methods in self.documentation_handler.endpoint_methods.items():
119+
for method in methods:
120+
http_methods_dict[method].append(endpoint)
121+
self.prompt_engineer.endpoint_found_methods = http_methods_dict
122+
self.prompt_engineer.endpoint_methods = self.documentation_handler.endpoint_methods
104123
print(f'SCHEMAS:{self.prompt_engineer.schemas}')
105-
return self._all_http_methods_found
124+
return self.all_http_methods_found()
106125

107126

108127

src/hackingBuddyGPT/usecases/web_api_testing/simple_web_api_testing.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def _setup_capabilities(self):
9595
methods_set = {self.http_method_template.format(method=method) for method in self.http_methods.split(",")}
9696
notes = self._context["notes"]
9797
self._capabilities = {
98-
"submit_http_method": SubmitHTTPMethod(self.http_method_description, methods_set, self.host),
98+
"submit_http_method": HTTPRequest(self.host),
9999
"http_request": HTTPRequest(self.host),
100100
"record_note": RecordNote(notes)
101101
}
@@ -134,8 +134,7 @@ def _handle_response(self, completion, response):
134134
result_str = self.response_handler.parse_http_status_line(result)
135135
self._prompt_history.append(tool_message(result_str, tool_call_id))
136136

137-
return self._all_http_methods_found
138-
137+
return self.all_http_methods_found()
139138
@use_case("Minimal implementation of a web API testing use case")
140139
class SimpleWebAPITestingUseCase(AutonomousAgentUseCase[SimpleWebAPITesting]):
141140
pass

src/hackingBuddyGPT/usecases/web_api_testing/utils/documentation_handler.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(self, llm_handler, response_handler):
2929
"""
3030
self.response_handler = response_handler
3131
self.schemas = {}
32+
self.endpoint_methods ={}
3233
self.filename = f"openapi_spec_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.yaml"
3334
self.openapi_spec = {
3435
"openapi": "3.0.0",
@@ -42,14 +43,17 @@ def __init__(self, llm_handler, response_handler):
4243
"components": {"schemas": {}}
4344
}
4445
self.llm_handler = llm_handler
45-
self.api_key = llm_handler.llm.api_key
46+
#self.api_key = llm_handler.llm.api_key
4647
current_path = os.path.dirname(os.path.abspath(__file__))
4748
self.file_path = os.path.join(current_path, "openapi_spec")
4849
self.file = os.path.join(self.file_path, self.filename)
4950
self._capabilities = {
5051
"yaml": YAMLFile()
5152
}
5253

54+
def partial_match(self, element, string_list):
55+
return any(element in string or string in element for string in string_list)
56+
5357
def update_openapi_spec(self, resp, result):
5458
"""
5559
Updates the OpenAPI specification based on the API response provided.
@@ -67,31 +71,51 @@ def update_openapi_spec(self, resp, result):
6771
method = request.method
6872
print(f'method: {method}')
6973
# Ensure that path and method are not None and method has no numeric characters
74+
# Ensure path and method are valid and method has no numeric characters
7075
if path and method:
76+
endpoint_methods = self.endpoint_methods
77+
endpoints = self.openapi_spec['endpoints']
78+
x = path.split('/')[1]
79+
7180
# Initialize the path if not already present
72-
if path not in self.openapi_spec['endpoints']:
73-
self.openapi_spec['endpoints'][path] = {}
81+
if path not in endpoints and x != "":
82+
endpoints[path] = {}
83+
if '1' not in path:
84+
endpoint_methods[path] = []
85+
7486
# Update the method description within the path
75-
example, reference, self.openapi_spec = self.response_handler.parse_http_response_to_openapi_example(self.openapi_spec, result, path, method)
87+
example, reference, self.openapi_spec = self.response_handler.parse_http_response_to_openapi_example(
88+
self.openapi_spec, result, path, method
89+
)
7690
self.schemas = self.openapi_spec["components"]["schemas"]
77-
if example is not None or reference is not None:
78-
self.openapi_spec['endpoints'][path][method.lower()] = {
91+
92+
if example or reference:
93+
endpoints[path][method.lower()] = {
7994
"summary": f"{method} operation on {path}",
8095
"responses": {
8196
"200": {
8297
"description": "Successful response",
8398
"content": {
8499
"application/json": {
85-
"schema": {
86-
"$ref": reference
87-
},
100+
"schema": {"$ref": reference},
88101
"examples": example
89102
}
90103
}
91104
}
92105
}
93106
}
94-
return list(self.openapi_spec['endpoints'].keys())
107+
108+
if '1' not in path and x != "":
109+
endpoint_methods[path].append(method)
110+
elif self.partial_match(x, endpoints.keys()):
111+
path = f"/{x}"
112+
print(f'endpoint methods = {endpoint_methods}')
113+
print(f'new path:{path}')
114+
endpoint_methods[path].append(method)
115+
116+
endpoint_methods[path] = list(set(endpoint_methods[path]))
117+
118+
return list(endpoints.keys())
95119

96120
def write_openapi_to_yaml(self):
97121
"""

0 commit comments

Comments
 (0)