diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 507912c504..4b6cc3c898 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.42.0"
+  ".": "1.46.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 185585b675..2fc39385e9 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8ff62fa1091460d68fbd36d72c17d91b709917bebf2983c9c4de5784bc384a2e.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-ff407aa10917e62f2b0c12d1ad2c4f1258ed083bd45753c70eaaf5b1cf8356ae.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6daddd33fe..c1f852327c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,85 @@
 # Changelog
 
+## 1.46.0 (2024-09-17)
+
+Full Changelog: [v1.45.1...v1.46.0](https://github.com/openai/openai-python/compare/v1.45.1...v1.46.0)
+
+### Features
+
+* **client:** add ._request_id property to object responses ([#1707](https://github.com/openai/openai-python/issues/1707)) ([8b3da05](https://github.com/openai/openai-python/commit/8b3da05a35b33245aec98693a0540ace6218a61b))
+
+
+### Documentation
+
+* **readme:** add examples for chat with image content ([#1703](https://github.com/openai/openai-python/issues/1703)) ([192b8f2](https://github.com/openai/openai-python/commit/192b8f2b6a49f462e48c1442858931875524ab49))
+
+## 1.45.1 (2024-09-16)
+
+Full Changelog: [v1.45.0...v1.45.1](https://github.com/openai/openai-python/compare/v1.45.0...v1.45.1)
+
+### Chores
+
+* **internal:** bump pyright / mypy version ([#1717](https://github.com/openai/openai-python/issues/1717)) ([351af85](https://github.com/openai/openai-python/commit/351af85c5b813391910301a5049edddc8c9e70dd))
+* **internal:** bump ruff ([#1714](https://github.com/openai/openai-python/issues/1714)) ([aceaf64](https://github.com/openai/openai-python/commit/aceaf641eedd092ed42e4aaf031e8cfbf37e4212))
+* **internal:** update spec link ([#1716](https://github.com/openai/openai-python/issues/1716)) ([ca58c7f](https://github.com/openai/openai-python/commit/ca58c7f83a7cede0367dec2500127573c9b00d1f))
+
+
+### Documentation
+
+* update CONTRIBUTING.md ([#1710](https://github.com/openai/openai-python/issues/1710)) ([4d45eb5](https://github.com/openai/openai-python/commit/4d45eb5eb794bcc5076c022be09e06fae103abcc))
+
+## 1.45.0 (2024-09-12)
+
+Full Changelog: [v1.44.1...v1.45.0](https://github.com/openai/openai-python/compare/v1.44.1...v1.45.0)
+
+### Features
+
+* **api:** add o1 models ([#1708](https://github.com/openai/openai-python/issues/1708)) ([06bd42e](https://github.com/openai/openai-python/commit/06bd42e77121a6abd4826a79ce1848812d956576))
+* **errors:** include completion in LengthFinishReasonError ([#1701](https://github.com/openai/openai-python/issues/1701)) ([b0e3256](https://github.com/openai/openai-python/commit/b0e32562aff9aceafec994d3b047f7c2a9f11524))
+
+
+### Bug Fixes
+
+* **types:** correctly mark stream discriminator as optional ([#1706](https://github.com/openai/openai-python/issues/1706)) ([80f02f9](https://github.com/openai/openai-python/commit/80f02f9e5f83fac9cd2f4172b733a92ad01399b2))
+
+## 1.44.1 (2024-09-09)
+
+Full Changelog: [v1.44.0...v1.44.1](https://github.com/openai/openai-python/compare/v1.44.0...v1.44.1)
+
+### Chores
+
+* add docstrings to raw response properties ([#1696](https://github.com/openai/openai-python/issues/1696)) ([1d2a19b](https://github.com/openai/openai-python/commit/1d2a19b0e8acab54c35ef2171d33321943488fdc))
+
+
+### Documentation
+
+* **readme:** add section on determining installed version ([#1697](https://github.com/openai/openai-python/issues/1697)) ([0255735](https://github.com/openai/openai-python/commit/0255735930d9c657c78e85e7f03fd1eb98a1e378))
+* **readme:** improve custom `base_url` example ([#1694](https://github.com/openai/openai-python/issues/1694)) ([05eec8a](https://github.com/openai/openai-python/commit/05eec8a0b7fcdc8651021f2e685214a353b861d1))
+
+## 1.44.0 (2024-09-06)
+
+Full Changelog: [v1.43.1...v1.44.0](https://github.com/openai/openai-python/compare/v1.43.1...v1.44.0)
+
+### Features
+
+* **vector store:** improve chunking strategy type names ([#1690](https://github.com/openai/openai-python/issues/1690)) ([e82cd85](https://github.com/openai/openai-python/commit/e82cd85ac4962e36cb3b139c503069b56918688f))
+
+## 1.43.1 (2024-09-05)
+
+Full Changelog: [v1.43.0...v1.43.1](https://github.com/openai/openai-python/compare/v1.43.0...v1.43.1)
+
+### Chores
+
+* pyproject.toml formatting changes ([#1687](https://github.com/openai/openai-python/issues/1687)) ([3387ede](https://github.com/openai/openai-python/commit/3387ede0b896788bf1197378b01941c75bd6e179))
+
+## 1.43.0 (2024-08-29)
+
+Full Changelog: [v1.42.0...v1.43.0](https://github.com/openai/openai-python/compare/v1.42.0...v1.43.0)
+
+### Features
+
+* **api:** add file search result details to run steps ([#1681](https://github.com/openai/openai-python/issues/1681)) ([f5449c0](https://github.com/openai/openai-python/commit/f5449c07580ac9707f0387f86f4772fbf0a874b6))
+
 ## 1.42.0 (2024-08-20)
 
 Full Changelog: [v1.41.1...v1.42.0](https://github.com/openai/openai-python/compare/v1.41.1...v1.42.0)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 354d21b2d2..5a6639b8fc 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,13 +31,13 @@ $ pip install -r requirements-dev.lock
 
 ## Modifying/Adding code
 
-Most of the SDK is generated code, and any modified code will be overridden on the next generation. The
-`src/openai/lib/` and `examples/` directories are exceptions and will never be overridden.
+Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
+result in merge conflicts between manual patches and changes from the generator. The generator will never
+modify the contents of the `src/openai/lib/` and `examples/` directories.
 
 ## Adding and running examples
 
-All files in the `examples/` directory are not modified by the Stainless generator and can be freely edited or
-added to.
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
 
 ```bash
 # add an example to examples/<your-example>.py
diff --git a/README.md b/README.md
index 525c1b5aaf..c47bdd54c5 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,48 @@ we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
 to add `OPENAI_API_KEY="My API Key"` to your `.env` file
 so that your API Key is not stored in source control.
 
+### Vision
+
+With a hosted image:
+
+```python
+response = client.chat.completions.create(
+    model="gpt-4o-mini",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"{img_url}"},
+                },
+            ],
+        }
+    ],
+)
+```
+
+With the image as a base64 encoded string:
+
+```python
+response = client.chat.completions.create(
+    model="gpt-4o-mini",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:{img_type};base64,{img_b64_str}"},
+                },
+            ],
+        }
+    ],
+)
+```
+
 ### Polling Helpers
 
 When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete. The SDK includes
@@ -375,6 +417,24 @@ Error codes are as followed:
 | >=500       | `InternalServerError`      |
 | N/A         | `APIConnectionError`       |
 
+## Request IDs
+
+> For more information on debugging requests, see [these docs](https://platform.openai.com/docs/api-reference/debugging-requests)
+
+All object responses in the SDK provide a `_request_id` property which is added from the `x-request-id` response header so that you can quickly log failing requests and report them back to OpenAI.
+
+```python
+completion = await client.chat.completions.create(
+    messages=[{"role": "user", "content": "Say this is a test"}], model="gpt-4"
+)
+print(completion._request_id)  # req_123
+```
+
+Note that unlike other properties that use an `_` prefix, the `_request_id` property
+*is* public. Unless documented otherwise, *all* other `_` prefix properties,
+methods and modules are *private*.
+
+
 ### Retries
 
 Certain errors are automatically retried 2 times by default, with a short exponential backoff.
@@ -567,7 +627,7 @@ from openai import OpenAI, DefaultHttpxClient
 
 client = OpenAI(
     # Or use the `OPENAI_BASE_URL` env var
-    base_url="http://my.test.server.example.com:8083",
+    base_url="http://my.test.server.example.com:8083/v1",
     http_client=DefaultHttpxClient(
         proxies="http://my.test.proxy.example.com",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
@@ -639,6 +699,17 @@ We take backwards-compatibility seriously and work hard to ensure you can rely o
 
 We are keen for your feedback; please open an [issue](https://www.github.com/openai/openai-python/issues) with questions, bugs, or suggestions.
 
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import openai
+print(openai.__version__)
+```
+
 ## Requirements
 
 Python 3.7 or higher.
diff --git a/api.md b/api.md
index 648d0f3708..32c0fb9efc 100644
--- a/api.md
+++ b/api.md
@@ -222,7 +222,17 @@ Methods:
 Types:
 
 ```python
-from openai.types.beta import VectorStore, VectorStoreDeleted
+from openai.types.beta import (
+    AutoFileChunkingStrategyParam,
+    FileChunkingStrategy,
+    FileChunkingStrategyParam,
+    OtherFileChunkingStrategyObject,
+    StaticFileChunkingStrategy,
+    StaticFileChunkingStrategyObject,
+    StaticFileChunkingStrategyParam,
+    VectorStore,
+    VectorStoreDeleted,
+)
 ```
 
 Methods:
@@ -365,6 +375,7 @@ from openai.types.beta.threads.runs import (
     RunStepDelta,
     RunStepDeltaEvent,
     RunStepDeltaMessageDelta,
+    RunStepInclude,
     ToolCall,
     ToolCallDelta,
     ToolCallDeltaObject,
@@ -374,7 +385,7 @@ from openai.types.beta.threads.runs import (
 
 Methods:
 
-- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="./src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id) -> <a href="./src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
+- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="./src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id, \*\*<a href="src/openai/types/beta/threads/runs/step_retrieve_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
 - <code title="get /threads/{thread_id}/runs/{run_id}/steps">client.beta.threads.runs.steps.<a href="./src/openai/resources/beta/threads/runs/steps.py">list</a>(run_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/runs/step_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/runs/run_step.py">SyncCursorPage[RunStep]</a></code>
 
 ### Messages
diff --git a/pyproject.toml b/pyproject.toml
index 526e46e063..a3bd81cad5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.42.0"
+version = "1.46.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index f4797f432b..a47de9656a 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -83,7 +83,7 @@ msal==1.29.0
     # via msal-extensions
 msal-extensions==1.2.0
     # via azure-identity
-mypy==1.7.1
+mypy==1.11.2
 mypy-extensions==1.0.0
     # via black
     # via mypy
@@ -125,7 +125,7 @@ pygments==2.18.0
     # via rich
 pyjwt==2.8.0
     # via msal
-pyright==1.1.374
+pyright==1.1.380
 pytest==7.1.1
     # via pytest-asyncio
 pytest-asyncio==0.21.1
@@ -141,7 +141,7 @@ requests==2.31.0
 respx==0.20.2
 rich==13.7.1
     # via inline-snapshot
-ruff==0.5.6
+ruff==0.6.5
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py
index f44f90b52f..e326ed9578 100644
--- a/src/openai/_exceptions.py
+++ b/src/openai/_exceptions.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Optional, cast
+from typing import TYPE_CHECKING, Any, Optional, cast
 from typing_extensions import Literal
 
 import httpx
@@ -10,6 +10,9 @@
 from ._utils import is_dict
 from ._models import construct_type
 
+if TYPE_CHECKING:
+    from .types.chat import ChatCompletion
+
 __all__ = [
     "BadRequestError",
     "AuthenticationError",
@@ -130,10 +133,20 @@ class InternalServerError(APIStatusError):
 
 
 class LengthFinishReasonError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            f"Could not parse response content as the length limit was reached",
-        )
+    completion: ChatCompletion
+    """The completion that caused this error.
+
+    Note: this will *not* be a complete `ChatCompletion` object when streaming as `usage`
+          will not be included.
+    """
+
+    def __init__(self, *, completion: ChatCompletion) -> None:
+        msg = "Could not parse response content as the length limit was reached"
+        if completion.usage:
+            msg += f" - {completion.usage}"
+
+        super().__init__(msg)
+        self.completion = completion
 
 
 class ContentFilterFinishReasonError(OpenAIError):
diff --git a/src/openai/_legacy_response.py b/src/openai/_legacy_response.py
index c42fb8b83e..c7dbd54e23 100644
--- a/src/openai/_legacy_response.py
+++ b/src/openai/_legacy_response.py
@@ -25,7 +25,7 @@
 
 from ._types import NoneType
 from ._utils import is_given, extract_type_arg, is_annotated_type
-from ._models import BaseModel, is_basemodel
+from ._models import BaseModel, is_basemodel, add_request_id
 from ._constants import RAW_RESPONSE_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
 from ._exceptions import APIResponseValidationError
@@ -138,8 +138,11 @@ class MyModel(BaseModel):
         if is_given(self._options.post_parser):
             parsed = self._options.post_parser(parsed)
 
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
         self._parsed_by_type[cache_key] = parsed
-        return parsed
+        return cast(R, parsed)
 
     @property
     def headers(self) -> httpx.Headers:
diff --git a/src/openai/_models.py b/src/openai/_models.py
index d386eaa3a4..d6f42d3d4d 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -2,7 +2,7 @@
 
 import os
 import inspect
-from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
+from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import (
     Unpack,
@@ -94,6 +94,23 @@ def model_fields_set(self) -> set[str]:
         class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
             extra: Any = pydantic.Extra.allow  # type: ignore
 
+    if TYPE_CHECKING:
+        _request_id: Optional[str] = None
+        """The ID of the request, returned via the X-Request-ID header. Useful for debugging requests and reporting issues to OpenAI.
+
+        This will **only** be set for the top-level response object, it will not be defined for nested objects. For example:
+        
+        ```py
+        completion = await client.chat.completions.create(...)
+        completion._request_id  # req_id_xxx
+        completion.usage._request_id  # raises `AttributeError`
+        ```
+
+        Note: unlike other properties that use an `_` prefix, this property
+        *is* public. Unless documented otherwise, all other `_` prefix properties,
+        methods and modules are *private*.
+        """
+
     def to_dict(
         self,
         *,
@@ -662,6 +679,21 @@ def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
     setattr(typ, "__pydantic_config__", config)  # noqa: B010
 
 
+def add_request_id(obj: BaseModel, request_id: str | None) -> None:
+    obj._request_id = request_id
+
+    # in Pydantic v1, using setattr like we do above causes the attribute
+    # to be included when serializing the model which we don't want in this
+    # case so we need to explicitly exclude it
+    if not PYDANTIC_V2:
+        try:
+            exclude_fields = obj.__exclude_fields__  # type: ignore
+        except AttributeError:
+            cast(Any, obj).__exclude_fields__ = {"_request_id", "__exclude_fields__"}
+        else:
+            cast(Any, obj).__exclude_fields__ = {*(exclude_fields or {}), "_request_id", "__exclude_fields__"}
+
+
 # our use of subclasssing here causes weirdness for type checkers,
 # so we just pretend that we don't subclass
 if TYPE_CHECKING:
diff --git a/src/openai/_response.py b/src/openai/_response.py
index f9d91786f6..20ce69ac8a 100644
--- a/src/openai/_response.py
+++ b/src/openai/_response.py
@@ -26,7 +26,7 @@
 
 from ._types import NoneType
 from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
-from ._models import BaseModel, is_basemodel
+from ._models import BaseModel, is_basemodel, add_request_id
 from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
 from ._exceptions import OpenAIError, APIResponseValidationError
@@ -315,8 +315,11 @@ class MyModel(BaseModel):
         if is_given(self._options.post_parser):
             parsed = self._options.post_parser(parsed)
 
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
         self._parsed_by_type[cache_key] = parsed
-        return parsed
+        return cast(R, parsed)
 
     def read(self) -> bytes:
         """Read and return the binary response content."""
@@ -419,8 +422,11 @@ class MyModel(BaseModel):
         if is_given(self._options.post_parser):
             parsed = self._options.post_parser(parsed)
 
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
         self._parsed_by_type[cache_key] = parsed
-        return parsed
+        return cast(R, parsed)
 
     async def read(self) -> bytes:
         """Read and return the binary response content."""
diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index 2fc5a1c65a..0bba17caad 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -363,12 +363,13 @@ def file_from_path(path: str) -> FileTypes:
 
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        for k, v in headers.items():
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
             if k.lower() == lower_header and isinstance(v, str):
                 return v
 
-    """ to deal with the case where the header looks like Stainless-Event-Id """
+    # to deal with the case where the header looks like Stainless-Event-Id
     intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
 
     for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
diff --git a/src/openai/_version.py b/src/openai/_version.py
index b89e3df342..ccd62dc230 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.42.0"  # x-release-please-version
+__version__ = "1.46.0"  # x-release-please-version
diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index 2ef1bf3553..f1fa9f2b55 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -69,7 +69,7 @@ def parse_chat_completion(
     choices: list[ParsedChoice[ResponseFormatT]] = []
     for choice in chat_completion.choices:
         if choice.finish_reason == "length":
-            raise LengthFinishReasonError()
+            raise LengthFinishReasonError(completion=chat_completion)
 
         if choice.finish_reason == "content_filter":
             raise ContentFilterFinishReasonError()
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index 342a5e2b95..a4b0f856f7 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -394,7 +394,9 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS
 
                 if has_parseable_input(response_format=self._response_format, input_tools=self._input_tools):
                     if choice.finish_reason == "length":
-                        raise LengthFinishReasonError()
+                        # at the time of writing, `.usage` will always be `None` but
+                        # we include it here in case that is changed in the future
+                        raise LengthFinishReasonError(completion=completion_snapshot)
 
                     if choice.finish_reason == "content_filter":
                         raise ContentFilterFinishReasonError()
diff --git a/src/openai/resources/audio/audio.py b/src/openai/resources/audio/audio.py
index 537ad573d0..18bd7b812c 100644
--- a/src/openai/resources/audio/audio.py
+++ b/src/openai/resources/audio/audio.py
@@ -47,10 +47,21 @@ def speech(self) -> Speech:
 
     @cached_property
     def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AudioWithStreamingResponse(self)
 
 
@@ -69,10 +80,21 @@ def speech(self) -> AsyncSpeech:
 
     @cached_property
     def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncAudioWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index a0df9ec487..6085ae8afe 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -31,10 +31,21 @@
 class Speech(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return SpeechWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return SpeechWithStreamingResponse(self)
 
     def create(
@@ -104,10 +115,21 @@ def create(
 class AsyncSpeech(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncSpeechWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncSpeechWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index 1ee962411c..a6009143d4 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -29,10 +29,21 @@
 class Transcriptions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return TranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return TranscriptionsWithStreamingResponse(self)
 
     def create(
@@ -125,10 +136,21 @@ def create(
 class AsyncTranscriptions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncTranscriptionsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index ed97ccf840..7ec647fb6b 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -28,10 +28,21 @@
 class Translations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return TranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return TranslationsWithStreamingResponse(self)
 
     def create(
@@ -109,10 +120,21 @@ def create(
 class AsyncTranslations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncTranslationsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
index 7152fac622..a8a0ba4bbc 100644
--- a/src/openai/resources/batches.py
+++ b/src/openai/resources/batches.py
@@ -30,10 +30,21 @@
 class Batches(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return BatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return BatchesWithStreamingResponse(self)
 
     def create(
@@ -224,10 +235,21 @@ def cancel(
 class AsyncBatches(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncBatchesWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 441390d24b..5d8c6ec331 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -35,10 +35,21 @@
 class Assistants(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AssistantsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AssistantsWithStreamingResponse(self)
 
     def create(
@@ -89,11 +100,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -239,11 +250,11 @@ def update(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -410,10 +421,21 @@ def delete(
 class AsyncAssistants(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAssistantsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncAssistantsWithStreamingResponse(self)
 
     async def create(
@@ -464,11 +486,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -614,11 +636,11 @@ async def update(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index 479c97c471..a7d3e707c8 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -54,10 +54,21 @@ def threads(self) -> Threads:
 
     @cached_property
     def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return BetaWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return BetaWithStreamingResponse(self)
 
 
@@ -80,10 +91,21 @@ def threads(self) -> AsyncThreads:
 
     @cached_property
     def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncBetaWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncBetaWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/beta/chat/completions.py b/src/openai/resources/beta/chat/completions.py
index 07eda27b76..ea3526778d 100644
--- a/src/openai/resources/beta/chat/completions.py
+++ b/src/openai/resources/beta/chat/completions.py
@@ -42,6 +42,7 @@ def parse(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -121,6 +122,7 @@ class MathResponse(BaseModel):
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
@@ -157,6 +159,7 @@ def stream(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -216,6 +219,7 @@ def stream(
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
@@ -254,6 +258,7 @@ async def parse(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -333,6 +338,7 @@ class MathResponse(BaseModel):
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
@@ -369,6 +375,7 @@ def stream(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -429,6 +436,7 @@ def stream(
             functions=functions,
             logit_bias=logit_bias,
             logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
             max_tokens=max_tokens,
             n=n,
             parallel_tool_calls=parallel_tool_calls,
diff --git a/src/openai/resources/beta/threads/messages.py b/src/openai/resources/beta/threads/messages.py
index f0832515ce..4901174329 100644
--- a/src/openai/resources/beta/threads/messages.py
+++ b/src/openai/resources/beta/threads/messages.py
@@ -32,10 +32,21 @@
 class Messages(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return MessagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return MessagesWithStreamingResponse(self)
 
     def create(
@@ -295,10 +306,21 @@ def delete(
 class AsyncMessages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncMessagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncMessagesWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index cbfb9546f0..3fb1cc77aa 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import typing_extensions
-from typing import Union, Iterable, Optional, overload
+from typing import List, Union, Iterable, Optional, overload
 from functools import partial
 from typing_extensions import Literal
 
@@ -49,6 +49,7 @@
 from .....types.beta.threads.run import Run
 from .....types.beta.assistant_tool_param import AssistantToolParam
 from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
@@ -62,10 +63,21 @@ def steps(self) -> Steps:
 
     @cached_property
     def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return RunsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return RunsWithStreamingResponse(self)
 
     @overload
@@ -74,6 +86,7 @@ def create(
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -104,6 +117,14 @@ def create(
               [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
               execute this run.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -146,11 +167,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -206,6 +227,7 @@ def create(
         *,
         assistant_id: str,
         stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -239,6 +261,14 @@ def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -281,11 +311,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -337,6 +367,7 @@ def create(
         *,
         assistant_id: str,
         stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -370,6 +401,14 @@ def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -412,11 +451,11 @@ def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -467,6 +506,7 @@ def create(
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -516,7 +556,11 @@ def create(
                 run_create_params.RunCreateParams,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
             stream=stream or False,
@@ -712,6 +756,7 @@ def create_and_poll(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -743,6 +788,7 @@ def create_and_poll(
         run = self.create(
             thread_id=thread_id,
             assistant_id=assistant_id,
+            include=include,
             additional_instructions=additional_instructions,
             additional_messages=additional_messages,
             instructions=instructions,
@@ -958,6 +1004,7 @@ def stream(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -988,6 +1035,7 @@ def stream(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1018,6 +1066,7 @@ def stream(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1057,6 +1106,7 @@ def stream(
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
+                    "include": include,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
@@ -1375,10 +1425,21 @@ def steps(self) -> AsyncSteps:
 
     @cached_property
     def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncRunsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncRunsWithStreamingResponse(self)
 
     @overload
@@ -1387,6 +1448,7 @@ async def create(
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1417,6 +1479,14 @@ async def create(
               [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
               execute this run.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -1459,11 +1529,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1519,6 +1589,7 @@ async def create(
         *,
         assistant_id: str,
         stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1552,6 +1623,14 @@ async def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -1594,11 +1673,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1650,6 +1729,7 @@ async def create(
         *,
         assistant_id: str,
         stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1683,6 +1763,14 @@ async def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -1725,11 +1813,11 @@ async def create(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1780,6 +1868,7 @@ async def create(
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1810,6 +1899,7 @@ async def create(
             body=await async_maybe_transform(
                 {
                     "assistant_id": assistant_id,
+                    "include": include,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
@@ -1829,7 +1919,11 @@ async def create(
                 run_create_params.RunCreateParams,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
             stream=stream or False,
@@ -2025,6 +2119,7 @@ async def create_and_poll(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -2056,6 +2151,7 @@ async def create_and_poll(
         run = await self.create(
             thread_id=thread_id,
             assistant_id=assistant_id,
+            include=include,
             additional_instructions=additional_instructions,
             additional_messages=additional_messages,
             instructions=instructions,
@@ -2303,6 +2399,7 @@ def stream(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -2333,6 +2430,7 @@ def stream(
         self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -2374,6 +2472,7 @@ def stream(
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
+                    "include": include,
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
diff --git a/src/openai/resources/beta/threads/runs/steps.py b/src/openai/resources/beta/threads/runs/steps.py
index 512008939c..5d6d55f9d9 100644
--- a/src/openai/resources/beta/threads/runs/steps.py
+++ b/src/openai/resources/beta/threads/runs/steps.py
@@ -2,23 +2,25 @@
 
 from __future__ import annotations
 
+from typing import List
 from typing_extensions import Literal
 
 import httpx
 
 from ..... import _legacy_response
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
+from ....._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads.runs import step_list_params
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
 from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 
 __all__ = ["Steps", "AsyncSteps"]
 
@@ -26,10 +28,21 @@
 class Steps(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> StepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return StepsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> StepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return StepsWithStreamingResponse(self)
 
     def retrieve(
@@ -38,6 +51,7 @@ def retrieve(
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -49,6 +63,14 @@ def retrieve(
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -67,7 +89,11 @@ def retrieve(
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
@@ -79,6 +105,7 @@ def list(
         thread_id: str,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -102,6 +129,14 @@ def list(
               ending with obj_foo, your subsequent call can include before=obj_foo in order to
               fetch the previous page of the list.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -133,6 +168,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
@@ -146,10 +182,21 @@ def list(
 class AsyncSteps(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncStepsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncStepsWithStreamingResponse(self)
 
     async def retrieve(
@@ -158,6 +205,7 @@ async def retrieve(
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -169,6 +217,14 @@ async def retrieve(
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -187,7 +243,11 @@ async def retrieve(
         return await self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
@@ -199,6 +259,7 @@ def list(
         thread_id: str,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -222,6 +283,14 @@ def list(
               ending with obj_foo, your subsequent call can include before=obj_foo in order to
               fetch the previous page of the list.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+              for more information.
+
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -253,6 +322,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 4c95c484cc..49b0e4b37e 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -72,10 +72,21 @@ def messages(self) -> Messages:
 
     @cached_property
     def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ThreadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ThreadsWithStreamingResponse(self)
 
     def create(
@@ -324,11 +335,11 @@ def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -458,11 +469,11 @@ def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -588,11 +599,11 @@ def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -895,10 +906,21 @@ def messages(self) -> AsyncMessages:
 
     @cached_property
     def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncThreadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncThreadsWithStreamingResponse(self)
 
     async def create(
@@ -1147,11 +1169,11 @@ async def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1281,11 +1303,11 @@ async def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1411,11 +1433,11 @@ async def create_and_run(
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/resources/beta/vector_stores/file_batches.py b/src/openai/resources/beta/vector_stores/file_batches.py
index d6862c24ef..d1f9c872e4 100644
--- a/src/openai/resources/beta/vector_stores/file_batches.py
+++ b/src/openai/resources/beta/vector_stores/file_batches.py
@@ -22,11 +22,10 @@
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ....types.beta import FileChunkingStrategyParam
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.beta.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
 from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
 from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
 
@@ -36,10 +35,21 @@
 class FileBatches(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FileBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FileBatchesWithStreamingResponse(self)
 
     def create(
@@ -47,7 +57,7 @@ def create(
         vector_store_id: str,
         *,
         file_ids: List[str],
-        chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -64,7 +74,7 @@ def create(
               files.
 
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
-              strategy.
+              strategy. Only applicable if `file_ids` is non-empty.
 
           extra_headers: Send extra headers
 
@@ -174,7 +184,7 @@ def create_and_poll(
         *,
         file_ids: List[str],
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFileBatch:
         """Create a vector store batch and poll until all files have been processed."""
         batch = self.create(
@@ -308,7 +318,7 @@ def upload_and_poll(
         max_concurrency: int = 5,
         file_ids: List[str] = [],
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFileBatch:
         """Uploads the given files concurrently and then creates a vector store file batch.
 
@@ -354,10 +364,21 @@ def upload_and_poll(
 class AsyncFileBatches(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFileBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFileBatchesWithStreamingResponse(self)
 
     async def create(
@@ -365,7 +386,7 @@ async def create(
         vector_store_id: str,
         *,
         file_ids: List[str],
-        chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -382,7 +403,7 @@ async def create(
               files.
 
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
-              strategy.
+              strategy. Only applicable if `file_ids` is non-empty.
 
           extra_headers: Send extra headers
 
@@ -492,7 +513,7 @@ async def create_and_poll(
         *,
         file_ids: List[str],
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFileBatch:
         """Create a vector store batch and poll until all files have been processed."""
         batch = await self.create(
@@ -626,7 +647,7 @@ async def upload_and_poll(
         max_concurrency: int = 5,
         file_ids: List[str] = [],
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFileBatch:
         """Uploads the given files concurrently and then creates a vector store file batch.
 
diff --git a/src/openai/resources/beta/vector_stores/files.py b/src/openai/resources/beta/vector_stores/files.py
index 35ca331cc0..fe43bb3488 100644
--- a/src/openai/resources/beta/vector_stores/files.py
+++ b/src/openai/resources/beta/vector_stores/files.py
@@ -18,11 +18,10 @@
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ....types.beta import FileChunkingStrategyParam
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.beta.vector_stores import file_list_params, file_create_params
+from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
 from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
 from ....types.beta.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
 
@@ -32,10 +31,21 @@
 class Files(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FilesWithStreamingResponse(self)
 
     def create(
@@ -43,7 +53,7 @@ def create(
         vector_store_id: str,
         *,
         file_id: str,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -62,7 +72,7 @@ def create(
               files.
 
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
-              strategy.
+              strategy. Only applicable if `file_ids` is non-empty.
 
           extra_headers: Send extra headers
 
@@ -245,7 +255,7 @@ def create_and_poll(
         *,
         vector_store_id: str,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFile:
         """Attach a file to the given vector store and wait for it to be processed."""
         self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
@@ -302,7 +312,7 @@ def upload(
         *,
         vector_store_id: str,
         file: FileTypes,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFile:
         """Upload a file to the `files` API and then attach it to the given vector store.
 
@@ -318,7 +328,7 @@ def upload_and_poll(
         vector_store_id: str,
         file: FileTypes,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFile:
         """Add a file to a vector store and poll until processing is complete."""
         file_obj = self._client.files.create(file=file, purpose="assistants")
@@ -333,10 +343,21 @@ def upload_and_poll(
 class AsyncFiles(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFilesWithStreamingResponse(self)
 
     async def create(
@@ -344,7 +365,7 @@ async def create(
         vector_store_id: str,
         *,
         file_id: str,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -363,7 +384,7 @@ async def create(
               files.
 
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
-              strategy.
+              strategy. Only applicable if `file_ids` is non-empty.
 
           extra_headers: Send extra headers
 
@@ -546,7 +567,7 @@ async def create_and_poll(
         *,
         vector_store_id: str,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFile:
         """Attach a file to the given vector store and wait for it to be processed."""
         await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
@@ -603,7 +624,7 @@ async def upload(
         *,
         vector_store_id: str,
         file: FileTypes,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFile:
         """Upload a file to the `files` API and then attach it to the given vector store.
 
@@ -621,7 +642,7 @@ async def upload_and_poll(
         vector_store_id: str,
         file: FileTypes,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
     ) -> VectorStoreFile:
         """Add a file to a vector store and poll until processing is complete."""
         file_obj = await self._client.files.create(file=file, purpose="assistants")
diff --git a/src/openai/resources/beta/vector_stores/vector_stores.py b/src/openai/resources/beta/vector_stores/vector_stores.py
index cbd56a0693..06e26852b4 100644
--- a/src/openai/resources/beta/vector_stores/vector_stores.py
+++ b/src/openai/resources/beta/vector_stores/vector_stores.py
@@ -33,13 +33,16 @@
     AsyncFileBatchesWithStreamingResponse,
 )
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ....types.beta import vector_store_list_params, vector_store_create_params, vector_store_update_params
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
+from ....types.beta import (
+    FileChunkingStrategyParam,
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_update_params,
 )
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.beta.vector_store import VectorStore
 from ....types.beta.vector_store_deleted import VectorStoreDeleted
+from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
 
 __all__ = ["VectorStores", "AsyncVectorStores"]
 
@@ -55,16 +58,27 @@ def file_batches(self) -> FileBatches:
 
     @cached_property
     def with_raw_response(self) -> VectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return VectorStoresWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return VectorStoresWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        chunking_strategy: vector_store_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
@@ -322,16 +336,27 @@ def file_batches(self) -> AsyncFileBatches:
 
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncVectorStoresWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncVectorStoresWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        chunking_strategy: vector_store_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
diff --git a/src/openai/resources/chat/chat.py b/src/openai/resources/chat/chat.py
index d14d055506..dc23a15a8e 100644
--- a/src/openai/resources/chat/chat.py
+++ b/src/openai/resources/chat/chat.py
@@ -23,10 +23,21 @@ def completions(self) -> Completions:
 
     @cached_property
     def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ChatWithStreamingResponse(self)
 
 
@@ -37,10 +48,21 @@ def completions(self) -> AsyncCompletions:
 
     @cached_property
     def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncChatWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index dc577d6251..e9267b1f03 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -36,10 +36,21 @@
 class Completions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CompletionsWithStreamingResponse(self)
 
     @overload
@@ -53,6 +64,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -121,13 +133,17 @@ def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -150,11 +166,11 @@ def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -174,8 +190,11 @@ def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -250,6 +269,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -324,13 +344,17 @@ def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -353,11 +377,11 @@ def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -377,8 +401,11 @@ def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -446,6 +473,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -520,13 +548,17 @@ def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -549,11 +581,11 @@ def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -573,8 +605,11 @@ def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -641,6 +676,7 @@ def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -676,6 +712,7 @@ def create(
                     "functions": functions,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
@@ -707,10 +744,21 @@ def create(
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCompletionsWithStreamingResponse(self)
 
     @overload
@@ -724,6 +772,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -792,13 +841,17 @@ async def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -821,11 +874,11 @@ async def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -845,8 +898,11 @@ async def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -921,6 +977,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -995,13 +1052,17 @@ async def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1024,11 +1085,11 @@ async def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1048,8 +1109,11 @@ async def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -1117,6 +1181,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -1191,13 +1256,17 @@ async def create(
               returns the log probabilities of each output token returned in the `content` of
               `message`.
 
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
 
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
 
           n: How many chat completion choices to generate for each input message. Note that
               you will be charged based on the number of generated tokens across all of the
@@ -1220,11 +1289,11 @@ async def create(
               all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-              Outputs which guarantees the model will match your supplied JSON schema. Learn
-              more in the
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
               [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1244,8 +1313,11 @@ async def create(
           service_tier: Specifies the latency tier to use for processing the request. This parameter is
               relevant for customers subscribed to the scale tier service:
 
-              - If set to 'auto', the system will utilize scale tier credits until they are
-                exhausted.
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
@@ -1312,6 +1384,7 @@ async def create(
         functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
@@ -1347,6 +1420,7 @@ async def create(
                     "functions": functions,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
                     "max_tokens": max_tokens,
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 0812000f78..091fb5657a 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -31,10 +31,21 @@
 class Completions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CompletionsWithStreamingResponse(self)
 
     @overload
@@ -562,10 +573,21 @@ def create(
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCompletionsWithStreamingResponse(self)
 
     @overload
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index 773b6f0968..71c2a18a24 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -27,10 +27,21 @@
 class Embeddings(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return EmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return EmbeddingsWithStreamingResponse(self)
 
     def create(
@@ -128,10 +139,21 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
 class AsyncEmbeddings(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncEmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncEmbeddingsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
index a240e1d886..e24eeec711 100644
--- a/src/openai/resources/files.py
+++ b/src/openai/resources/files.py
@@ -42,10 +42,21 @@
 class Files(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FilesWithStreamingResponse(self)
 
     def create(
@@ -324,10 +335,21 @@ def wait_for_processing(
 class AsyncFiles(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFilesWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 0404fed6ec..c386de3c2a 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -24,10 +24,21 @@ def jobs(self) -> Jobs:
 
     @cached_property
     def with_raw_response(self) -> FineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FineTuningWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FineTuningWithStreamingResponse(self)
 
 
@@ -38,10 +49,21 @@ def jobs(self) -> AsyncJobs:
 
     @cached_property
     def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFineTuningWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFineTuningWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
index 67f5739a02..8b5e905ea5 100644
--- a/src/openai/resources/fine_tuning/jobs/checkpoints.py
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -24,10 +24,21 @@
 class Checkpoints(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CheckpointsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CheckpointsWithStreamingResponse(self)
 
     def list(
@@ -84,10 +95,21 @@ def list(
 class AsyncCheckpoints(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCheckpointsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCheckpointsWithStreamingResponse(self)
 
     def list(
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index 5cef7bcd22..44abf1cfe1 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -43,10 +43,21 @@ def checkpoints(self) -> Checkpoints:
 
     @cached_property
     def with_raw_response(self) -> JobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return JobsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> JobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return JobsWithStreamingResponse(self)
 
     def create(
@@ -103,7 +114,7 @@ def create(
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
@@ -323,10 +334,21 @@ def checkpoints(self) -> AsyncCheckpoints:
 
     @cached_property
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncJobsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncJobsWithStreamingResponse(self)
 
     async def create(
@@ -383,7 +405,7 @@ async def create(
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 0913b572cb..e9629d48fd 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -29,10 +29,21 @@
 class Images(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ImagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ImagesWithStreamingResponse(self)
 
     def create_variation(
@@ -275,10 +286,21 @@ def generate(
 class AsyncImages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncImagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncImagesWithStreamingResponse(self)
 
     async def create_variation(
diff --git a/src/openai/resources/models.py b/src/openai/resources/models.py
index e76c496ffa..d6062de230 100644
--- a/src/openai/resources/models.py
+++ b/src/openai/resources/models.py
@@ -23,10 +23,21 @@
 class Models(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ModelsWithStreamingResponse(self)
 
     def retrieve(
@@ -125,10 +136,21 @@ def delete(
 class AsyncModels(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncModelsWithStreamingResponse(self)
 
     async def retrieve(
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index b9ad9972f0..5283554373 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -26,10 +26,21 @@
 class Moderations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ModerationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ModerationsWithStreamingResponse(self)
 
     def create(
@@ -86,10 +97,21 @@ def create(
 class AsyncModerations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModerationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncModerationsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/uploads/parts.py b/src/openai/resources/uploads/parts.py
index 3ec2592b1e..d46e5ea1bb 100644
--- a/src/openai/resources/uploads/parts.py
+++ b/src/openai/resources/uploads/parts.py
@@ -27,10 +27,21 @@
 class Parts(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> PartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return PartsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> PartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return PartsWithStreamingResponse(self)
 
     def create(
@@ -91,10 +102,21 @@ def create(
 class AsyncParts(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncPartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncPartsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncPartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncPartsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py
index 5eecef4d4b..96a531a8e4 100644
--- a/src/openai/resources/uploads/uploads.py
+++ b/src/openai/resources/uploads/uploads.py
@@ -50,10 +50,21 @@ def parts(self) -> Parts:
 
     @cached_property
     def with_raw_response(self) -> UploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return UploadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> UploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return UploadsWithStreamingResponse(self)
 
     @overload
@@ -332,10 +343,21 @@ def parts(self) -> AsyncParts:
 
     @cached_property
     def with_raw_response(self) -> AsyncUploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncUploadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncUploadsWithStreamingResponse(self)
 
     @overload
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index 9c5ddfdbe0..7f76fed0cd 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -19,6 +19,7 @@
 from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
 from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
 from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
+from .file_chunking_strategy import FileChunkingStrategy as FileChunkingStrategy
 from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
@@ -28,11 +29,17 @@
 from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
 from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
+from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .static_file_chunking_strategy import StaticFileChunkingStrategy as StaticFileChunkingStrategy
 from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
 from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam as AutoFileChunkingStrategyParam
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject as OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam as StaticFileChunkingStrategyParam
 from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject as StaticFileChunkingStrategyObject
 from .assistant_response_format_option_param import (
     AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
 )
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index c6a0a4cfcf..b4da08745d 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -90,11 +90,11 @@ class Assistant(BaseModel):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 84cd4425d1..eca4da0a2b 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -3,10 +3,11 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing_extensions import Required, TypedDict
 
 from ..chat_model import ChatModel
 from .assistant_tool_param import AssistantToolParam
+from .file_chunking_strategy_param import FileChunkingStrategyParam
 from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
@@ -15,10 +16,6 @@
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
     "ToolResourcesFileSearchVectorStore",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
@@ -61,11 +58,11 @@ class AssistantCreateParams(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -118,43 +115,12 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
-class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-    """Always `auto`."""
-
-
-class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
-    """
-
-    max_chunk_size_tokens: Required[int]
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
-    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
-
-    type: Required[Literal["static"]]
-    """Always `static`."""
-
-
-ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
-    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
-]
-
-
 class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
-    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    chunking_strategy: FileChunkingStrategyParam
     """The chunking strategy used to chunk the file(s).
 
-    If not set, will use the `auto` strategy.
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
     """
 
     file_ids: List[str]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index ade565819f..5396233937 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -50,11 +50,11 @@ class AssistantUpdateParams(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/auto_file_chunking_strategy_param.py b/src/openai/types/beta/auto_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..6f17836bac
--- /dev/null
+++ b/src/openai/types/beta/auto_file_chunking_strategy_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["AutoFileChunkingStrategyParam"]
+
+
+class AutoFileChunkingStrategyParam(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
diff --git a/src/openai/types/beta/file_chunking_strategy.py b/src/openai/types/beta/file_chunking_strategy.py
new file mode 100644
index 0000000000..406d69dd0e
--- /dev/null
+++ b/src/openai/types/beta/file_chunking_strategy.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject
+
+__all__ = ["FileChunkingStrategy"]
+
+FileChunkingStrategy: TypeAlias = Annotated[
+    Union[StaticFileChunkingStrategyObject, OtherFileChunkingStrategyObject], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/file_chunking_strategy_param.py b/src/openai/types/beta/file_chunking_strategy_param.py
new file mode 100644
index 0000000000..46383358e5
--- /dev/null
+++ b/src/openai/types/beta/file_chunking_strategy_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam
+
+__all__ = ["FileChunkingStrategyParam"]
+
+FileChunkingStrategyParam: TypeAlias = Union[AutoFileChunkingStrategyParam, StaticFileChunkingStrategyParam]
diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py
index 26ab1cb83f..aee6593e89 100644
--- a/src/openai/types/beta/file_search_tool.py
+++ b/src/openai/types/beta/file_search_tool.py
@@ -5,7 +5,21 @@
 
 from ..._models import BaseModel
 
-__all__ = ["FileSearchTool", "FileSearch"]
+__all__ = ["FileSearchTool", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(BaseModel):
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Optional[Literal["auto", "default_2024_08_21"]] = None
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
 
 
 class FileSearch(BaseModel):
@@ -17,7 +31,18 @@ class FileSearch(BaseModel):
 
     Note that the file search tool may output fewer than `max_num_results` results.
     See the
-    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/number-of-chunks-returned)
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
     for more information.
     """
 
diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py
index 666719f8cd..5ce91207ba 100644
--- a/src/openai/types/beta/file_search_tool_param.py
+++ b/src/openai/types/beta/file_search_tool_param.py
@@ -4,7 +4,21 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["FileSearchToolParam", "FileSearch"]
+__all__ = ["FileSearchToolParam", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(TypedDict, total=False):
+    score_threshold: Required[float]
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
 
 
 class FileSearch(TypedDict, total=False):
@@ -16,7 +30,18 @@ class FileSearch(TypedDict, total=False):
 
     Note that the file search tool may output fewer than `max_num_results` results.
     See the
-    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/number-of-chunks-returned)
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: FileSearchRankingOptions
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
     for more information.
     """
 
diff --git a/src/openai/types/beta/other_file_chunking_strategy_object.py b/src/openai/types/beta/other_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..89da560be4
--- /dev/null
+++ b/src/openai/types/beta/other_file_chunking_strategy_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["OtherFileChunkingStrategyObject"]
+
+
+class OtherFileChunkingStrategyObject(BaseModel):
+    type: Literal["other"]
+    """Always `other`."""
diff --git a/src/openai/types/beta/static_file_chunking_strategy.py b/src/openai/types/beta/static_file_chunking_strategy.py
new file mode 100644
index 0000000000..ba80e1a2b9
--- /dev/null
+++ b/src/openai/types/beta/static_file_chunking_strategy.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from ..._models import BaseModel
+
+__all__ = ["StaticFileChunkingStrategy"]
+
+
+class StaticFileChunkingStrategy(BaseModel):
+    chunk_overlap_tokens: int
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: int
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/beta/static_file_chunking_strategy_object.py b/src/openai/types/beta/static_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..896c4b8320
--- /dev/null
+++ b/src/openai/types/beta/static_file_chunking_strategy_object.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .static_file_chunking_strategy import StaticFileChunkingStrategy
+
+__all__ = ["StaticFileChunkingStrategyObject"]
+
+
+class StaticFileChunkingStrategyObject(BaseModel):
+    static: StaticFileChunkingStrategy
+
+    type: Literal["static"]
+    """Always `static`."""
diff --git a/src/openai/types/beta/static_file_chunking_strategy_param.py b/src/openai/types/beta/static_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..f917ac5647
--- /dev/null
+++ b/src/openai/types/beta/static_file_chunking_strategy_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["StaticFileChunkingStrategyParam"]
+
+
+class StaticFileChunkingStrategyParam(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 7490b25ef3..20d525fa1a 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -9,6 +9,7 @@
 from .function_tool_param import FunctionToolParam
 from .file_search_tool_param import FileSearchToolParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .file_chunking_strategy_param import FileChunkingStrategyParam
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from .threads.message_content_part_param import MessageContentPartParam
 from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
@@ -24,10 +25,6 @@
     "ThreadToolResourcesCodeInterpreter",
     "ThreadToolResourcesFileSearch",
     "ThreadToolResourcesFileSearchVectorStore",
-    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategy",
-    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
-    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
-    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
     "ToolResources",
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
@@ -101,11 +98,11 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -218,44 +215,12 @@ class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
-class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-    """Always `auto`."""
-
-
-class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
-    """
-
-    max_chunk_size_tokens: Required[int]
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
-    static: Required[ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
-
-    type: Required[Literal["static"]]
-    """Always `static`."""
-
-
-ThreadToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
-    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto,
-    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic,
-]
-
-
 class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
-    chunking_strategy: ThreadToolResourcesFileSearchVectorStoreChunkingStrategy
+    chunking_strategy: FileChunkingStrategyParam
     """The chunking strategy used to chunk the file(s).
 
-    If not set, will use the `auto` strategy.
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
     """
 
     file_ids: List[str]
@@ -367,7 +332,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/thread_create_params.py b/src/openai/types/beta/thread_create_params.py
index f9561aa48c..729164b481 100644
--- a/src/openai/types/beta/thread_create_params.py
+++ b/src/openai/types/beta/thread_create_params.py
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .file_chunking_strategy_param import FileChunkingStrategyParam
 from .threads.message_content_part_param import MessageContentPartParam
 
 __all__ = [
@@ -18,10 +19,6 @@
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
     "ToolResourcesFileSearchVectorStore",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
-    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
@@ -99,43 +96,12 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
-class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-    """Always `auto`."""
-
-
-class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
-    """
-
-    max_chunk_size_tokens: Required[int]
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
-    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
-
-    type: Required[Literal["static"]]
-    """Always `static`."""
-
-
-ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
-    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
-]
-
-
 class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
-    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    chunking_strategy: FileChunkingStrategyParam
     """The chunking strategy used to chunk the file(s).
 
-    If not set, will use the `auto` strategy.
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
     """
 
     file_ids: List[str]
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 0579e229d8..5abc1de295 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -172,11 +172,11 @@ class Run(BaseModel):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index d3e6d9c476..824cb1a041 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -2,11 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ...chat_model import ChatModel
 from ..assistant_tool_param import AssistantToolParam
+from .runs.run_step_include import RunStepInclude
 from .message_content_part_param import MessageContentPartParam
 from ..code_interpreter_tool_param import CodeInterpreterToolParam
 from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
@@ -32,6 +33,18 @@ class RunCreateParamsBase(TypedDict, total=False):
     execute this run.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+    for more information.
+    """
+
     additional_instructions: Optional[str]
     """Appends additional instructions at the end of the instructions for the run.
 
@@ -98,11 +111,11 @@ class RunCreateParamsBase(TypedDict, total=False):
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -212,7 +225,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class RunCreateParamsNonStreaming(RunCreateParamsBase):
+class RunCreateParamsNonStreaming(RunCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
index ccb5e5e97e..147728603a 100644
--- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -31,7 +31,7 @@ class ToolOutput(TypedDict, total=False):
     """
 
 
-class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/runs/__init__.py b/src/openai/types/beta/threads/runs/__init__.py
index a312ce3df2..467d5d793d 100644
--- a/src/openai/types/beta/threads/runs/__init__.py
+++ b/src/openai/types/beta/threads/runs/__init__.py
@@ -6,9 +6,11 @@
 from .tool_call import ToolCall as ToolCall
 from .run_step_delta import RunStepDelta as RunStepDelta
 from .tool_call_delta import ToolCallDelta as ToolCallDelta
+from .run_step_include import RunStepInclude as RunStepInclude
 from .step_list_params import StepListParams as StepListParams
 from .function_tool_call import FunctionToolCall as FunctionToolCall
 from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .step_retrieve_params import StepRetrieveParams as StepRetrieveParams
 from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
 from .file_search_tool_call import FileSearchToolCall as FileSearchToolCall
 from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call.py b/src/openai/types/beta/threads/runs/file_search_tool_call.py
index 57c0ca9a90..da4d58dc37 100644
--- a/src/openai/types/beta/threads/runs/file_search_tool_call.py
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call.py
@@ -1,17 +1,71 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ....._models import BaseModel
 
-__all__ = ["FileSearchToolCall"]
+__all__ = [
+    "FileSearchToolCall",
+    "FileSearch",
+    "FileSearchRankingOptions",
+    "FileSearchResult",
+    "FileSearchResultContent",
+]
+
+
+class FileSearchRankingOptions(BaseModel):
+    ranker: Literal["default_2024_08_21"]
+    """The ranker used for the file search."""
+
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+
+class FileSearchResultContent(BaseModel):
+    text: Optional[str] = None
+    """The text content of the file."""
+
+    type: Optional[Literal["text"]] = None
+    """The type of the content."""
+
+
+class FileSearchResult(BaseModel):
+    file_id: str
+    """The ID of the file that result was found in."""
+
+    file_name: str
+    """The name of the file that result was found in."""
+
+    score: float
+    """The score of the result.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    content: Optional[List[FileSearchResultContent]] = None
+    """The content of the result that was found.
+
+    The content is only included if requested via the include query parameter.
+    """
+
+
+class FileSearch(BaseModel):
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search."""
+
+    results: Optional[List[FileSearchResult]] = None
+    """The results of the file search."""
 
 
 class FileSearchToolCall(BaseModel):
     id: str
     """The ID of the tool call object."""
 
-    file_search: object
+    file_search: FileSearch
     """For now, this is always going to be an empty object."""
 
     type: Literal["file_search"]
diff --git a/src/openai/types/beta/threads/runs/run_step_include.py b/src/openai/types/beta/threads/runs/run_step_include.py
new file mode 100644
index 0000000000..8e76c1b716
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_include.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["RunStepInclude"]
+
+RunStepInclude: TypeAlias = Literal["step_details.tool_calls[*].file_search.results[*].content"]
diff --git a/src/openai/types/beta/threads/runs/step_list_params.py b/src/openai/types/beta/threads/runs/step_list_params.py
index 606d444539..3931bd7e0c 100644
--- a/src/openai/types/beta/threads/runs/step_list_params.py
+++ b/src/openai/types/beta/threads/runs/step_list_params.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import List
 from typing_extensions import Literal, Required, TypedDict
 
+from .run_step_include import RunStepInclude
+
 __all__ = ["StepListParams"]
 
 
@@ -28,6 +31,18 @@ class StepListParams(TypedDict, total=False):
     of the list.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+    for more information.
+    """
+
     limit: int
     """A limit on the number of objects to be returned.
 
diff --git a/src/openai/types/beta/threads/runs/step_retrieve_params.py b/src/openai/types/beta/threads/runs/step_retrieve_params.py
new file mode 100644
index 0000000000..22c1c049f4
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/step_retrieve_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+from .run_step_include import RunStepInclude
+
+__all__ = ["StepRetrieveParams"]
+
+
+class StepRetrieveParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    run_id: Required[str]
+
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings)
+    for more information.
+    """
diff --git a/src/openai/types/beta/vector_store_create_params.py b/src/openai/types/beta/vector_store_create_params.py
index 4f74af49f8..a8f03a89b9 100644
--- a/src/openai/types/beta/vector_store_create_params.py
+++ b/src/openai/types/beta/vector_store_create_params.py
@@ -2,21 +2,16 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing import List, Optional
+from typing_extensions import Literal, Required, TypedDict
 
-__all__ = [
-    "VectorStoreCreateParams",
-    "ChunkingStrategy",
-    "ChunkingStrategyAuto",
-    "ChunkingStrategyStatic",
-    "ChunkingStrategyStaticStatic",
-    "ExpiresAfter",
-]
+from .file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["VectorStoreCreateParams", "ExpiresAfter"]
 
 
 class VectorStoreCreateParams(TypedDict, total=False):
-    chunking_strategy: ChunkingStrategy
+    chunking_strategy: FileChunkingStrategyParam
     """The chunking strategy used to chunk the file(s).
 
     If not set, will use the `auto` strategy. Only applicable if `file_ids` is
@@ -45,36 +40,6 @@ class VectorStoreCreateParams(TypedDict, total=False):
     """The name of the vector store."""
 
 
-class ChunkingStrategyAuto(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-    """Always `auto`."""
-
-
-class ChunkingStrategyStaticStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
-    """
-
-    max_chunk_size_tokens: Required[int]
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ChunkingStrategyStatic(TypedDict, total=False):
-    static: Required[ChunkingStrategyStaticStatic]
-
-    type: Required[Literal["static"]]
-    """Always `static`."""
-
-
-ChunkingStrategy: TypeAlias = Union[ChunkingStrategyAuto, ChunkingStrategyStatic]
-
-
 class ExpiresAfter(TypedDict, total=False):
     anchor: Required[Literal["last_active_at"]]
     """Anchor timestamp after which the expiration policy applies.
diff --git a/src/openai/types/beta/vector_stores/file_batch_create_params.py b/src/openai/types/beta/vector_stores/file_batch_create_params.py
index e1c3303cf3..e42ea99cd1 100644
--- a/src/openai/types/beta/vector_stores/file_batch_create_params.py
+++ b/src/openai/types/beta/vector_stores/file_batch_create_params.py
@@ -2,16 +2,12 @@
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing import List
+from typing_extensions import Required, TypedDict
 
-__all__ = [
-    "FileBatchCreateParams",
-    "ChunkingStrategy",
-    "ChunkingStrategyAutoChunkingStrategyRequestParam",
-    "ChunkingStrategyStaticChunkingStrategyRequestParam",
-    "ChunkingStrategyStaticChunkingStrategyRequestParamStatic",
-]
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileBatchCreateParams"]
 
 
 class FileBatchCreateParams(TypedDict, total=False):
@@ -22,40 +18,9 @@ class FileBatchCreateParams(TypedDict, total=False):
     files.
     """
 
-    chunking_strategy: ChunkingStrategy
+    chunking_strategy: FileChunkingStrategyParam
     """The chunking strategy used to chunk the file(s).
 
-    If not set, will use the `auto` strategy.
-    """
-
-
-class ChunkingStrategyAutoChunkingStrategyRequestParam(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-    """Always `auto`."""
-
-
-class ChunkingStrategyStaticChunkingStrategyRequestParamStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
     """
-
-    max_chunk_size_tokens: Required[int]
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ChunkingStrategyStaticChunkingStrategyRequestParam(TypedDict, total=False):
-    static: Required[ChunkingStrategyStaticChunkingStrategyRequestParamStatic]
-
-    type: Required[Literal["static"]]
-    """Always `static`."""
-
-
-ChunkingStrategy: TypeAlias = Union[
-    ChunkingStrategyAutoChunkingStrategyRequestParam, ChunkingStrategyStaticChunkingStrategyRequestParam
-]
diff --git a/src/openai/types/beta/vector_stores/file_create_params.py b/src/openai/types/beta/vector_stores/file_create_params.py
index cfb80657c6..d074d766e6 100644
--- a/src/openai/types/beta/vector_stores/file_create_params.py
+++ b/src/openai/types/beta/vector_stores/file_create_params.py
@@ -2,16 +2,11 @@
 
 from __future__ import annotations
 
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
+from typing_extensions import Required, TypedDict
 
-__all__ = [
-    "FileCreateParams",
-    "ChunkingStrategy",
-    "ChunkingStrategyAutoChunkingStrategyRequestParam",
-    "ChunkingStrategyStaticChunkingStrategyRequestParam",
-    "ChunkingStrategyStaticChunkingStrategyRequestParamStatic",
-]
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileCreateParams"]
 
 
 class FileCreateParams(TypedDict, total=False):
@@ -22,40 +17,9 @@ class FileCreateParams(TypedDict, total=False):
     files.
     """
 
-    chunking_strategy: ChunkingStrategy
+    chunking_strategy: FileChunkingStrategyParam
     """The chunking strategy used to chunk the file(s).
 
-    If not set, will use the `auto` strategy.
-    """
-
-
-class ChunkingStrategyAutoChunkingStrategyRequestParam(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-    """Always `auto`."""
-
-
-class ChunkingStrategyStaticChunkingStrategyRequestParamStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
     """
-
-    max_chunk_size_tokens: Required[int]
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ChunkingStrategyStaticChunkingStrategyRequestParam(TypedDict, total=False):
-    static: Required[ChunkingStrategyStaticChunkingStrategyRequestParamStatic]
-
-    type: Required[Literal["static"]]
-    """Always `static`."""
-
-
-ChunkingStrategy: TypeAlias = Union[
-    ChunkingStrategyAutoChunkingStrategyRequestParam, ChunkingStrategyStaticChunkingStrategyRequestParam
-]
diff --git a/src/openai/types/beta/vector_stores/vector_store_file.py b/src/openai/types/beta/vector_stores/vector_store_file.py
index 65096e8dad..e4608e159c 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file.py
+++ b/src/openai/types/beta/vector_stores/vector_store_file.py
@@ -1,19 +1,12 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
+from typing import Optional
+from typing_extensions import Literal
 
-from ...._utils import PropertyInfo
 from ...._models import BaseModel
+from ..file_chunking_strategy import FileChunkingStrategy
 
-__all__ = [
-    "VectorStoreFile",
-    "LastError",
-    "ChunkingStrategy",
-    "ChunkingStrategyStatic",
-    "ChunkingStrategyStaticStatic",
-    "ChunkingStrategyOther",
-]
+__all__ = ["VectorStoreFile", "LastError"]
 
 
 class LastError(BaseModel):
@@ -24,38 +17,6 @@ class LastError(BaseModel):
     """A human-readable description of the error."""
 
 
-class ChunkingStrategyStaticStatic(BaseModel):
-    chunk_overlap_tokens: int
-    """The number of tokens that overlap between chunks. The default value is `400`.
-
-    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
-    """
-
-    max_chunk_size_tokens: int
-    """The maximum number of tokens in each chunk.
-
-    The default value is `800`. The minimum value is `100` and the maximum value is
-    `4096`.
-    """
-
-
-class ChunkingStrategyStatic(BaseModel):
-    static: ChunkingStrategyStaticStatic
-
-    type: Literal["static"]
-    """Always `static`."""
-
-
-class ChunkingStrategyOther(BaseModel):
-    type: Literal["other"]
-    """Always `other`."""
-
-
-ChunkingStrategy: TypeAlias = Annotated[
-    Union[ChunkingStrategyStatic, ChunkingStrategyOther], PropertyInfo(discriminator="type")
-]
-
-
 class VectorStoreFile(BaseModel):
     id: str
     """The identifier, which can be referenced in API endpoints."""
@@ -93,5 +54,5 @@ class VectorStoreFile(BaseModel):
     attached to.
     """
 
-    chunking_strategy: Optional[ChunkingStrategy] = None
+    chunking_strategy: Optional[FileChunkingStrategy] = None
     """The strategy used to chunk the file."""
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 91435dcedd..4ed89b00f5 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -87,15 +87,22 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     `content` of `message`.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a completion,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
     max_tokens: Optional[int]
     """
     The maximum number of [tokens](/tokenizer) that can be generated in the chat
-    completion.
+    completion. This value can be used to control
+    [costs](https://openai.com/api/pricing/) for text generated via API.
 
-    The total length of input tokens and generated tokens is limited by the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    This value is now deprecated in favor of `max_completion_tokens`, and is not
+    compatible with
+    [o1 series models](https://platform.openai.com/docs/guides/reasoning).
     """
 
     n: Optional[int]
@@ -130,11 +137,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
-    Outputs which guarantees the model will match your supplied JSON schema. Learn
-    more in the
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
     [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -160,8 +167,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     This parameter is relevant for customers subscribed to the scale tier service:
 
-    - If set to 'auto', the system will utilize scale tier credits until they are
-      exhausted.
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
     - If set to 'default', the request will be processed using the default service
       tier with a lower uptime SLA and no latency guarentee.
     - When not set, the default behavior is 'auto'.
@@ -262,7 +272,7 @@ class Function(TypedDict, total=False):
 ResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema]
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """If set, partial message deltas will be sent, like in ChatGPT.
 
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index 09bc081f7a..f8438c75c8 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -5,9 +5,14 @@
 __all__ = ["ChatModel"]
 
 ChatModel: TypeAlias = Literal[
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
     "gpt-4o",
-    "gpt-4o-2024-05-13",
     "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
     "gpt-4-turbo",
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index 9fe22fe3c9..6c112b3902 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -160,7 +160,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """Whether to stream back partial progress.
 
diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py
index ac09afd479..a4b9116e35 100644
--- a/src/openai/types/completion_usage.py
+++ b/src/openai/types/completion_usage.py
@@ -1,9 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 
 from .._models import BaseModel
 
-__all__ = ["CompletionUsage"]
+__all__ = ["CompletionUsage", "CompletionTokensDetails"]
+
+
+class CompletionTokensDetails(BaseModel):
+    reasoning_tokens: Optional[int] = None
+    """Tokens generated by the model for reasoning."""
 
 
 class CompletionUsage(BaseModel):
@@ -15,3 +21,6 @@ class CompletionUsage(BaseModel):
 
     total_tokens: int
     """Total number of tokens used in the request (prompt + completion)."""
+
+    completion_tokens_details: Optional[CompletionTokensDetails] = None
+    """Breakdown of tokens used in a completion."""
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index e9be2ef1ca..8f5ea86274 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -50,7 +50,7 @@ class JobCreateParams(TypedDict, total=False):
 
     suffix: Optional[str]
     """
-    A string of up to 18 characters that will be added to your fine-tuned model
+    A string of up to 64 characters that will be added to your fine-tuned model
     name.
 
     For example, a `suffix` of "custom-model-name" would produce a model name like
diff --git a/tests/api_resources/beta/threads/runs/test_steps.py b/tests/api_resources/beta/threads/runs/test_steps.py
index e6108d8dad..f5dc17e0b5 100644
--- a/tests/api_resources/beta/threads/runs/test_steps.py
+++ b/tests/api_resources/beta/threads/runs/test_steps.py
@@ -27,6 +27,16 @@ def test_method_retrieve(self, client: OpenAI) -> None:
         )
         assert_matches_type(RunStep, step, path=["response"])
 
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        step = client.beta.threads.runs.steps.retrieve(
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+        )
+        assert_matches_type(RunStep, step, path=["response"])
+
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.steps.with_raw_response.retrieve(
@@ -89,10 +99,11 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
+            run_id="run_id",
+            thread_id="thread_id",
+            after="after",
+            before="before",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
             limit=0,
             order="asc",
         )
@@ -151,6 +162,16 @@ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         )
         assert_matches_type(RunStep, step, path=["response"])
 
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        step = await async_client.beta.threads.runs.steps.retrieve(
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+        )
+        assert_matches_type(RunStep, step, path=["response"])
+
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
@@ -213,10 +234,11 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         step = await async_client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
+            run_id="run_id",
+            thread_id="thread_id",
+            after="after",
+            before="before",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
             limit=0,
             order="asc",
         )
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 5d16bdb364..c8d70f5f89 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -33,9 +33,10 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            additional_instructions="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
                     "content": "string",
@@ -199,7 +200,8 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             "string",
             assistant_id="string",
             stream=True,
-            additional_instructions="string",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
                     "content": "string",
@@ -699,9 +701,10 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            additional_instructions="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
                     "content": "string",
@@ -865,7 +868,8 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             "string",
             assistant_id="string",
             stream=True,
-            additional_instructions="string",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
                     "content": "string",
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 0b89fbf9cd..c44703a434 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -55,6 +55,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
@@ -175,6 +176,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
@@ -314,6 +316,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
@@ -434,6 +437,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
             n=1,
             parallel_tool_calls=True,
diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py
index d67d5129cd..e7b9c4f1fd 100644
--- a/tests/lib/chat/test_completions.py
+++ b/tests/lib/chat/test_completions.py
@@ -74,7 +74,7 @@ def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pyte
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_845eaabc1f',
-    usage=CompletionUsage(completion_tokens=28, prompt_tokens=14, total_tokens=42)
+    usage=CompletionUsage(completion_tokens=28, completion_tokens_details=None, prompt_tokens=14, total_tokens=42)
 )
 """
     )
@@ -129,7 +129,7 @@ class Location(BaseModel):
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_2a322c9ffc',
-    usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31)
+    usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31)
 )
 """
     )
@@ -186,7 +186,7 @@ class Location(BaseModel):
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_2a322c9ffc',
-    usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31)
+    usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31)
 )
 """
     )
@@ -368,7 +368,7 @@ class CalendarEvent:
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_2a322c9ffc',
-    usage=CompletionUsage(completion_tokens=17, prompt_tokens=32, total_tokens=49)
+    usage=CompletionUsage(completion_tokens=17, completion_tokens_details=None, prompt_tokens=32, total_tokens=49)
 )
 """
     )
diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py
index c3dd69ad57..5ad1f084d2 100644
--- a/tests/lib/chat/test_completions_streaming.py
+++ b/tests/lib/chat/test_completions_streaming.py
@@ -155,7 +155,7 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream
     object='chat.completion',
     service_tier=None,
     system_fingerprint='fp_845eaabc1f',
-    usage=CompletionUsage(completion_tokens=14, prompt_tokens=17, total_tokens=31)
+    usage=CompletionUsage(completion_tokens=14, completion_tokens_details=None, prompt_tokens=17, total_tokens=31)
 )
 """
     )
diff --git a/tests/test_legacy_response.py b/tests/test_legacy_response.py
index 3c2df53e58..a6fec9f2de 100644
--- a/tests/test_legacy_response.py
+++ b/tests/test_legacy_response.py
@@ -66,6 +66,27 @@ def test_response_parse_custom_model(client: OpenAI) -> None:
     assert obj.bar == 2
 
 
+def test_response_basemodel_request_id(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(
+            200,
+            headers={"x-request-id": "my-req-id"},
+            content=json.dumps({"foo": "hello!", "bar": 2}),
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj._request_id == "my-req-id"
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+    assert obj.to_dict() == {"foo": "hello!", "bar": 2}
+
+
 def test_response_parse_annotated_type(client: OpenAI) -> None:
     response = LegacyAPIResponse(
         raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
diff --git a/tests/test_response.py b/tests/test_response.py
index b7d88bdbde..97c56e0035 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -156,6 +156,49 @@ async def test_async_response_parse_custom_model(async_client: AsyncOpenAI) -> N
     assert obj.bar == 2
 
 
+def test_response_basemodel_request_id(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(
+            200,
+            headers={"x-request-id": "my-req-id"},
+            content=json.dumps({"foo": "hello!", "bar": 2}),
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj._request_id == "my-req-id"
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+    assert obj.to_dict() == {"foo": "hello!", "bar": 2}
+
+
+@pytest.mark.asyncio
+async def test_async_response_basemodel_request_id(client: OpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(
+            200,
+            headers={"x-request-id": "my-req-id"},
+            content=json.dumps({"foo": "hello!", "bar": 2}),
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=CustomModel)
+    assert obj._request_id == "my-req-id"
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+    assert obj.to_dict() == {"foo": "hello!", "bar": 2}
+
+
 def test_response_parse_annotated_type(client: OpenAI) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),