diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 6eb007253c..83bca8f716 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.15.2" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
 RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c031d9a1d1..6fc5b36597 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,28 +14,40 @@ jobs:
     if: github.repository == 'openai/openai-python'
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: 0.24.0
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Install dependencies
-        run: |
-          rye sync --all-features
+        run: rye sync --all-features
 
-      - name: Run ruff
-        run: |
-          rye run check:ruff
+      - name: Run lints
+        run: ./scripts/lint
+  test:
+    name: test
+    runs-on: ubuntu-latest
+    if: github.repository == 'openai/openai-python'
 
-      - name: Run type checking
-        run: |
-          rye run typecheck
+    steps:
+      - uses: actions/checkout@v4
 
-      - name: Ensure importable
+      - name: Install Rye
         run: |
-          rye run python -c 'import openai'
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: 0.24.0
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: Run tests
+        run: ./scripts/test
+
diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
index c8c94db105..1ac03ede3f 100644
--- a/.github/workflows/create-releases.yml
+++ b/.github/workflows/create-releases.yml
@@ -14,7 +14,7 @@ jobs:
     environment: publish
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: stainless-api/trigger-release-please@v1
         id: release
@@ -25,10 +25,10 @@ jobs:
       - name: Install Rye
         if: ${{ steps.release.outputs.releases_created }}
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
+          RYE_VERSION: 0.24.0
           RYE_INSTALL_OPTION: "--yes"
 
       - name: Publish to PyPI
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 026ed29c22..aae985b27e 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -10,14 +10,14 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
+          RYE_VERSION: 0.24.0
           RYE_INSTALL_OPTION: "--yes"
 
       - name: Publish to PyPI
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 108aa5973a..e078964a6f 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -13,7 +13,7 @@ jobs:
     if: github.repository == 'openai/openai-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Check release environment
         run: |
diff --git a/.gitignore b/.gitignore
index a4b2f8c0bd..0f9a66a976 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ dist
 .env
 .envrc
 codegen.log
+Brewfile.lock.json
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index c3c95522a6..5d67d3563d 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.9.0"
+  ".": "1.30.4"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index c550abf3c6..2e5c705a0d 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1,2 @@
-configured_endpoints: 51
+configured_endpoints: 64
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-363dd904e5d6e65b3a323fc88e6b502fb23a6aa319be219273e3ee47c7530993.yml
diff --git a/Brewfile b/Brewfile
new file mode 100644
index 0000000000..492ca37bb0
--- /dev/null
+++ b/Brewfile
@@ -0,0 +1,2 @@
+brew "rye"
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14771f603b..b72dd9335d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,578 @@
 # Changelog
 
+## 1.30.4 (2024-05-28)
+
+Full Changelog: [v1.30.3...v1.30.4](https://github.com/openai/openai-python/compare/v1.30.3...v1.30.4)
+
+### Chores
+
+* add missing __all__ definitions ([7fba60f](https://github.com/openai/openai-python/commit/7fba60f2e8adc26e83080aaf3e436eb9891e1253))
+* **internal:** fix lint issue ([f423cd0](https://github.com/openai/openai-python/commit/f423cd05d33b3e734eda7c0c008faac14ae96bb7))
+
+## 1.30.3 (2024-05-24)
+
+Full Changelog: [v1.30.2...v1.30.3](https://github.com/openai/openai-python/compare/v1.30.2...v1.30.3)
+
+### Chores
+
+* **ci:** update rye install location ([#1440](https://github.com/openai/openai-python/issues/1440)) ([8a0e5bf](https://github.com/openai/openai-python/commit/8a0e5bf4c03d9c714799fad43be68ac9c2b1f37a))
+* **internal:** bump pyright ([#1442](https://github.com/openai/openai-python/issues/1442)) ([64a151e](https://github.com/openai/openai-python/commit/64a151eae705d55484f870df461434c0a6961e2b))
+* **internal:** fix lint issue ([#1444](https://github.com/openai/openai-python/issues/1444)) ([b0eb458](https://github.com/openai/openai-python/commit/b0eb4582e050b0a25af3d80d2cb584bfc7cd11ab))
+
+
+### Documentation
+
+* **contributing:** update references to rye-up.com ([dcc34a2](https://github.com/openai/openai-python/commit/dcc34a26d1a6a0debf440724fad658c77547048c))
+
+## 1.30.2 (2024-05-23)
+
+Full Changelog: [v1.30.1...v1.30.2](https://github.com/openai/openai-python/compare/v1.30.1...v1.30.2)
+
+### Chores
+
+* **ci:** update rye install location ([#1436](https://github.com/openai/openai-python/issues/1436)) ([f7cc4e7](https://github.com/openai/openai-python/commit/f7cc4e7d5d0964a4a5d53e602379770c2576e1aa))
+
+## 1.30.1 (2024-05-14)
+
+Full Changelog: [v1.30.0...v1.30.1](https://github.com/openai/openai-python/compare/v1.30.0...v1.30.1)
+
+### Chores
+
+* **internal:** add slightly better logging to scripts ([#1422](https://github.com/openai/openai-python/issues/1422)) ([43dffab](https://github.com/openai/openai-python/commit/43dffabb3bed4edf8a6e523cbb289f733a5f9b24))
+
+## 1.30.0 (2024-05-14)
+
+Full Changelog: [v1.29.0...v1.30.0](https://github.com/openai/openai-python/compare/v1.29.0...v1.30.0)
+
+### Features
+
+* **api:** add incomplete state ([#1420](https://github.com/openai/openai-python/issues/1420)) ([6484984](https://github.com/openai/openai-python/commit/648498412d1c7740e6b67ed4d0a55b89ff29d3b1))
+
+## 1.29.0 (2024-05-13)
+
+Full Changelog: [v1.28.2...v1.29.0](https://github.com/openai/openai-python/compare/v1.28.2...v1.29.0)
+
+### Features
+
+* **api:** add gpt-4o model ([#1417](https://github.com/openai/openai-python/issues/1417)) ([4f09f8c](https://github.com/openai/openai-python/commit/4f09f8c6cc4450f5e61f158f1bd54c513063a1a8))
+
+## 1.28.2 (2024-05-13)
+
+Full Changelog: [v1.28.1...v1.28.2](https://github.com/openai/openai-python/compare/v1.28.1...v1.28.2)
+
+### Bug Fixes
+
+* **client:** accidental blocking sleep in async code ([#1415](https://github.com/openai/openai-python/issues/1415)) ([0ac6ecb](https://github.com/openai/openai-python/commit/0ac6ecb8d4e52f895bc3ae1f589f22ddaaef6204))
+
+
+### Chores
+
+* **internal:** bump pydantic dependency ([#1413](https://github.com/openai/openai-python/issues/1413)) ([ed73d1d](https://github.com/openai/openai-python/commit/ed73d1db540714e29a1ba30e3aa6429aae8b1dd8))
+
+## 1.28.1 (2024-05-11)
+
+Full Changelog: [v1.28.0...v1.28.1](https://github.com/openai/openai-python/compare/v1.28.0...v1.28.1)
+
+### Chores
+
+* **docs:** add SECURITY.md ([#1408](https://github.com/openai/openai-python/issues/1408)) ([119970a](https://github.com/openai/openai-python/commit/119970a31b67e88c623d50855290ccf3847c10eb))
+
+## 1.28.0 (2024-05-09)
+
+Full Changelog: [v1.27.0...v1.28.0](https://github.com/openai/openai-python/compare/v1.27.0...v1.28.0)
+
+### Features
+
+* **api:** add message image content ([#1405](https://github.com/openai/openai-python/issues/1405)) ([a115de6](https://github.com/openai/openai-python/commit/a115de60ce1ca503a7659bb9a19c18699d4d9bcb))
+
+## 1.27.0 (2024-05-08)
+
+Full Changelog: [v1.26.0...v1.27.0](https://github.com/openai/openai-python/compare/v1.26.0...v1.27.0)
+
+### Features
+
+* **api:** adding file purposes ([#1401](https://github.com/openai/openai-python/issues/1401)) ([2e9d0bd](https://github.com/openai/openai-python/commit/2e9d0bd0e4bf677ed9b21c6448e804313e026441))
+
+## 1.26.0 (2024-05-06)
+
+Full Changelog: [v1.25.2...v1.26.0](https://github.com/openai/openai-python/compare/v1.25.2...v1.26.0)
+
+### Features
+
+* **api:** add usage metadata when streaming ([#1395](https://github.com/openai/openai-python/issues/1395)) ([3cb064b](https://github.com/openai/openai-python/commit/3cb064b10d661dbcc74b6bc1ed7d8e635ab2876a))
+
+## 1.25.2 (2024-05-05)
+
+Full Changelog: [v1.25.1...v1.25.2](https://github.com/openai/openai-python/compare/v1.25.1...v1.25.2)
+
+### Documentation
+
+* **readme:** fix misleading timeout example value ([#1393](https://github.com/openai/openai-python/issues/1393)) ([3eba8e7](https://github.com/openai/openai-python/commit/3eba8e7573ec1bf4231a304c8eabc8a8d077f46d))
+
+## 1.25.1 (2024-05-02)
+
+Full Changelog: [v1.25.0...v1.25.1](https://github.com/openai/openai-python/compare/v1.25.0...v1.25.1)
+
+### Chores
+
+* **internal:** bump prism version ([#1390](https://github.com/openai/openai-python/issues/1390)) ([a5830fc](https://github.com/openai/openai-python/commit/a5830fc1c5ffd21e2010490905084ad5614212a3))
+
+## 1.25.0 (2024-05-01)
+
+Full Changelog: [v1.24.1...v1.25.0](https://github.com/openai/openai-python/compare/v1.24.1...v1.25.0)
+
+### Features
+
+* **api:** delete messages ([#1388](https://github.com/openai/openai-python/issues/1388)) ([d0597cd](https://github.com/openai/openai-python/commit/d0597cdc1813cddffacbaa50565e86d2420d1873))
+
+## 1.24.1 (2024-04-30)
+
+Full Changelog: [v1.24.0...v1.24.1](https://github.com/openai/openai-python/compare/v1.24.0...v1.24.1)
+
+### Chores
+
+* **internal:** add link to openapi spec ([#1385](https://github.com/openai/openai-python/issues/1385)) ([b315d04](https://github.com/openai/openai-python/commit/b315d04e9624ec3a841d7c51813bb553640c23ce))
+
+## 1.24.0 (2024-04-29)
+
+Full Changelog: [v1.23.6...v1.24.0](https://github.com/openai/openai-python/compare/v1.23.6...v1.24.0)
+
+### Features
+
+* **api:** add required tool_choice ([#1382](https://github.com/openai/openai-python/issues/1382)) ([c558f65](https://github.com/openai/openai-python/commit/c558f651df39f61425cd4109318f78ed94cbf163))
+
+
+### Chores
+
+* **client:** log response headers in debug mode ([#1383](https://github.com/openai/openai-python/issues/1383)) ([f31a426](https://github.com/openai/openai-python/commit/f31a4261adc4ebd92582cee264e41eb6a6dafc57))
+* **internal:** minor reformatting ([#1377](https://github.com/openai/openai-python/issues/1377)) ([7003dbb](https://github.com/openai/openai-python/commit/7003dbb863b6e16381070b8b86ac24aa070a3799))
+* **internal:** reformat imports ([#1375](https://github.com/openai/openai-python/issues/1375)) ([2ad0c3b](https://github.com/openai/openai-python/commit/2ad0c3b8e0b746ed20db3c84a9c6a369aa10bf5d))
+
+## 1.23.6 (2024-04-25)
+
+Full Changelog: [v1.23.5...v1.23.6](https://github.com/openai/openai-python/compare/v1.23.5...v1.23.6)
+
+### Chores
+
+* **internal:** update test helper function ([#1371](https://github.com/openai/openai-python/issues/1371)) ([6607c4a](https://github.com/openai/openai-python/commit/6607c4a491fd1912f9222d6fe464ccef6e865eac))
+
+## 1.23.5 (2024-04-24)
+
+Full Changelog: [v1.23.4...v1.23.5](https://github.com/openai/openai-python/compare/v1.23.4...v1.23.5)
+
+### Chores
+
+* **internal:** use actions/checkout@v4 for codeflow ([#1368](https://github.com/openai/openai-python/issues/1368)) ([d1edf8b](https://github.com/openai/openai-python/commit/d1edf8beb806ebaefdcc2cb6e39f99e1811a2668))
+
+## 1.23.4 (2024-04-24)
+
+Full Changelog: [v1.23.3...v1.23.4](https://github.com/openai/openai-python/compare/v1.23.3...v1.23.4)
+
+### Bug Fixes
+
+* **api:** change timestamps to unix integers ([#1367](https://github.com/openai/openai-python/issues/1367)) ([fbc0e15](https://github.com/openai/openai-python/commit/fbc0e15f422971bd15499d4ea5f42a1c885c7004))
+* **docs:** doc improvements ([#1364](https://github.com/openai/openai-python/issues/1364)) ([8c3a005](https://github.com/openai/openai-python/commit/8c3a005247ea045b9a95e7459eba2a90067daf71))
+
+
+### Chores
+
+* **tests:** rename test file ([#1366](https://github.com/openai/openai-python/issues/1366)) ([4204e63](https://github.com/openai/openai-python/commit/4204e63e27584c68ad27825261225603d7a87008))
+
+## 1.23.3 (2024-04-23)
+
+Full Changelog: [v1.23.2...v1.23.3](https://github.com/openai/openai-python/compare/v1.23.2...v1.23.3)
+
+### Chores
+
+* **internal:** restructure imports ([#1359](https://github.com/openai/openai-python/issues/1359)) ([4e5eb37](https://github.com/openai/openai-python/commit/4e5eb374ea0545a6117db657bb05f6417bc62d18))
+
+## 1.23.2 (2024-04-19)
+
+Full Changelog: [v1.23.1...v1.23.2](https://github.com/openai/openai-python/compare/v1.23.1...v1.23.2)
+
+### Bug Fixes
+
+* **api:** correct types for message attachment tools ([#1348](https://github.com/openai/openai-python/issues/1348)) ([78a6261](https://github.com/openai/openai-python/commit/78a6261eaad7839284903287d4f647d9cb4ced0b))
+
+## 1.23.1 (2024-04-18)
+
+Full Changelog: [v1.23.0...v1.23.1](https://github.com/openai/openai-python/compare/v1.23.0...v1.23.1)
+
+### Bug Fixes
+
+* **api:** correct types for attachments ([#1342](https://github.com/openai/openai-python/issues/1342)) ([542d30c](https://github.com/openai/openai-python/commit/542d30c6dad4e139bf3eb443936d42b7b42dad54))
+
+## 1.23.0 (2024-04-18)
+
+Full Changelog: [v1.22.0...v1.23.0](https://github.com/openai/openai-python/compare/v1.22.0...v1.23.0)
+
+### Features
+
+* **api:** add request id property to response classes ([#1341](https://github.com/openai/openai-python/issues/1341)) ([444d680](https://github.com/openai/openai-python/commit/444d680cbb3745adbc27788213ae3312567136a8))
+
+
+### Documentation
+
+* **helpers:** fix example snippets ([#1339](https://github.com/openai/openai-python/issues/1339)) ([8929088](https://github.com/openai/openai-python/commit/8929088b206a04b4c5b85fb69b0b983fb56f9b03))
+
+## 1.22.0 (2024-04-18)
+
+Full Changelog: [v1.21.2...v1.22.0](https://github.com/openai/openai-python/compare/v1.21.2...v1.22.0)
+
+### Features
+
+* **api:** batch list endpoint ([#1338](https://github.com/openai/openai-python/issues/1338)) ([a776f38](https://github.com/openai/openai-python/commit/a776f387e3159f9a8f4dcaa7d0d3b78c2a884f91))
+
+
+### Chores
+
+* **internal:** ban usage of lru_cache ([#1331](https://github.com/openai/openai-python/issues/1331)) ([8f9223b](https://github.com/openai/openai-python/commit/8f9223bfe13200c685fc97c25ada3015a69c6df7))
+* **internal:** bump pyright to 1.1.359 ([#1337](https://github.com/openai/openai-python/issues/1337)) ([feec0dd](https://github.com/openai/openai-python/commit/feec0dd1dd243941a279c3224c5ca1d727d76676))
+
+## 1.21.2 (2024-04-17)
+
+Full Changelog: [v1.21.1...v1.21.2](https://github.com/openai/openai-python/compare/v1.21.1...v1.21.2)
+
+### Chores
+
+* **internal:** add lru_cache helper function ([#1329](https://github.com/openai/openai-python/issues/1329)) ([cbeebfc](https://github.com/openai/openai-python/commit/cbeebfcca8bf1a3feb4462a79e10099bda5bed84))
+
+## 1.21.1 (2024-04-17)
+
+Full Changelog: [v1.21.0...v1.21.1](https://github.com/openai/openai-python/compare/v1.21.0...v1.21.1)
+
+### Chores
+
+* **api:** docs and response_format response property ([#1327](https://github.com/openai/openai-python/issues/1327)) ([7a6d142](https://github.com/openai/openai-python/commit/7a6d142f013994c4eb9a4f55888464c885f8baf0))
+
+## 1.21.0 (2024-04-17)
+
+Full Changelog: [v1.20.0...v1.21.0](https://github.com/openai/openai-python/compare/v1.20.0...v1.21.0)
+
+### Features
+
+* **api:** add vector stores ([#1325](https://github.com/openai/openai-python/issues/1325)) ([038a3c5](https://github.com/openai/openai-python/commit/038a3c50db7b6a88f54ff1cd1ff6cbaef2caf87f))
+
+## 1.20.0 (2024-04-16)
+
+Full Changelog: [v1.19.0...v1.20.0](https://github.com/openai/openai-python/compare/v1.19.0...v1.20.0)
+
+### Features
+
+* **client:** add header OpenAI-Project ([#1320](https://github.com/openai/openai-python/issues/1320)) ([0c489f1](https://github.com/openai/openai-python/commit/0c489f16a7d9e5ac753da87273b223893edefa69))
+* extract chat models to a named enum ([#1322](https://github.com/openai/openai-python/issues/1322)) ([1ccd9b6](https://github.com/openai/openai-python/commit/1ccd9b67322736a4714e58c953d59585322c527d))
+
+## 1.19.0 (2024-04-15)
+
+Full Changelog: [v1.18.0...v1.19.0](https://github.com/openai/openai-python/compare/v1.18.0...v1.19.0)
+
+### Features
+
+* **errors:** add request_id property ([#1317](https://github.com/openai/openai-python/issues/1317)) ([f9eb77d](https://github.com/openai/openai-python/commit/f9eb77dca422b9456f4e3b31c7474046235eec1d))
+
+## 1.18.0 (2024-04-15)
+
+Full Changelog: [v1.17.1...v1.18.0](https://github.com/openai/openai-python/compare/v1.17.1...v1.18.0)
+
+### Features
+
+* **api:** add batch API ([#1316](https://github.com/openai/openai-python/issues/1316)) ([3e6f19e](https://github.com/openai/openai-python/commit/3e6f19e6e7489bf1c94944a5f8f9b1d4535cdc43))
+* **api:** updates ([#1314](https://github.com/openai/openai-python/issues/1314)) ([8281dc9](https://github.com/openai/openai-python/commit/8281dc956178f5de345645660081f7d0c15a57a6))
+
+## 1.17.1 (2024-04-12)
+
+Full Changelog: [v1.17.0...v1.17.1](https://github.com/openai/openai-python/compare/v1.17.0...v1.17.1)
+
+### Chores
+
+* fix typo ([#1304](https://github.com/openai/openai-python/issues/1304)) ([1129082](https://github.com/openai/openai-python/commit/1129082955f98d76c0927781ef9e7d0beeda2ec4))
+* **internal:** formatting ([#1311](https://github.com/openai/openai-python/issues/1311)) ([8fd411b](https://github.com/openai/openai-python/commit/8fd411b48b6b1eafaab2dac26201525c1ee0b942))
+
+## 1.17.0 (2024-04-10)
+
+Full Changelog: [v1.16.2...v1.17.0](https://github.com/openai/openai-python/compare/v1.16.2...v1.17.0)
+
+### Features
+
+* **api:** add additional messages when creating thread run ([#1298](https://github.com/openai/openai-python/issues/1298)) ([70eb081](https://github.com/openai/openai-python/commit/70eb081804b14cc8c151ebd85458545a50a074fd))
+* **client:** add DefaultHttpxClient and DefaultAsyncHttpxClient ([#1302](https://github.com/openai/openai-python/issues/1302)) ([69cdfc3](https://github.com/openai/openai-python/commit/69cdfc319fff7ebf28cdd13cc6c1761b7d97811d))
+* **models:** add to_dict & to_json helper methods ([#1305](https://github.com/openai/openai-python/issues/1305)) ([40a881d](https://github.com/openai/openai-python/commit/40a881d10442af8b445ce030f8ab338710e1c4c8))
+
+## 1.16.2 (2024-04-04)
+
+Full Changelog: [v1.16.1...v1.16.2](https://github.com/openai/openai-python/compare/v1.16.1...v1.16.2)
+
+### Bug Fixes
+
+* **client:** correct logic for line decoding in streaming ([#1293](https://github.com/openai/openai-python/issues/1293)) ([687caef](https://github.com/openai/openai-python/commit/687caefa4acf615bf404f16817bfd9a6f285ee5c))
+
+## 1.16.1 (2024-04-02)
+
+Full Changelog: [v1.16.0...v1.16.1](https://github.com/openai/openai-python/compare/v1.16.0...v1.16.1)
+
+### Chores
+
+* **internal:** defer model build for import latency ([#1291](https://github.com/openai/openai-python/issues/1291)) ([bc6866e](https://github.com/openai/openai-python/commit/bc6866eb2335d01532190d0906cad7bf9af28621))
+
+## 1.16.0 (2024-04-01)
+
+Full Changelog: [v1.15.0...v1.16.0](https://github.com/openai/openai-python/compare/v1.15.0...v1.16.0)
+
+### Features
+
+* **api:** add support for filtering messages by run_id ([#1288](https://github.com/openai/openai-python/issues/1288)) ([58d6b77](https://github.com/openai/openai-python/commit/58d6b773218ef1dd8dc6208124a16078e4ac11c1))
+* **api:** run polling helpers ([#1289](https://github.com/openai/openai-python/issues/1289)) ([6b427f3](https://github.com/openai/openai-python/commit/6b427f38610847bce3ce5334177f07917bd7c187))
+
+
+### Chores
+
+* **client:** validate that max_retries is not None ([#1286](https://github.com/openai/openai-python/issues/1286)) ([aa5920a](https://github.com/openai/openai-python/commit/aa5920af6131c49a44352524154ee4a1684e76b2))
+
+
+### Refactors
+
+* rename createAndStream to stream ([6b427f3](https://github.com/openai/openai-python/commit/6b427f38610847bce3ce5334177f07917bd7c187))
+
+## 1.15.0 (2024-03-31)
+
+Full Changelog: [v1.14.3...v1.15.0](https://github.com/openai/openai-python/compare/v1.14.3...v1.15.0)
+
+### Features
+
+* **api:** adding temperature parameter ([#1282](https://github.com/openai/openai-python/issues/1282)) ([0e68fd3](https://github.com/openai/openai-python/commit/0e68fd3690155785d1fb0ee9a8604f51e6701b1d))
+* **client:** increase default HTTP max_connections to 1000 and max_keepalive_connections to 100 ([#1281](https://github.com/openai/openai-python/issues/1281)) ([340d139](https://github.com/openai/openai-python/commit/340d1391e3071a265ed12c0a8d70d4d73a860bd8))
+* **package:** export default constants ([#1275](https://github.com/openai/openai-python/issues/1275)) ([fdc126e](https://github.com/openai/openai-python/commit/fdc126e428320f1bed5eabd3eed229f08ab9effa))
+
+
+### Bug Fixes
+
+* **project:** use absolute github links on PyPi ([#1280](https://github.com/openai/openai-python/issues/1280)) ([94cd528](https://github.com/openai/openai-python/commit/94cd52837650e5b7e115119d69e6b1c7ba1f6bf1))
+
+
+### Chores
+
+* **internal:** bump dependencies ([#1273](https://github.com/openai/openai-python/issues/1273)) ([18dcd65](https://github.com/openai/openai-python/commit/18dcd654d9f54628b5fe21a499d1fef500e15f7f))
+
+
+### Documentation
+
+* **readme:** change undocumented params wording ([#1284](https://github.com/openai/openai-python/issues/1284)) ([7498ef1](https://github.com/openai/openai-python/commit/7498ef1e9568200086ba3efb99ea100feb05e3f0))
+
+## 1.14.3 (2024-03-25)
+
+Full Changelog: [v1.14.2...v1.14.3](https://github.com/openai/openai-python/compare/v1.14.2...v1.14.3)
+
+### Bug Fixes
+
+* revert regression with 3.7 support ([#1269](https://github.com/openai/openai-python/issues/1269)) ([37aed56](https://github.com/openai/openai-python/commit/37aed564143dc7281f1eaa6ab64ec5ca334cf25e))
+
+
+### Chores
+
+* **internal:** construct error properties instead of using the raw response ([#1257](https://github.com/openai/openai-python/issues/1257)) ([11dce5c](https://github.com/openai/openai-python/commit/11dce5c66395722b245f5d5461ce379ca7b939e4))
+* **internal:** formatting change ([#1258](https://github.com/openai/openai-python/issues/1258)) ([b907dd7](https://github.com/openai/openai-python/commit/b907dd7dcae895e4209559da061d0991a8d640a6))
+* **internal:** loosen input type for util function ([#1250](https://github.com/openai/openai-python/issues/1250)) ([fc8b4c3](https://github.com/openai/openai-python/commit/fc8b4c37dc91dfcc0535c19236092992171784a0))
+
+
+### Documentation
+
+* **contributing:** fix typo ([#1264](https://github.com/openai/openai-python/issues/1264)) ([835cb9b](https://github.com/openai/openai-python/commit/835cb9b2f92e2aa3329545b4677865dcd4fd00f0))
+* **readme:** consistent use of sentence case in headings ([#1255](https://github.com/openai/openai-python/issues/1255)) ([519f371](https://github.com/openai/openai-python/commit/519f371af779b5fa353292ff5a2d3332afe0987e))
+* **readme:** document how to make undocumented requests ([#1256](https://github.com/openai/openai-python/issues/1256)) ([5887858](https://github.com/openai/openai-python/commit/5887858a7b649dfde5b733ef01e5cffcf953b2a7))
+
+## 1.14.2 (2024-03-19)
+
+Full Changelog: [v1.14.1...v1.14.2](https://github.com/openai/openai-python/compare/v1.14.1...v1.14.2)
+
+### Performance Improvements
+
+* cache TypeAdapters ([#1114](https://github.com/openai/openai-python/issues/1114)) ([41b6fee](https://github.com/openai/openai-python/commit/41b6feec70d3f203e36ba9a92205389bafce930c))
+* cache TypeAdapters ([#1243](https://github.com/openai/openai-python/issues/1243)) ([2005076](https://github.com/openai/openai-python/commit/2005076f500bef6e0a6cc8f935b9cc9fef65ab5b))
+
+
+### Chores
+
+* **internal:** update generated pragma comment ([#1247](https://github.com/openai/openai-python/issues/1247)) ([3eeb9b3](https://github.com/openai/openai-python/commit/3eeb9b3a71e01c2593be443a97a353371466d01a))
+
+
+### Documentation
+
+* assistant improvements ([#1249](https://github.com/openai/openai-python/issues/1249)) ([e7a3176](https://github.com/openai/openai-python/commit/e7a3176b7606822bd5ad8f7fece87de6aad1e5b6))
+* fix typo in CONTRIBUTING.md ([#1245](https://github.com/openai/openai-python/issues/1245)) ([adef57a](https://github.com/openai/openai-python/commit/adef57ae5c71734873ba49bccd92fa7f28068d28))
+
+## 1.14.1 (2024-03-15)
+
+Full Changelog: [v1.14.0...v1.14.1](https://github.com/openai/openai-python/compare/v1.14.0...v1.14.1)
+
+### Documentation
+
+* **readme:** assistant streaming ([#1238](https://github.com/openai/openai-python/issues/1238)) ([0fc30a2](https://github.com/openai/openai-python/commit/0fc30a23030b4ff60f27cd2f472517926ed0f300))
+
+## 1.14.0 (2024-03-13)
+
+Full Changelog: [v1.13.4...v1.14.0](https://github.com/openai/openai-python/compare/v1.13.4...v1.14.0)
+
+### Features
+
+* **assistants:** add support for streaming ([#1233](https://github.com/openai/openai-python/issues/1233)) ([17635dc](https://github.com/openai/openai-python/commit/17635dccbeddf153f8201dbca18b44e16a1799b2))
+
+## 1.13.4 (2024-03-13)
+
+Full Changelog: [v1.13.3...v1.13.4](https://github.com/openai/openai-python/compare/v1.13.3...v1.13.4)
+
+### Bug Fixes
+
+* **streaming:** improve error messages ([#1218](https://github.com/openai/openai-python/issues/1218)) ([4f5ff29](https://github.com/openai/openai-python/commit/4f5ff298601b5a8bfbf0a9d0c0d1329d1502a205))
+
+
+### Chores
+
+* **api:** update docs ([#1212](https://github.com/openai/openai-python/issues/1212)) ([71236e0](https://github.com/openai/openai-python/commit/71236e0de4012a249af4c1ffd95973a8ba4fa61f))
+* **client:** improve error message for invalid http_client argument ([#1216](https://github.com/openai/openai-python/issues/1216)) ([d0c928a](https://github.com/openai/openai-python/commit/d0c928abbd99020fe828350f3adfd10c638a2eed))
+* **docs:** mention install from git repo ([#1203](https://github.com/openai/openai-python/issues/1203)) ([3ab6f44](https://github.com/openai/openai-python/commit/3ab6f447ffd8d2394e58416e401e545a99ec85af))
+* export NOT_GIVEN sentinel value ([#1223](https://github.com/openai/openai-python/issues/1223)) ([8a4f76f](https://github.com/openai/openai-python/commit/8a4f76f992c66f20cd6aa070c8dc4839e4cf9f3c))
+* **internal:** add core support for deserializing into number response ([#1219](https://github.com/openai/openai-python/issues/1219)) ([004bc92](https://github.com/openai/openai-python/commit/004bc924ea579852b9266ca11aea93463cf75104))
+* **internal:** bump pyright ([#1221](https://github.com/openai/openai-python/issues/1221)) ([3c2e815](https://github.com/openai/openai-python/commit/3c2e815311ace4ff81ccd446b23ff50a4e099485))
+* **internal:** improve deserialisation of discriminated unions ([#1227](https://github.com/openai/openai-python/issues/1227)) ([4767259](https://github.com/openai/openai-python/commit/4767259d25ac135550b37b15e4c0497e5ff0330d))
+* **internal:** minor core client restructuring ([#1199](https://github.com/openai/openai-python/issues/1199)) ([4314cdc](https://github.com/openai/openai-python/commit/4314cdcd522537e6cbbd87206d5bb236f672ce05))
+* **internal:** split up transforms into sync / async ([#1210](https://github.com/openai/openai-python/issues/1210)) ([7853a83](https://github.com/openai/openai-python/commit/7853a8358864957cc183581bdf7c03810a7b2756))
+* **internal:** support more input types ([#1211](https://github.com/openai/openai-python/issues/1211)) ([d0e4baa](https://github.com/openai/openai-python/commit/d0e4baa40d32c2da0ce5ceef8e0c7193b98f2b5a))
+* **internal:** support parsing Annotated types ([#1222](https://github.com/openai/openai-python/issues/1222)) ([8598f81](https://github.com/openai/openai-python/commit/8598f81841eeab0ab00eb21fdec7e8756ffde909))
+* **types:** include discriminators in unions ([#1228](https://github.com/openai/openai-python/issues/1228)) ([3ba0dcc](https://github.com/openai/openai-python/commit/3ba0dcc19a2af0ef869c77da2805278f71ee96c2))
+
+
+### Documentation
+
+* **contributing:** improve wording ([#1201](https://github.com/openai/openai-python/issues/1201)) ([95a1e0e](https://github.com/openai/openai-python/commit/95a1e0ea8e5446c413606847ebf9e35afbc62bf9))
+
+## 1.13.3 (2024-02-28)
+
+Full Changelog: [v1.13.2...v1.13.3](https://github.com/openai/openai-python/compare/v1.13.2...v1.13.3)
+
+### Features
+
+* **api:** add wav and pcm to response_format ([#1189](https://github.com/openai/openai-python/issues/1189)) ([dbd20fc](https://github.com/openai/openai-python/commit/dbd20fc42e93358261f71b9aa0e5f955053c3825))
+
+
+### Chores
+
+* **client:** use anyio.sleep instead of asyncio.sleep ([#1198](https://github.com/openai/openai-python/issues/1198)) ([b6d025b](https://github.com/openai/openai-python/commit/b6d025b54f091e79f5d4a0a8923f29574fd66027))
+* **internal:** bump pyright ([#1193](https://github.com/openai/openai-python/issues/1193)) ([9202e04](https://github.com/openai/openai-python/commit/9202e04d07a7c47232f39196346c734869b8f55a))
+* **types:** extract run status to a named type ([#1178](https://github.com/openai/openai-python/issues/1178)) ([249ecbd](https://github.com/openai/openai-python/commit/249ecbdeb6566a385ec46dfd5000b4eaa03965f0))
+
+
+### Documentation
+
+* add note in azure_deployment docstring ([#1188](https://github.com/openai/openai-python/issues/1188)) ([96fa995](https://github.com/openai/openai-python/commit/96fa99572dd76ee708f2bae04d11b659cdd698b2))
+* **examples:** add pyaudio streaming example ([#1194](https://github.com/openai/openai-python/issues/1194)) ([3683c5e](https://github.com/openai/openai-python/commit/3683c5e3c7f07e4b789a0c4cc417b2c59539cae2))
+
+## 1.13.2 (2024-02-20)
+
+Full Changelog: [v1.13.1...v1.13.2](https://github.com/openai/openai-python/compare/v1.13.1...v1.13.2)
+
+### Bug Fixes
+
+* **ci:** revert "move github release logic to github app" ([#1170](https://github.com/openai/openai-python/issues/1170)) ([f1adc2e](https://github.com/openai/openai-python/commit/f1adc2e6f2f29acb4404e84137a9d3109714c585))
+
+## 1.13.1 (2024-02-20)
+
+Full Changelog: [v1.13.0...v1.13.1](https://github.com/openai/openai-python/compare/v1.13.0...v1.13.1)
+
+### Chores
+
+* **internal:** bump rye to v0.24.0 ([#1168](https://github.com/openai/openai-python/issues/1168)) ([84c4256](https://github.com/openai/openai-python/commit/84c4256316f2a79068ecadb852e5e69b6b104a1f))
+
+## 1.13.0 (2024-02-19)
+
+Full Changelog: [v1.12.0...v1.13.0](https://github.com/openai/openai-python/compare/v1.12.0...v1.13.0)
+
+### Features
+
+* **api:** updates ([#1146](https://github.com/openai/openai-python/issues/1146)) ([79b7675](https://github.com/openai/openai-python/commit/79b7675e51fb7d269a6ea281a568bc7812ba2ace))
+
+
+### Bug Fixes
+
+* **api:** remove non-GA instance_id param ([#1164](https://github.com/openai/openai-python/issues/1164)) ([1abe139](https://github.com/openai/openai-python/commit/1abe139b1a5f5cc41263738fc12856056dce5697))
+
+
+### Chores
+
+* **ci:** move github release logic to github app ([#1155](https://github.com/openai/openai-python/issues/1155)) ([67cfac2](https://github.com/openai/openai-python/commit/67cfac2564dfb718da0465e34b90ac6928fa962a))
+* **client:** use correct accept headers for binary data ([#1161](https://github.com/openai/openai-python/issues/1161)) ([e536437](https://github.com/openai/openai-python/commit/e536437ae0b2cb0ddf2d74618722005d37403f32))
+* **internal:** refactor release environment script ([#1158](https://github.com/openai/openai-python/issues/1158)) ([7fe8ec3](https://github.com/openai/openai-python/commit/7fe8ec3bf04ecf85e3bd5adf0d9992c051f87b81))
+
+## 1.12.0 (2024-02-08)
+
+Full Changelog: [v1.11.1...v1.12.0](https://github.com/openai/openai-python/compare/v1.11.1...v1.12.0)
+
+### Features
+
+* **api:** add `timestamp_granularities`, add `gpt-3.5-turbo-0125` model ([#1125](https://github.com/openai/openai-python/issues/1125)) ([1ecf8f6](https://github.com/openai/openai-python/commit/1ecf8f6b12323ed09fb6a2815c85b9533ee52a50))
+* **cli/images:** add support for `--model` arg ([#1132](https://github.com/openai/openai-python/issues/1132)) ([0d53866](https://github.com/openai/openai-python/commit/0d5386615cda7cd50d5db90de2119b84dba29519))
+
+
+### Bug Fixes
+
+* remove double brackets from timestamp_granularities param ([#1140](https://github.com/openai/openai-python/issues/1140)) ([3db0222](https://github.com/openai/openai-python/commit/3db022216a81fa86470b53ec1246669bc7b17897))
+* **types:** loosen most List params types to Iterable ([#1129](https://github.com/openai/openai-python/issues/1129)) ([bdb31a3](https://github.com/openai/openai-python/commit/bdb31a3b1db6ede4e02b3c951c4fd23f70260038))
+
+
+### Chores
+
+* **internal:** add lint command ([#1128](https://github.com/openai/openai-python/issues/1128)) ([4c021c0](https://github.com/openai/openai-python/commit/4c021c0ab0151c2ec092d860c9b60e22e658cd03))
+* **internal:** support serialising iterable types ([#1127](https://github.com/openai/openai-python/issues/1127)) ([98d4e59](https://github.com/openai/openai-python/commit/98d4e59afcf2d65d4e660d91eb9462240ef5cd63))
+
+
+### Documentation
+
+* add CONTRIBUTING.md ([#1138](https://github.com/openai/openai-python/issues/1138)) ([79c8f0e](https://github.com/openai/openai-python/commit/79c8f0e8bf5470e2e31e781e8d279331e89ddfbe))
+
+## 1.11.1 (2024-02-04)
+
+Full Changelog: [v1.11.0...v1.11.1](https://github.com/openai/openai-python/compare/v1.11.0...v1.11.1)
+
+### Bug Fixes
+
+* prevent crash when platform.architecture() is not allowed ([#1120](https://github.com/openai/openai-python/issues/1120)) ([9490554](https://github.com/openai/openai-python/commit/949055488488e93597cbc6c2cdd81f14f203e53b))
+
+## 1.11.0 (2024-02-03)
+
+Full Changelog: [v1.10.0...v1.11.0](https://github.com/openai/openai-python/compare/v1.10.0...v1.11.0)
+
+### Features
+
+* **client:** support parsing custom response types ([#1111](https://github.com/openai/openai-python/issues/1111)) ([da00fc3](https://github.com/openai/openai-python/commit/da00fc3f8e0ff13c6c3ca970e4bb86846304bd06))
+
+
+### Chores
+
+* **interal:** make link to api.md relative ([#1117](https://github.com/openai/openai-python/issues/1117)) ([4a10879](https://github.com/openai/openai-python/commit/4a108797e46293357601ce933e21b557a5dc6954))
+* **internal:** cast type in mocked test ([#1112](https://github.com/openai/openai-python/issues/1112)) ([99b21e1](https://github.com/openai/openai-python/commit/99b21e1fc681eb10e01d479cc043ad3c89272b1c))
+* **internal:** enable ruff type checking misuse lint rule ([#1106](https://github.com/openai/openai-python/issues/1106)) ([fa63e60](https://github.com/openai/openai-python/commit/fa63e605c82ec78f4fc27469c434b421a08fb909))
+* **internal:** support multipart data with overlapping keys ([#1104](https://github.com/openai/openai-python/issues/1104)) ([455bc9f](https://github.com/openai/openai-python/commit/455bc9f1fd018a32cd604eb4b400e05aa8d71822))
+* **internal:** support pre-release versioning ([#1113](https://github.com/openai/openai-python/issues/1113)) ([dea5b08](https://github.com/openai/openai-python/commit/dea5b08c28d47b331fd44f6920cf9fe322b68e51))
+
+## 1.10.0 (2024-01-25)
+
+Full Changelog: [v1.9.0...v1.10.0](https://github.com/openai/openai-python/compare/v1.9.0...v1.10.0)
+
+### Features
+
+* **api:** add text embeddings dimensions param ([#1103](https://github.com/openai/openai-python/issues/1103)) ([94abfa0](https://github.com/openai/openai-python/commit/94abfa0f988c199ea95a9c870c4ae9808823186d))
+* **azure:** proactively add audio/speech to deployment endpoints ([#1099](https://github.com/openai/openai-python/issues/1099)) ([fdf8742](https://github.com/openai/openai-python/commit/fdf87429b45ceb47ae6fd068ab70cc07bcb8da44))
+* **client:** enable follow redirects by default ([#1100](https://github.com/openai/openai-python/issues/1100)) ([d325b7c](https://github.com/openai/openai-python/commit/d325b7ca594c2abaada536249b5633b106943333))
+
+
+### Chores
+
+* **internal:** add internal helpers ([#1092](https://github.com/openai/openai-python/issues/1092)) ([629bde5](https://github.com/openai/openai-python/commit/629bde5800d84735e22d924db23109a141f48644))
+
+
+### Refactors
+
+* remove unnecessary builtin import ([#1094](https://github.com/openai/openai-python/issues/1094)) ([504b7d4](https://github.com/openai/openai-python/commit/504b7d4a0b4715bd49a1a076a8d4868e51fb3351))
+
 ## 1.9.0 (2024-01-21)
 
 Full Changelog: [v1.8.0...v1.9.0](https://github.com/openai/openai-python/compare/v1.8.0...v1.9.0)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000..354d21b2d2
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,125 @@
+## Setting up the environment
+
+### With Rye
+
+We use [Rye](https://rye-up.com/) to manage dependencies so we highly recommend [installing it](https://rye-up.com/guide/installation/) as it will automatically provision a Python environment with the expected Python version.
+
+After installing Rye, you'll just have to run this command:
+
+```sh
+$ rye sync --all-features
+```
+
+You can then run scripts using `rye run python script.py` or by activating the virtual environment:
+
+```sh
+$ rye shell
+# or manually activate - https://docs.python.org/3/library/venv.html#how-venvs-work
+$ source .venv/bin/activate
+
+# now you can omit the `rye run` prefix
+$ python script.py
+```
+
+### Without Rye
+
+Alternatively if you don't want to install `Rye`, you can stick with the standard `pip` setup by ensuring you have the Python version specified in `.python-version`, create a virtual environment however you desire and then install dependencies using this command:
+
+```sh
+$ pip install -r requirements-dev.lock
+```
+
+## Modifying/Adding code
+
+Most of the SDK is generated code, and any modified code will be overridden on the next generation. The
+`src/openai/lib/` and `examples/` directories are exceptions and will never be overridden.
+
+## Adding and running examples
+
+All files in the `examples/` directory are not modified by the Stainless generator and can be freely edited or
+added to.
+
+```bash
+# add an example to examples/<your-example>.py
+
+#!/usr/bin/env -S rye run python
+…
+```
+
+```
+chmod +x examples/<your-example>.py
+# run the example against your api
+./examples/<your-example>.py
+```
+
+## Using the repository from source
+
+If you’d like to use the repository from source, you can either install from git or link to a cloned repository:
+
+To install via git:
+
+```bash
+pip install git+ssh://git@github.com/openai/openai-python.git
+```
+
+Alternatively, you can build from source and install the wheel file:
+
+Building this package will create two files in the `dist/` directory, a `.tar.gz` containing the source files and a `.whl` that can be used to install the package efficiently.
+
+To create a distributable version of the library, all you have to do is run this command:
+
+```bash
+rye build
+# or
+python -m build
+```
+
+Then to install:
+
+```sh
+pip install ./path-to-wheel-file.whl
+```
+
+## Running tests
+
+Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
+
+```bash
+# you will need npm installed
+npx prism mock path/to/your/openapi.yml
+```
+
+```bash
+rye run pytest
+```
+
+## Linting and formatting
+
+This repository uses [ruff](https://github.com/astral-sh/ruff) and
+[black](https://github.com/psf/black) to format the code in the repository.
+
+To lint:
+
+```bash
+rye run lint
+```
+
+To format and fix all ruff issues automatically:
+
+```bash
+rye run format
+```
+
+## Publishing and releases
+
+Changes made to this repository via the automated release PR pipeline should publish to PyPI automatically. If
+the changes aren't made through the automated pipeline, you may want to make releases manually.
+
+### Publish with a GitHub workflow
+
+You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/openai/openai-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
+
+### Publish manually
+
+If you need to manually release a package, you can run the `bin/publish-pypi` script with a `PYPI_TOKEN` set on
+the environment.
diff --git a/README.md b/README.md
index 22e7ac795f..e566a2f8d0 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ It is generated from our [OpenAPI specification](https://github.com/openai/opena
 
 ## Documentation
 
-The REST API documentation can be found [on platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](https://www.github.com/openai/openai-python/blob/main/api.md).
+The REST API documentation can be found [on platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
@@ -18,12 +18,13 @@ The REST API documentation can be found [on platform.openai.com](https://platfor
 > The SDK was rewritten in v1, which was released November 6th 2023. See the [v1 migration guide](https://github.com/openai/openai-python/discussions/742), which includes scripts to automatically update your code.
 
 ```sh
+# install from PyPI
 pip install openai
 ```
 
 ## Usage
 
-The full API of this library can be found in [api.md](https://www.github.com/openai/openai-python/blob/main/api.md).
+The full API of this library can be found in [api.md](api.md).
 
 ```python
 import os
@@ -50,6 +51,56 @@ we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
 to add `OPENAI_API_KEY="My API Key"` to your `.env` file
 so that your API Key is not stored in source control.
 
+### Polling Helpers
+
+When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete. The SDK includes
+helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
+If an API method results in an action which could benefit from polling there will be a corresponding version of the
+method ending in '\_and_poll'.
+
+For instance to create a Run and poll until it reaches a terminal state you can run:
+
+```python
+run = client.beta.threads.runs.create_and_poll(
+    thread_id=thread.id,
+    assistant_id=assistant.id,
+)
+```
+
+More information on the lifecycle of a Run can be found in the [Run Lifecycle Documentation](https://platform.openai.com/docs/assistants/how-it-works/run-lifecycle)
+
+### Bulk Upload Helpers
+
+When creating an interacting with vector stores, you can use the polling helpers to monitor the status of operations.
+For convenience, we also provide a bulk upload helper to allow you to simultaneously upload several files at once.
+
+```python
+sample_files = [Path("sample-paper.pdf"), ...]
+
+batch = await client.vector_stores.file_batches.upload_and_poll(
+    store.id,
+    files=sample_files,
+)
+```
+
+### Streaming Helpers
+
+The SDK also includes helpers to process streams and handle the incoming events.
+
+```python
+with client.beta.threads.runs.stream(
+    thread_id=thread.id,
+    assistant_id=assistant.id,
+    instructions="Please address the user as Jane Doe. The user has a premium account.",
+) as stream:
+    for event in stream:
+        # Print the text from text delta events
+        if event.type == "thread.message.delta" and event.data.delta.content:
+            print(event.data.delta.content[0].text)
+```
+
+More information on streaming helpers can be found in the dedicated documentation: [helpers.md](helpers.md)
+
 ## Async usage
 
 Simply import `AsyncOpenAI` instead of `OpenAI` and use `await` with each API call:
@@ -82,7 +133,7 @@ asyncio.run(main())
 
 Functionality between the synchronous and asynchronous clients is otherwise identical.
 
-## Streaming Responses
+## Streaming responses
 
 We provide support for streaming responses using Server Side Events (SSE).
 
@@ -163,10 +214,10 @@ We recommend that you always instantiate a client (e.g., with `client = OpenAI()
 
 ## Using types
 
-Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev), which provide helper methods for things like:
+Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
 
-- Serializing back into JSON, `model.model_dump_json(indent=2, exclude_unset=True)`
-- Converting to a dictionary, `model.model_dump(exclude_unset=True)`
+- Serializing back into JSON, `model.to_json()`
+- Converting to a dictionary, `model.to_dict()`
 
 Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
 
@@ -262,7 +313,7 @@ completion = client.chat.completions.create(
 )
 ```
 
-## File Uploads
+## File uploads
 
 Request parameters that correspond to file uploads can be passed as `bytes`, a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
 
@@ -373,7 +424,7 @@ client = OpenAI(
 )
 
 # Override per-request:
-client.with_options(timeout=5 * 1000).chat.completions.create(
+client.with_options(timeout=5.0).chat.completions.create(
     messages=[
         {
             "role": "user",
@@ -468,6 +519,41 @@ with client.chat.completions.with_streaming_response.create(
 
 The context manager is required so that the response will reliably be closed.
 
+### Making custom/undocumented requests
+
+This library is typed for convenient access to the documented API.
+
+If you need to access undocumented endpoints, params, or response properties, the library can still be used.
+
+#### Undocumented endpoints
+
+To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
+http verbs. Options on the client will be respected (such as retries) will be respected when making this
+request.
+
+```py
+import httpx
+
+response = client.post(
+    "/foo",
+    cast_to=httpx.Response,
+    body={"my_param": True},
+)
+
+print(response.headers.get("x-foo"))
+```
+
+#### Undocumented request params
+
+If you want to explicitly send an extra param, you can do so with the `extra_query`, `extra_body`, and `extra_headers` request
+options.
+
+#### Undocumented response properties
+
+To access undocumented response properties, you can access the extra fields like `response.unknown_prop`. You
+can also get all the extra fields on the Pydantic model as a dict with
+[`response.model_extra`](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_extra).
+
 ### Configuring the HTTP client
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
@@ -477,13 +563,12 @@ You can directly override the [httpx client](https://www.python-httpx.org/api/#c
 - Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality
 
 ```python
-import httpx
-from openai import OpenAI
+from openai import OpenAI, DefaultHttpxClient
 
 client = OpenAI(
     # Or use the `OPENAI_BASE_URL` env var
     base_url="http://my.test.server.example.com:8083",
-    http_client=httpx.Client(
+    http_client=DefaultHttpxClient(
         proxies="http://my.test.proxy.example.com",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
@@ -523,7 +608,7 @@ completion = client.chat.completions.create(
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
 ```
 
 In addition to the options provided in the base `OpenAI` client, the following options are provided:
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000..c54acaf331
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,29 @@
+# Security Policy
+
+## Reporting Security Issues
+
+This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+
+To report a security issue, please contact the Stainless team at security@stainlessapi.com.
+
+## Responsible Disclosure
+
+We appreciate the efforts of security researchers and individuals who help us maintain the security of
+SDKs we generate. If you believe you have found a security vulnerability, please adhere to responsible
+disclosure practices by allowing us a reasonable amount of time to investigate and address the issue
+before making any information public.
+
+## Reporting Non-SDK Related Security Issues
+
+If you encounter security issues that are not directly related to SDKs but pertain to the services
+or products provided by OpenAI please follow the respective company's security reporting guidelines.
+
+### OpenAI Terms and Policies
+
+Our Security Policy can be found at [Security Policy URL](https://openai.com/policies/coordinated-vulnerability-disclosure-policy).
+
+Please contact disclosure@openai.com for any questions or concerns regarding security of our services.
+
+---
+
+Thank you for helping us keep the SDKs and systems they interact with secure.
diff --git a/api.md b/api.md
index 86b972d14e..de69f11dca 100644
--- a/api.md
+++ b/api.md
@@ -1,7 +1,7 @@
 # Shared Types
 
 ```python
-from openai.types import FunctionDefinition, FunctionParameters
+from openai.types import ErrorObject, FunctionDefinition, FunctionParameters
 ```
 
 # Completions
@@ -18,6 +18,12 @@ Methods:
 
 # Chat
 
+Types:
+
+```python
+from openai.types import ChatModel
+```
+
 ## Completions
 
 Types:
@@ -37,6 +43,7 @@ from openai.types.chat import (
     ChatCompletionMessageToolCall,
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
+    ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
     ChatCompletionTokenLogprob,
     ChatCompletionTool,
@@ -159,56 +166,132 @@ Methods:
 Types:
 
 ```python
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+    FineTuningJobIntegration,
+    FineTuningJobWandbIntegration,
+    FineTuningJobWandbIntegrationObject,
+)
 ```
 
 Methods:
 
-- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">create</a>(\*\*<a href="src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
-- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">create</a>(\*\*<a href="src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+
+### Checkpoints
+
+Types:
+
+```python
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+```
+
+Methods:
+
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="./src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
 
 # Beta
 
-## Assistants
+## VectorStores
 
 Types:
 
 ```python
-from openai.types.beta import Assistant, AssistantDeleted
+from openai.types.beta import VectorStore, VectorStoreDeleted
 ```
 
 Methods:
 
-- <code title="post /assistants">client.beta.assistants.<a href="./src/openai/resources/beta/assistants/assistants.py">create</a>(\*\*<a href="src/openai/types/beta/assistant_create_params.py">params</a>) -> <a href="./src/openai/types/beta/assistant.py">Assistant</a></code>
-- <code title="get /assistants/{assistant_id}">client.beta.assistants.<a href="./src/openai/resources/beta/assistants/assistants.py">retrieve</a>(assistant_id) -> <a href="./src/openai/types/beta/assistant.py">Assistant</a></code>
-- <code title="post /assistants/{assistant_id}">client.beta.assistants.<a href="./src/openai/resources/beta/assistants/assistants.py">update</a>(assistant_id, \*\*<a href="src/openai/types/beta/assistant_update_params.py">params</a>) -> <a href="./src/openai/types/beta/assistant.py">Assistant</a></code>
-- <code title="get /assistants">client.beta.assistants.<a href="./src/openai/resources/beta/assistants/assistants.py">list</a>(\*\*<a href="src/openai/types/beta/assistant_list_params.py">params</a>) -> <a href="./src/openai/types/beta/assistant.py">SyncCursorPage[Assistant]</a></code>
-- <code title="delete /assistants/{assistant_id}">client.beta.assistants.<a href="./src/openai/resources/beta/assistants/assistants.py">delete</a>(assistant_id) -> <a href="./src/openai/types/beta/assistant_deleted.py">AssistantDeleted</a></code>
+- <code title="post /vector_stores">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">create</a>(\*\*<a href="src/openai/types/beta/vector_store_create_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">retrieve</a>(vector_store_id) -> <a href="./src/openai/types/beta/vector_store.py">VectorStore</a></code>
+- <code title="post /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">update</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_store_update_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">list</a>(\*\*<a href="src/openai/types/beta/vector_store_list_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_store.py">SyncCursorPage[VectorStore]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">delete</a>(vector_store_id) -> <a href="./src/openai/types/beta/vector_store_deleted.py">VectorStoreDeleted</a></code>
 
 ### Files
 
 Types:
 
 ```python
-from openai.types.beta.assistants import AssistantFile, FileDeleteResponse
+from openai.types.beta.vector_stores import VectorStoreFile, VectorStoreFileDeleted
 ```
 
 Methods:
 
-- <code title="post /assistants/{assistant_id}/files">client.beta.assistants.files.<a href="./src/openai/resources/beta/assistants/files.py">create</a>(assistant_id, \*\*<a href="src/openai/types/beta/assistants/file_create_params.py">params</a>) -> <a href="./src/openai/types/beta/assistants/assistant_file.py">AssistantFile</a></code>
-- <code title="get /assistants/{assistant_id}/files/{file_id}">client.beta.assistants.files.<a href="./src/openai/resources/beta/assistants/files.py">retrieve</a>(file_id, \*, assistant_id) -> <a href="./src/openai/types/beta/assistants/assistant_file.py">AssistantFile</a></code>
-- <code title="get /assistants/{assistant_id}/files">client.beta.assistants.files.<a href="./src/openai/resources/beta/assistants/files.py">list</a>(assistant_id, \*\*<a href="src/openai/types/beta/assistants/file_list_params.py">params</a>) -> <a href="./src/openai/types/beta/assistants/assistant_file.py">SyncCursorPage[AssistantFile]</a></code>
-- <code title="delete /assistants/{assistant_id}/files/{file_id}">client.beta.assistants.files.<a href="./src/openai/resources/beta/assistants/files.py">delete</a>(file_id, \*, assistant_id) -> <a href="./src/openai/types/beta/assistants/file_delete_response.py">FileDeleteResponse</a></code>
+- <code title="post /vector_stores/{vector_store_id}/files">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">create</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_create_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">retrieve</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_list_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}/files/{file_id}">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_deleted.py">VectorStoreFileDeleted</a></code>
+- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">create_and_poll</a>(\*args) -> VectorStoreFile</code>
+- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">poll</a>(\*args) -> VectorStoreFile</code>
+- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">upload</a>(\*args) -> VectorStoreFile</code>
+- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">upload_and_poll</a>(\*args) -> VectorStoreFile</code>
+
+### FileBatches
+
+Types:
+
+```python
+from openai.types.beta.vector_stores import VectorStoreFileBatch
+```
+
+Methods:
+
+- <code title="post /vector_stores/{vector_store_id}/file_batches">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_batch_create_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">list_files</a>(batch_id, \*, vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_batch_list_files_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+- <code>client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">create_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
+- <code>client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">poll</a>(\*args) -> VectorStoreFileBatch</code>
+- <code>client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">upload_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
+
+## Assistants
+
+Types:
+
+```python
+from openai.types.beta import (
+    Assistant,
+    AssistantDeleted,
+    AssistantStreamEvent,
+    AssistantTool,
+    CodeInterpreterTool,
+    FileSearchTool,
+    FunctionTool,
+    MessageStreamEvent,
+    RunStepStreamEvent,
+    RunStreamEvent,
+    ThreadStreamEvent,
+)
+```
+
+Methods:
+
+- <code title="post /assistants">client.beta.assistants.<a href="./src/openai/resources/beta/assistants.py">create</a>(\*\*<a href="src/openai/types/beta/assistant_create_params.py">params</a>) -> <a href="./src/openai/types/beta/assistant.py">Assistant</a></code>
+- <code title="get /assistants/{assistant_id}">client.beta.assistants.<a href="./src/openai/resources/beta/assistants.py">retrieve</a>(assistant_id) -> <a href="./src/openai/types/beta/assistant.py">Assistant</a></code>
+- <code title="post /assistants/{assistant_id}">client.beta.assistants.<a href="./src/openai/resources/beta/assistants.py">update</a>(assistant_id, \*\*<a href="src/openai/types/beta/assistant_update_params.py">params</a>) -> <a href="./src/openai/types/beta/assistant.py">Assistant</a></code>
+- <code title="get /assistants">client.beta.assistants.<a href="./src/openai/resources/beta/assistants.py">list</a>(\*\*<a href="src/openai/types/beta/assistant_list_params.py">params</a>) -> <a href="./src/openai/types/beta/assistant.py">SyncCursorPage[Assistant]</a></code>
+- <code title="delete /assistants/{assistant_id}">client.beta.assistants.<a href="./src/openai/resources/beta/assistants.py">delete</a>(assistant_id) -> <a href="./src/openai/types/beta/assistant_deleted.py">AssistantDeleted</a></code>
 
 ## Threads
 
 Types:
 
 ```python
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+    AssistantResponseFormat,
+    AssistantResponseFormatOption,
+    AssistantToolChoice,
+    AssistantToolChoiceFunction,
+    AssistantToolChoiceOption,
+    Thread,
+    ThreadDeleted,
+)
 ```
 
 Methods:
@@ -218,13 +301,15 @@ Methods:
 - <code title="post /threads/{thread_id}">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">update</a>(thread_id, \*\*<a href="src/openai/types/beta/thread_update_params.py">params</a>) -> <a href="./src/openai/types/beta/thread.py">Thread</a></code>
 - <code title="delete /threads/{thread_id}">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">delete</a>(thread_id) -> <a href="./src/openai/types/beta/thread_deleted.py">ThreadDeleted</a></code>
 - <code title="post /threads/runs">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run</a>(\*\*<a href="src/openai/types/beta/thread_create_and_run_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
+- <code>client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run_poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 ### Runs
 
 Types:
 
 ```python
-from openai.types.beta.threads import RequiredActionFunctionToolCall, Run
+from openai.types.beta.threads import RequiredActionFunctionToolCall, Run, RunStatus
 ```
 
 Methods:
@@ -235,6 +320,12 @@ Methods:
 - <code title="get /threads/{thread_id}/runs">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/run_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">SyncCursorPage[Run]</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/cancel">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">cancel</a>(run_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/submit_tool_outputs">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs</a>(run_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/run_submit_tool_outputs_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">create_and_poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">create_and_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_and_poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 #### Steps
 
@@ -242,11 +333,22 @@ Types:
 
 ```python
 from openai.types.beta.threads.runs import (
-    CodeToolCall,
+    CodeInterpreterLogs,
+    CodeInterpreterOutputImage,
+    CodeInterpreterToolCall,
+    CodeInterpreterToolCallDelta,
+    FileSearchToolCall,
+    FileSearchToolCallDelta,
     FunctionToolCall,
+    FunctionToolCallDelta,
     MessageCreationStepDetails,
-    RetrievalToolCall,
     RunStep,
+    RunStepDelta,
+    RunStepDeltaEvent,
+    RunStepDeltaMessageDelta,
+    ToolCall,
+    ToolCallDelta,
+    ToolCallDeltaObject,
     ToolCallsStepDetails,
 )
 ```
@@ -262,29 +364,54 @@ Types:
 
 ```python
 from openai.types.beta.threads import (
-    MessageContentImageFile,
-    MessageContentText,
-    ThreadMessage,
-    ThreadMessageDeleted,
+    Annotation,
+    AnnotationDelta,
+    FileCitationAnnotation,
+    FileCitationDeltaAnnotation,
+    FilePathAnnotation,
+    FilePathDeltaAnnotation,
+    ImageFile,
+    ImageFileContentBlock,
+    ImageFileDelta,
+    ImageFileDeltaBlock,
+    ImageURL,
+    ImageURLContentBlock,
+    ImageURLDelta,
+    ImageURLDeltaBlock,
+    Message,
+    MessageContent,
+    MessageContentDelta,
+    MessageContentPartParam,
+    MessageDeleted,
+    MessageDelta,
+    MessageDeltaEvent,
+    Text,
+    TextContentBlock,
+    TextContentBlockParam,
+    TextDelta,
+    TextDeltaBlock,
 )
 ```
 
 Methods:
 
-- <code title="post /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">create</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_create_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="get /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">retrieve</a>(message_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="post /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">update</a>(message_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/message_update_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="get /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/thread_message.py">SyncCursorPage[ThreadMessage]</a></code>
+- <code title="post /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages.py">create</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_create_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="get /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages.py">retrieve</a>(message_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="post /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages.py">update</a>(message_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/message_update_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="get /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/message.py">SyncCursorPage[Message]</a></code>
+- <code title="delete /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages.py">delete</a>(message_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/message_deleted.py">MessageDeleted</a></code>
 
-#### Files
+# Batches
 
 Types:
 
 ```python
-from openai.types.beta.threads.messages import MessageFile
+from openai.types import Batch, BatchError, BatchRequestCounts
 ```
 
 Methods:
 
-- <code title="get /threads/{thread_id}/messages/{message_id}/files/{file_id}">client.beta.threads.messages.files.<a href="./src/openai/resources/beta/threads/messages/files.py">retrieve</a>(file_id, \*, thread_id, message_id) -> <a href="./src/openai/types/beta/threads/messages/message_file.py">MessageFile</a></code>
-- <code title="get /threads/{thread_id}/messages/{message_id}/files">client.beta.threads.messages.files.<a href="./src/openai/resources/beta/threads/messages/files.py">list</a>(message_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/messages/file_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/messages/message_file.py">SyncCursorPage[MessageFile]</a></code>
+- <code title="post /batches">client.batches.<a href="./src/openai/resources/batches.py">create</a>(\*\*<a href="src/openai/types/batch_create_params.py">params</a>) -> <a href="./src/openai/types/batch.py">Batch</a></code>
+- <code title="get /batches/{batch_id}">client.batches.<a href="./src/openai/resources/batches.py">retrieve</a>(batch_id) -> <a href="./src/openai/types/batch.py">Batch</a></code>
+- <code title="get /batches">client.batches.<a href="./src/openai/resources/batches.py">list</a>(\*\*<a href="src/openai/types/batch_list_params.py">params</a>) -> <a href="./src/openai/types/batch.py">SyncCursorPage[Batch]</a></code>
+- <code title="post /batches/{batch_id}/cancel">client.batches.<a href="./src/openai/resources/batches.py">cancel</a>(batch_id) -> <a href="./src/openai/types/batch.py">Batch</a></code>
diff --git a/bin/check-env-state.py b/bin/check-env-state.py
deleted file mode 100644
index e1b8b6cb39..0000000000
--- a/bin/check-env-state.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""Script that exits 1 if the current environment is not
-in sync with the `requirements-dev.lock` file.
-"""
-
-from pathlib import Path
-
-import importlib_metadata
-
-
-def should_run_sync() -> bool:
-    dev_lock = Path(__file__).parent.parent.joinpath("requirements-dev.lock")
-
-    for line in dev_lock.read_text().splitlines():
-        if not line or line.startswith("#") or line.startswith("-e"):
-            continue
-
-        dep, lock_version = line.split("==")
-
-        try:
-            version = importlib_metadata.version(dep)
-
-            if lock_version != version:
-                print(f"mismatch for {dep} current={version} lock={lock_version}")
-                return True
-        except Exception:
-            print(f"could not import {dep}")
-            return True
-
-    return False
-
-
-def main() -> None:
-    if should_run_sync():
-        exit(1)
-    else:
-        exit(0)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bin/check-release-environment b/bin/check-release-environment
index b0c8d34f0c..2cc5ad6352 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -10,9 +10,9 @@ if [ -z "${PYPI_TOKEN}" ]; then
   errors+=("The OPENAI_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
   echo -e "Found the following errors in the release environment:\n"
 
   for error in "${errors[@]}"; do
diff --git a/bin/check-test-server b/bin/check-test-server
deleted file mode 100755
index a6fa34950d..0000000000
--- a/bin/check-test-server
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[0;33m'
-NC='\033[0m' # No Color
-
-function prism_is_running() {
-  curl --silent "http://localhost:4010" >/dev/null 2>&1
-}
-
-function is_overriding_api_base_url() {
-  [ -n "$TEST_API_BASE_URL" ]
-}
-
-if is_overriding_api_base_url ; then
-  # If someone is running the tests against the live API, we can trust they know
-  # what they're doing and exit early.
-  echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
-
-  exit 0
-elif prism_is_running ; then
-  echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
-  echo
-
-  exit 0
-else
-  echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
-  echo -e "running against your OpenAPI spec."
-  echo
-  echo -e "${YELLOW}To fix:${NC}"
-  echo
-  echo -e "1. Install Prism (requires Node 16+):"
-  echo
-  echo -e "  With npm:"
-  echo -e "    \$ ${YELLOW}npm install -g @stoplight/prism-cli${NC}"
-  echo
-  echo -e "  With yarn:"
-  echo -e "    \$ ${YELLOW}yarn global add @stoplight/prism-cli${NC}"
-  echo
-  echo -e "2. Run the mock server"
-  echo
-  echo -e "  To run the server, pass in the path of your OpenAPI"
-  echo -e "  spec to the prism command:"
-  echo
-  echo -e "    \$ ${YELLOW}prism mock path/to/your.openapi.yml${NC}"
-  echo
-
-  exit 1
-fi
diff --git a/bin/test b/bin/test
deleted file mode 100755
index 60ede7a842..0000000000
--- a/bin/test
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-bin/check-test-server && rye run pytest "$@"
diff --git a/examples/assistant.py b/examples/assistant.py
index c5fbb82a3a..0631494ecc 100644
--- a/examples/assistant.py
+++ b/examples/assistant.py
@@ -1,4 +1,3 @@
-import time
 
 import openai
 
@@ -20,28 +19,20 @@
     content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
 )
 
-run = client.beta.threads.runs.create(
+run = client.beta.threads.runs.create_and_poll(
     thread_id=thread.id,
     assistant_id=assistant.id,
     instructions="Please address the user as Jane Doe. The user has a premium account.",
 )
 
-print("checking assistant status. ")
-while True:
-    run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
+print("Run completed with status: " + run.status)
 
-    if run.status == "completed":
-        print("done!")
-        messages = client.beta.threads.messages.list(thread_id=thread.id)
+if run.status == "completed":
+    messages = client.beta.threads.messages.list(thread_id=thread.id)
 
-        print("messages: ")
-        for message in messages:
-            assert message.content[0].type == "text"
-            print({"role": message.role, "message": message.content[0].text.value})
+    print("messages: ")
+    for message in messages:
+        assert message.content[0].type == "text"
+        print({"role": message.role, "message": message.content[0].text.value})
 
-        client.beta.assistants.delete(assistant.id)
-
-        break
-    else:
-        print("in progress...")
-        time.sleep(5)
+    client.beta.assistants.delete(assistant.id)
diff --git a/examples/assistant_stream.py b/examples/assistant_stream.py
new file mode 100644
index 0000000000..0465d3930f
--- /dev/null
+++ b/examples/assistant_stream.py
@@ -0,0 +1,33 @@
+import openai
+
+# gets API Key from environment variable OPENAI_API_KEY
+client = openai.OpenAI()
+
+assistant = client.beta.assistants.create(
+    name="Math Tutor",
+    instructions="You are a personal math tutor. Write and run code to answer math questions.",
+    tools=[{"type": "code_interpreter"}],
+    model="gpt-4-1106-preview",
+)
+
+thread = client.beta.threads.create()
+
+message = client.beta.threads.messages.create(
+    thread_id=thread.id,
+    role="user",
+    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
+)
+
+print("starting run stream")
+
+stream = client.beta.threads.runs.create(
+    thread_id=thread.id,
+    assistant_id=assistant.id,
+    instructions="Please address the user as Jane Doe. The user has a premium account.",
+    stream=True,
+)
+
+for event in stream:
+    print(event.model_dump_json(indent=2, exclude_unset=True))
+
+client.beta.assistants.delete(assistant.id)
diff --git a/examples/assistant_stream_helpers.py b/examples/assistant_stream_helpers.py
new file mode 100644
index 0000000000..7baec77c72
--- /dev/null
+++ b/examples/assistant_stream_helpers.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from typing_extensions import override
+
+import openai
+from openai import AssistantEventHandler
+from openai.types.beta import AssistantStreamEvent
+from openai.types.beta.threads import Text, TextDelta
+from openai.types.beta.threads.runs import RunStep, RunStepDelta
+
+
+class EventHandler(AssistantEventHandler):
+    @override
+    def on_event(self, event: AssistantStreamEvent) -> None:
+        if event.event == "thread.run.step.created":
+            details = event.data.step_details
+            if details.type == "tool_calls":
+                print("Generating code to interpret:\n\n```py")
+        elif event.event == "thread.message.created":
+            print("\nResponse:\n")
+
+    @override
+    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        print(delta.value, end="", flush=True)
+
+    @override
+    def on_run_step_done(self, run_step: RunStep) -> None:
+        details = run_step.step_details
+        if details.type == "tool_calls":
+            for tool in details.tool_calls:
+                if tool.type == "code_interpreter":
+                    print("\n```\nExecuting code...")
+
+    @override
+    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        details = delta.step_details
+        if details is not None and details.type == "tool_calls":
+            for tool in details.tool_calls or []:
+                if tool.type == "code_interpreter" and tool.code_interpreter and tool.code_interpreter.input:
+                    print(tool.code_interpreter.input, end="", flush=True)
+
+
+def main() -> None:
+    client = openai.OpenAI()
+
+    assistant = client.beta.assistants.create(
+        name="Math Tutor",
+        instructions="You are a personal math tutor. Write and run code to answer math questions.",
+        tools=[{"type": "code_interpreter"}],
+        model="gpt-4-1106-preview",
+    )
+
+    try:
+        question = "I need to solve the equation `3x + 11 = 14`. Can you help me?"
+
+        thread = client.beta.threads.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": question,
+                },
+            ]
+        )
+        print(f"Question: {question}\n")
+
+        with client.beta.threads.runs.stream(
+            thread_id=thread.id,
+            assistant_id=assistant.id,
+            instructions="Please address the user as Jane Doe. The user has a premium account.",
+            event_handler=EventHandler(),
+        ) as stream:
+            stream.until_done()
+            print()
+    finally:
+        client.beta.assistants.delete(assistant.id)
+
+
+main()
diff --git a/examples/audio.py b/examples/audio.py
index 73491090f5..85f47bfb06 100755
--- a/examples/audio.py
+++ b/examples/audio.py
@@ -1,5 +1,6 @@
-#!/usr/bin/env python
+#!/usr/bin/env rye run python
 
+import time
 from pathlib import Path
 
 from openai import OpenAI
@@ -11,6 +12,8 @@
 
 
 def main() -> None:
+    stream_to_speakers()
+
     # Create text-to-speech audio file
     with openai.audio.speech.with_streaming_response.create(
         model="tts-1",
@@ -34,5 +37,28 @@ def main() -> None:
     print(translation.text)
 
 
+def stream_to_speakers() -> None:
+    import pyaudio
+
+    player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
+
+    start_time = time.time()
+
+    with openai.audio.speech.with_streaming_response.create(
+        model="tts-1",
+        voice="alloy",
+        response_format="pcm",  # similar to WAV, but without a header chunk at the start.
+        input="""I see skies of blue and clouds of white
+                The bright blessed days, the dark sacred nights
+                And I think to myself
+                What a wonderful world""",
+    ) as response:
+        print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
+        for chunk in response.iter_bytes(chunk_size=1024):
+            player_stream.write(chunk)
+
+    print(f"Done in {int((time.time() - start_time) * 1000)}ms.")
+
+
 if __name__ == "__main__":
     main()
diff --git a/examples/azure.py b/examples/azure.py
index a28b8cc433..6936c4cb0e 100755
--- a/examples/azure.py
+++ b/examples/azure.py
@@ -20,7 +20,7 @@
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
 
 
 deployment_client = AzureOpenAI(
@@ -40,4 +40,4 @@
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
diff --git a/examples/azure_ad.py b/examples/azure_ad.py
index f13079dd04..1b0d81863d 100755
--- a/examples/azure_ad.py
+++ b/examples/azure_ad.py
@@ -27,4 +27,4 @@
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
diff --git a/examples/demo.py b/examples/demo.py
index 37830e3e97..ac1710f3e0 100755
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -36,3 +36,18 @@
 
     print(chunk.choices[0].delta.content, end="")
 print()
+
+# Response headers:
+print("----- custom response headers test -----")
+response = client.chat.completions.with_raw_response.create(
+    model="gpt-4",
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+)
+completion = response.parse()
+print(response.request_id)
+print(completion.choices[0].message.content)
diff --git a/examples/streaming.py b/examples/streaming.py
index 368fa5f911..9a84891a83 100755
--- a/examples/streaming.py
+++ b/examples/streaming.py
@@ -22,12 +22,12 @@ def sync_main() -> None:
 
     # You can manually control iteration over the response
     first = next(response)
-    print(f"got response data: {first.model_dump_json(indent=2)}")
+    print(f"got response data: {first.to_json()}")
 
     # Or you could automatically iterate through all of data.
     # Note that the for loop will not exit until *all* of the data has been processed.
     for data in response:
-        print(data.model_dump_json())
+        print(data.to_json())
 
 
 async def async_main() -> None:
@@ -43,12 +43,12 @@ async def async_main() -> None:
     # You can manually control iteration over the response.
     # In Python 3.10+ you can also use the `await anext(response)` builtin instead
     first = await response.__anext__()
-    print(f"got response data: {first.model_dump_json(indent=2)}")
+    print(f"got response data: {first.to_json()}")
 
     # Or you could automatically iterate through all of data.
     # Note that the for loop will not exit until *all* of the data has been processed.
     async for data in response:
-        print(data.model_dump_json())
+        print(data.to_json())
 
 
 sync_main()
diff --git a/helpers.md b/helpers.md
new file mode 100644
index 0000000000..3508b59a33
--- /dev/null
+++ b/helpers.md
@@ -0,0 +1,238 @@
+# Streaming Helpers
+
+OpenAI supports streaming responses when interacting with the [Assistant](#assistant-streaming-api) APIs.
+
+## Assistant Streaming API
+
+OpenAI supports streaming responses from Assistants. The SDK provides convenience wrappers around the API
+so you can subscribe to the types of events you are interested in as well as receive accumulated responses.
+
+More information can be found in the documentation: [Assistant Streaming](https://platform.openai.com/docs/assistants/overview?lang=python)
+
+#### An example of creating a run and subscribing to some events
+
+You can subscribe to events by creating an event handler class and overloading the relevant event handlers.
+
+```python
+from typing_extensions import override
+from openai import AssistantEventHandler, OpenAI
+from openai.types.beta.threads import Text, TextDelta
+from openai.types.beta.threads.runs import ToolCall, ToolCallDelta
+
+client = openai.OpenAI()
+
+# First, we create a EventHandler class to define
+# how we want to handle the events in the response stream.
+
+class EventHandler(AssistantEventHandler):
+  @override
+  def on_text_created(self, text: Text) -> None:
+    print(f"\nassistant > ", end="", flush=True)
+
+  @override
+  def on_text_delta(self, delta: TextDelta, snapshot: Text):
+    print(delta.value, end="", flush=True)
+
+  @override
+  def on_tool_call_created(self, tool_call: ToolCall):
+    print(f"\nassistant > {tool_call.type}\n", flush=True)
+
+  @override
+  def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall):
+    if delta.type == "code_interpreter" and delta.code_interpreter:
+      if delta.code_interpreter.input:
+        print(delta.code_interpreter.input, end="", flush=True)
+      if delta.code_interpreter.outputs:
+        print(f"\n\noutput >", flush=True)
+        for output in delta.code_interpreter.outputs:
+          if output.type == "logs":
+            print(f"\n{output.logs}", flush=True)
+
+# Then, we use the `stream` SDK helper
+# with the `EventHandler` class to create the Run
+# and stream the response.
+
+with client.beta.threads.runs.stream(
+  thread_id="thread_id",
+  assistant_id="assistant_id",
+  event_handler=EventHandler(),
+) as stream:
+  stream.until_done()
+```
+
+#### An example of iterating over events
+
+You can also iterate over all the streamed events.
+
+```python
+with client.beta.threads.runs.stream(
+  thread_id=thread.id,
+  assistant_id=assistant.id
+) as stream:
+    for event in stream:
+        # Print the text from text delta events
+        if event.event == "thread.message.delta" and event.data.delta.content:
+            print(event.data.delta.content[0].text)
+```
+
+#### An example of iterating over text
+
+You can also iterate over just the text deltas received
+
+```python
+with client.beta.threads.runs.stream(
+  thread_id=thread.id,
+  assistant_id=assistant.id
+) as stream:
+    for text in stream.text_deltas:
+        print(text)
+```
+
+### Creating Streams
+
+There are three helper methods for creating streams:
+
+```python
+client.beta.threads.runs.stream()
+```
+
+This method can be used to start and stream the response to an existing run with an associated thread
+that is already populated with messages.
+
+```python
+client.beta.threads.create_and_run_stream()
+```
+
+This method can be used to add a message to a thread, start a run and then stream the response.
+
+```python
+client.beta.threads.runs.submit_tool_outputs_stream()
+```
+
+This method can be used to submit a tool output to a run waiting on the output and start a stream.
+
+### Assistant Events
+
+The assistant API provides events you can subscribe to for the following events.
+
+```python
+def on_event(self, event: AssistantStreamEvent)
+```
+
+This allows you to subscribe to all the possible raw events sent by the OpenAI streaming API.
+In many cases it will be more convenient to subscribe to a more specific set of events for your use case.
+
+More information on the types of events can be found here: [Events](https://platform.openai.com/docs/api-reference/assistants-streaming/events)
+
+```python
+def on_run_step_created(self, run_step: RunStep)
+def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep)
+def on_run_step_done(self, run_step: RunStep)
+```
+
+These events allow you to subscribe to the creation, delta and completion of a RunStep.
+
+For more information on how Runs and RunSteps work see the documentation [Runs and RunSteps](https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps)
+
+```python
+def on_message_created(self, message: Message)
+def on_message_delta(self, delta: MessageDelta, snapshot: Message)
+def on_message_done(self, message: Message)
+```
+
+This allows you to subscribe to Message creation, delta and completion events. Messages can contain
+different types of content that can be sent from a model (and events are available for specific content types).
+For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
+
+More information on messages can be found
+on in the documentation page [Message](https://platform.openai.com/docs/api-reference/messages/object).
+
+```python
+def on_text_created(self, text: Text)
+def on_text_delta(self, delta: TextDelta, snapshot: Text)
+def on_text_done(self, text: Text)
+```
+
+These events allow you to subscribe to the creation, delta and completion of a Text content (a specific type of message).
+For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
+
+```python
+def on_image_file_done(self, image_file: ImageFile)
+```
+
+Image files are not sent incrementally so an event is provided for when a image file is available.
+
+```python
+def on_tool_call_created(self, tool_call: ToolCall)
+def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall)
+def on_tool_call_done(self, tool_call: ToolCall)
+```
+
+These events allow you to subscribe to events for the creation, delta and completion of a ToolCall.
+
+More information on tools can be found here [Tools](https://platform.openai.com/docs/assistants/tools)
+
+```python
+def on_end(self)
+```
+
+The last event send when a stream ends.
+
+```python
+def on_timeout(self)
+```
+
+This event is triggered if the request times out.
+
+```python
+def on_exception(self, exception: Exception)
+```
+
+This event is triggered if an exception occurs during streaming.
+
+### Assistant Methods
+
+The assistant streaming object also provides a few methods for convenience:
+
+```python
+def current_event() -> AssistantStreamEvent | None
+def current_run() -> Run | None
+def current_message_snapshot() -> Message | None
+def current_run_step_snapshot() -> RunStep | None
+```
+
+These methods are provided to allow you to access additional context from within event handlers. In many cases
+the handlers should include all the information you need for processing, but if additional context is required it
+can be accessed.
+
+Note: There is not always a relevant context in certain situations (these will be `None` in those cases).
+
+```python
+def get_final_run(self) -> Run
+def get_final_run_steps(self) -> List[RunStep]
+def get_final_messages(self) -> List[Message]
+```
+
+These methods are provided for convenience to collect information at the end of a stream. Calling these events
+will trigger consumption of the stream until completion and then return the relevant accumulated objects.
+
+# Polling Helpers
+
+When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete.
+The SDK includes helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
+If an API method results in an action which could benefit from polling there will be a corresponding version of the
+method ending in `_and_poll`.
+
+All methods also allow you to set the polling frequency, how often the API is checked for an update, via a function argument (`poll_interval_ms`).
+
+The polling methods are:
+
+```python
+client.beta.threads.create_and_run_poll(...)
+client.beta.threads.runs.create_and_poll(...)
+client.beta.threads.runs.submit_tool_ouptputs_and_poll(...)
+client.beta.vector_stores.files.upload_and_poll(...)
+client.beta.vector_stores.files.create_and_poll(...)
+client.beta.vector_stores.file_batches.create_and_poll(...)
+client.beta.vector_stores.file_batches.upload_and_poll(...)
+```
diff --git a/pyproject.toml b/pyproject.toml
index 82f4c7e068..3b6888a64d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,8 @@
 [project]
 name = "openai"
-version = "1.9.0"
+version = "1.30.4"
 description = "The official Python library for the openai API"
-readme = "README.md"
+dynamic = ["readme"]
 license = "Apache-2.0"
 authors = [
 { name = "OpenAI", email = "support@openai.com" },
@@ -50,7 +50,7 @@ openai = "openai.cli:main"
 managed = true
 # version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright",
+    "pyright>=1.1.359",
     "mypy",
     "respx",
     "pytest",
@@ -60,8 +60,11 @@ dev-dependencies = [
     "nox",
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
+    "inline-snapshot >=0.7.0",
     "azure-identity >=1.14.1",
-    "types-tqdm > 4"
+    "types-tqdm > 4",
+    "types-pyaudio > 0",
+    "trio >=0.22.2"
 ]
 
 [tool.rye.scripts]
@@ -71,10 +74,14 @@ format = { chain = [
   "fix:ruff",
 ]}
 "format:black" = "black ."
-"format:docs" = "python bin/ruffen-docs.py README.md api.md"
+"format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
 "format:ruff" = "ruff format"
 "format:isort" = "isort ."
 
+"lint" = { chain = [
+  "check:ruff",
+  "typecheck",
+]}
 "check:ruff" = "ruff ."
 "fix:ruff" = "ruff --fix ."
 
@@ -87,7 +94,7 @@ typecheck = { chain = [
 "typecheck:mypy" = "mypy ."
 
 [build-system]
-requires = ["hatchling"]
+requires = ["hatchling", "hatch-fancy-pypi-readme"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build]
@@ -98,6 +105,17 @@ include = [
 [tool.hatch.build.targets.wheel]
 packages = ["src/openai"]
 
+[tool.hatch.metadata.hooks.fancy-pypi-readme]
+content-type = "text/markdown"
+
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
+path = "README.md"
+
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
+# replace relative links with absolute links
+pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
+replacement = '[\1](https://github.com/openai/openai-python/tree/main/\g<2>)'
+
 [tool.black]
 line-length = 120
 target-version = ["py37"]
@@ -129,6 +147,7 @@ reportImplicitOverride = true
 reportImportCycles = false
 reportPrivateUsage = false
 
+
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
@@ -147,6 +166,10 @@ select = [
   # print statements
   "T201",
   "T203",
+  # misuse of typing.TYPE_CHECKING
+  "TCH004",
+  # import rules
+  "TID251",
 ]
 ignore = [
   # mutable defaults
@@ -162,6 +185,9 @@ ignore-init-module-imports = true
 [tool.ruff.format]
 docstring-code-format = true
 
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"functools.lru_cache".msg = "This function does not retain type information for the wrapped function's arguments; The `lru_cache` function from `_utils` should be used instead"
+
 [tool.ruff.lint.isort]
 length-sort = true
 length-sort-straight = true
@@ -171,5 +197,6 @@ known-first-party = ["openai", "tests"]
 
 [tool.ruff.per-file-ignores]
 "bin/**.py" = ["T201", "T203"]
+"scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
 "examples/**.py" = ["T201", "T203"]
diff --git a/release-please-config.json b/release-please-config.json
index 5c66d801f5..745ef5fd54 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -5,6 +5,8 @@
   "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json",
   "include-v-in-tag": true,
   "include-component-in-tag": false,
+  "versioning": "prerelease",
+  "prerelease": true,
   "bump-minor-pre-major": true,
   "bump-patch-for-minor-pre-major": false,
   "pull-request-header": "Automated Release PR",
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 088cb2bd98..c5416cd4db 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -5,67 +5,174 @@
 #   pre: false
 #   features: []
 #   all-features: true
+#   with-sources: false
 
 -e file:.
 annotated-types==0.6.0
+    # via pydantic
 anyio==4.1.0
+    # via httpx
+    # via openai
 argcomplete==3.1.2
+    # via nox
+asttokens==2.4.1
+    # via inline-snapshot
 attrs==23.1.0
-azure-core==1.29.6
+    # via outcome
+    # via pytest
+    # via trio
+azure-core==1.30.1
+    # via azure-identity
 azure-identity==1.15.0
+black==24.4.2
+    # via inline-snapshot
 certifi==2023.7.22
+    # via httpcore
+    # via httpx
+    # via requests
 cffi==1.16.0
+    # via cryptography
 charset-normalizer==3.3.2
+    # via requests
+click==8.1.7
+    # via black
+    # via inline-snapshot
 colorlog==6.7.0
-cryptography==41.0.7
+    # via nox
+cryptography==42.0.7
+    # via azure-identity
+    # via msal
+    # via pyjwt
 dirty-equals==0.6.0
 distlib==0.3.7
+    # via virtualenv
 distro==1.8.0
+    # via openai
 exceptiongroup==1.1.3
+    # via anyio
+    # via trio
+executing==2.0.1
+    # via inline-snapshot
 filelock==3.12.4
+    # via virtualenv
 h11==0.14.0
+    # via httpcore
 httpcore==1.0.2
+    # via httpx
 httpx==0.25.2
+    # via openai
+    # via respx
 idna==3.4
+    # via anyio
+    # via httpx
+    # via requests
+    # via trio
 importlib-metadata==7.0.0
 iniconfig==2.0.0
-msal==1.26.0
+    # via pytest
+inline-snapshot==0.7.0
+msal==1.28.0
+    # via azure-identity
+    # via msal-extensions
 msal-extensions==1.1.0
+    # via azure-identity
 mypy==1.7.1
 mypy-extensions==1.0.0
+    # via black
+    # via mypy
 nodeenv==1.8.0
+    # via pyright
 nox==2023.4.22
 numpy==1.26.3
+    # via openai
+    # via pandas
+    # via pandas-stubs
+outcome==1.3.0.post0
+    # via trio
 packaging==23.2
+    # via black
+    # via msal-extensions
+    # via nox
+    # via pytest
 pandas==2.1.4
+    # via openai
 pandas-stubs==2.1.4.231227
+    # via openai
+pathspec==0.12.1
+    # via black
 platformdirs==3.11.0
+    # via black
+    # via virtualenv
 pluggy==1.3.0
+    # via pytest
 portalocker==2.8.2
+    # via msal-extensions
 py==1.11.0
-pycparser==2.21
-pydantic==2.4.2
-pydantic-core==2.10.1
+    # via pytest
+pycparser==2.22
+    # via cffi
+pydantic==2.7.1
+    # via openai
+pydantic-core==2.18.2
+    # via pydantic
 pyjwt==2.8.0
-pyright==1.1.332
+    # via msal
+pyright==1.1.364
 pytest==7.1.1
+    # via pytest-asyncio
 pytest-asyncio==0.21.1
 python-dateutil==2.8.2
+    # via pandas
+    # via time-machine
 pytz==2023.3.post1
+    # via dirty-equals
+    # via pandas
 requests==2.31.0
+    # via azure-core
+    # via msal
 respx==0.20.2
 ruff==0.1.9
+setuptools==68.2.2
+    # via nodeenv
 six==1.16.0
+    # via asttokens
+    # via azure-core
+    # via python-dateutil
 sniffio==1.3.0
+    # via anyio
+    # via httpx
+    # via openai
+    # via trio
+sortedcontainers==2.4.0
+    # via trio
 time-machine==2.9.0
+toml==0.10.2
+    # via inline-snapshot
 tomli==2.0.1
+    # via black
+    # via mypy
+    # via pytest
 tqdm==4.66.1
-types-pytz==2023.3.1.1
+    # via openai
+trio==0.22.2
+types-pyaudio==0.2.16.20240106
+types-pytz==2024.1.0.20240417
+    # via pandas-stubs
+types-toml==0.10.8.20240310
+    # via inline-snapshot
 types-tqdm==4.66.0.2
 typing-extensions==4.8.0
-tzdata==2023.4
-urllib3==2.1.0
+    # via azure-core
+    # via black
+    # via mypy
+    # via openai
+    # via pydantic
+    # via pydantic-core
+tzdata==2024.1
+    # via pandas
+urllib3==2.2.1
+    # via requests
 virtualenv==20.24.5
+    # via nox
 zipp==3.17.0
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==68.2.2
+    # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index c178f26a88..47cf8a40e9 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -5,27 +5,59 @@
 #   pre: false
 #   features: []
 #   all-features: true
+#   with-sources: false
 
 -e file:.
 annotated-types==0.6.0
+    # via pydantic
 anyio==4.1.0
+    # via httpx
+    # via openai
 certifi==2023.7.22
+    # via httpcore
+    # via httpx
 distro==1.8.0
+    # via openai
 exceptiongroup==1.1.3
+    # via anyio
 h11==0.14.0
+    # via httpcore
 httpcore==1.0.2
+    # via httpx
 httpx==0.25.2
+    # via openai
 idna==3.4
-numpy==1.26.2
-pandas==2.1.3
-pandas-stubs==2.1.1.230928
-pydantic==2.4.2
-pydantic-core==2.10.1
-python-dateutil==2.8.2
-pytz==2023.3.post1
+    # via anyio
+    # via httpx
+numpy==1.26.4
+    # via openai
+    # via pandas
+    # via pandas-stubs
+pandas==2.2.2
+    # via openai
+pandas-stubs==2.2.1.240316
+    # via openai
+pydantic==2.7.1
+    # via openai
+pydantic-core==2.18.2
+    # via pydantic
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2024.1
+    # via pandas
 six==1.16.0
+    # via python-dateutil
 sniffio==1.3.0
+    # via anyio
+    # via httpx
+    # via openai
 tqdm==4.66.1
-types-pytz==2023.3.1.1
+    # via openai
+types-pytz==2024.1.0.20240417
+    # via pandas-stubs
 typing-extensions==4.8.0
-tzdata==2023.3
+    # via openai
+    # via pydantic
+    # via pydantic-core
+tzdata==2024.1
+    # via pandas
diff --git a/scripts/bootstrap b/scripts/bootstrap
new file mode 100755
index 0000000000..29df07e77b
--- /dev/null
+++ b/scripts/bootstrap
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
+  brew bundle check >/dev/null 2>&1 || {
+    echo "==> Installing Homebrew dependencies…"
+    brew bundle
+  }
+fi
+
+echo "==> Installing Python dependencies…"
+
+# experimental uv support makes installations significantly faster
+rye config --set-bool behavior.use-uv=true
+
+rye sync
diff --git a/scripts/format b/scripts/format
new file mode 100755
index 0000000000..667ec2d7af
--- /dev/null
+++ b/scripts/format
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+echo "==> Running formatters"
+rye run format
diff --git a/scripts/lint b/scripts/lint
new file mode 100755
index 0000000000..64495ee345
--- /dev/null
+++ b/scripts/lint
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+echo "==> Running lints"
+rye run lint
+
+echo "==> Making sure it imports"
+rye run python -c 'import openai'
+
diff --git a/scripts/mock b/scripts/mock
new file mode 100755
index 0000000000..fe89a1d084
--- /dev/null
+++ b/scripts/mock
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+if [[ -n "$1" && "$1" != '--'* ]]; then
+  URL="$1"
+  shift
+else
+  URL="$(grep 'openapi_spec_url' .stats.yml | cut -d' ' -f2)"
+fi
+
+# Check if the URL is empty
+if [ -z "$URL" ]; then
+  echo "Error: No OpenAPI spec path/url provided or found in .stats.yml"
+  exit 1
+fi
+
+echo "==> Starting mock server with URL ${URL}"
+
+# Run prism mock on the given spec
+if [ "$1" == "--daemon" ]; then
+  npm exec --package=@stoplight/prism-cli@~5.8 -- prism mock "$URL" &> .prism.log &
+
+  # Wait for server to come online
+  echo -n "Waiting for server"
+  while ! grep -q "✖  fatal\|Prism is listening" ".prism.log" ; do
+    echo -n "."
+    sleep 0.1
+  done
+
+  if grep -q "✖  fatal" ".prism.log"; then
+    cat .prism.log
+    exit 1
+  fi
+
+  echo
+else
+  npm exec  --package=@stoplight/prism-cli@~5.8 -- prism mock "$URL"
+fi
diff --git a/scripts/test b/scripts/test
new file mode 100755
index 0000000000..b3ace9013b
--- /dev/null
+++ b/scripts/test
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+NC='\033[0m' # No Color
+
+function prism_is_running() {
+  curl --silent "http://localhost:4010" >/dev/null 2>&1
+}
+
+kill_server_on_port() {
+  pids=$(lsof -t -i tcp:"$1" || echo "")
+  if [ "$pids" != "" ]; then
+    kill "$pids"
+    echo "Stopped $pids."
+  fi
+}
+
+function is_overriding_api_base_url() {
+  [ -n "$TEST_API_BASE_URL" ]
+}
+
+if ! is_overriding_api_base_url && ! prism_is_running ; then
+  # When we exit this script, make sure to kill the background mock server process
+  trap 'kill_server_on_port 4010' EXIT
+
+  # Start the dev server
+  ./scripts/mock --daemon
+fi
+
+if is_overriding_api_base_url ; then
+  echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
+  echo
+elif ! prism_is_running ; then
+  echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
+  echo -e "running against your OpenAPI spec."
+  echo
+  echo -e "To run the server, pass in the path or url of your OpenAPI"
+  echo -e "spec to the prism command:"
+  echo
+  echo -e "  \$ ${YELLOW}npm exec --package=@stoplight/prism-cli@~5.3.2 -- prism mock path/to/your.openapi.yml${NC}"
+  echo
+
+  exit 1
+else
+  echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
+  echo
+fi
+
+echo "==> Running tests"
+rye run pytest "$@"
diff --git a/bin/ruffen-docs.py b/scripts/utils/ruffen-docs.py
similarity index 100%
rename from bin/ruffen-docs.py
rename to scripts/utils/ruffen-docs.py
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 0de58b3327..0e87ae9259 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -6,11 +6,13 @@
 from typing_extensions import override
 
 from . import types
-from ._types import NoneType, Transport, ProxiesTypes
+from ._types import NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
 from ._client import Client, OpenAI, Stream, Timeout, Transport, AsyncClient, AsyncOpenAI, AsyncStream, RequestOptions
+from ._models import BaseModel
 from ._version import __title__, __version__
 from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
+from ._constants import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES, DEFAULT_CONNECTION_LIMITS
 from ._exceptions import (
     APIError,
     OpenAIError,
@@ -27,6 +29,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
+from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
 __all__ = [
@@ -36,6 +39,8 @@
     "NoneType",
     "Transport",
     "ProxiesTypes",
+    "NotGiven",
+    "NOT_GIVEN",
     "OpenAIError",
     "APIError",
     "APIStatusError",
@@ -59,12 +64,22 @@
     "OpenAI",
     "AsyncOpenAI",
     "file_from_path",
+    "BaseModel",
+    "DEFAULT_TIMEOUT",
+    "DEFAULT_MAX_RETRIES",
+    "DEFAULT_CONNECTION_LIMITS",
+    "DefaultHttpxClient",
+    "DefaultAsyncHttpxClient",
 ]
 
 from .lib import azure as _azure
 from .version import VERSION as VERSION
 from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
 from .lib._old_api import *
+from .lib.streaming import (
+    AssistantEventHandler as AssistantEventHandler,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+)
 
 _setup_logging()
 
@@ -93,6 +108,8 @@
 
 organization: str | None = None
 
+project: str | None = None
+
 base_url: str | _httpx.URL | None = None
 
 timeout: float | Timeout | None = DEFAULT_TIMEOUT
@@ -144,6 +161,17 @@ def organization(self, value: str | None) -> None:  # type: ignore
 
         organization = value
 
+    @property  # type: ignore
+    @override
+    def project(self) -> str | None:
+        return project
+
+    @project.setter  # type: ignore
+    def project(self, value: str | None) -> None:  # type: ignore
+        global project
+
+        project = value
+
     @property
     @override
     def base_url(self) -> _httpx.URL:
@@ -295,6 +323,7 @@ def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
         _client = _ModuleClient(
             api_key=api_key,
             organization=organization,
+            project=project,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -320,6 +349,7 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
     files as files,
     images as images,
     models as models,
+    batches as batches,
     embeddings as embeddings,
     completions as completions,
     fine_tuning as fine_tuning,
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index 43fad0603d..5d5d25fca9 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -29,7 +29,6 @@
     cast,
     overload,
 )
-from functools import lru_cache
 from typing_extensions import Literal, override, get_origin
 
 import anyio
@@ -61,7 +60,7 @@
     RequestOptions,
     ModelBuilderProtocol,
 )
-from ._utils import is_dict, is_given, is_mapping
+from ._utils import is_dict, is_list, is_given, lru_cache, is_mapping
 from ._compat import model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
 from ._response import (
@@ -71,15 +70,15 @@
     extract_response_type,
 )
 from ._constants import (
-    DEFAULT_LIMITS,
     DEFAULT_TIMEOUT,
     MAX_RETRY_DELAY,
     DEFAULT_MAX_RETRIES,
     INITIAL_RETRY_DELAY,
     RAW_RESPONSE_HEADER,
     OVERRIDE_CAST_TO_HEADER,
+    DEFAULT_CONNECTION_LIMITS,
 )
-from ._streaming import Stream, AsyncStream
+from ._streaming import Stream, SSEDecoder, AsyncStream, SSEBytesDecoder
 from ._exceptions import (
     APIStatusError,
     APITimeoutError,
@@ -361,6 +360,11 @@ def __init__(
         self._strict_response_validation = _strict_response_validation
         self._idempotency_header = None
 
+        if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
+            raise TypeError(
+                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `openai.DEFAULT_MAX_RETRIES`"
+            )
+
     def _enforce_trailing_slash(self, url: URL) -> URL:
         if url.raw_path.endswith(b"/"):
             return url
@@ -431,6 +435,9 @@ def _prepare_url(self, url: str) -> URL:
 
         return merge_url
 
+    def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
+        return SSEDecoder()
+
     def _build_request(
         self,
         options: FinalRequestOptions,
@@ -451,14 +458,18 @@ def _build_request(
 
         headers = self._build_headers(options)
         params = _merge_mappings(self._custom_query, options.params)
+        content_type = headers.get("Content-Type")
 
         # If the given Content-Type header is multipart/form-data then it
         # has to be removed so that httpx can generate the header with
         # additional information for us as it has to be in this form
         # for the server to be able to correctly parse the request:
         # multipart/form-data; boundary=---abc--
-        if headers.get("Content-Type") == "multipart/form-data":
-            headers.pop("Content-Type")
+        if content_type is not None and content_type.startswith("multipart/form-data"):
+            if "boundary" not in content_type:
+                # only remove the header if the boundary hasn't been explicitly set
+                # as the caller doesn't want httpx to come up with their own boundary
+                headers.pop("Content-Type")
 
             # As we are now sending multipart/form-data instead of application/json
             # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
@@ -494,9 +505,25 @@ def _serialize_multipartform(self, data: Mapping[object, object]) -> dict[str, o
         )
         serialized: dict[str, object] = {}
         for key, value in items:
-            if key in serialized:
-                raise ValueError(f"Duplicate key encountered: {key}; This behaviour is not supported")
-            serialized[key] = value
+            existing = serialized.get(key)
+
+            if not existing:
+                serialized[key] = value
+                continue
+
+            # If a value has already been set for this key then that
+            # means we're sending data like `array[]=[1, 2, 3]` and we
+            # need to tell httpx that we want to send multiple values with
+            # the same key which is done by using a list or a tuple.
+            #
+            # Note: 2d arrays should never result in the same key at both
+            # levels so it's safe to assume that if the value is a list,
+            # it was because we changed it to be a list.
+            if is_list(existing):
+                existing.append(value)
+            else:
+                serialized[key] = [existing, value]
+
         return serialized
 
     def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]:
@@ -688,7 +715,27 @@ def _idempotency_key(self) -> str:
         return f"stainless-python-retry-{uuid.uuid4()}"
 
 
-class SyncHttpxClientWrapper(httpx.Client):
+class _DefaultHttpxClient(httpx.Client):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultHttpxClient = httpx.Client
+    """An alias to `httpx.Client` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.Client` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultHttpxClient = _DefaultHttpxClient
+
+
+class SyncHttpxClientWrapper(DefaultHttpxClient):
     def __del__(self) -> None:
         try:
             self.close()
@@ -724,7 +771,7 @@ def __init__(
             if http_client is not None:
                 raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
         else:
-            limits = DEFAULT_LIMITS
+            limits = DEFAULT_CONNECTION_LIMITS
 
         if transport is not None:
             warnings.warn(
@@ -757,6 +804,11 @@ def __init__(
             else:
                 timeout = DEFAULT_TIMEOUT
 
+        if http_client is not None and not isinstance(http_client, httpx.Client):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.Client` but got {type(http_client)}"
+            )
+
         super().__init__(
             version=version,
             limits=limits,
@@ -777,6 +829,7 @@ def __init__(
             proxies=proxies,
             transport=transport,
             limits=limits,
+            follow_redirects=True,
         )
 
     def is_closed(self) -> bool:
@@ -893,6 +946,8 @@ def _request(
         if self.custom_auth is not None:
             kwargs["auth"] = self.custom_auth
 
+        log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+
         try:
             response = self._client.send(
                 request,
@@ -931,8 +986,14 @@ def _request(
             raise APIConnectionError(request=request) from err
 
         log.debug(
-            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+            'HTTP Response: %s %s "%i %s" %s',
+            request.method,
+            request.url,
+            response.status_code,
+            response.reason_phrase,
+            response.headers,
         )
+        log.debug("request_id: %s", response.headers.get("x-request-id"))
 
         try:
             response.raise_for_status()
@@ -1228,7 +1289,27 @@ def get_api_list(
         return self._request_api_list(model, page, opts)
 
 
-class AsyncHttpxClientWrapper(httpx.AsyncClient):
+class _DefaultAsyncHttpxClient(httpx.AsyncClient):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultAsyncHttpxClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+
+
+class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
     def __del__(self) -> None:
         try:
             # TODO(someday): support non asyncio runtimes here
@@ -1265,7 +1346,7 @@ def __init__(
             if http_client is not None:
                 raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
         else:
-            limits = DEFAULT_LIMITS
+            limits = DEFAULT_CONNECTION_LIMITS
 
         if transport is not None:
             warnings.warn(
@@ -1298,6 +1379,11 @@ def __init__(
             else:
                 timeout = DEFAULT_TIMEOUT
 
+        if http_client is not None and not isinstance(http_client, httpx.AsyncClient):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.AsyncClient` but got {type(http_client)}"
+            )
+
         super().__init__(
             version=version,
             base_url=base_url,
@@ -1318,6 +1404,7 @@ def __init__(
             proxies=proxies,
             transport=transport,
             limits=limits,
+            follow_redirects=True,
         )
 
     def is_closed(self) -> bool:
@@ -1814,8 +1901,12 @@ def __str__(self) -> str:
 
 
 def get_platform() -> Platform:
-    system = platform.system().lower()
-    platform_name = platform.platform().lower()
+    try:
+        system = platform.system().lower()
+        platform_name = platform.platform().lower()
+    except Exception:
+        return "Unknown"
+
     if "iphone" in platform_name or "ipad" in platform_name:
         # Tested using Python3IDE on an iPhone 11 and Pythonista on an iPad 7
         # system is Darwin and platform_name is a string like:
@@ -1858,8 +1949,8 @@ def platform_headers(version: str) -> Dict[str, str]:
         "X-Stainless-Package-Version": version,
         "X-Stainless-OS": str(get_platform()),
         "X-Stainless-Arch": str(get_architecture()),
-        "X-Stainless-Runtime": platform.python_implementation(),
-        "X-Stainless-Runtime-Version": platform.python_version(),
+        "X-Stainless-Runtime": get_python_runtime(),
+        "X-Stainless-Runtime-Version": get_python_version(),
     }
 
 
@@ -1875,9 +1966,27 @@ def __str__(self) -> str:
 Arch = Union[OtherArch, Literal["x32", "x64", "arm", "arm64", "unknown"]]
 
 
+def get_python_runtime() -> str:
+    try:
+        return platform.python_implementation()
+    except Exception:
+        return "unknown"
+
+
+def get_python_version() -> str:
+    try:
+        return platform.python_version()
+    except Exception:
+        return "unknown"
+
+
 def get_architecture() -> Arch:
-    python_bitness, _ = platform.architecture()
-    machine = platform.machine().lower()
+    try:
+        python_bitness, _ = platform.architecture()
+        machine = platform.machine().lower()
+    except Exception:
+        return "unknown"
+
     if machine in ("arm64", "aarch64"):
         return "arm64"
 
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 5043d60e2a..8f3060c6f6 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -57,24 +57,29 @@ class OpenAI(SyncAPIClient):
     models: resources.Models
     fine_tuning: resources.FineTuning
     beta: resources.Beta
+    batches: resources.Batches
     with_raw_response: OpenAIWithRawResponse
     with_streaming_response: OpenAIWithStreamedResponse
 
     # client options
     api_key: str
     organization: str | None
+    project: str | None
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        # Configure a custom httpx client.
+        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
         http_client: httpx.Client | None = None,
         # Enable or disable schema validation for data returned by the API.
         # When enabled an error APIResponseValidationError is raised
@@ -91,6 +96,7 @@ def __init__(
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -104,6 +110,10 @@ def __init__(
             organization = os.environ.get("OPENAI_ORG_ID")
         self.organization = organization
 
+        if project is None:
+            project = os.environ.get("OPENAI_PROJECT_ID")
+        self.project = project
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -132,6 +142,7 @@ def __init__(
         self.models = resources.Models(self)
         self.fine_tuning = resources.FineTuning(self)
         self.beta = resources.Beta(self)
+        self.batches = resources.Batches(self)
         self.with_raw_response = OpenAIWithRawResponse(self)
         self.with_streaming_response = OpenAIWithStreamedResponse(self)
 
@@ -153,6 +164,7 @@ def default_headers(self) -> dict[str, str | Omit]:
             **super().default_headers,
             "X-Stainless-Async": "false",
             "OpenAI-Organization": self.organization if self.organization is not None else Omit(),
+            "OpenAI-Project": self.project if self.project is not None else Omit(),
             **self._custom_headers,
         }
 
@@ -161,6 +173,7 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
@@ -196,6 +209,7 @@ def copy(
         return self.__class__(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
+            project=project or self.project,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -255,24 +269,29 @@ class AsyncOpenAI(AsyncAPIClient):
     models: resources.AsyncModels
     fine_tuning: resources.AsyncFineTuning
     beta: resources.AsyncBeta
+    batches: resources.AsyncBatches
     with_raw_response: AsyncOpenAIWithRawResponse
     with_streaming_response: AsyncOpenAIWithStreamedResponse
 
     # client options
     api_key: str
     organization: str | None
+    project: str | None
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
+        # Configure a custom httpx client.
+        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
         http_client: httpx.AsyncClient | None = None,
         # Enable or disable schema validation for data returned by the API.
         # When enabled an error APIResponseValidationError is raised
@@ -289,6 +308,7 @@ def __init__(
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -302,6 +322,10 @@ def __init__(
             organization = os.environ.get("OPENAI_ORG_ID")
         self.organization = organization
 
+        if project is None:
+            project = os.environ.get("OPENAI_PROJECT_ID")
+        self.project = project
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -330,6 +354,7 @@ def __init__(
         self.models = resources.AsyncModels(self)
         self.fine_tuning = resources.AsyncFineTuning(self)
         self.beta = resources.AsyncBeta(self)
+        self.batches = resources.AsyncBatches(self)
         self.with_raw_response = AsyncOpenAIWithRawResponse(self)
         self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
 
@@ -351,6 +376,7 @@ def default_headers(self) -> dict[str, str | Omit]:
             **super().default_headers,
             "X-Stainless-Async": f"async:{get_async_library()}",
             "OpenAI-Organization": self.organization if self.organization is not None else Omit(),
+            "OpenAI-Project": self.project if self.project is not None else Omit(),
             **self._custom_headers,
         }
 
@@ -359,6 +385,7 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
@@ -394,6 +421,7 @@ def copy(
         return self.__class__(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
+            project=project or self.project,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -454,6 +482,7 @@ def __init__(self, client: OpenAI) -> None:
         self.models = resources.ModelsWithRawResponse(client.models)
         self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
         self.beta = resources.BetaWithRawResponse(client.beta)
+        self.batches = resources.BatchesWithRawResponse(client.batches)
 
 
 class AsyncOpenAIWithRawResponse:
@@ -468,6 +497,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self.models = resources.AsyncModelsWithRawResponse(client.models)
         self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
         self.beta = resources.AsyncBetaWithRawResponse(client.beta)
+        self.batches = resources.AsyncBatchesWithRawResponse(client.batches)
 
 
 class OpenAIWithStreamedResponse:
@@ -482,6 +512,7 @@ def __init__(self, client: OpenAI) -> None:
         self.models = resources.ModelsWithStreamingResponse(client.models)
         self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
         self.beta = resources.BetaWithStreamingResponse(client.beta)
+        self.batches = resources.BatchesWithStreamingResponse(client.batches)
 
 
 class AsyncOpenAIWithStreamedResponse:
@@ -496,6 +527,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self.models = resources.AsyncModelsWithStreamingResponse(client.models)
         self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
         self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
+        self.batches = resources.AsyncBatchesWithStreamingResponse(client.batches)
 
 
 Client = OpenAI
diff --git a/src/openai/_compat.py b/src/openai/_compat.py
index 3cda39909b..74c7639b4c 100644
--- a/src/openai/_compat.py
+++ b/src/openai/_compat.py
@@ -1,13 +1,15 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Union, TypeVar, cast
+from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
 from datetime import date, datetime
+from typing_extensions import Self
 
 import pydantic
 from pydantic.fields import FieldInfo
 
 from ._types import StrBytesIntFloat
 
+_T = TypeVar("_T")
 _ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
 
 # --------------- Pydantic v2 compatibility ---------------
@@ -178,8 +180,43 @@ class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel):
 # cached properties
 if TYPE_CHECKING:
     cached_property = property
+
+    # we define a separate type (copied from typeshed)
+    # that represents that `cached_property` is `set`able
+    # at runtime, which differs from `@property`.
+    #
+    # this is a separate type as editors likely special case
+    # `@property` and we don't want to cause issues just to have
+    # more helpful internal types.
+
+    class typed_cached_property(Generic[_T]):
+        func: Callable[[Any], _T]
+        attrname: str | None
+
+        def __init__(self, func: Callable[[Any], _T]) -> None:
+            ...
+
+        @overload
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self:
+            ...
+
+        @overload
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T:
+            ...
+
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
+            raise NotImplementedError()
+
+        def __set_name__(self, owner: type[Any], name: str) -> None:
+            ...
+
+        # __set__ is not defined at runtime, but @cached_property is designed to be settable
+        def __set__(self, instance: object, value: _T) -> None:
+            ...
 else:
     try:
         from functools import cached_property as cached_property
     except ImportError:
         from cached_property import cached_property as cached_property
+
+    typed_cached_property = cached_property
diff --git a/src/openai/_constants.py b/src/openai/_constants.py
index dffb8ecfb6..3f82bed037 100644
--- a/src/openai/_constants.py
+++ b/src/openai/_constants.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 import httpx
 
@@ -8,7 +8,7 @@
 # default timeout is 10 minutes
 DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
-DEFAULT_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
+DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)
 
 INITIAL_RETRY_DELAY = 0.5
 MAX_RETRY_DELAY = 8.0
diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py
index d7ded1248f..f6731cfac5 100644
--- a/src/openai/_exceptions.py
+++ b/src/openai/_exceptions.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -8,6 +8,7 @@
 import httpx
 
 from ._utils import is_dict
+from ._models import construct_type
 
 __all__ = [
     "BadRequestError",
@@ -51,9 +52,9 @@ def __init__(self, message: str, request: httpx.Request, *, body: object | None)
         self.body = body
 
         if is_dict(body):
-            self.code = cast(Any, body.get("code"))
-            self.param = cast(Any, body.get("param"))
-            self.type = cast(Any, body.get("type"))
+            self.code = cast(Any, construct_type(type_=Optional[str], value=body.get("code")))
+            self.param = cast(Any, construct_type(type_=Optional[str], value=body.get("param")))
+            self.type = cast(Any, construct_type(type_=str, value=body.get("type")))
         else:
             self.code = None
             self.param = None
@@ -75,11 +76,13 @@ class APIStatusError(APIError):
 
     response: httpx.Response
     status_code: int
+    request_id: str | None
 
     def __init__(self, message: str, *, response: httpx.Response, body: object | None) -> None:
         super().__init__(message, response.request, body=body)
         self.response = response
         self.status_code = response.status_code
+        self.request_id = response.headers.get("x-request-id")
 
 
 class APIConnectionError(APIError):
diff --git a/src/openai/_files.py b/src/openai/_files.py
index bebfb19501..ad7b668b4b 100644
--- a/src/openai/_files.py
+++ b/src/openai/_files.py
@@ -13,12 +13,17 @@
     FileContent,
     RequestFiles,
     HttpxFileTypes,
+    Base64FileInput,
     HttpxFileContent,
     HttpxRequestFiles,
 )
 from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
 
 
+def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
+    return isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
+
+
 def is_file_content(obj: object) -> TypeGuard[FileContent]:
     return (
         isinstance(obj, bytes) or isinstance(obj, tuple) or isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
diff --git a/src/openai/_legacy_response.py b/src/openai/_legacy_response.py
index c36c94f165..1de906b167 100644
--- a/src/openai/_legacy_response.py
+++ b/src/openai/_legacy_response.py
@@ -5,25 +5,28 @@
 import logging
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, Iterator, AsyncIterator, cast
-from typing_extensions import Awaitable, ParamSpec, get_args, override, deprecated, get_origin
+from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, Iterator, AsyncIterator, cast, overload
+from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
 
 import anyio
 import httpx
+import pydantic
 
 from ._types import NoneType
-from ._utils import is_given
+from ._utils import is_given, extract_type_arg, is_annotated_type
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER
+from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
 from ._exceptions import APIResponseValidationError
 
 if TYPE_CHECKING:
     from ._models import FinalRequestOptions
-    from ._base_client import Stream, BaseClient, AsyncStream
+    from ._base_client import BaseClient
 
 
 P = ParamSpec("P")
 R = TypeVar("R")
+_T = TypeVar("_T")
 
 log: logging.Logger = logging.getLogger(__name__)
 
@@ -43,7 +46,7 @@ class LegacyAPIResponse(Generic[R]):
 
     _cast_to: type[R]
     _client: BaseClient[Any, Any]
-    _parsed: R | None
+    _parsed_by_type: dict[type[Any], Any]
     _stream: bool
     _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
     _options: FinalRequestOptions
@@ -62,27 +65,66 @@ def __init__(
     ) -> None:
         self._cast_to = cast_to
         self._client = client
-        self._parsed = None
+        self._parsed_by_type = {}
         self._stream = stream
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
 
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T:
+        ...
+
+    @overload
     def parse(self) -> R:
+        ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
 
+        NOTE: For the async client: this will become a coroutine in the next major version.
+
         For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
 
-        NOTE: For the async client: this will become a coroutine in the next major version.
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
         """
-        if self._parsed is not None:
-            return self._parsed
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
 
-        parsed = self._parse()
+        parsed = self._parse(to=to)
         if is_given(self._options.post_parser):
             parsed = self._options.post_parser(parsed)
 
-        self._parsed = parsed
+        self._parsed_by_type[cache_key] = parsed
         return parsed
 
     @property
@@ -135,13 +177,33 @@ def elapsed(self) -> datetime.timedelta:
         """The time taken for the complete request/response cycle to complete."""
         return self.http_response.elapsed
 
-    def _parse(self) -> R:
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        # unwrap `Annotated[T, ...]` -> `T`
+        if to and is_annotated_type(to):
+            to = extract_type_arg(to, 0)
+
         if self._stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
             if self._stream_cls:
                 return cast(
                     R,
                     self._stream_cls(
-                        cast_to=_extract_stream_chunk_type(self._stream_cls),
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
                         response=self.http_response,
                         client=cast(Any, self._client),
                     ),
@@ -160,7 +222,12 @@ def _parse(self) -> R:
                 ),
             )
 
-        cast_to = self._cast_to
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -168,6 +235,12 @@ def _parse(self) -> R:
         if cast_to == str:
             return cast(R, response.text)
 
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
         origin = get_origin(cast_to) or cast_to
 
         if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
@@ -186,14 +259,9 @@ def _parse(self) -> R:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        # The check here is necessary as we are subverting the the type system
-        # with casts as the relationship between TypeVars and Types are very strict
-        # which means we must return *exactly* what was input or transform it in a
-        # way that retains the TypeVar state. As we cannot do that in this function
-        # then we have to resort to using `cast`. At the time of writing, we know this
-        # to be safe as we have handled all the types that could be bound to the
-        # `ResponseT` TypeVar, however if that TypeVar is ever updated in the future, then
-        # this function would become unsafe but a type checker would not report an error.
+        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
         if (
             cast_to is not object
             and not origin is list
@@ -202,12 +270,12 @@ def _parse(self) -> R:
             and not issubclass(origin, BaseModel)
         ):
             raise RuntimeError(
-                f"Invalid state, expected {cast_to} to be a subclass type of {BaseModel}, {dict}, {list} or {Union}."
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
             )
 
         # split is required to handle cases where additional information is included
         # in the response, e.g. application/json; charset=utf-8
-        content_type, *_ = response.headers.get("content-type").split(";")
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
         if content_type != "application/json":
             if is_basemodel(cast_to):
                 try:
@@ -253,15 +321,6 @@ def __init__(self) -> None:
         )
 
 
-def _extract_stream_chunk_type(stream_cls: type) -> type:
-    args = get_args(stream_cls)
-    if not args:
-        raise TypeError(
-            f"Expected stream_cls to have been given a generic type argument, e.g. Stream[Foo] but received {stream_cls}",
-        )
-    return cast(type, args[0])
-
-
 def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
     """Higher order function that takes one of our bound API methods and wraps it
     to support returning the raw `APIResponse` object directly.
@@ -269,7 +328,7 @@ def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIRespon
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "true"
 
         kwargs["extra_headers"] = extra_headers
@@ -286,7 +345,7 @@ def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P
 
     @functools.wraps(func)
     async def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "true"
 
         kwargs["extra_headers"] = extra_headers
diff --git a/src/openai/_models.py b/src/openai/_models.py
index 48d5624f64..75c68cc730 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import os
 import inspect
 from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
 from datetime import date, datetime
@@ -10,6 +11,7 @@
     Protocol,
     Required,
     TypedDict,
+    TypeGuard,
     final,
     override,
     runtime_checkable,
@@ -30,7 +32,20 @@
     AnyMapping,
     HttpxRequestFiles,
 )
-from ._utils import is_list, is_given, is_mapping, parse_date, parse_datetime, strip_not_given
+from ._utils import (
+    PropertyInfo,
+    is_list,
+    is_given,
+    lru_cache,
+    is_mapping,
+    parse_date,
+    coerce_boolean,
+    parse_datetime,
+    strip_not_given,
+    extract_type_arg,
+    is_annotated_type,
+    strip_annotated_type,
+)
 from ._compat import (
     PYDANTIC_V2,
     ConfigDict,
@@ -46,6 +61,9 @@
 )
 from ._constants import RAW_RESPONSE_HEADER
 
+if TYPE_CHECKING:
+    from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema
+
 __all__ = ["BaseModel", "GenericModel"]
 
 _T = TypeVar("_T")
@@ -58,7 +76,9 @@ class _ConfigProtocol(Protocol):
 
 class BaseModel(pydantic.BaseModel):
     if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow")
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
+        )
     else:
 
         @property
@@ -70,6 +90,79 @@ def model_fields_set(self) -> set[str]:
         class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
             extra: Any = pydantic.Extra.allow  # type: ignore
 
+    def to_dict(
+        self,
+        *,
+        mode: Literal["json", "python"] = "python",
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> dict[str, object]:
+        """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            mode:
+                If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`.
+                If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)`
+
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+            exclude_none: Whether to exclude fields that have a value of `None` from the output.
+            warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2.
+        """
+        return self.model_dump(
+            mode=mode,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    def to_json(
+        self,
+        *,
+        indent: int | None = 2,
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> str:
+        """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation).
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2`
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that have the default value.
+            exclude_none: Whether to exclude fields that have a value of `None`.
+            warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2.
+        """
+        return self.model_dump_json(
+            indent=indent,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
     @override
     def __str__(self) -> str:
         # mypy complains about an invalid self arg
@@ -158,7 +251,9 @@ def model_dump(
             exclude_defaults: bool = False,
             exclude_none: bool = False,
             round_trip: bool = False,
-            warnings: bool = True,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
         ) -> dict[str, Any]:
             """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
 
@@ -186,6 +281,10 @@ def model_dump(
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
                 raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
             return super().dict(  # pyright: ignore[reportDeprecated]
                 include=include,
                 exclude=exclude,
@@ -207,7 +306,9 @@ def model_dump_json(
             exclude_defaults: bool = False,
             exclude_none: bool = False,
             round_trip: bool = False,
-            warnings: bool = True,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
         ) -> str:
             """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
 
@@ -231,6 +332,10 @@ def model_dump_json(
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
                 raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
             return super().json(  # type: ignore[reportDeprecated]
                 indent=indent,
                 include=include,
@@ -259,7 +364,6 @@ def _construct_field(value: object, field: FieldInfo, key: str) -> object:
 
 def is_basemodel(type_: type) -> bool:
     """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
-    origin = get_origin(type_) or type_
     if is_union(type_):
         for variant in get_args(type_):
             if is_basemodel(variant):
@@ -267,14 +371,29 @@ def is_basemodel(type_: type) -> bool:
 
         return False
 
+    return is_basemodel_type(type_)
+
+
+def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
+    origin = get_origin(type_) or type_
     return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
 
 
-def construct_type(*, value: object, type_: type) -> object:
+def construct_type(*, value: object, type_: object) -> object:
     """Loose coercion to the expected type with construction of nested values.
 
     If the given value does not match the expected type then it is returned as-is.
     """
+    # we allow `object` as the input type because otherwise, passing things like
+    # `Literal['value']` will be reported as a type error by type checkers
+    type_ = cast("type[object]", type_)
+
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        meta: tuple[Any, ...] = get_args(type_)[1:]
+        type_ = extract_type_arg(type_, 0)
+    else:
+        meta = tuple()
 
     # we need to use the origin class for any types that are subscripted generics
     # e.g. Dict[str, object]
@@ -283,10 +402,32 @@ def construct_type(*, value: object, type_: type) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=type_, value=value)
+            return validate_type(type_=cast("type[object]", type_), value=value)
         except Exception:
             pass
 
+        # if the type is a discriminated union then we want to construct the right variant
+        # in the union, even if the data doesn't match exactly, otherwise we'd break code
+        # that relies on the constructed class types, e.g.
+        #
+        # class FooType:
+        #   kind: Literal['foo']
+        #   value: str
+        #
+        # class BarType:
+        #   kind: Literal['bar']
+        #   value: int
+        #
+        # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
+        # we'd end up constructing `FooType` when it should be `BarType`.
+        discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta)
+        if discriminator and is_mapping(value):
+            variant_value = value.get(discriminator.field_alias_from or discriminator.field_name)
+            if variant_value and isinstance(variant_value, str):
+                variant_type = discriminator.mapping.get(variant_value)
+                if variant_type:
+                    return construct_type(type_=variant_type, value=value)
+
         # if the data is not valid, use the first variant that doesn't fail while deserializing
         for variant in args:
             try:
@@ -344,6 +485,129 @@ def construct_type(*, value: object, type_: type) -> object:
     return value
 
 
+@runtime_checkable
+class CachedDiscriminatorType(Protocol):
+    __discriminator__: DiscriminatorDetails
+
+
+class DiscriminatorDetails:
+    field_name: str
+    """The name of the discriminator field in the variant class, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo']
+    ```
+
+    Will result in field_name='type'
+    """
+
+    field_alias_from: str | None
+    """The name of the discriminator field in the API response, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo'] = Field(alias='type_from_api')
+    ```
+
+    Will result in field_alias_from='type_from_api'
+    """
+
+    mapping: dict[str, type]
+    """Mapping of discriminator value to variant type, e.g.
+
+    {'foo': FooVariant, 'bar': BarVariant}
+    """
+
+    def __init__(
+        self,
+        *,
+        mapping: dict[str, type],
+        discriminator_field: str,
+        discriminator_alias: str | None,
+    ) -> None:
+        self.mapping = mapping
+        self.field_name = discriminator_field
+        self.field_alias_from = discriminator_alias
+
+
+def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
+    if isinstance(union, CachedDiscriminatorType):
+        return union.__discriminator__
+
+    discriminator_field_name: str | None = None
+
+    for annotation in meta_annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None:
+            discriminator_field_name = annotation.discriminator
+            break
+
+    if not discriminator_field_name:
+        return None
+
+    mapping: dict[str, type] = {}
+    discriminator_alias: str | None = None
+
+    for variant in get_args(union):
+        variant = strip_annotated_type(variant)
+        if is_basemodel_type(variant):
+            if PYDANTIC_V2:
+                field = _extract_field_schema_pv2(variant, discriminator_field_name)
+                if not field:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field.get("serialization_alias")
+
+                field_schema = field["schema"]
+
+                if field_schema["type"] == "literal":
+                    for entry in cast("LiteralSchema", field_schema)["expected"]:
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+            else:
+                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+                if not field_info:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field_info.alias
+
+                if field_info.annotation and is_literal_type(field_info.annotation):
+                    for entry in get_args(field_info.annotation):
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+
+    if not mapping:
+        return None
+
+    details = DiscriminatorDetails(
+        mapping=mapping,
+        discriminator_field=discriminator_field_name,
+        discriminator_alias=discriminator_alias,
+    )
+    cast(CachedDiscriminatorType, union).__discriminator__ = details
+    return details
+
+
+def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
+    schema = model.__pydantic_core_schema__
+    if schema["type"] != "model":
+        return None
+
+    fields_schema = schema["schema"]
+    if fields_schema["type"] != "model-fields":
+        return None
+
+    fields_schema = cast("ModelFieldsSchema", fields_schema)
+
+    field = fields_schema["fields"].get(field_name)
+    if not field:
+        return None
+
+    return cast("ModelField", field)  # pyright: ignore[reportUnnecessaryCast]
+
+
 def validate_type(*, type_: type[_T], value: object) -> _T:
     """Strict validation that the given value matches the expected type"""
     if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
@@ -363,7 +627,14 @@ class GenericModel(BaseGenericModel, BaseModel):
 
 
 if PYDANTIC_V2:
-    from pydantic import TypeAdapter
+    from pydantic import TypeAdapter as _TypeAdapter
+
+    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter))
+
+    if TYPE_CHECKING:
+        from pydantic import TypeAdapter
+    else:
+        TypeAdapter = _CachedTypeAdapter
 
     def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
         return TypeAdapter(type_).validate_python(value)
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index d66e137ecd..6f7356eb3c 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import override
 
@@ -42,6 +42,12 @@ def __load__(self) -> resources.Models:
         return _load_client().models
 
 
+class BatchesProxy(LazyProxy[resources.Batches]):
+    @override
+    def __load__(self) -> resources.Batches:
+        return _load_client().batches
+
+
 class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
     @override
     def __load__(self) -> resources.Embeddings:
@@ -72,6 +78,7 @@ def __load__(self) -> resources.FineTuning:
 audio: resources.Audio = AudioProxy().__as_proxied__()
 images: resources.Images = ImagesProxy().__as_proxied__()
 models: resources.Models = ModelsProxy().__as_proxied__()
+batches: resources.Batches = BatchesProxy().__as_proxied__()
 embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
 completions: resources.Completions = CompletionsProxy().__as_proxied__()
 moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
diff --git a/src/openai/_resource.py b/src/openai/_resource.py
index db1b0fa45a..fff9ba19c3 100644
--- a/src/openai/_resource.py
+++ b/src/openai/_resource.py
@@ -1,11 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import time
-import asyncio
 from typing import TYPE_CHECKING
 
+import anyio
+
 if TYPE_CHECKING:
     from ._client import OpenAI, AsyncOpenAI
 
@@ -39,4 +40,4 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self._get_api_list = client.get_api_list
 
     async def _sleep(self, seconds: float) -> None:
-        await asyncio.sleep(seconds)
+        await anyio.sleep(seconds)
diff --git a/src/openai/_response.py b/src/openai/_response.py
index 15a323afa4..4ba2ae681c 100644
--- a/src/openai/_response.py
+++ b/src/openai/_response.py
@@ -16,25 +16,29 @@
     Iterator,
     AsyncIterator,
     cast,
+    overload,
 )
 from typing_extensions import Awaitable, ParamSpec, override, get_origin
 
 import anyio
 import httpx
+import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_var_from_base
+from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
+from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
 from ._exceptions import OpenAIError, APIResponseValidationError
 
 if TYPE_CHECKING:
     from ._models import FinalRequestOptions
-    from ._base_client import Stream, BaseClient, AsyncStream
+    from ._base_client import BaseClient
 
 
 P = ParamSpec("P")
 R = TypeVar("R")
+_T = TypeVar("_T")
 _APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
 _AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
 
@@ -44,7 +48,7 @@
 class BaseAPIResponse(Generic[R]):
     _cast_to: type[R]
     _client: BaseClient[Any, Any]
-    _parsed: R | None
+    _parsed_by_type: dict[type[Any], Any]
     _is_sse_stream: bool
     _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
     _options: FinalRequestOptions
@@ -63,7 +67,7 @@ def __init__(
     ) -> None:
         self._cast_to = cast_to
         self._client = client
-        self._parsed = None
+        self._parsed_by_type = {}
         self._is_sse_stream = stream
         self._stream_cls = stream_cls
         self._options = options
@@ -116,8 +120,28 @@ def __repr__(self) -> str:
             f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
         )
 
-    def _parse(self) -> R:
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        # unwrap `Annotated[T, ...]` -> `T`
+        if to and is_annotated_type(to):
+            to = extract_type_arg(to, 0)
+
         if self._is_sse_stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
             if self._stream_cls:
                 return cast(
                     R,
@@ -141,7 +165,12 @@ def _parse(self) -> R:
                 ),
             )
 
-        cast_to = self._cast_to
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -152,6 +181,12 @@ def _parse(self) -> R:
         if cast_to == bytes:
             return cast(R, response.content)
 
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
         origin = get_origin(cast_to) or cast_to
 
         # handle the legacy binary response case
@@ -171,14 +206,9 @@ def _parse(self) -> R:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        # The check here is necessary as we are subverting the the type system
-        # with casts as the relationship between TypeVars and Types are very strict
-        # which means we must return *exactly* what was input or transform it in a
-        # way that retains the TypeVar state. As we cannot do that in this function
-        # then we have to resort to using `cast`. At the time of writing, we know this
-        # to be safe as we have handled all the types that could be bound to the
-        # `ResponseT` TypeVar, however if that TypeVar is ever updated in the future, then
-        # this function would become unsafe but a type checker would not report an error.
+        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
         if (
             cast_to is not object
             and not origin is list
@@ -187,12 +217,12 @@ def _parse(self) -> R:
             and not issubclass(origin, BaseModel)
         ):
             raise RuntimeError(
-                f"Invalid state, expected {cast_to} to be a subclass type of {BaseModel}, {dict}, {list} or {Union}."
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
             )
 
         # split is required to handle cases where additional information is included
         # in the response, e.g. application/json; charset=utf-8
-        content_type, *_ = response.headers.get("content-type").split(";")
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
         if content_type != "application/json":
             if is_basemodel(cast_to):
                 try:
@@ -228,22 +258,61 @@ def _parse(self) -> R:
 
 
 class APIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T:
+        ...
+
+    @overload
     def parse(self) -> R:
+        ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
 
         For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
         """
-        if self._parsed is not None:
-            return self._parsed
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
 
         if not self._is_sse_stream:
             self.read()
 
-        parsed = self._parse()
+        parsed = self._parse(to=to)
         if is_given(self._options.post_parser):
             parsed = self._options.post_parser(parsed)
 
-        self._parsed = parsed
+        self._parsed_by_type[cache_key] = parsed
         return parsed
 
     def read(self) -> bytes:
@@ -297,22 +366,59 @@ def iter_lines(self) -> Iterator[str]:
 
 
 class AsyncAPIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    async def parse(self, *, to: type[_T]) -> _T:
+        ...
+
+    @overload
     async def parse(self) -> R:
+        ...
+
+    async def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
 
         For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `httpx.Response`
         """
-        if self._parsed is not None:
-            return self._parsed
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
 
         if not self._is_sse_stream:
             await self.read()
 
-        parsed = self._parse()
+        parsed = self._parse(to=to)
         if is_given(self._options.post_parser):
             parsed = self._options.post_parser(parsed)
 
-        self._parsed = parsed
+        self._parsed_by_type[cache_key] = parsed
         return parsed
 
     async def read(self) -> bytes:
@@ -545,7 +651,7 @@ def to_streamed_response_wrapper(func: Callable[P, R]) -> Callable[P, ResponseCo
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[APIResponse[R]]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "stream"
 
         kwargs["extra_headers"] = extra_headers
@@ -566,7 +672,7 @@ def async_to_streamed_response_wrapper(
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "stream"
 
         kwargs["extra_headers"] = extra_headers
@@ -590,7 +696,7 @@ def to_custom_streamed_response_wrapper(
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[_APIResponseT]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "stream"
         extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
 
@@ -615,7 +721,7 @@ def async_to_custom_streamed_response_wrapper(
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "stream"
         extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
 
@@ -635,7 +741,7 @@ def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "raw"
 
         kwargs["extra_headers"] = extra_headers
@@ -652,7 +758,7 @@ def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P
 
     @functools.wraps(func)
     async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "raw"
 
         kwargs["extra_headers"] = extra_headers
@@ -674,7 +780,7 @@ def to_custom_raw_response_wrapper(
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "raw"
         extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
 
@@ -697,7 +803,7 @@ def async_to_custom_raw_response_wrapper(
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
         extra_headers[RAW_RESPONSE_HEADER] = "raw"
         extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
 
@@ -708,26 +814,6 @@ def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
     return wrapped
 
 
-def extract_stream_chunk_type(stream_cls: type) -> type:
-    """Given a type like `Stream[T]`, returns the generic type variable `T`.
-
-    This also handles the case where a concrete subclass is given, e.g.
-    ```py
-    class MyStream(Stream[bytes]):
-        ...
-
-    extract_stream_chunk_type(MyStream) -> bytes
-    ```
-    """
-    from ._base_client import Stream, AsyncStream
-
-    return extract_type_var_from_base(
-        stream_cls,
-        index=0,
-        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
-    )
-
-
 def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
     """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
 
diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index 85cec70c11..0fda992cff 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -2,13 +2,14 @@
 from __future__ import annotations
 
 import json
+import inspect
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast
-from typing_extensions import Self, override
+from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, runtime_checkable
 
 import httpx
 
-from ._utils import is_mapping
+from ._utils import is_mapping, extract_type_var_from_base
 from ._exceptions import APIError
 
 if TYPE_CHECKING:
@@ -23,6 +24,8 @@ class Stream(Generic[_T]):
 
     response: httpx.Response
 
+    _decoder: SSEBytesDecoder
+
     def __init__(
         self,
         *,
@@ -33,7 +36,7 @@ def __init__(
         self.response = response
         self._cast_to = cast_to
         self._client = client
-        self._decoder = SSEDecoder()
+        self._decoder = client._make_sse_decoder()
         self._iterator = self.__stream__()
 
     def __next__(self) -> _T:
@@ -44,7 +47,7 @@ def __iter__(self) -> Iterator[_T]:
             yield item
 
     def _iter_events(self) -> Iterator[ServerSentEvent]:
-        yield from self._decoder.iter(self.response.iter_lines())
+        yield from self._decoder.iter_bytes(self.response.iter_bytes())
 
     def __stream__(self) -> Iterator[_T]:
         cast_to = cast(Any, self._cast_to)
@@ -59,14 +62,40 @@ def __stream__(self) -> Iterator[_T]:
             if sse.event is None:
                 data = sse.json()
                 if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
                     raise APIError(
-                        message="An error occurred during streaming",
+                        message=message,
                         request=self.response.request,
                         body=data["error"],
                     )
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+
         # Ensure the entire stream is consumed
         for _sse in iterator:
             ...
@@ -96,6 +125,8 @@ class AsyncStream(Generic[_T]):
 
     response: httpx.Response
 
+    _decoder: SSEDecoder | SSEBytesDecoder
+
     def __init__(
         self,
         *,
@@ -106,7 +137,7 @@ def __init__(
         self.response = response
         self._cast_to = cast_to
         self._client = client
-        self._decoder = SSEDecoder()
+        self._decoder = client._make_sse_decoder()
         self._iterator = self.__stream__()
 
     async def __anext__(self) -> _T:
@@ -117,7 +148,7 @@ async def __aiter__(self) -> AsyncIterator[_T]:
             yield item
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
-        async for sse in self._decoder.aiter(self.response.aiter_lines()):
+        async for sse in self._decoder.aiter_bytes(self.response.aiter_bytes()):
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
@@ -133,14 +164,40 @@ async def __stream__(self) -> AsyncIterator[_T]:
             if sse.event is None:
                 data = sse.json()
                 if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
                     raise APIError(
-                        message="An error occurred during streaming",
+                        message=message,
                         request=self.response.request,
                         body=data["error"],
                     )
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+
         # Ensure the entire stream is consumed
         async for _sse in iterator:
             ...
@@ -218,21 +275,49 @@ def __init__(self) -> None:
         self._last_event_id = None
         self._retry = None
 
-    def iter(self, iterator: Iterator[str]) -> Iterator[ServerSentEvent]:
-        """Given an iterator that yields lines, iterate over it & yield every event encountered"""
-        for line in iterator:
-            line = line.rstrip("\n")
-            sse = self.decode(line)
-            if sse is not None:
-                yield sse
-
-    async def aiter(self, iterator: AsyncIterator[str]) -> AsyncIterator[ServerSentEvent]:
-        """Given an async iterator that yields lines, iterate over it & yield every event encountered"""
-        async for line in iterator:
-            line = line.rstrip("\n")
-            sse = self.decode(line)
-            if sse is not None:
-                yield sse
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        for chunk in self._iter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    def _iter_chunks(self, iterator: Iterator[bytes]) -> Iterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
+
+    async def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        async for chunk in self._aiter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    async def _aiter_chunks(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        async for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
 
     def decode(self, line: str) -> ServerSentEvent | None:
         # See: https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation  # noqa: E501
@@ -281,3 +366,45 @@ def decode(self, line: str) -> ServerSentEvent | None:
             pass  # Field is ignored.
 
         return None
+
+
+@runtime_checkable
+class SSEBytesDecoder(Protocol):
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+    def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an async iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+
+def is_stream_class_type(typ: type) -> TypeGuard[type[Stream[object]] | type[AsyncStream[object]]]:
+    """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
+    origin = get_origin(typ) or typ
+    return inspect.isclass(origin) and issubclass(origin, (Stream, AsyncStream))
+
+
+def extract_stream_chunk_type(
+    stream_cls: type,
+    *,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Stream[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyStream(Stream[bytes]):
+        ...
+
+    extract_stream_chunk_type(MyStream) -> bytes
+    ```
+    """
+    from ._base_client import Stream, AsyncStream
+
+    return extract_type_var_from_base(
+        stream_cls,
+        index=0,
+        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
+        failure_message=failure_message,
+    )
diff --git a/src/openai/_types.py b/src/openai/_types.py
index b5bf8f8af0..de9b1dd48b 100644
--- a/src/openai/_types.py
+++ b/src/openai/_types.py
@@ -41,8 +41,10 @@
 ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
 ProxiesTypes = Union[str, Proxy, ProxiesDict]
 if TYPE_CHECKING:
+    Base64FileInput = Union[IO[bytes], PathLike[str]]
     FileContent = Union[IO[bytes], bytes, PathLike[str]]
 else:
+    Base64FileInput = Union[IO[bytes], PathLike]
     FileContent = Union[IO[bytes], bytes, PathLike]  # PathLike is not subscriptable in Python 3.8.
 FileTypes = Union[
     # file (or bytes)
diff --git a/src/openai/_utils/__init__.py b/src/openai/_utils/__init__.py
index 2dcfc122f1..31b5b22799 100644
--- a/src/openai/_utils/__init__.py
+++ b/src/openai/_utils/__init__.py
@@ -1,3 +1,4 @@
+from ._sync import asyncify as asyncify
 from ._proxy import LazyProxy as LazyProxy
 from ._utils import (
     flatten as flatten,
@@ -5,9 +6,11 @@
     is_list as is_list,
     is_given as is_given,
     is_tuple as is_tuple,
+    lru_cache as lru_cache,
     is_mapping as is_mapping,
     is_tuple_t as is_tuple_t,
     parse_date as parse_date,
+    is_iterable as is_iterable,
     is_sequence as is_sequence,
     coerce_float as coerce_float,
     is_mapping_t as is_mapping_t,
@@ -32,6 +35,7 @@
     is_list_type as is_list_type,
     is_union_type as is_union_type,
     extract_type_arg as extract_type_arg,
+    is_iterable_type as is_iterable_type,
     is_required_type as is_required_type,
     is_annotated_type as is_annotated_type,
     strip_annotated_type as strip_annotated_type,
@@ -41,5 +45,7 @@
 from ._transform import (
     PropertyInfo as PropertyInfo,
     transform as transform,
+    async_transform as async_transform,
     maybe_transform as maybe_transform,
+    async_maybe_transform as async_maybe_transform,
 )
diff --git a/src/openai/_utils/_proxy.py b/src/openai/_utils/_proxy.py
index 6f05efcd21..c46a62a698 100644
--- a/src/openai/_utils/_proxy.py
+++ b/src/openai/_utils/_proxy.py
@@ -10,7 +10,7 @@
 class LazyProxy(Generic[T], ABC):
     """Implements data methods to pretend that an instance is another instance.
 
-    This includes forwarding attribute access and othe methods.
+    This includes forwarding attribute access and other methods.
     """
 
     # Note: we have to special case proxies that themselves return proxies
@@ -45,7 +45,7 @@ def __dir__(self) -> Iterable[str]:
 
     @property  # type: ignore
     @override
-    def __class__(self) -> type:
+    def __class__(self) -> type:  # pyright: ignore
         proxied = self.__get_proxied__()
         if issubclass(type(proxied), LazyProxy):
             return type(proxied)
diff --git a/src/openai/_utils/_sync.py b/src/openai/_utils/_sync.py
new file mode 100644
index 0000000000..595924e5b1
--- /dev/null
+++ b/src/openai/_utils/_sync.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import functools
+from typing import TypeVar, Callable, Awaitable
+from typing_extensions import ParamSpec
+
+import anyio
+import anyio.to_thread
+
+T_Retval = TypeVar("T_Retval")
+T_ParamSpec = ParamSpec("T_ParamSpec")
+
+
+# copied from `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(
+    function: Callable[T_ParamSpec, T_Retval],
+    *,
+    cancellable: bool = False,
+    limiter: anyio.CapacityLimiter | None = None,
+) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+    """
+    Take a blocking function and create an async one that receives the same
+    positional and keyword arguments, and that when called, calls the original function
+    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
+    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
+    keyword arguments additional to positional arguments and it adds better support for
+    autocompletion and inline errors for the arguments of the function called and the
+    return value.
+
+    If the `cancellable` option is enabled and the task waiting for its completion is
+    cancelled, the thread will still run its course but its return value (or any raised
+    exception) will be ignored.
+
+    Use it like this:
+
+    ```Python
+    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
+        # Do work
+        return "Some result"
+
+
+    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
+    print(result)
+    ```
+
+    ## Arguments
+
+    `function`: a blocking regular callable (e.g. a function)
+    `cancellable`: `True` to allow cancellation of the operation
+    `limiter`: capacity limiter to use to limit the total amount of threads running
+        (if omitted, the default limiter is used)
+
+    ## Return
+
+    An async function that takes the same positional and keyword arguments as the
+    original one, that when called runs the same original function in a thread worker
+    and returns the result.
+    """
+
+    async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
+        partial_f = functools.partial(function, *args, **kwargs)
+        return await anyio.to_thread.run_sync(partial_f, cancellable=cancellable, limiter=limiter)
+
+    return wrapper
diff --git a/src/openai/_utils/_transform.py b/src/openai/_utils/_transform.py
index 3a1c14969b..47e262a515 100644
--- a/src/openai/_utils/_transform.py
+++ b/src/openai/_utils/_transform.py
@@ -1,19 +1,26 @@
 from __future__ import annotations
 
+import io
+import base64
+import pathlib
 from typing import Any, Mapping, TypeVar, cast
 from datetime import date, datetime
 from typing_extensions import Literal, get_args, override, get_type_hints
 
+import anyio
 import pydantic
 
 from ._utils import (
     is_list,
     is_mapping,
+    is_iterable,
 )
+from .._files import is_base64_file_input
 from ._typing import (
     is_list_type,
     is_union_type,
     extract_type_arg,
+    is_iterable_type,
     is_required_type,
     is_annotated_type,
     strip_annotated_type,
@@ -27,7 +34,7 @@
 # TODO: ensure works correctly with forward references in all cases
 
 
-PropertyFormat = Literal["iso8601", "custom"]
+PropertyFormat = Literal["iso8601", "base64", "custom"]
 
 
 class PropertyInfo:
@@ -44,6 +51,7 @@ class MyParams(TypedDict):
     alias: str | None
     format: PropertyFormat | None
     format_template: str | None
+    discriminator: str | None
 
     def __init__(
         self,
@@ -51,14 +59,16 @@ def __init__(
         alias: str | None = None,
         format: PropertyFormat | None = None,
         format_template: str | None = None,
+        discriminator: str | None = None,
     ) -> None:
         self.alias = alias
         self.format = format
         self.format_template = format_template
+        self.discriminator = discriminator
 
     @override
     def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}')"
+        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')"
 
 
 def maybe_transform(
@@ -157,7 +167,12 @@ def _transform_recursive(
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
-    if is_list_type(stripped_type) and is_list(data):
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
         inner_type = extract_type_arg(stripped_type, 0)
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
@@ -173,11 +188,7 @@ def _transform_recursive(
     if isinstance(data, pydantic.BaseModel):
         return model_dump(data, exclude_unset=True)
 
-    return _transform_value(data, annotation)
-
-
-def _transform_value(data: object, type_: type) -> object:
-    annotated_type = _get_annotated_type(type_)
+    annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
         return data
 
@@ -198,6 +209,22 @@ def _format_data(data: object, format_: PropertyFormat, format_template: str | N
         if format_ == "custom" and format_template is not None:
             return data.strftime(format_template)
 
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = data.read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
     return data
 
 
@@ -215,3 +242,141 @@ def _transform_typeddict(
         else:
             result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_)
     return result
+
+
+async def async_maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `async_transform()` that allows `None` to be passed.
+
+    See `async_transform()` for more details.
+    """
+    if data is None:
+        return None
+    return await async_transform(data, expected_type)
+
+
+async def async_transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+async def _async_transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return await _async_transform_typeddict(data, stripped_type)
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        inner_type = extract_type_arg(stripped_type, 0)
+        return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True)
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return await _async_format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+    if isinstance(data, (date, datetime)):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = await anyio.Path(data).read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+async def _async_transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
+    return result
diff --git a/src/openai/_utils/_typing.py b/src/openai/_utils/_typing.py
index a020822bc0..c036991f04 100644
--- a/src/openai/_utils/_typing.py
+++ b/src/openai/_utils/_typing.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-from typing import Any, TypeVar, cast
+from typing import Any, TypeVar, Iterable, cast
+from collections import abc as _c_abc
 from typing_extensions import Required, Annotated, get_args, get_origin
 
 from .._types import InheritsGeneric
@@ -15,6 +16,12 @@ def is_list_type(typ: type) -> bool:
     return (get_origin(typ) or typ) == list
 
 
+def is_iterable_type(typ: type) -> bool:
+    """If the given type is `typing.Iterable[T]`"""
+    origin = get_origin(typ) or typ
+    return origin == Iterable or origin == _c_abc.Iterable
+
+
 def is_union_type(typ: type) -> bool:
     return _is_union(get_origin(typ))
 
@@ -45,7 +52,13 @@ def extract_type_arg(typ: type, index: int) -> type:
         raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
 
 
-def extract_type_var_from_base(typ: type, *, generic_bases: tuple[type, ...], index: int) -> type:
+def extract_type_var_from_base(
+    typ: type,
+    *,
+    generic_bases: tuple[type, ...],
+    index: int,
+    failure_message: str | None = None,
+) -> type:
     """Given a type like `Foo[T]`, returns the generic type variable `T`.
 
     This also handles the case where a concrete subclass is given, e.g.
@@ -104,4 +117,4 @@ class MyResponse(Foo[_T]):
 
         return extracted
 
-    raise RuntimeError(f"Could not resolve inner type variable at index {index} for {typ}")
+    raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}")
diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index 1c5c21a8ea..34797c2905 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -20,7 +20,7 @@
 
 import sniffio
 
-from .._types import Headers, NotGiven, FileTypes, NotGivenOr, HeadersLike
+from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
 from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
 
 _T = TypeVar("_T")
@@ -164,6 +164,10 @@ def is_list(obj: object) -> TypeGuard[list[object]]:
     return isinstance(obj, list)
 
 
+def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
+    return isinstance(obj, Iterable)
+
+
 def deepcopy_minimal(item: _T) -> _T:
     """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
 
@@ -261,6 +265,8 @@ def wrapper(*args: object, **kwargs: object) -> object:
                     )
                     msg = f"Missing required arguments; Expected either {variations} arguments to be given"
                 else:
+                    assert len(variants) > 0
+
                     # TODO: this error message is not deterministic
                     missing = list(set(variants[0]) - given_params)
                     if len(missing) > 1:
@@ -364,7 +370,6 @@ def file_from_path(path: str) -> FileTypes:
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
     if isinstance(headers, Mapping):
-        headers = cast(Headers, headers)
         for k, v in headers.items():
             if k.lower() == lower_header and isinstance(v, str):
                 return v
@@ -385,3 +390,13 @@ def get_async_library() -> str:
         return sniffio.current_async_library()
     except Exception:
         return "false"
+
+
+def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
+    """A version of functools.lru_cache that retains the type signature
+    for the wrapped function arguments.
+    """
+    wrapper = functools.lru_cache(  # noqa: TID251
+        maxsize=maxsize,
+    )
+    return cast(Any, wrapper)  # type: ignore[no-any-return]
diff --git a/src/openai/_version.py b/src/openai/_version.py
index b4e6d226ea..de2bb78a79 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.9.0"  # x-release-please-version
+__version__ = "1.30.4"  # x-release-please-version
diff --git a/src/openai/cli/_api/image.py b/src/openai/cli/_api/image.py
index e6149eeac4..3e2a0a90f1 100644
--- a/src/openai/cli/_api/image.py
+++ b/src/openai/cli/_api/image.py
@@ -14,6 +14,7 @@
 
 def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     sub = subparser.add_parser("images.generate")
+    sub.add_argument("-m", "--model", type=str)
     sub.add_argument("-p", "--prompt", type=str, required=True)
     sub.add_argument("-n", "--num-images", type=int, default=1)
     sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
@@ -21,6 +22,7 @@ def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     sub.set_defaults(func=CLIImage.create, args_model=CLIImageCreateArgs)
 
     sub = subparser.add_parser("images.edit")
+    sub.add_argument("-m", "--model", type=str)
     sub.add_argument("-p", "--prompt", type=str, required=True)
     sub.add_argument("-n", "--num-images", type=int, default=1)
     sub.add_argument(
@@ -42,6 +44,7 @@ def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     sub.set_defaults(func=CLIImage.edit, args_model=CLIImageEditArgs)
 
     sub = subparser.add_parser("images.create_variation")
+    sub.add_argument("-m", "--model", type=str)
     sub.add_argument("-n", "--num-images", type=int, default=1)
     sub.add_argument(
         "-I",
@@ -60,6 +63,7 @@ class CLIImageCreateArgs(BaseModel):
     num_images: int
     size: str
     response_format: str
+    model: NotGivenOr[str] = NOT_GIVEN
 
 
 class CLIImageCreateVariationArgs(BaseModel):
@@ -67,6 +71,7 @@ class CLIImageCreateVariationArgs(BaseModel):
     num_images: int
     size: str
     response_format: str
+    model: NotGivenOr[str] = NOT_GIVEN
 
 
 class CLIImageEditArgs(BaseModel):
@@ -76,12 +81,14 @@ class CLIImageEditArgs(BaseModel):
     response_format: str
     prompt: str
     mask: NotGivenOr[str] = NOT_GIVEN
+    model: NotGivenOr[str] = NOT_GIVEN
 
 
 class CLIImage:
     @staticmethod
     def create(args: CLIImageCreateArgs) -> None:
         image = get_client().images.generate(
+            model=args.model,
             prompt=args.prompt,
             n=args.num_images,
             # casts required because the API is typed for enums
@@ -97,6 +104,7 @@ def create_variation(args: CLIImageCreateVariationArgs) -> None:
             buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
 
         image = get_client().images.create_variation(
+            model=args.model,
             image=("image", buffer_reader),
             n=args.num_images,
             # casts required because the API is typed for enums
@@ -118,6 +126,7 @@ def edit(args: CLIImageEditArgs) -> None:
                 mask = BufferReader(file_reader.read(), desc="Mask progress")
 
         image = get_client().images.edit(
+            model=args.model,
             prompt=args.prompt,
             image=("image", buffer_reader),
             n=args.num_images,
diff --git a/src/openai/cli/_tools/migrate.py b/src/openai/cli/_tools/migrate.py
index 14773302e1..53073b866f 100644
--- a/src/openai/cli/_tools/migrate.py
+++ b/src/openai/cli/_tools/migrate.py
@@ -138,7 +138,7 @@ def install() -> Path:
     unpacked_dir.mkdir(parents=True, exist_ok=True)
 
     with tarfile.open(temp_file, "r:gz") as archive:
-        archive.extractall(unpacked_dir)
+        archive.extractall(unpacked_dir, filter="data")
 
     for item in unpacked_dir.iterdir():
         item.rename(target_dir / item.name)
diff --git a/src/openai/lib/_validators.py b/src/openai/lib/_validators.py
index e36f0e95fb..cf24cd2294 100644
--- a/src/openai/lib/_validators.py
+++ b/src/openai/lib/_validators.py
@@ -678,9 +678,11 @@ def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_ac
             df_train = df.sample(n=n_train, random_state=42)
             df_valid = df.drop(df_train.index)
             df_train[["prompt", "completion"]].to_json(  # type: ignore
-                fnames[0], lines=True, orient="records", force_ascii=False
+                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
+            )
+            df_valid[["prompt", "completion"]].to_json(
+                fnames[1], lines=True, orient="records", force_ascii=False, indent=None
             )
-            df_valid[["prompt", "completion"]].to_json(fnames[1], lines=True, orient="records", force_ascii=False)
 
             n_classes, pos_class = get_classification_hyperparams(df)
             additional_params += " --compute_classification_metrics"
@@ -690,7 +692,9 @@ def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_ac
                 additional_params += f" --classification_n_classes {n_classes}"
         else:
             assert len(fnames) == 1
-            df[["prompt", "completion"]].to_json(fnames[0], lines=True, orient="records", force_ascii=False)
+            df[["prompt", "completion"]].to_json(
+                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
+            )
 
         # Add -v VALID_FILE if we split the file into train / valid
         files_string = ("s" if split else "") + " to `" + ("` and `".join(fnames))
diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py
index 27bebd8cab..b76b83c61c 100644
--- a/src/openai/lib/azure.py
+++ b/src/openai/lib/azure.py
@@ -22,6 +22,7 @@
         "/embeddings",
         "/audio/transcriptions",
         "/audio/translations",
+        "/audio/speech",
         "/images/generations",
     ]
 )
@@ -129,6 +130,7 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
         base_url: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -142,6 +144,7 @@ def __init__(
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `AZURE_OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
         - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
         - `api_version` from `OPENAI_API_VERSION`
         - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
@@ -154,7 +157,7 @@ def __init__(
             azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
 
             azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints.
+                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
         """
         if api_key is None:
             api_key = os.environ.get("AZURE_OPENAI_API_KEY")
@@ -204,6 +207,7 @@ def __init__(
         super().__init__(
             api_key=api_key,
             organization=organization,
+            project=project,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -222,6 +226,7 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
@@ -241,6 +246,7 @@ def copy(
         return super().copy(
             api_key=api_key,
             organization=organization,
+            project=project,
             base_url=base_url,
             timeout=timeout,
             http_client=http_client,
@@ -305,6 +311,7 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -324,6 +331,7 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -343,6 +351,7 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -362,6 +371,7 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
         base_url: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -375,6 +385,7 @@ def __init__(
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `AZURE_OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
         - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
         - `api_version` from `OPENAI_API_VERSION`
         - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
@@ -387,7 +398,7 @@ def __init__(
             azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
 
             azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints.
+                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
         """
         if api_key is None:
             api_key = os.environ.get("AZURE_OPENAI_API_KEY")
@@ -437,6 +448,7 @@ def __init__(
         super().__init__(
             api_key=api_key,
             organization=organization,
+            project=project,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -455,6 +467,7 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
@@ -474,6 +487,7 @@ def copy(
         return super().copy(
             api_key=api_key,
             organization=organization,
+            project=project,
             base_url=base_url,
             timeout=timeout,
             http_client=http_client,
diff --git a/src/openai/lib/streaming/__init__.py b/src/openai/lib/streaming/__init__.py
new file mode 100644
index 0000000000..eb378d2561
--- /dev/null
+++ b/src/openai/lib/streaming/__init__.py
@@ -0,0 +1,8 @@
+from ._assistants import (
+    AssistantEventHandler as AssistantEventHandler,
+    AssistantEventHandlerT as AssistantEventHandlerT,
+    AssistantStreamManager as AssistantStreamManager,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
+)
diff --git a/src/openai/lib/streaming/_assistants.py b/src/openai/lib/streaming/_assistants.py
new file mode 100644
index 0000000000..03d97ec2eb
--- /dev/null
+++ b/src/openai/lib/streaming/_assistants.py
@@ -0,0 +1,1035 @@
+from __future__ import annotations
+
+import asyncio
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
+from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
+
+import httpx
+
+from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
+from ..._models import construct_type
+from ..._streaming import Stream, AsyncStream
+from ...types.beta import AssistantStreamEvent
+from ...types.beta.threads import (
+    Run,
+    Text,
+    Message,
+    ImageFile,
+    TextDelta,
+    MessageDelta,
+    MessageContent,
+    MessageContentDelta,
+)
+from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
+
+
+class AssistantEventHandler:
+    text_deltas: Iterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    def __next__(self) -> AssistantStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[AssistantStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            self.__stream.close()
+
+    def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    def __text_deltas__(self) -> Iterator[str]:
+        for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
+        """
+
+    def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    self.on_image_file_done(content.image_file)
+
+            self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+
+            self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    def __stream__(self) -> Iterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            for event in stream:
+                self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            self.on_timeout()
+            self.on_exception(exc)
+            raise
+        except Exception as exc:
+            self.on_exception(exc)
+            raise
+        finally:
+            self.on_end()
+
+
+AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
+
+
+class AssistantStreamManager(Generic[AssistantEventHandlerT]):
+    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
+    so that a context manager can be used.
+
+    ```py
+    with client.threads.create_and_run_stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[AssistantStreamEvent]],
+        *,
+        event_handler: AssistantEventHandlerT,
+    ) -> None:
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    def __enter__(self) -> AssistantEventHandlerT:
+        self.__stream = self.__api_request()
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncAssistantEventHandler:
+    text_deltas: AsyncIterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    async for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    async def __anext__(self) -> AssistantStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            await self.__stream.close()
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    async def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        await self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    async def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        await self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    async def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        await self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    async def __text_deltas__(self) -> AsyncIterator[str]:
+        async for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    async def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    async def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    async def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    async def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    async def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    async def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    async def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    async def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
+        """
+
+    async def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    async def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        await self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                await self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            await self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        await self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                await self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                await self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            await self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    await self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    await self.on_image_file_done(content.image_file)
+
+            await self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            await self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        await self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            await self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        await self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            await self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+
+            await self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            async for event in stream:
+                await self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            await self.on_timeout()
+            await self.on_exception(exc)
+            raise
+        except Exception as exc:
+            await self.on_exception(exc)
+            raise
+        finally:
+            await self.on_end()
+
+
+AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
+
+
+class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
+    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.threads.create_and_run_stream(...) as stream:
+        async for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
+        *,
+        event_handler: AsyncAssistantEventHandlerT,
+    ) -> None:
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
+        self.__stream = await self.__api_request
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def accumulate_run_step(
+    *,
+    event: AssistantStreamEvent,
+    run_step_snapshots: dict[str, RunStep],
+) -> None:
+    if event.event == "thread.run.step.created":
+        run_step_snapshots[event.data.id] = event.data
+        return
+
+    if event.event == "thread.run.step.delta":
+        data = event.data
+        snapshot = run_step_snapshots[data.id]
+
+        if data.delta:
+            merged = accumulate_delta(
+                cast(
+                    "dict[object, object]",
+                    snapshot.model_dump(exclude_unset=True),
+                ),
+                cast(
+                    "dict[object, object]",
+                    data.delta.model_dump(exclude_unset=True),
+                ),
+            )
+            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
+
+    return None
+
+
+def accumulate_event(
+    *,
+    event: AssistantStreamEvent,
+    current_message_snapshot: Message | None,
+) -> tuple[Message | None, list[MessageContentDelta]]:
+    """Returns a tuple of message snapshot and newly created text message deltas"""
+    if event.event == "thread.message.created":
+        return event.data, []
+
+    new_content: list[MessageContentDelta] = []
+
+    if event.event != "thread.message.delta":
+        return current_message_snapshot, []
+
+    if not current_message_snapshot:
+        raise RuntimeError("Encountered a message delta with no previous snapshot")
+
+    data = event.data
+    if data.delta.content:
+        for content_delta in data.delta.content:
+            try:
+                block = current_message_snapshot.content[content_delta.index]
+            except IndexError:
+                current_message_snapshot.content.insert(
+                    content_delta.index,
+                    cast(
+                        MessageContent,
+                        construct_type(
+                            # mypy doesn't allow Content for some reason
+                            type_=cast(Any, MessageContent),
+                            value=content_delta.model_dump(exclude_unset=True),
+                        ),
+                    ),
+                )
+                new_content.append(content_delta)
+            else:
+                merged = accumulate_delta(
+                    cast(
+                        "dict[object, object]",
+                        block.model_dump(exclude_unset=True),
+                    ),
+                    cast(
+                        "dict[object, object]",
+                        content_delta.model_dump(exclude_unset=True),
+                    ),
+                )
+                current_message_snapshot.content[content_delta.index] = cast(
+                    MessageContent,
+                    construct_type(
+                        # mypy doesn't allow Content for some reason
+                        type_=cast(Any, MessageContent),
+                        value=merged,
+                    ),
+                )
+
+    return current_message_snapshot, new_content
+
+
+def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
+    for key, delta_value in delta.items():
+        if key not in acc:
+            acc[key] = delta_value
+            continue
+
+        acc_value = acc[key]
+        if acc_value is None:
+            acc[key] = delta_value
+            continue
+
+        # the `index` property is used in arrays of objects so it should
+        # not be accumulated like other values e.g.
+        # [{'foo': 'bar', 'index': 0}]
+        #
+        # the same applies to `type` properties as they're used for
+        # discriminated unions
+        if key == "index" or key == "type":
+            acc[key] = delta_value
+            continue
+
+        if isinstance(acc_value, str) and isinstance(delta_value, str):
+            acc_value += delta_value
+        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
+            acc_value += delta_value
+        elif is_dict(acc_value) and is_dict(delta_value):
+            acc_value = accumulate_delta(acc_value, delta_value)
+        elif is_list(acc_value) and is_list(delta_value):
+            # for lists of non-dictionary items we'll only ever get new entries
+            # in the array, existing entries will never be changed
+            if all(isinstance(x, (str, int, float)) for x in acc_value):
+                acc_value.extend(delta_value)
+                continue
+
+            for delta_entry in delta_value:
+                if not is_dict(delta_entry):
+                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
+
+                try:
+                    index = delta_entry["index"]
+                except KeyError as exc:
+                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
+
+                if not isinstance(index, int):
+                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
+
+                try:
+                    acc_entry = acc_value[index]
+                except IndexError:
+                    acc_value.insert(index, delta_entry)
+                else:
+                    if not is_dict(acc_entry):
+                        raise TypeError("not handled yet")
+
+                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
+
+        acc[key] = acc_value
+
+    return acc
diff --git a/src/openai/pagination.py b/src/openai/pagination.py
index f7527753e1..8293638269 100644
--- a/src/openai/pagination.py
+++ b/src/openai/pagination.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Any, List, Generic, TypeVar, Optional, cast
 from typing_extensions import Protocol, override, runtime_checkable
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index 1fb4aa62ec..ecae4243fc 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .beta import (
     Beta,
@@ -48,6 +48,14 @@
     ModelsWithStreamingResponse,
     AsyncModelsWithStreamingResponse,
 )
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
 from .embeddings import (
     Embeddings,
     AsyncEmbeddings,
@@ -142,4 +150,10 @@
     "AsyncBetaWithRawResponse",
     "BetaWithStreamingResponse",
     "AsyncBetaWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
 ]
diff --git a/src/openai/resources/audio/__init__.py b/src/openai/resources/audio/__init__.py
index 63d06494b8..7da1d2dbde 100644
--- a/src/openai/resources/audio/__init__.py
+++ b/src/openai/resources/audio/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .audio import (
     Audio,
diff --git a/src/openai/resources/audio/audio.py b/src/openai/resources/audio/audio.py
index bafacf4422..537ad573d0 100644
--- a/src/openai/resources/audio/audio.py
+++ b/src/openai/resources/audio/audio.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index 4e94d4aaef..e26c58051e 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -9,7 +9,10 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -41,7 +44,7 @@ def create(
         input: str,
         model: Union[str, Literal["tts-1", "tts-1-hd"]],
         voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac"] | NotGiven = NOT_GIVEN,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -65,7 +68,8 @@ def create(
               available in the
               [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
 
-          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
 
           speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
               the default.
@@ -78,6 +82,7 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
         return self._post(
             "/audio/speech",
             body=maybe_transform(
@@ -112,7 +117,7 @@ async def create(
         input: str,
         model: Union[str, Literal["tts-1", "tts-1-hd"]],
         voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac"] | NotGiven = NOT_GIVEN,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -136,7 +141,8 @@ async def create(
               available in the
               [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
 
-          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
 
           speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
               the default.
@@ -149,9 +155,10 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
         return await self._post(
             "/audio/speech",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "input": input,
                     "model": model,
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index 2c167be395..995680186b 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -1,22 +1,28 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union, Mapping, cast
+from typing import List, Union, Mapping, cast
 from typing_extensions import Literal
 
 import httpx
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import extract_files, maybe_transform, deepcopy_minimal
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...types.audio import Transcription, transcription_create_params
+from ...types.audio import transcription_create_params
 from ..._base_client import (
     make_request_options,
 )
+from ...types.audio.transcription import Transcription
 
 __all__ = ["Transcriptions", "AsyncTranscriptions"]
 
@@ -39,6 +45,7 @@ def create(
         prompt: str | NotGiven = NOT_GIVEN,
         response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -54,7 +61,8 @@ def create(
               The audio file object (not file name) to transcribe, in one of these formats:
               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
 
           language: The language of the input audio. Supplying the input language in
               [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
@@ -74,6 +82,12 @@ def create(
               [log probability](https://en.wikipedia.org/wiki/Log_probability) to
               automatically increase the temperature until certain thresholds are hit.
 
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -90,6 +104,7 @@ def create(
                 "prompt": prompt,
                 "response_format": response_format,
                 "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
@@ -127,6 +142,7 @@ async def create(
         prompt: str | NotGiven = NOT_GIVEN,
         response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -142,7 +158,8 @@ async def create(
               The audio file object (not file name) to transcribe, in one of these formats:
               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
 
           language: The language of the input audio. Supplying the input language in
               [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
@@ -162,6 +179,12 @@ async def create(
               [log probability](https://en.wikipedia.org/wiki/Log_probability) to
               automatically increase the temperature until certain thresholds are hit.
 
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -178,6 +201,7 @@ async def create(
                 "prompt": prompt,
                 "response_format": response_format,
                 "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
@@ -188,7 +212,7 @@ async def create(
             extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index d6cbc75886..d711ee2fbd 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -9,14 +9,20 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import extract_files, maybe_transform, deepcopy_minimal
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...types.audio import Translation, translation_create_params
+from ...types.audio import translation_create_params
 from ..._base_client import (
     make_request_options,
 )
+from ...types.audio.translation import Translation
 
 __all__ = ["Translations", "AsyncTranslations"]
 
@@ -52,7 +58,8 @@ def create(
           file: The audio file object (not file name) translate, in one of these formats: flac,
               mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
@@ -133,7 +140,8 @@ async def create(
           file: The audio file object (not file name) translate, in one of these formats: flac,
               mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
@@ -174,7 +182,7 @@ async def create(
             extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/audio/translations",
-            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
new file mode 100644
index 0000000000..db4c4da235
--- /dev/null
+++ b/src/openai/resources/batches.py
@@ -0,0 +1,481 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import batch_list_params, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from ..types.batch import Batch
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
+              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
+              embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 100 MB in size.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Batch]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=SyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
+              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
+              embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 100 MB in size.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=AsyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/src/openai/resources/beta/__init__.py b/src/openai/resources/beta/__init__.py
index 973c6ba54e..01f5338757 100644
--- a/src/openai/resources/beta/__init__.py
+++ b/src/openai/resources/beta/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .beta import (
     Beta,
@@ -24,8 +24,22 @@
     AssistantsWithStreamingResponse,
     AsyncAssistantsWithStreamingResponse,
 )
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
 
 __all__ = [
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
     "Assistants",
     "AsyncAssistants",
     "AssistantsWithRawResponse",
diff --git a/src/openai/resources/beta/assistants/assistants.py b/src/openai/resources/beta/assistants.py
similarity index 65%
rename from src/openai/resources/beta/assistants/assistants.py
rename to src/openai/resources/beta/assistants.py
index 3a2418ad90..5912aff77a 100644
--- a/src/openai/resources/beta/assistants/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -1,47 +1,40 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from .... import _legacy_response
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
 )
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ....types.beta import (
-    Assistant,
-    AssistantDeleted,
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ...types.beta import (
     assistant_list_params,
     assistant_create_params,
     assistant_update_params,
 )
-from ...._base_client import (
+from ..._base_client import (
     AsyncPaginator,
     make_request_options,
 )
+from ...types.beta.assistant import Assistant
+from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.beta.assistant_tool_param import AssistantToolParam
+from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["Assistants", "AsyncAssistants"]
 
 
 class Assistants(SyncAPIResource):
-    @cached_property
-    def files(self) -> Files:
-        return Files(self._client)
-
     @cached_property
     def with_raw_response(self) -> AssistantsWithRawResponse:
         return AssistantsWithRawResponse(self)
@@ -53,13 +46,40 @@ def with_streaming_response(self) -> AssistantsWithStreamingResponse:
     def create(
         self,
         *,
-        model: str,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
         description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_create_params.Tool] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -79,11 +99,7 @@ def create(
 
           description: The description of the assistant. The maximum length is 512 characters.
 
-          file_ids: A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -93,8 +109,40 @@ def create(
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
           tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
 
           extra_headers: Send extra headers
 
@@ -104,18 +152,21 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             "/assistants",
             body=maybe_transform(
                 {
                     "model": model,
                     "description": description,
-                    "file_ids": file_ids,
                     "instructions": instructions,
                     "metadata": metadata,
                     "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
                     "tools": tools,
+                    "top_p": top_p,
                 },
                 assistant_create_params.AssistantCreateParams,
             ),
@@ -150,7 +201,7 @@ def retrieve(
         """
         if not assistant_id:
             raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/assistants/{assistant_id}",
             options=make_request_options(
@@ -164,12 +215,15 @@ def update(
         assistant_id: str,
         *,
         description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: str | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_update_params.Tool] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -184,13 +238,7 @@ def update(
 
         The maximum length is 512 characters.
 
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order. If a
-              file was previously attached to the list but does not show up in the list, it
-              will be deleted from the assistant.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -206,8 +254,40 @@ def update(
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
           tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
 
           extra_headers: Send extra headers
 
@@ -219,18 +299,21 @@ def update(
         """
         if not assistant_id:
             raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/assistants/{assistant_id}",
             body=maybe_transform(
                 {
                     "description": description,
-                    "file_ids": file_ids,
                     "instructions": instructions,
                     "metadata": metadata,
                     "model": model,
                     "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
                     "tools": tools,
+                    "top_p": top_p,
                 },
                 assistant_update_params.AssistantUpdateParams,
             ),
@@ -283,7 +366,7 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             "/assistants",
             page=SyncCursorPage[Assistant],
@@ -330,7 +413,7 @@ def delete(
         """
         if not assistant_id:
             raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._delete(
             f"/assistants/{assistant_id}",
             options=make_request_options(
@@ -341,10 +424,6 @@ def delete(
 
 
 class AsyncAssistants(AsyncAPIResource):
-    @cached_property
-    def files(self) -> AsyncFiles:
-        return AsyncFiles(self._client)
-
     @cached_property
     def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
         return AsyncAssistantsWithRawResponse(self)
@@ -356,13 +435,40 @@ def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
     async def create(
         self,
         *,
-        model: str,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
         description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_create_params.Tool] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -382,11 +488,7 @@ async def create(
 
           description: The description of the assistant. The maximum length is 512 characters.
 
-          file_ids: A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -396,8 +498,40 @@ async def create(
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
           tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
 
           extra_headers: Send extra headers
 
@@ -407,18 +541,21 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             "/assistants",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "model": model,
                     "description": description,
-                    "file_ids": file_ids,
                     "instructions": instructions,
                     "metadata": metadata,
                     "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
                     "tools": tools,
+                    "top_p": top_p,
                 },
                 assistant_create_params.AssistantCreateParams,
             ),
@@ -453,7 +590,7 @@ async def retrieve(
         """
         if not assistant_id:
             raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
             f"/assistants/{assistant_id}",
             options=make_request_options(
@@ -467,12 +604,15 @@ async def update(
         assistant_id: str,
         *,
         description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: str | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_update_params.Tool] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -487,13 +627,7 @@ async def update(
 
         The maximum length is 512 characters.
 
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order. If a
-              file was previously attached to the list but does not show up in the list, it
-              will be deleted from the assistant.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -509,8 +643,40 @@ async def update(
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
           tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
 
           extra_headers: Send extra headers
 
@@ -522,18 +688,21 @@ async def update(
         """
         if not assistant_id:
             raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/assistants/{assistant_id}",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "description": description,
-                    "file_ids": file_ids,
                     "instructions": instructions,
                     "metadata": metadata,
                     "model": model,
                     "name": name,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
                     "tools": tools,
+                    "top_p": top_p,
                 },
                 assistant_update_params.AssistantUpdateParams,
             ),
@@ -586,7 +755,7 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             "/assistants",
             page=AsyncCursorPage[Assistant],
@@ -633,7 +802,7 @@ async def delete(
         """
         if not assistant_id:
             raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._delete(
             f"/assistants/{assistant_id}",
             options=make_request_options(
@@ -663,10 +832,6 @@ def __init__(self, assistants: Assistants) -> None:
             assistants.delete,
         )
 
-    @cached_property
-    def files(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self._assistants.files)
-
 
 class AsyncAssistantsWithRawResponse:
     def __init__(self, assistants: AsyncAssistants) -> None:
@@ -688,10 +853,6 @@ def __init__(self, assistants: AsyncAssistants) -> None:
             assistants.delete,
         )
 
-    @cached_property
-    def files(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self._assistants.files)
-
 
 class AssistantsWithStreamingResponse:
     def __init__(self, assistants: Assistants) -> None:
@@ -713,10 +874,6 @@ def __init__(self, assistants: Assistants) -> None:
             assistants.delete,
         )
 
-    @cached_property
-    def files(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self._assistants.files)
-
 
 class AsyncAssistantsWithStreamingResponse:
     def __init__(self, assistants: AsyncAssistants) -> None:
@@ -737,7 +894,3 @@ def __init__(self, assistants: AsyncAssistants) -> None:
         self.delete = async_to_streamed_response_wrapper(
             assistants.delete,
         )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self._assistants.files)
diff --git a/src/openai/resources/beta/assistants/__init__.py b/src/openai/resources/beta/assistants/__init__.py
deleted file mode 100644
index ad04a71572..0000000000
--- a/src/openai/resources/beta/assistants/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from .assistants import (
-    Assistants,
-    AsyncAssistants,
-    AssistantsWithRawResponse,
-    AsyncAssistantsWithRawResponse,
-    AssistantsWithStreamingResponse,
-    AsyncAssistantsWithStreamingResponse,
-)
-
-__all__ = [
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "FilesWithStreamingResponse",
-    "AsyncFilesWithStreamingResponse",
-    "Assistants",
-    "AsyncAssistants",
-    "AssistantsWithRawResponse",
-    "AsyncAssistantsWithRawResponse",
-    "AssistantsWithStreamingResponse",
-    "AsyncAssistantsWithStreamingResponse",
-]
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index 7081cff305..0d9806678f 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -20,13 +20,25 @@
     AsyncAssistantsWithStreamingResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
 from .threads.threads import Threads, AsyncThreads
-from .assistants.assistants import Assistants, AsyncAssistants
+from .vector_stores.vector_stores import VectorStores, AsyncVectorStores
 
 __all__ = ["Beta", "AsyncBeta"]
 
 
 class Beta(SyncAPIResource):
+    @cached_property
+    def vector_stores(self) -> VectorStores:
+        return VectorStores(self._client)
+
     @cached_property
     def assistants(self) -> Assistants:
         return Assistants(self._client)
@@ -45,6 +57,10 @@ def with_streaming_response(self) -> BetaWithStreamingResponse:
 
 
 class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStores:
+        return AsyncVectorStores(self._client)
+
     @cached_property
     def assistants(self) -> AsyncAssistants:
         return AsyncAssistants(self._client)
@@ -66,6 +82,10 @@ class BetaWithRawResponse:
     def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
+    @cached_property
+    def vector_stores(self) -> VectorStoresWithRawResponse:
+        return VectorStoresWithRawResponse(self._beta.vector_stores)
+
     @cached_property
     def assistants(self) -> AssistantsWithRawResponse:
         return AssistantsWithRawResponse(self._beta.assistants)
@@ -79,6 +99,10 @@ class AsyncBetaWithRawResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
+        return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
+
     @cached_property
     def assistants(self) -> AsyncAssistantsWithRawResponse:
         return AsyncAssistantsWithRawResponse(self._beta.assistants)
@@ -92,6 +116,10 @@ class BetaWithStreamingResponse:
     def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
+    @cached_property
+    def vector_stores(self) -> VectorStoresWithStreamingResponse:
+        return VectorStoresWithStreamingResponse(self._beta.vector_stores)
+
     @cached_property
     def assistants(self) -> AssistantsWithStreamingResponse:
         return AssistantsWithStreamingResponse(self._beta.assistants)
@@ -105,6 +133,10 @@ class AsyncBetaWithStreamingResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
+        return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
+
     @cached_property
     def assistants(self) -> AsyncAssistantsWithStreamingResponse:
         return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
diff --git a/src/openai/resources/beta/threads/__init__.py b/src/openai/resources/beta/threads/__init__.py
index 886574b327..a66e445b1f 100644
--- a/src/openai/resources/beta/threads/__init__.py
+++ b/src/openai/resources/beta/threads/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .runs import (
     Runs,
diff --git a/src/openai/resources/beta/threads/messages/messages.py b/src/openai/resources/beta/threads/messages.py
similarity index 71%
rename from src/openai/resources/beta/threads/messages/messages.py
rename to src/openai/resources/beta/threads/messages.py
index c95cdd5d00..f0832515ce 100644
--- a/src/openai/resources/beta/threads/messages/messages.py
+++ b/src/openai/resources/beta/threads/messages.py
@@ -1,41 +1,35 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ..... import _legacy_response
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
 )
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
-from ....._compat import cached_property
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
     AsyncPaginator,
     make_request_options,
 )
-from .....types.beta.threads import ThreadMessage, message_list_params, message_create_params, message_update_params
+from ....types.beta.threads import message_list_params, message_create_params, message_update_params
+from ....types.beta.threads.message import Message
+from ....types.beta.threads.message_deleted import MessageDeleted
+from ....types.beta.threads.message_content_part_param import MessageContentPartParam
 
 __all__ = ["Messages", "AsyncMessages"]
 
 
 class Messages(SyncAPIResource):
-    @cached_property
-    def files(self) -> Files:
-        return Files(self._client)
-
     @cached_property
     def with_raw_response(self) -> MessagesWithRawResponse:
         return MessagesWithRawResponse(self)
@@ -48,9 +42,9 @@ def create(
         self,
         thread_id: str,
         *,
-        content: str,
-        role: Literal["user"],
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -58,20 +52,22 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Create a message.
 
         Args:
-          content: The content of the message.
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
 
-          role: The role of the entity that is creating the message. Currently only `user` is
-              supported.
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
 
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the message should use. There can be a maximum of 10 files attached to a
-              message. Useful for tools like `retrieval` and `code_interpreter` that can
-              access and use files.
+          attachments: A list of files attached to the message, and the tools they should be added to.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
@@ -88,14 +84,14 @@ def create(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/messages",
             body=maybe_transform(
                 {
                     "content": content,
                     "role": role,
-                    "file_ids": file_ids,
+                    "attachments": attachments,
                     "metadata": metadata,
                 },
                 message_create_params.MessageCreateParams,
@@ -103,7 +99,7 @@ def create(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def retrieve(
@@ -117,7 +113,7 @@ def retrieve(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Retrieve a message.
 
@@ -134,13 +130,13 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not message_id:
             raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}/messages/{message_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def update(
@@ -155,7 +151,7 @@ def update(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Modifies a message.
 
@@ -177,14 +173,14 @@ def update(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not message_id:
             raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/messages/{message_id}",
             body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def list(
@@ -195,13 +191,14 @@ def list(
         before: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[ThreadMessage]:
+    ) -> SyncCursorPage[Message]:
         """
         Returns a list of messages for a given thread.
 
@@ -222,6 +219,8 @@ def list(
           order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
               order and `desc` for descending order.
 
+          run_id: Filter messages by the run ID that generated them.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -232,10 +231,10 @@ def list(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/messages",
-            page=SyncCursorPage[ThreadMessage],
+            page=SyncCursorPage[Message],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -247,19 +246,53 @@ def list(
                         "before": before,
                         "limit": limit,
                         "order": order,
+                        "run_id": run_id,
                     },
                     message_list_params.MessageListParams,
                 ),
             ),
-            model=ThreadMessage,
+            model=Message,
         )
 
+    def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
 
-class AsyncMessages(AsyncAPIResource):
-    @cached_property
-    def files(self) -> AsyncFiles:
-        return AsyncFiles(self._client)
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
 
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
         return AsyncMessagesWithRawResponse(self)
@@ -272,9 +305,9 @@ async def create(
         self,
         thread_id: str,
         *,
-        content: str,
-        role: Literal["user"],
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -282,20 +315,22 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Create a message.
 
         Args:
-          content: The content of the message.
+          content: The text contents of the message.
 
-          role: The role of the entity that is creating the message. Currently only `user` is
-              supported.
+          role:
+              The role of the entity that is creating the message. Allowed values include:
 
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the message should use. There can be a maximum of 10 files attached to a
-              message. Useful for tools like `retrieval` and `code_interpreter` that can
-              access and use files.
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
@@ -312,14 +347,14 @@ async def create(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/threads/{thread_id}/messages",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "content": content,
                     "role": role,
-                    "file_ids": file_ids,
+                    "attachments": attachments,
                     "metadata": metadata,
                 },
                 message_create_params.MessageCreateParams,
@@ -327,7 +362,7 @@ async def create(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     async def retrieve(
@@ -341,7 +376,7 @@ async def retrieve(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Retrieve a message.
 
@@ -358,13 +393,13 @@ async def retrieve(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not message_id:
             raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
             f"/threads/{thread_id}/messages/{message_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     async def update(
@@ -379,7 +414,7 @@ async def update(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Modifies a message.
 
@@ -401,14 +436,14 @@ async def update(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not message_id:
             raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/threads/{thread_id}/messages/{message_id}",
-            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def list(
@@ -419,13 +454,14 @@ def list(
         before: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[ThreadMessage, AsyncCursorPage[ThreadMessage]]:
+    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
         """
         Returns a list of messages for a given thread.
 
@@ -446,6 +482,8 @@ def list(
           order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
               order and `desc` for descending order.
 
+          run_id: Filter messages by the run ID that generated them.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -456,10 +494,10 @@ def list(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/messages",
-            page=AsyncCursorPage[ThreadMessage],
+            page=AsyncCursorPage[Message],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -471,11 +509,49 @@ def list(
                         "before": before,
                         "limit": limit,
                         "order": order,
+                        "run_id": run_id,
                     },
                     message_list_params.MessageListParams,
                 ),
             ),
-            model=ThreadMessage,
+            model=Message,
+        )
+
+    async def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
         )
 
 
@@ -495,10 +571,9 @@ def __init__(self, messages: Messages) -> None:
         self.list = _legacy_response.to_raw_response_wrapper(
             messages.list,
         )
-
-    @cached_property
-    def files(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self._messages.files)
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            messages.delete,
+        )
 
 
 class AsyncMessagesWithRawResponse:
@@ -517,10 +592,9 @@ def __init__(self, messages: AsyncMessages) -> None:
         self.list = _legacy_response.async_to_raw_response_wrapper(
             messages.list,
         )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self._messages.files)
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            messages.delete,
+        )
 
 
 class MessagesWithStreamingResponse:
@@ -539,10 +613,9 @@ def __init__(self, messages: Messages) -> None:
         self.list = to_streamed_response_wrapper(
             messages.list,
         )
-
-    @cached_property
-    def files(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self._messages.files)
+        self.delete = to_streamed_response_wrapper(
+            messages.delete,
+        )
 
 
 class AsyncMessagesWithStreamingResponse:
@@ -561,7 +634,6 @@ def __init__(self, messages: AsyncMessages) -> None:
         self.list = async_to_streamed_response_wrapper(
             messages.list,
         )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self._messages.files)
+        self.delete = async_to_streamed_response_wrapper(
+            messages.delete,
+        )
diff --git a/src/openai/resources/beta/threads/messages/__init__.py b/src/openai/resources/beta/threads/messages/__init__.py
deleted file mode 100644
index 0acb0ab201..0000000000
--- a/src/openai/resources/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-    MessagesWithStreamingResponse,
-    AsyncMessagesWithStreamingResponse,
-)
-
-__all__ = [
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "FilesWithStreamingResponse",
-    "AsyncFilesWithStreamingResponse",
-    "Messages",
-    "AsyncMessages",
-    "MessagesWithRawResponse",
-    "AsyncMessagesWithRawResponse",
-    "MessagesWithStreamingResponse",
-    "AsyncMessagesWithStreamingResponse",
-]
diff --git a/src/openai/resources/beta/threads/messages/files.py b/src/openai/resources/beta/threads/messages/files.py
deleted file mode 100644
index fc8b894d72..0000000000
--- a/src/openai/resources/beta/threads/messages/files.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing_extensions import Literal
-
-import httpx
-
-from ..... import _legacy_response
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
-from ....._compat import cached_property
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads.messages import MessageFile, file_list_params
-
-__all__ = ["Files", "AsyncFiles"]
-
-
-class Files(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        thread_id: str,
-        message_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> MessageFile:
-        """
-        Retrieves a message file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}/messages/{message_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=MessageFile,
-        )
-
-    def list(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[MessageFile]:
-        """Returns a list of message files.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages/{message_id}/files",
-            page=SyncCursorPage[MessageFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=MessageFile,
-        )
-
-
-class AsyncFiles(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        thread_id: str,
-        message_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> MessageFile:
-        """
-        Retrieves a message file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/messages/{message_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=MessageFile,
-        )
-
-    def list(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[MessageFile, AsyncCursorPage[MessageFile]]:
-        """Returns a list of message files.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages/{message_id}/files",
-            page=AsyncCursorPage[MessageFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=MessageFile,
-        )
-
-
-class FilesWithRawResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            files.list,
-        )
-
-
-class AsyncFilesWithRawResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            files.list,
-        )
-
-
-class FilesWithStreamingResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.retrieve = to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            files.list,
-        )
-
-
-class AsyncFilesWithStreamingResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            files.list,
-        )
diff --git a/src/openai/resources/beta/threads/runs/__init__.py b/src/openai/resources/beta/threads/runs/__init__.py
index 659c96acfb..50aa9fae60 100644
--- a/src/openai/resources/beta/threads/runs/__init__.py
+++ b/src/openai/resources/beta/threads/runs/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .runs import (
     Runs,
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 0ed48b4792..c37071529c 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -1,8 +1,10 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+import typing_extensions
+from typing import Union, Iterable, Optional, overload
+from functools import partial
 from typing_extensions import Literal
 
 import httpx
@@ -17,22 +19,40 @@
     AsyncStepsWithStreamingResponse,
 )
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
+from ....._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import (
     AsyncPaginator,
     make_request_options,
 )
+from .....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
 from .....types.beta.threads import (
-    Run,
     run_list_params,
     run_create_params,
     run_update_params,
     run_submit_tool_outputs_params,
 )
+from .....types.beta.threads.run import Run
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["Runs", "AsyncRuns"]
 
@@ -50,16 +70,52 @@ def with_raw_response(self) -> RunsWithRawResponse:
     def with_streaming_response(self) -> RunsWithStreamingResponse:
         return RunsWithStreamingResponse(self)
 
+    @overload
     def create(
         self,
         thread_id: str,
         *,
         assistant_id: str,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Optional[List[run_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -79,10 +135,322 @@ def create(
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
 
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
           instructions: Overrides the
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -93,9 +461,46 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -104,19 +509,83 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -124,6 +593,8 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
         )
 
     def retrieve(
@@ -154,7 +625,7 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}",
             options=make_request_options(
@@ -197,7 +668,7 @@ def update(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/runs/{run_id}",
             body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
@@ -252,7 +723,7 @@ def list(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/runs",
             page=SyncCursorPage[Run],
@@ -302,7 +773,7 @@ def cancel(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/runs/{run_id}/cancel",
             options=make_request_options(
@@ -311,12 +782,51 @@ def cancel(
             cast_to=Run,
         )
 
-    def submit_tool_outputs(
+    def create_and_poll(
         self,
-        run_id: str,
         *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
-        tool_outputs: List[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -325,98 +835,1489 @@ def submit_tool_outputs(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
 
-          extra_query: Add additional query parameters to the request
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
 
-          extra_body: Add additional JSON properties to the request
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
             body=maybe_transform(
-                {"tool_outputs": tool_outputs}, run_submit_tool_outputs_params.RunSubmitToolOutputsParams
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
         )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    def poll(
+        self,
+        run_id: str,
+        thread_id: str,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
 
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
 
-class AsyncRuns(AsyncAPIResource):
-    @cached_property
-    def steps(self) -> AsyncSteps:
-        return AsyncSteps(self._client)
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
 
-    @cached_property
-    def with_raw_response(self) -> AsyncRunsWithRawResponse:
-        return AsyncRunsWithRawResponse(self)
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
 
-    @cached_property
-    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
-        return AsyncRunsWithStreamingResponse(self)
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
 
-    async def create(
+            self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
         self,
-        thread_id: str,
         *,
         assistant_id: str,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Optional[List[run_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a run.
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
 
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
 
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
 
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
 
+        Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
               characters long.
 
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
+          extra_headers: Send extra headers
 
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=AsyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
 
+        Args:
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -427,17 +2328,305 @@ async def create(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -445,51 +2634,271 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
-    async def retrieve(
+    async def poll(
         self,
         run_id: str,
-        *,
         thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        Retrieves a run.
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
 
-        Args:
-          extra_headers: Send extra headers
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
 
-          extra_query: Add additional query parameters to the request
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = await self.with_raw_response.retrieve(
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
 
-          extra_body: Add additional JSON properties to the request
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
+
+            await self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/runs/{run_id}",
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
-    async def update(
+    @overload
+    async def submit_tool_outputs(
         self,
         run_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -498,13 +2907,17 @@ async def update(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        Modifies a run.
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
 
         Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
 
           extra_headers: Send extra headers
 
@@ -514,54 +2927,35 @@ async def update(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}",
-            body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
+        ...
 
-    def list(
+    @overload
+    async def submit_tool_outputs(
         self,
-        thread_id: str,
+        run_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+    ) -> AsyncStream[AssistantStreamEvent]:
         """
-        Returns a list of runs belonging to a thread.
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
 
         Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
 
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
+          tool_outputs: A list of tools for which the outputs are being submitted.
 
           extra_headers: Send extra headers
 
@@ -571,46 +2965,36 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs",
-            page=AsyncCursorPage[Run],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    run_list_params.RunListParams,
-                ),
-            ),
-            model=Run,
-        )
+        ...
 
-    async def cancel(
+    @overload
+    async def submit_tool_outputs(
         self,
         run_id: str,
         *,
         thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
         """
-        Cancels a run that is `in_progress`.
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
 
         Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -619,25 +3003,52 @@ async def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=await async_maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
 
-    async def submit_tool_outputs(
+    async def submit_tool_outputs_and_poll(
         self,
-        run_id: str,
         *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
         thread_id: str,
-        tool_outputs: List[run_submit_tool_outputs_params.ToolOutput],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -646,37 +3057,124 @@ async def submit_tool_outputs(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.submit_tool_outputs(
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
 
-          extra_query: Add additional query parameters to the request
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
 
-          extra_body: Add additional JSON properties to the request
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
 
-          timeout: Override the client-level default timeout for this request, in seconds
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
         """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
             f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
             body=maybe_transform(
-                {"tool_outputs": tool_outputs}, run_submit_tool_outputs_params.RunSubmitToolOutputsParams
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
 
 class RunsWithRawResponse:
diff --git a/src/openai/resources/beta/threads/runs/steps.py b/src/openai/resources/beta/threads/runs/steps.py
index 539745a594..512008939c 100644
--- a/src/openai/resources/beta/threads/runs/steps.py
+++ b/src/openai/resources/beta/threads/runs/steps.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -17,7 +17,8 @@
     AsyncPaginator,
     make_request_options,
 )
-from .....types.beta.threads.runs import RunStep, step_list_params
+from .....types.beta.threads.runs import step_list_params
+from .....types.beta.threads.runs.run_step import RunStep
 
 __all__ = ["Steps", "AsyncSteps"]
 
@@ -62,7 +63,7 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
@@ -119,7 +120,7 @@ def list(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/runs/{run_id}/steps",
             page=SyncCursorPage[RunStep],
@@ -182,7 +183,7 @@ async def retrieve(
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
@@ -239,7 +240,7 @@ def list(
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
             raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/runs/{run_id}/steps",
             page=AsyncCursorPage[RunStep],
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 0372ae2f66..36cdd03f91 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -1,8 +1,10 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Union, Iterable, Optional, overload
+from functools import partial
+from typing_extensions import Literal
 
 import httpx
 
@@ -24,14 +26,17 @@
     AsyncMessagesWithStreamingResponse,
 )
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform
+from ...._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
 from .runs.runs import Runs, AsyncRuns
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
 from ....types.beta import (
-    Thread,
-    ThreadDeleted,
     thread_create_params,
     thread_update_params,
     thread_create_and_run_params,
@@ -39,8 +44,20 @@
 from ...._base_client import (
     make_request_options,
 )
-from .messages.messages import Messages, AsyncMessages
-from ....types.beta.threads import Run
+from ....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["Threads", "AsyncThreads"]
 
@@ -65,8 +82,9 @@ def with_streaming_response(self) -> ThreadsWithStreamingResponse:
     def create(
         self,
         *,
-        messages: List[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -86,6 +104,11 @@ def create(
               can be a maximum of 64 characters long and values can be a maxium of 512
               characters long.
 
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -94,13 +117,14 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             "/threads",
             body=maybe_transform(
                 {
                     "messages": messages,
                     "metadata": metadata,
+                    "tool_resources": tool_resources,
                 },
                 thread_create_params.ThreadCreateParams,
             ),
@@ -135,7 +159,7 @@ def retrieve(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -149,6 +173,7 @@ def update(
         thread_id: str,
         *,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -165,6 +190,11 @@ def update(
               can be a maximum of 64 characters long and values can be a maxium of 512
               characters long.
 
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -175,10 +205,16 @@ def update(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}",
-            body=maybe_transform({"metadata": metadata}, thread_update_params.ThreadUpdateParams),
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -210,7 +246,7 @@ def delete(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._delete(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -219,15 +255,51 @@ def delete(
             cast_to=ThreadDeleted,
         )
 
+    @overload
     def create_and_run(
         self,
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[List[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -246,6 +318,168 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -256,11 +490,53 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -269,17 +545,503 @@ def create_and_run(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             "/threads/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "tool": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -287,7 +1049,10 @@ def create_and_run(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
         )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 
 
 class AsyncThreads(AsyncAPIResource):
@@ -310,8 +1075,9 @@ def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
     async def create(
         self,
         *,
-        messages: List[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -331,6 +1097,11 @@ async def create(
               can be a maximum of 64 characters long and values can be a maxium of 512
               characters long.
 
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -339,13 +1110,14 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             "/threads",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "messages": messages,
                     "metadata": metadata,
+                    "tool_resources": tool_resources,
                 },
                 thread_create_params.ThreadCreateParams,
             ),
@@ -380,7 +1152,7 @@ async def retrieve(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -394,6 +1166,7 @@ async def update(
         thread_id: str,
         *,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -410,6 +1183,11 @@ async def update(
               can be a maximum of 64 characters long and values can be a maxium of 512
               characters long.
 
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -420,10 +1198,16 @@ async def update(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/threads/{thread_id}",
-            body=maybe_transform({"metadata": metadata}, thread_update_params.ThreadUpdateParams),
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -455,7 +1239,7 @@ async def delete(
         """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._delete(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -464,15 +1248,51 @@ async def delete(
             cast_to=ThreadDeleted,
         )
 
+    @overload
     async def create_and_run(
         self,
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[List[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -491,6 +1311,168 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -501,11 +1483,53 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -514,17 +1538,507 @@ async def create_and_run(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
+            "/threads/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create_and_run(
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.runs.poll(
+            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
             "/threads/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "tool": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -532,7 +2046,10 @@ async def create_and_run(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
 
 class ThreadsWithRawResponse:
diff --git a/src/openai/resources/beta/vector_stores/__init__.py b/src/openai/resources/beta/vector_stores/__init__.py
new file mode 100644
index 0000000000..96ae16c302
--- /dev/null
+++ b/src/openai/resources/beta/vector_stores/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "FileBatches",
+    "AsyncFileBatches",
+    "FileBatchesWithRawResponse",
+    "AsyncFileBatchesWithRawResponse",
+    "FileBatchesWithStreamingResponse",
+    "AsyncFileBatchesWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+]
diff --git a/src/openai/resources/beta/vector_stores/file_batches.py b/src/openai/resources/beta/vector_stores/file_batches.py
new file mode 100644
index 0000000000..f1ced51700
--- /dev/null
+++ b/src/openai/resources/beta/vector_stores/file_batches.py
@@ -0,0 +1,736 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import asyncio
+from typing import List, Iterable
+from typing_extensions import Literal
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+
+import httpx
+import sniffio
+
+from .... import _legacy_response
+from ....types import FileObject
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
+from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+
+__all__ = ["FileBatches", "AsyncFileBatches"]
+
+
+class FileBatches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform({"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+        )
+        # TODO: don't poll unless necessary??
+        return self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        results: list[FileObject] = []
+
+        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
+            futures: list[Future[FileObject]] = [
+                executor.submit(
+                    self._client.files.create,
+                    file=file,
+                    purpose="assistants",
+                )
+                for file in files
+            ]
+
+        for future in as_completed(futures):
+            exc = future.exception()
+            if exc:
+                raise exc
+
+            results.append(future.result())
+
+        batch = self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in results)],
+            poll_interval_ms=poll_interval_ms,
+        )
+        return batch
+
+
+class AsyncFileBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform({"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = await self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+        )
+        # TODO: don't poll unless necessary??
+        return await self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    async def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        uploaded_files: list[FileObject] = []
+
+        async_library = sniffio.current_async_library()
+
+        if async_library == "asyncio":
+
+            async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
+                async with semaphore:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            semaphore = asyncio.Semaphore(max_concurrency)
+
+            tasks = [asyncio_upload_file(semaphore, file) for file in files]
+
+            await asyncio.gather(*tasks)
+        elif async_library == "trio":
+            # We only import if the library is being used.
+            # We support Python 3.7 so are using an older version of trio that does not have type information
+            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
+
+            async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
+                async with limiter:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            limiter = trio.CapacityLimiter(max_concurrency)
+
+            async with trio.open_nursery() as nursery:
+                for file in files:
+                    nursery.start_soon(trio_upload_file, limiter, file)  # pyright: ignore [reportUnknownMemberType]
+        else:
+            raise RuntimeError(
+                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
+            )
+
+        batch = await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
+            poll_interval_ms=poll_interval_ms,
+        )
+        return batch
+
+
+class FileBatchesWithRawResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class FileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = async_to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
diff --git a/src/openai/resources/beta/assistants/files.py b/src/openai/resources/beta/vector_stores/files.py
similarity index 56%
rename from src/openai/resources/beta/assistants/files.py
rename to src/openai/resources/beta/vector_stores/files.py
index c21465036a..5c3db27619 100644
--- a/src/openai/resources/beta/assistants/files.py
+++ b/src/openai/resources/beta/vector_stores/files.py
@@ -1,14 +1,19 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing_extensions import Literal
+from typing import TYPE_CHECKING
+from typing_extensions import Literal, assert_never
 
 import httpx
 
 from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ...._utils import (
+    is_given,
+    maybe_transform,
+    async_maybe_transform,
+)
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -17,7 +22,9 @@
     AsyncPaginator,
     make_request_options,
 )
-from ....types.beta.assistants import AssistantFile, FileDeleteResponse, file_list_params, file_create_params
+from ....types.beta.vector_stores import file_list_params, file_create_params
+from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
+from ....types.beta.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
 
 __all__ = ["Files", "AsyncFiles"]
 
@@ -33,7 +40,7 @@ def with_streaming_response(self) -> FilesWithStreamingResponse:
 
     def create(
         self,
-        assistant_id: str,
+        vector_store_id: str,
         *,
         file_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -42,16 +49,16 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> VectorStoreFile:
         """
-        Create an assistant file by attaching a
-        [File](https://platform.openai.com/docs/api-reference/files) to an
-        [assistant](https://platform.openai.com/docs/api-reference/assistants).
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
 
         Args:
-          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-              `purpose="assistants"`) that the assistant should use. Useful for tools like
-              `retrieval` and `code_interpreter` that can access files.
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
 
           extra_headers: Send extra headers
 
@@ -61,32 +68,32 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
-            f"/assistants/{assistant_id}/files",
+            f"/vector_stores/{vector_store_id}/files",
             body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=VectorStoreFile,
         )
 
     def retrieve(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        vector_store_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> VectorStoreFile:
         """
-        Retrieves an AssistantFile.
+        Retrieves a vector store file.
 
         Args:
           extra_headers: Send extra headers
@@ -97,25 +104,26 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=VectorStoreFile,
         )
 
     def list(
         self,
-        assistant_id: str,
+        vector_store_id: str,
         *,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -124,9 +132,9 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[AssistantFile]:
+    ) -> SyncCursorPage[VectorStoreFile]:
         """
-        Returns a list of assistant files.
+        Returns a list of vector store files.
 
         Args:
           after: A cursor for use in pagination. `after` is an object ID that defines your place
@@ -139,6 +147,8 @@ def list(
               ending with obj_foo, your subsequent call can include before=obj_foo in order to
               fetch the previous page of the list.
 
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -153,12 +163,12 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
-            f"/assistants/{assistant_id}/files",
-            page=SyncCursorPage[AssistantFile],
+            f"/vector_stores/{vector_store_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -168,29 +178,34 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "filter": filter,
                         "limit": limit,
                         "order": order,
                     },
                     file_list_params.FileListParams,
                 ),
             ),
-            model=AssistantFile,
+            model=VectorStoreFile,
         )
 
     def delete(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        vector_store_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleteResponse:
-        """
-        Delete an assistant file.
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
 
         Args:
           extra_headers: Send extra headers
@@ -201,17 +216,103 @@ def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._delete(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=FileDeleteResponse,
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        self.create(vector_store_id=vector_store_id, file_id=file_id)
+
+        return self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
+
+    def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
         )
 
 
@@ -226,7 +327,7 @@ def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
 
     async def create(
         self,
-        assistant_id: str,
+        vector_store_id: str,
         *,
         file_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -235,16 +336,16 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> VectorStoreFile:
         """
-        Create an assistant file by attaching a
-        [File](https://platform.openai.com/docs/api-reference/files) to an
-        [assistant](https://platform.openai.com/docs/api-reference/assistants).
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
 
         Args:
-          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-              `purpose="assistants"`) that the assistant should use. Useful for tools like
-              `retrieval` and `code_interpreter` that can access files.
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
 
           extra_headers: Send extra headers
 
@@ -254,32 +355,32 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
-            f"/assistants/{assistant_id}/files",
-            body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            f"/vector_stores/{vector_store_id}/files",
+            body=await async_maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=VectorStoreFile,
         )
 
     async def retrieve(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        vector_store_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> VectorStoreFile:
         """
-        Retrieves an AssistantFile.
+        Retrieves a vector store file.
 
         Args:
           extra_headers: Send extra headers
@@ -290,25 +391,26 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=VectorStoreFile,
         )
 
     def list(
         self,
-        assistant_id: str,
+        vector_store_id: str,
         *,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -317,9 +419,9 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[AssistantFile, AsyncCursorPage[AssistantFile]]:
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
         """
-        Returns a list of assistant files.
+        Returns a list of vector store files.
 
         Args:
           after: A cursor for use in pagination. `after` is an object ID that defines your place
@@ -332,6 +434,8 @@ def list(
               ending with obj_foo, your subsequent call can include before=obj_foo in order to
               fetch the previous page of the list.
 
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -346,12 +450,12 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
-            f"/assistants/{assistant_id}/files",
-            page=AsyncCursorPage[AssistantFile],
+            f"/vector_stores/{vector_store_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -361,29 +465,34 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "filter": filter,
                         "limit": limit,
                         "order": order,
                     },
                     file_list_params.FileListParams,
                 ),
             ),
-            model=AssistantFile,
+            model=VectorStoreFile,
         )
 
     async def delete(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        vector_store_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleteResponse:
-        """
-        Delete an assistant file.
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
 
         Args:
           extra_headers: Send extra headers
@@ -394,17 +503,103 @@ async def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._delete(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=FileDeleteResponse,
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    async def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        await self.create(vector_store_id=vector_store_id, file_id=file_id)
+
+        return await self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    async def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    async def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
+
+    async def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
         )
 
 
diff --git a/src/openai/resources/beta/vector_stores/vector_stores.py b/src/openai/resources/beta/vector_stores/vector_stores.py
new file mode 100644
index 0000000000..8a177c2864
--- /dev/null
+++ b/src/openai/resources/beta/vector_stores/vector_stores.py
@@ -0,0 +1,684 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.beta import vector_store_list_params, vector_store_create_params, vector_store_update_params
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.vector_store import VectorStore
+from ....types.beta.vector_store_deleted import VectorStoreDeleted
+
+__all__ = ["VectorStores", "AsyncVectorStores"]
+
+
+class VectorStores(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
+
+    @cached_property
+    def file_batches(self) -> FileBatches:
+        return FileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> VectorStoresWithRawResponse:
+        return VectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        return VectorStoresWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/vector_stores",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStore]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=SyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+
+class AsyncVectorStores(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatches:
+        return AsyncFileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        return AsyncVectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        return AsyncVectorStoresWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/vector_stores",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include before=obj_foo in order to
+              fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=AsyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    async def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+
+class VectorStoresWithRawResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithRawResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class VectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = async_to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self._vector_stores.file_batches)
diff --git a/src/openai/resources/chat/__init__.py b/src/openai/resources/chat/__init__.py
index a9668053c0..52dfdceacc 100644
--- a/src/openai/resources/chat/__init__.py
+++ b/src/openai/resources/chat/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .chat import (
     Chat,
diff --git a/src/openai/resources/chat/chat.py b/src/openai/resources/chat/chat.py
index b6effa4e63..d14d055506 100644
--- a/src/openai/resources/chat/chat.py
+++ b/src/openai/resources/chat/chat.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index f461161ab7..aa25bc1858 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -1,30 +1,34 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional, overload
 from typing_extensions import Literal
 
 import httpx
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import required_args, maybe_transform
+from ..._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..._streaming import Stream, AsyncStream
-from ...types.chat import (
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionToolParam,
-    ChatCompletionMessageParam,
-    ChatCompletionToolChoiceOptionParam,
-    completion_create_params,
-)
+from ...types.chat import completion_create_params
 from ..._base_client import (
     make_request_options,
 )
+from ...types.chat_model import ChatModel
+from ...types.chat.chat_completion import ChatCompletion
+from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -42,29 +46,11 @@ def with_streaming_response(self) -> CompletionsWithStreamingResponse:
     def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -74,9 +60,10 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -130,8 +117,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -152,7 +138,8 @@ def create(
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output. Compatible with
-              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -180,29 +167,31 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
               We generally recommend altering this or `top_p` but not both.
 
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
 
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
-          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
-              at each token position, each with an associated log probability. `logprobs` must
-              be set to `true` if this parameter is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
@@ -228,30 +217,12 @@ def create(
     def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         stream: Literal[True],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -260,9 +231,10 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -323,8 +295,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -345,7 +316,8 @@ def create(
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output. Compatible with
-              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -366,29 +338,31 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
               We generally recommend altering this or `top_p` but not both.
 
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
 
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
-          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
-              at each token position, each with an associated log probability. `logprobs` must
-              be set to `true` if this parameter is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
@@ -414,30 +388,12 @@ def create(
     def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         stream: bool,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -446,9 +402,10 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -509,8 +466,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -531,7 +487,8 @@ def create(
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output. Compatible with
-              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -552,29 +509,31 @@ def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
               We generally recommend altering this or `top_p` but not both.
 
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
 
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
-          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
-              at each token position, each with an associated log probability. `logprobs` must
-              be set to `true` if this parameter is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
@@ -600,29 +559,11 @@ def create(
     def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -632,9 +573,10 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -663,6 +605,7 @@ def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
                     "tools": tools,
@@ -694,29 +637,11 @@ def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
     async def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -726,9 +651,10 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -782,8 +708,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -804,7 +729,8 @@ async def create(
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output. Compatible with
-              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -832,29 +758,31 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
               We generally recommend altering this or `top_p` but not both.
 
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
 
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
-          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
-              at each token position, each with an associated log probability. `logprobs` must
-              be set to `true` if this parameter is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
@@ -880,30 +808,12 @@ async def create(
     async def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         stream: Literal[True],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -912,9 +822,10 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -975,8 +886,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -997,7 +907,8 @@ async def create(
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output. Compatible with
-              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1018,29 +929,31 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
               We generally recommend altering this or `top_p` but not both.
 
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
 
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
-          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
-              at each token position, each with an associated log probability. `logprobs` must
-              be set to `true` if this parameter is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
@@ -1066,30 +979,12 @@ async def create(
     async def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         stream: bool,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -1098,9 +993,10 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -1161,8 +1057,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -1183,7 +1078,8 @@ async def create(
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output. Compatible with
-              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1204,29 +1100,31 @@ async def create(
 
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
               We generally recommend altering this or `top_p` but not both.
 
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
 
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
-          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
-              at each token position, each with an associated log probability. `logprobs` must
-              be set to `true` if this parameter is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
@@ -1252,29 +1150,11 @@ async def create(
     async def create(
         self,
         *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
         logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -1284,9 +1164,10 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -1299,7 +1180,7 @@ async def create(
     ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         return await self._post(
             "/chat/completions",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "messages": messages,
                     "model": model,
@@ -1315,6 +1196,7 @@ async def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
                     "tools": tools,
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 3d2e10230a..0812000f78 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -1,16 +1,20 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional, overload
 from typing_extensions import Literal
 
 import httpx
 
 from .. import _legacy_response
-from ..types import Completion, completion_create_params
+from ..types import completion_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import required_args, maybe_transform
+from .._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -18,6 +22,8 @@
 from .._base_client import (
     make_request_options,
 )
+from ..types.completion import Completion
+from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -36,7 +42,7 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -48,6 +54,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -151,8 +158,12 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -184,7 +195,7 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: Literal[True],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -196,6 +207,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -299,8 +311,12 @@ def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -332,7 +348,7 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: bool,
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -344,6 +360,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -447,8 +464,12 @@ def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -480,7 +501,7 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -492,6 +513,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -520,6 +542,7 @@ def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
@@ -550,7 +573,7 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -562,6 +585,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -665,8 +689,12 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -698,7 +726,7 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: Literal[True],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -710,6 +738,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -813,8 +842,12 @@ async def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -846,7 +879,7 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: bool,
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -858,6 +891,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -961,8 +995,12 @@ async def create(
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -994,7 +1032,7 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1006,6 +1044,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1019,7 +1058,7 @@ async def create(
     ) -> Completion | AsyncStream[Completion]:
         return await self._post(
             "/completions",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "model": model,
                     "prompt": prompt,
@@ -1034,6 +1073,7 @@ async def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index 5bc7ed855e..773b6f0968 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -1,15 +1,15 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import base64
-from typing import List, Union, cast
+from typing import List, Union, Iterable, cast
 from typing_extensions import Literal
 
 import httpx
 
 from .. import _legacy_response
-from ..types import CreateEmbeddingResponse, embedding_create_params
+from ..types import embedding_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import is_given, maybe_transform
 from .._compat import cached_property
@@ -19,6 +19,7 @@
 from .._base_client import (
     make_request_options,
 )
+from ..types.create_embedding_response import CreateEmbeddingResponse
 
 __all__ = ["Embeddings", "AsyncEmbeddings"]
 
@@ -35,8 +36,9 @@ def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
     def create(
         self,
         *,
-        input: Union[str, List[str], List[int], List[List[int]]],
-        model: Union[str, Literal["text-embedding-ada-002"]],
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
+        dimensions: int | NotGiven = NOT_GIVEN,
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -64,6 +66,9 @@ def create(
               [Model overview](https://platform.openai.com/docs/models/overview) for
               descriptions of them.
 
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
           encoding_format: The format to return the embeddings in. Can be either `float` or
               [`base64`](https://pypi.org/project/pybase64/).
 
@@ -83,6 +88,7 @@ def create(
             "input": input,
             "model": model,
             "user": user,
+            "dimensions": dimensions,
             "encoding_format": encoding_format,
         }
         if not is_given(encoding_format) and has_numpy():
@@ -131,8 +137,9 @@ def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
     async def create(
         self,
         *,
-        input: Union[str, List[str], List[int], List[List[int]]],
-        model: Union[str, Literal["text-embedding-ada-002"]],
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
+        dimensions: int | NotGiven = NOT_GIVEN,
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -160,6 +167,9 @@ async def create(
               [Model overview](https://platform.openai.com/docs/models/overview) for
               descriptions of them.
 
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
+
           encoding_format: The format to return the embeddings in. Can be either `float` or
               [`base64`](https://pypi.org/project/pybase64/).
 
@@ -179,6 +189,7 @@ async def create(
             "input": input,
             "model": model,
             "user": user,
+            "dimensions": dimensions,
             "encoding_format": encoding_format,
         }
         if not is_given(encoding_format) and has_numpy():
diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
index 58a2a217c7..aed0829dfe 100644
--- a/src/openai/resources/files.py
+++ b/src/openai/resources/files.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -10,9 +10,14 @@
 import httpx
 
 from .. import _legacy_response
-from ..types import FileObject, FileDeleted, file_list_params, file_create_params
+from ..types import file_list_params, file_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import extract_files, maybe_transform, deepcopy_minimal
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -28,6 +33,8 @@
     AsyncPaginator,
     make_request_options,
 )
+from ..types.file_object import FileObject
+from ..types.file_deleted import FileDeleted
 
 __all__ = ["Files", "AsyncFiles"]
 
@@ -45,7 +52,7 @@ def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["fine-tune", "assistants"],
+        purpose: Literal["assistants", "batch", "fine-tune"],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -55,14 +62,18 @@ def create(
     ) -> FileObject:
         """Upload a file that can be used across various endpoints.
 
-        The size of all the
-        files uploaded by one organization can be up to 100 GB.
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
+
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
+
+        The Fine-tuning API only supports `.jsonl` files.
 
-        The size of individual files can be a maximum of 512 MB or 2 million tokens for
-        Assistants. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
-        learn more about the types of files supported. The Fine-tuning API only supports
-        `.jsonl` files.
+        The Batch API only supports `.jsonl` files up to 100 MB in size.
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -72,12 +83,12 @@ def create(
 
           purpose: The intended purpose of the uploaded file.
 
-              Use "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-              "assistants" for
+              Use "assistants" for
               [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-              us to validate the format of the uploaded file is correct for fine-tuning.
+              [Message](https://platform.openai.com/docs/api-reference/messages) files,
+              "vision" for Assistants image file inputs, "batch" for
+              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
+              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
 
           extra_headers: Send extra headers
 
@@ -238,6 +249,7 @@ def content(
         """
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
         return self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
@@ -272,7 +284,6 @@ def retrieve_content(
         """
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"Accept": "application/json", **(extra_headers or {})}
         return self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
@@ -318,7 +329,7 @@ async def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["fine-tune", "assistants"],
+        purpose: Literal["assistants", "batch", "fine-tune"],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -328,14 +339,18 @@ async def create(
     ) -> FileObject:
         """Upload a file that can be used across various endpoints.
 
-        The size of all the
-        files uploaded by one organization can be up to 100 GB.
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
 
-        The size of individual files can be a maximum of 512 MB or 2 million tokens for
-        Assistants. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
-        learn more about the types of files supported. The Fine-tuning API only supports
-        `.jsonl` files.
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
+
+        The Fine-tuning API only supports `.jsonl` files.
+
+        The Batch API only supports `.jsonl` files up to 100 MB in size.
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -345,12 +360,12 @@ async def create(
 
           purpose: The intended purpose of the uploaded file.
 
-              Use "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-              "assistants" for
+              Use "assistants" for
               [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-              us to validate the format of the uploaded file is correct for fine-tuning.
+              [Message](https://platform.openai.com/docs/api-reference/messages) files,
+              "vision" for Assistants image file inputs, "batch" for
+              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
+              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
 
           extra_headers: Send extra headers
 
@@ -374,7 +389,7 @@ async def create(
             extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/files",
-            body=maybe_transform(body, file_create_params.FileCreateParams),
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -511,6 +526,7 @@ async def content(
         """
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
         return await self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
@@ -545,7 +561,6 @@ async def retrieve_content(
         """
         if not file_id:
             raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"Accept": "application/json", **(extra_headers or {})}
         return await self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
diff --git a/src/openai/resources/fine_tuning/__init__.py b/src/openai/resources/fine_tuning/__init__.py
index ab0c28ef4b..7765231fee 100644
--- a/src/openai/resources/fine_tuning/__init__.py
+++ b/src/openai/resources/fine_tuning/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .jobs import (
     Jobs,
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 33b25baec9..0404fed6ec 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -11,6 +11,7 @@
     AsyncJobsWithStreamingResponse,
 )
 from ..._compat import cached_property
+from .jobs.jobs import Jobs, AsyncJobs
 from ..._resource import SyncAPIResource, AsyncAPIResource
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
diff --git a/src/openai/resources/fine_tuning/jobs/__init__.py b/src/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..94cd1fb7e7
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 0000000000..67f5739a02
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,177 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning.jobs import checkpoint_list_params
+from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import FineTuningJobCheckpoint
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=SyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=AsyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = to_streamed_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = async_to_streamed_response_wrapper(
+            checkpoints.list,
+        )
diff --git a/src/openai/resources/fine_tuning/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
similarity index 88%
rename from src/openai/resources/fine_tuning/jobs.py
rename to src/openai/resources/fine_tuning/jobs/jobs.py
index 6b59932982..f38956e6be 100644
--- a/src/openai/resources/fine_tuning/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -1,35 +1,46 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import (
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
     AsyncPaginator,
     make_request_options,
 )
-from ...types.fine_tuning import (
-    FineTuningJob,
-    FineTuningJobEvent,
-    job_list_params,
-    job_create_params,
-    job_list_events_params,
-)
+from ....types.fine_tuning import job_list_params, job_create_params, job_list_events_params
+from ....types.fine_tuning.fine_tuning_job import FineTuningJob
+from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent
 
 __all__ = ["Jobs", "AsyncJobs"]
 
 
 class Jobs(SyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> JobsWithRawResponse:
         return JobsWithRawResponse(self)
@@ -44,6 +55,8 @@ def create(
         model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -68,7 +81,7 @@ def create(
 
           training_file: The ID of an uploaded file that contains training data.
 
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
               for how to upload a file.
 
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
@@ -79,6 +92,12 @@ def create(
 
           hyperparameters: The hyperparameters used for the fine-tuning job.
 
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
           suffix: A string of up to 18 characters that will be added to your fine-tuned model
               name.
 
@@ -113,6 +132,8 @@ def create(
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -291,6 +312,10 @@ def list_events(
 
 
 class AsyncJobs(AsyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
         return AsyncJobsWithRawResponse(self)
@@ -305,6 +330,8 @@ async def create(
         model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -329,7 +356,7 @@ async def create(
 
           training_file: The ID of an uploaded file that contains training data.
 
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
               for how to upload a file.
 
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
@@ -340,6 +367,12 @@ async def create(
 
           hyperparameters: The hyperparameters used for the fine-tuning job.
 
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
           suffix: A string of up to 18 characters that will be added to your fine-tuned model
               name.
 
@@ -369,11 +402,13 @@ async def create(
         """
         return await self._post(
             "/fine_tuning/jobs",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -571,6 +606,10 @@ def __init__(self, jobs: Jobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
 
 class AsyncJobsWithRawResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
@@ -592,6 +631,10 @@ def __init__(self, jobs: AsyncJobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
 
 class JobsWithStreamingResponse:
     def __init__(self, jobs: Jobs) -> None:
@@ -613,6 +656,10 @@ def __init__(self, jobs: Jobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
 
 class AsyncJobsWithStreamingResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
@@ -633,3 +680,7 @@ def __init__(self, jobs: AsyncJobs) -> None:
         self.list_events = async_to_streamed_response_wrapper(
             jobs.list_events,
         )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 91530e47ca..74b2a46a3f 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -8,20 +8,21 @@
 import httpx
 
 from .. import _legacy_response
-from ..types import (
-    ImagesResponse,
-    image_edit_params,
-    image_generate_params,
-    image_create_variation_params,
-)
+from ..types import image_edit_params, image_generate_params, image_create_variation_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import extract_files, maybe_transform, deepcopy_minimal
+from .._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import (
     make_request_options,
 )
+from ..types.images_response import ImagesResponse
 
 __all__ = ["Images", "AsyncImages"]
 
@@ -65,7 +66,8 @@ def create_variation(
               `n=1` is supported.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024`.
@@ -146,7 +148,8 @@ def edit(
           n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024`.
@@ -226,7 +229,8 @@ def generate(
               for `dall-e-3`.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
@@ -310,7 +314,8 @@ async def create_variation(
               `n=1` is supported.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024`.
@@ -345,7 +350,7 @@ async def create_variation(
             extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/variations",
-            body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
+            body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -391,7 +396,8 @@ async def edit(
           n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024`.
@@ -428,7 +434,7 @@ async def edit(
             extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/edits",
-            body=maybe_transform(body, image_edit_params.ImageEditParams),
+            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -471,7 +477,8 @@ async def generate(
               for `dall-e-3`.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
@@ -496,7 +503,7 @@ async def generate(
         """
         return await self._post(
             "/images/generations",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "prompt": prompt,
                     "model": model,
diff --git a/src/openai/resources/models.py b/src/openai/resources/models.py
index 3536f083d2..e76c496ffa 100644
--- a/src/openai/resources/models.py
+++ b/src/openai/resources/models.py
@@ -1,20 +1,21 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import httpx
 
 from .. import _legacy_response
-from ..types import Model, ModelDeleted
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncPage, AsyncPage
+from ..types.model import Model
 from .._base_client import (
     AsyncPaginator,
     make_request_options,
 )
+from ..types.model_deleted import ModelDeleted
 
 __all__ = ["Models", "AsyncModels"]
 
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index 540d089071..9386e50dae 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -8,15 +8,19 @@
 import httpx
 
 from .. import _legacy_response
-from ..types import ModerationCreateResponse, moderation_create_params
+from ..types import moderation_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import (
     make_request_options,
 )
+from ..types.moderation_create_response import ModerationCreateResponse
 
 __all__ = ["Moderations", "AsyncModerations"]
 
@@ -43,7 +47,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModerationCreateResponse:
         """
-        Classifies if text violates OpenAI's Content Policy
+        Classifies if text is potentially harmful.
 
         Args:
           input: The input text to classify
@@ -103,7 +107,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModerationCreateResponse:
         """
-        Classifies if text violates OpenAI's Content Policy
+        Classifies if text is potentially harmful.
 
         Args:
           input: The input text to classify
@@ -127,7 +131,7 @@ async def create(
         """
         return await self._post(
             "/moderations",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "input": input,
                     "model": model,
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index d6108e1eed..7873efb34f 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -1,13 +1,20 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+from .batch import Batch as Batch
 from .image import Image as Image
 from .model import Model as Model
-from .shared import FunctionDefinition as FunctionDefinition, FunctionParameters as FunctionParameters
+from .shared import (
+    ErrorObject as ErrorObject,
+    FunctionDefinition as FunctionDefinition,
+    FunctionParameters as FunctionParameters,
+)
 from .embedding import Embedding as Embedding
+from .chat_model import ChatModel as ChatModel
 from .completion import Completion as Completion
 from .moderation import Moderation as Moderation
+from .batch_error import BatchError as BatchError
 from .file_object import FileObject as FileObject
 from .file_content import FileContent as FileContent
 from .file_deleted import FileDeleted as FileDeleted
@@ -15,9 +22,12 @@
 from .images_response import ImagesResponse as ImagesResponse
 from .completion_usage import CompletionUsage as CompletionUsage
 from .file_list_params import FileListParams as FileListParams
+from .batch_list_params import BatchListParams as BatchListParams
 from .completion_choice import CompletionChoice as CompletionChoice
 from .image_edit_params import ImageEditParams as ImageEditParams
 from .file_create_params import FileCreateParams as FileCreateParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
diff --git a/src/openai/types/audio/__init__.py b/src/openai/types/audio/__init__.py
index ba5f7fd8e0..8d2c44c86a 100644
--- a/src/openai/types/audio/__init__.py
+++ b/src/openai/types/audio/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/audio/speech_create_params.py b/src/openai/types/audio/speech_create_params.py
index 6a302dd3c8..8d75ec4ccc 100644
--- a/src/openai/types/audio/speech_create_params.py
+++ b/src/openai/types/audio/speech_create_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -26,8 +26,11 @@ class SpeechCreateParams(TypedDict, total=False):
     [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
     """
 
-    response_format: Literal["mp3", "opus", "aac", "flac"]
-    """The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`."""
+    response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
+    """The format to audio in.
+
+    Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`.
+    """
 
     speed: float
     """The speed of the generated audio.
diff --git a/src/openai/types/audio/transcription.py b/src/openai/types/audio/transcription.py
index d2274faa0e..0b6ab39e78 100644
--- a/src/openai/types/audio/transcription.py
+++ b/src/openai/types/audio/transcription.py
@@ -1,4 +1,6 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
 
 from ..._models import BaseModel
 
@@ -7,3 +9,4 @@
 
 class Transcription(BaseModel):
     text: str
+    """The transcribed text."""
diff --git a/src/openai/types/audio/transcription_create_params.py b/src/openai/types/audio/transcription_create_params.py
index 7bd70d7b48..6b2d5bae79 100644
--- a/src/openai/types/audio/transcription_create_params.py
+++ b/src/openai/types/audio/transcription_create_params.py
@@ -1,8 +1,8 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union
+from typing import List, Union
 from typing_extensions import Literal, Required, TypedDict
 
 from ..._types import FileTypes
@@ -18,7 +18,11 @@ class TranscriptionCreateParams(TypedDict, total=False):
     """
 
     model: Required[Union[str, Literal["whisper-1"]]]
-    """ID of the model to use. Only `whisper-1` is currently available."""
+    """ID of the model to use.
+
+    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
+    currently available.
+    """
 
     language: str
     """The language of the input audio.
@@ -50,3 +54,12 @@ class TranscriptionCreateParams(TypedDict, total=False):
     [log probability](https://en.wikipedia.org/wiki/Log_probability) to
     automatically increase the temperature until certain thresholds are hit.
     """
+
+    timestamp_granularities: List[Literal["word", "segment"]]
+    """The timestamp granularities to populate for this transcription.
+
+    `response_format` must be set `verbose_json` to use timestamp granularities.
+    Either or both of these options are supported: `word`, or `segment`. Note: There
+    is no additional latency for segment timestamps, but generating word timestamps
+    incurs additional latency.
+    """
diff --git a/src/openai/types/audio/translation.py b/src/openai/types/audio/translation.py
index a01d622abc..3d9ede2939 100644
--- a/src/openai/types/audio/translation.py
+++ b/src/openai/types/audio/translation.py
@@ -1,4 +1,6 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
 
 from ..._models import BaseModel
 
diff --git a/src/openai/types/audio/translation_create_params.py b/src/openai/types/audio/translation_create_params.py
index d3cb4b9e63..f23a41ed5c 100644
--- a/src/openai/types/audio/translation_create_params.py
+++ b/src/openai/types/audio/translation_create_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -18,7 +18,11 @@ class TranslationCreateParams(TypedDict, total=False):
     """
 
     model: Required[Union[str, Literal["whisper-1"]]]
-    """ID of the model to use. Only `whisper-1` is currently available."""
+    """ID of the model to use.
+
+    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
+    currently available.
+    """
 
     prompt: str
     """An optional text to guide the model's style or continue a previous audio
diff --git a/src/openai/types/batch.py b/src/openai/types/batch.py
new file mode 100644
index 0000000000..90f6d79572
--- /dev/null
+++ b/src/openai/types/batch.py
@@ -0,0 +1,85 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .batch_error import BatchError
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+    data: Optional[List[BatchError]] = None
+
+    object: Optional[str] = None
+    """The object type, which is always `list`."""
+
+
+class Batch(BaseModel):
+    id: str
+
+    completion_window: str
+    """The time frame within which the batch should be processed."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the batch was created."""
+
+    endpoint: str
+    """The OpenAI API endpoint used by the batch."""
+
+    input_file_id: str
+    """The ID of the input file for the batch."""
+
+    object: Literal["batch"]
+    """The object type, which is always `batch`."""
+
+    status: Literal[
+        "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled"
+    ]
+    """The current status of the batch."""
+
+    cancelled_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch was cancelled."""
+
+    cancelling_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started cancelling."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch was completed."""
+
+    error_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of requests with errors."""
+
+    errors: Optional[Errors] = None
+
+    expired_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch expired."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch will expire."""
+
+    failed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch failed."""
+
+    finalizing_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started finalizing."""
+
+    in_progress_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started processing."""
+
+    metadata: Optional[builtins.object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    output_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of successfully executed requests."""
+
+    request_counts: Optional[BatchRequestCounts] = None
+    """The request counts for different statuses within the batch."""
diff --git a/src/openai/types/batch_create_params.py b/src/openai/types/batch_create_params.py
new file mode 100644
index 0000000000..140380d417
--- /dev/null
+++ b/src/openai/types/batch_create_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    completion_window: Required[Literal["24h"]]
+    """The time frame within which the batch should be processed.
+
+    Currently only `24h` is supported.
+    """
+
+    endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"]]
+    """The endpoint to be used for all requests in the batch.
+
+    Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are
+    supported. Note that `/v1/embeddings` batches are also restricted to a maximum
+    of 50,000 embedding inputs across all requests in the batch.
+    """
+
+    input_file_id: Required[str]
+    """The ID of an uploaded file that contains requests for the new batch.
+
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+    for how to upload a file.
+
+    Your input file must be formatted as a
+    [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+    and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+    requests, and can be up to 100 MB in size.
+    """
+
+    metadata: Optional[Dict[str, str]]
+    """Optional custom metadata for the batch."""
diff --git a/src/openai/types/batch_error.py b/src/openai/types/batch_error.py
new file mode 100644
index 0000000000..1cdd808dbd
--- /dev/null
+++ b/src/openai/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+    code: Optional[str] = None
+    """An error code identifying the error type."""
+
+    line: Optional[int] = None
+    """The line number of the input file where the error occurred, if applicable."""
+
+    message: Optional[str] = None
+    """A human-readable message providing more details about the error."""
+
+    param: Optional[str] = None
+    """The name of the parameter that caused the error, if applicable."""
diff --git a/src/openai/types/batch_list_params.py b/src/openai/types/batch_list_params.py
new file mode 100644
index 0000000000..ef5e966b79
--- /dev/null
+++ b/src/openai/types/batch_list_params.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
new file mode 100644
index 0000000000..ef6c84a0a1
--- /dev/null
+++ b/src/openai/types/batch_request_counts.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .._models import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+    completed: int
+    """Number of requests that have been completed successfully."""
+
+    failed: int
+    """Number of requests that have failed."""
+
+    total: int
+    """Total number of requests in the batch."""
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index e6742521e9..d851a3619c 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -1,14 +1,40 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .thread import Thread as Thread
 from .assistant import Assistant as Assistant
+from .vector_store import VectorStore as VectorStore
+from .function_tool import FunctionTool as FunctionTool
+from .assistant_tool import AssistantTool as AssistantTool
 from .thread_deleted import ThreadDeleted as ThreadDeleted
+from .file_search_tool import FileSearchTool as FileSearchTool
 from .assistant_deleted import AssistantDeleted as AssistantDeleted
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .assistant_tool_param import AssistantToolParam as AssistantToolParam
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
+from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
 from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
+from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
+from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
+from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
+from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
+from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
+from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
+from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
+from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
+from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
+from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
+)
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 89e45d4806..4e5adc766e 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -1,33 +1,38 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import List, Union, Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
-from ..shared import FunctionDefinition
 from ..._models import BaseModel
+from .assistant_tool import AssistantTool
+from .assistant_response_format_option import AssistantResponseFormatOption
 
-__all__ = ["Assistant", "Tool", "ToolCodeInterpreter", "ToolRetrieval", "ToolFunction"]
+__all__ = ["Assistant", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
-class ToolCodeInterpreter(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolRetrieval(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
+class ToolResourcesCodeInterpreter(BaseModel):
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter`` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
 
 
-class ToolFunction(BaseModel):
-    function: FunctionDefinition
+class ToolResourcesFileSearch(BaseModel):
+    vector_store_ids: Optional[List[str]] = None
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
 
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
 
+class ToolResources(BaseModel):
+    code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
 
-Tool = Union[ToolCodeInterpreter, ToolRetrieval, ToolFunction]
+    file_search: Optional[ToolResourcesFileSearch] = None
 
 
 class Assistant(BaseModel):
@@ -40,20 +45,13 @@ class Assistant(BaseModel):
     description: Optional[str] = None
     """The description of the assistant. The maximum length is 512 characters."""
 
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order.
-    """
-
     instructions: Optional[str] = None
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[builtins.object] = None
+    metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
@@ -77,9 +75,52 @@ class Assistant(BaseModel):
     object: Literal["assistant"]
     """The object type, which is always `assistant`."""
 
-    tools: List[Tool]
+    tools: List[AssistantTool]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
+    `code_interpreter`, `file_search`, or `function`.
+    """
+
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources] = None
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
     """
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 539897a7ba..67e7f7e78c 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -1,23 +1,50 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from .assistant_tool_param import AssistantToolParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
     "AssistantCreateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "ToolResourcesFileSearchVectorStore",
 ]
 
 
 class AssistantCreateParams(TypedDict, total=False):
-    model: Required[str]
+    model: Required[
+        Union[
+            str,
+            Literal[
+                "gpt-4o",
+                "gpt-4o-2024-05-13",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+    ]
     """ID of the model to use.
 
     You can use the
@@ -30,17 +57,10 @@ class AssistantCreateParams(TypedDict, total=False):
     description: Optional[str]
     """The description of the assistant. The maximum length is 512 characters."""
 
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order.
-    """
-
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object]
@@ -54,29 +74,102 @@ class AssistantCreateParams(TypedDict, total=False):
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
-    tools: List[Tool]
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Iterable[AssistantToolParam]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
+    `code_interpreter`, `file_search`, or `function`.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+    We generally recommend altering this or temperature but not both.
+    """
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
 
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this assistant. There can be a maximum of 1
+    vector store attached to the assistant.
+    """
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
 
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/assistant_deleted.py b/src/openai/types/beta/assistant_deleted.py
index 23802caaf6..3be40cd6b8 100644
--- a/src/openai/types/beta/assistant_deleted.py
+++ b/src/openai/types/beta/assistant_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/assistant_list_params.py b/src/openai/types/beta/assistant_list_params.py
index b2d794a43a..f54f63120b 100644
--- a/src/openai/types/beta/assistant_list_params.py
+++ b/src/openai/types/beta/assistant_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
new file mode 100644
index 0000000000..f53bdaf62a
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantResponseFormat"]
+
+
+class AssistantResponseFormat(BaseModel):
+    type: Optional[Literal["text", "json_object"]] = None
+    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 0000000000..d4e05e0ea9
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format import AssistantResponseFormat
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 0000000000..46e04125d1
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format_param import AssistantResponseFormatParam
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
new file mode 100644
index 0000000000..96e1d02115
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantResponseFormatParam"]
+
+
+class AssistantResponseFormatParam(TypedDict, total=False):
+    type: Literal["text", "json_object"]
+    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_stream_event.py b/src/openai/types/beta/assistant_stream_event.py
new file mode 100644
index 0000000000..91925e93b3
--- /dev/null
+++ b/src/openai/types/beta/assistant_stream_event.py
@@ -0,0 +1,279 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated
+
+from .thread import Thread
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .threads.run import Run
+from .threads.message import Message
+from ..shared.error_object import ErrorObject
+from .threads.runs.run_step import RunStep
+from .threads.message_delta_event import MessageDeltaEvent
+from .threads.runs.run_step_delta_event import RunStepDeltaEvent
+
+__all__ = [
+    "AssistantStreamEvent",
+    "ThreadCreated",
+    "ThreadRunCreated",
+    "ThreadRunQueued",
+    "ThreadRunInProgress",
+    "ThreadRunRequiresAction",
+    "ThreadRunCompleted",
+    "ThreadRunFailed",
+    "ThreadRunCancelling",
+    "ThreadRunCancelled",
+    "ThreadRunExpired",
+    "ThreadRunStepCreated",
+    "ThreadRunStepInProgress",
+    "ThreadRunStepDelta",
+    "ThreadRunStepCompleted",
+    "ThreadRunStepFailed",
+    "ThreadRunStepCancelled",
+    "ThreadRunStepExpired",
+    "ThreadMessageCreated",
+    "ThreadMessageInProgress",
+    "ThreadMessageDelta",
+    "ThreadMessageCompleted",
+    "ThreadMessageIncomplete",
+    "ErrorEvent",
+]
+
+
+class ThreadCreated(BaseModel):
+    data: Thread
+    """
+    Represents a thread that contains
+    [messages](https://platform.openai.com/docs/api-reference/messages).
+    """
+
+    event: Literal["thread.created"]
+
+
+class ThreadRunCreated(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.created"]
+
+
+class ThreadRunQueued(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.queued"]
+
+
+class ThreadRunInProgress(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.in_progress"]
+
+
+class ThreadRunRequiresAction(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.requires_action"]
+
+
+class ThreadRunCompleted(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.completed"]
+
+
+class ThreadRunFailed(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.failed"]
+
+
+class ThreadRunCancelling(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelling"]
+
+
+class ThreadRunCancelled(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelled"]
+
+
+class ThreadRunExpired(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.expired"]
+
+
+class ThreadRunStepCreated(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.created"]
+
+
+class ThreadRunStepInProgress(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.in_progress"]
+
+
+class ThreadRunStepDelta(BaseModel):
+    data: RunStepDeltaEvent
+    """Represents a run step delta i.e.
+
+    any changed fields on a run step during streaming.
+    """
+
+    event: Literal["thread.run.step.delta"]
+
+
+class ThreadRunStepCompleted(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.completed"]
+
+
+class ThreadRunStepFailed(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.failed"]
+
+
+class ThreadRunStepCancelled(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.cancelled"]
+
+
+class ThreadRunStepExpired(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.expired"]
+
+
+class ThreadMessageCreated(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.created"]
+
+
+class ThreadMessageInProgress(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.in_progress"]
+
+
+class ThreadMessageDelta(BaseModel):
+    data: MessageDeltaEvent
+    """Represents a message delta i.e.
+
+    any changed fields on a message during streaming.
+    """
+
+    event: Literal["thread.message.delta"]
+
+
+class ThreadMessageCompleted(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.completed"]
+
+
+class ThreadMessageIncomplete(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.incomplete"]
+
+
+class ErrorEvent(BaseModel):
+    data: ErrorObject
+
+    event: Literal["error"]
+
+
+AssistantStreamEvent = Annotated[
+    Union[
+        ThreadCreated,
+        ThreadRunCreated,
+        ThreadRunQueued,
+        ThreadRunInProgress,
+        ThreadRunRequiresAction,
+        ThreadRunCompleted,
+        ThreadRunFailed,
+        ThreadRunCancelling,
+        ThreadRunCancelled,
+        ThreadRunExpired,
+        ThreadRunStepCreated,
+        ThreadRunStepInProgress,
+        ThreadRunStepDelta,
+        ThreadRunStepCompleted,
+        ThreadRunStepFailed,
+        ThreadRunStepCancelled,
+        ThreadRunStepExpired,
+        ThreadMessageCreated,
+        ThreadMessageInProgress,
+        ThreadMessageDelta,
+        ThreadMessageCompleted,
+        ThreadMessageIncomplete,
+        ErrorEvent,
+    ],
+    PropertyInfo(discriminator="event"),
+]
diff --git a/src/openai/types/beta/assistant_tool.py b/src/openai/types/beta/assistant_tool.py
new file mode 100644
index 0000000000..7832da48cc
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ..._utils import PropertyInfo
+from .function_tool import FunctionTool
+from .file_search_tool import FileSearchTool
+from .code_interpreter_tool import CodeInterpreterTool
+
+__all__ = ["AssistantTool"]
+
+AssistantTool = Annotated[Union[CodeInterpreterTool, FileSearchTool, FunctionTool], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 0000000000..d73439f006
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+    type: Literal["function", "code_interpreter", "file_search"]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: Optional[AssistantToolChoiceFunction] = None
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 0000000000..d0d4255357
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_function_param.py b/src/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 0000000000..428857de91
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 0000000000..8958bc8fb0
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption = Union[Literal["none", "auto", "required"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 0000000000..81b7f15136
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam = Union[Literal["none", "auto", "required"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 0000000000..904f489e26
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+    type: Required[Literal["function", "code_interpreter", "file_search"]]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant_tool_param.py b/src/openai/types/beta/assistant_tool_param.py
new file mode 100644
index 0000000000..5b1d30ba2f
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .function_tool_param import FunctionToolParam
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["AssistantToolParam"]
+
+AssistantToolParam = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index dfb5d4c553..b401e1a891 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -1,38 +1,24 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Iterable, Optional
+from typing_extensions import TypedDict
 
-from ...types import shared_params
+from .assistant_tool_param import AssistantToolParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
-__all__ = [
-    "AssistantUpdateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
-]
+__all__ = ["AssistantUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
 class AssistantUpdateParams(TypedDict, total=False):
     description: Optional[str]
     """The description of the assistant. The maximum length is 512 characters."""
 
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order. If a
-    file was previously attached to the list but does not show up in the list, it
-    will be deleted from the assistant.
-    """
-
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object]
@@ -56,29 +42,78 @@ class AssistantUpdateParams(TypedDict, total=False):
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
-    tools: List[Tool]
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Iterable[AssistantToolParam]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
+    `code_interpreter`, `file_search`, or `function`.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+    We generally recommend altering this or temperature but not both.
+    """
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    Overrides the list of
+    [file](https://platform.openai.com/docs/api-reference/files) IDs made available
+    to the `code_interpreter` tool. There can be a maximum of 20 files associated
+    with the tool.
+    """
 
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    Overrides the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
 
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/assistants/__init__.py b/src/openai/types/beta/assistants/__init__.py
deleted file mode 100644
index 9dbb3e2b8b..0000000000
--- a/src/openai/types/beta/assistants/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from .assistant_file import AssistantFile as AssistantFile
-from .file_list_params import FileListParams as FileListParams
-from .file_create_params import FileCreateParams as FileCreateParams
-from .file_delete_response import FileDeleteResponse as FileDeleteResponse
diff --git a/src/openai/types/beta/assistants/assistant_file.py b/src/openai/types/beta/assistants/assistant_file.py
deleted file mode 100644
index 1d1573ac0f..0000000000
--- a/src/openai/types/beta/assistants/assistant_file.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["AssistantFile"]
-
-
-class AssistantFile(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: str
-    """The assistant ID that the file is attached to."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the assistant file was created."""
-
-    object: Literal["assistant.file"]
-    """The object type, which is always `assistant.file`."""
diff --git a/src/openai/types/beta/assistants/file_delete_response.py b/src/openai/types/beta/assistants/file_delete_response.py
deleted file mode 100644
index 52c138feda..0000000000
--- a/src/openai/types/beta/assistants/file_delete_response.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FileDeleteResponse"]
-
-
-class FileDeleteResponse(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: Literal["assistant.file.deleted"]
diff --git a/src/openai/types/beta/chat/__init__.py b/src/openai/types/beta/chat/__init__.py
index b2f53e3525..f8ee8b14b1 100644
--- a/src/openai/types/beta/chat/__init__.py
+++ b/src/openai/types/beta/chat/__init__.py
@@ -1,3 +1,3 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
diff --git a/src/openai/types/beta/code_interpreter_tool.py b/src/openai/types/beta/code_interpreter_tool.py
new file mode 100644
index 0000000000..17ab3de629
--- /dev/null
+++ b/src/openai/types/beta/code_interpreter_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["CodeInterpreterTool"]
+
+
+class CodeInterpreterTool(BaseModel):
+    type: Literal["code_interpreter"]
+    """The type of tool being defined: `code_interpreter`"""
diff --git a/src/openai/types/beta/code_interpreter_tool_param.py b/src/openai/types/beta/code_interpreter_tool_param.py
new file mode 100644
index 0000000000..4f6916d756
--- /dev/null
+++ b/src/openai/types/beta/code_interpreter_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CodeInterpreterToolParam"]
+
+
+class CodeInterpreterToolParam(TypedDict, total=False):
+    type: Required[Literal["code_interpreter"]]
+    """The type of tool being defined: `code_interpreter`"""
diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py
new file mode 100644
index 0000000000..eea55ea6ac
--- /dev/null
+++ b/src/openai/types/beta/file_search_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileSearchTool"]
+
+
+class FileSearchTool(BaseModel):
+    type: Literal["file_search"]
+    """The type of tool being defined: `file_search`"""
diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py
new file mode 100644
index 0000000000..d33fd06da4
--- /dev/null
+++ b/src/openai/types/beta/file_search_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FileSearchToolParam"]
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
diff --git a/src/openai/types/beta/function_tool.py b/src/openai/types/beta/function_tool.py
new file mode 100644
index 0000000000..f9227678df
--- /dev/null
+++ b/src/openai/types/beta/function_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.function_definition import FunctionDefinition
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of tool being defined: `function`"""
diff --git a/src/openai/types/beta/function_tool_param.py b/src/openai/types/beta/function_tool_param.py
new file mode 100644
index 0000000000..b44c0d47ef
--- /dev/null
+++ b/src/openai/types/beta/function_tool_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    function: Required[shared_params.FunctionDefinition]
+
+    type: Required[Literal["function"]]
+    """The type of tool being defined: `function`"""
diff --git a/src/openai/types/beta/thread.py b/src/openai/types/beta/thread.py
index 474527033a..6f7a6c7d0c 100644
--- a/src/openai/types/beta/thread.py
+++ b/src/openai/types/beta/thread.py
@@ -1,12 +1,36 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
 
-__all__ = ["Thread"]
+__all__ = ["Thread", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
+
+
+class ToolResourcesCodeInterpreter(BaseModel):
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(BaseModel):
+    vector_store_ids: Optional[List[str]] = None
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+
+class ToolResources(BaseModel):
+    code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
+
+    file_search: Optional[ToolResourcesFileSearch] = None
 
 
 class Thread(BaseModel):
@@ -16,7 +40,7 @@ class Thread(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the thread was created."""
 
-    metadata: Optional[builtins.object] = None
+    metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
@@ -26,3 +50,11 @@ class Thread(BaseModel):
 
     object: Literal["thread"]
     """The object type, which is always `thread`."""
+
+    tool_resources: Optional[ToolResources] = None
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 9f58dcd875..6efe6e7aee 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -1,24 +1,38 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from .function_tool_param import FunctionToolParam
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .threads.message_content_part_param import MessageContentPartParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
-    "ThreadCreateAndRunParams",
+    "ThreadCreateAndRunParamsBase",
     "Thread",
     "ThreadMessage",
+    "ThreadMessageAttachment",
+    "ThreadMessageAttachmentTool",
+    "ThreadToolResources",
+    "ThreadToolResourcesCodeInterpreter",
+    "ThreadToolResourcesFileSearch",
+    "ThreadToolResourcesFileSearchVectorStore",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
     "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "TruncationStrategy",
+    "ThreadCreateAndRunParamsNonStreaming",
+    "ThreadCreateAndRunParamsStreaming",
 ]
 
 
-class ThreadCreateAndRunParams(TypedDict, total=False):
+class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant_id: Required[str]
     """
     The ID of the
@@ -32,6 +46,24 @@ class ThreadCreateAndRunParams(TypedDict, total=False):
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -40,7 +72,32 @@ class ThreadCreateAndRunParams(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
-    model: Optional[str]
+    model: Union[
+        str,
+        Literal[
+            "gpt-4o",
+            "gpt-4o-2024-05-13",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -48,33 +105,102 @@ class ThreadCreateAndRunParams(TypedDict, total=False):
     assistant will be used.
     """
 
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
     thread: Thread
     """If no thread is provided, an empty thread will be created."""
 
-    tools: Optional[List[Tool]]
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Optional[Iterable[Tool]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+
+ThreadMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class ThreadMessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[ThreadMessageAttachmentTool]
+    """The tools to add this file to."""
+
 
 class ThreadMessage(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
 
-    role: Required[Literal["user"]]
-    """The role of the entity that is creating the message.
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
 
-    Currently only `user` is supported.
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
     """
 
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
+    attachments: Optional[Iterable[ThreadMessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
 
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
@@ -85,8 +211,58 @@ class ThreadMessage(TypedDict, total=False):
     """
 
 
+class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
+
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ThreadToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+    vector_stores: Iterable[ThreadToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class ThreadToolResources(TypedDict, total=False):
+    code_interpreter: ThreadToolResourcesCodeInterpreter
+
+    file_search: ThreadToolResourcesFileSearch
+
+
 class Thread(TypedDict, total=False):
-    messages: List[ThreadMessage]
+    messages: Iterable[ThreadMessage]
     """
     A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
     start the thread with.
@@ -100,22 +276,76 @@ class Thread(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
+    tool_resources: Optional[ThreadToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
 
+    file_search: ToolResourcesFileSearch
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
+Tool = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
+
+
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class ThreadCreateAndRunParamsStreaming(ThreadCreateAndRunParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+ThreadCreateAndRunParams = Union[ThreadCreateAndRunParamsNonStreaming, ThreadCreateAndRunParamsStreaming]
diff --git a/src/openai/types/beta/thread_create_params.py b/src/openai/types/beta/thread_create_params.py
index d2ec78bbc3..ccf50d58dc 100644
--- a/src/openai/types/beta/thread_create_params.py
+++ b/src/openai/types/beta/thread_create_params.py
@@ -1,15 +1,28 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["ThreadCreateParams", "Message"]
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .threads.message_content_part_param import MessageContentPartParam
+
+__all__ = [
+    "ThreadCreateParams",
+    "Message",
+    "MessageAttachment",
+    "MessageAttachmentTool",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "ToolResourcesFileSearchVectorStore",
+]
 
 
 class ThreadCreateParams(TypedDict, total=False):
-    messages: List[Message]
+    messages: Iterable[Message]
     """
     A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
     start the thread with.
@@ -23,24 +36,41 @@ class ThreadCreateParams(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
+    tool_resources: Optional[ToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+MessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class MessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[MessageAttachmentTool]
+    """The tools to add this file to."""
+
 
 class Message(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
 
-    role: Required[Literal["user"]]
-    """The role of the entity that is creating the message.
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
 
-    Currently only `user` is supported.
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
     """
 
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
+    attachments: Optional[Iterable[MessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
 
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
@@ -49,3 +79,53 @@ class Message(TypedDict, total=False):
     structured format. Keys can be a maximum of 64 characters long and values can be
     a maxium of 512 characters long.
     """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
+
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/thread_deleted.py b/src/openai/types/beta/thread_deleted.py
index 410ac1aea0..d385626319 100644
--- a/src/openai/types/beta/thread_deleted.py
+++ b/src/openai/types/beta/thread_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/thread_update_params.py b/src/openai/types/beta/thread_update_params.py
index 6c1d32fc57..7210ab77c9 100644
--- a/src/openai/types/beta/thread_update_params.py
+++ b/src/openai/types/beta/thread_update_params.py
@@ -1,11 +1,11 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import List, Optional
 from typing_extensions import TypedDict
 
-__all__ = ["ThreadUpdateParams"]
+__all__ = ["ThreadUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
 class ThreadUpdateParams(TypedDict, total=False):
@@ -16,3 +16,36 @@ class ThreadUpdateParams(TypedDict, total=False):
     structured format. Keys can be a maximum of 64 characters long and values can be
     a maxium of 512 characters long.
     """
+
+    tool_resources: Optional[ToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/threads/__init__.py b/src/openai/types/beta/threads/__init__.py
index 8c77466dec..023d76fc13 100644
--- a/src/openai/types/beta/threads/__init__.py
+++ b/src/openai/types/beta/threads/__init__.py
@@ -1,16 +1,44 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .run import Run as Run
-from .thread_message import ThreadMessage as ThreadMessage
+from .text import Text as Text
+from .message import Message as Message
+from .image_url import ImageURL as ImageURL
+from .annotation import Annotation as Annotation
+from .image_file import ImageFile as ImageFile
+from .run_status import RunStatus as RunStatus
+from .text_delta import TextDelta as TextDelta
+from .message_delta import MessageDelta as MessageDelta
+from .image_url_delta import ImageURLDelta as ImageURLDelta
+from .image_url_param import ImageURLParam as ImageURLParam
+from .message_content import MessageContent as MessageContent
+from .message_deleted import MessageDeleted as MessageDeleted
 from .run_list_params import RunListParams as RunListParams
+from .annotation_delta import AnnotationDelta as AnnotationDelta
+from .image_file_delta import ImageFileDelta as ImageFileDelta
+from .image_file_param import ImageFileParam as ImageFileParam
+from .text_delta_block import TextDeltaBlock as TextDeltaBlock
 from .run_create_params import RunCreateParams as RunCreateParams
 from .run_update_params import RunUpdateParams as RunUpdateParams
+from .text_content_block import TextContentBlock as TextContentBlock
+from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
 from .message_list_params import MessageListParams as MessageListParams
-from .message_content_text import MessageContentText as MessageContentText
+from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
+from .image_url_delta_block import ImageURLDeltaBlock as ImageURLDeltaBlock
+from .message_content_delta import MessageContentDelta as MessageContentDelta
 from .message_create_params import MessageCreateParams as MessageCreateParams
 from .message_update_params import MessageUpdateParams as MessageUpdateParams
-from .message_content_image_file import MessageContentImageFile as MessageContentImageFile
+from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
+from .image_url_content_block import ImageURLContentBlock as ImageURLContentBlock
+from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
+from .image_file_content_block import ImageFileContentBlock as ImageFileContentBlock
+from .text_content_block_param import TextContentBlockParam as TextContentBlockParam
+from .file_path_delta_annotation import FilePathDeltaAnnotation as FilePathDeltaAnnotation
+from .message_content_part_param import MessageContentPartParam as MessageContentPartParam
+from .image_url_content_block_param import ImageURLContentBlockParam as ImageURLContentBlockParam
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation as FileCitationDeltaAnnotation
+from .image_file_content_block_param import ImageFileContentBlockParam as ImageFileContentBlockParam
 from .run_submit_tool_outputs_params import RunSubmitToolOutputsParams as RunSubmitToolOutputsParams
 from .required_action_function_tool_call import RequiredActionFunctionToolCall as RequiredActionFunctionToolCall
diff --git a/src/openai/types/beta/threads/annotation.py b/src/openai/types/beta/threads/annotation.py
new file mode 100644
index 0000000000..31e228c831
--- /dev/null
+++ b/src/openai/types/beta/threads/annotation.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .file_path_annotation import FilePathAnnotation
+from .file_citation_annotation import FileCitationAnnotation
+
+__all__ = ["Annotation"]
+
+Annotation = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/beta/threads/annotation_delta.py b/src/openai/types/beta/threads/annotation_delta.py
new file mode 100644
index 0000000000..912429672f
--- /dev/null
+++ b/src/openai/types/beta/threads/annotation_delta.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .file_path_delta_annotation import FilePathDeltaAnnotation
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation
+
+__all__ = ["AnnotationDelta"]
+
+AnnotationDelta = Annotated[
+    Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/threads/file_citation_annotation.py b/src/openai/types/beta/threads/file_citation_annotation.py
new file mode 100644
index 0000000000..68571cd477
--- /dev/null
+++ b/src/openai/types/beta/threads/file_citation_annotation.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: str
+    """The ID of the specific File the citation is from."""
+
+    quote: str
+    """The specific quote in the file."""
+
+
+class FileCitationAnnotation(BaseModel):
+    end_index: int
+
+    file_citation: FileCitation
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
diff --git a/src/openai/types/beta/threads/file_citation_delta_annotation.py b/src/openai/types/beta/threads/file_citation_delta_annotation.py
new file mode 100644
index 0000000000..b40c0d123e
--- /dev/null
+++ b/src/openai/types/beta/threads/file_citation_delta_annotation.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationDeltaAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the specific File the citation is from."""
+
+    quote: Optional[str] = None
+    """The specific quote in the file."""
+
+
+class FileCitationDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
+
+    end_index: Optional[int] = None
+
+    file_citation: Optional[FileCitation] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
diff --git a/src/openai/types/beta/threads/file_path_annotation.py b/src/openai/types/beta/threads/file_path_annotation.py
new file mode 100644
index 0000000000..9812737ece
--- /dev/null
+++ b/src/openai/types/beta/threads/file_path_annotation.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: str
+    """The ID of the file that was generated."""
+
+
+class FilePathAnnotation(BaseModel):
+    end_index: int
+
+    file_path: FilePath
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
diff --git a/src/openai/types/beta/threads/file_path_delta_annotation.py b/src/openai/types/beta/threads/file_path_delta_annotation.py
new file mode 100644
index 0000000000..0cbb445e48
--- /dev/null
+++ b/src/openai/types/beta/threads/file_path_delta_annotation.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathDeltaAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file that was generated."""
+
+
+class FilePathDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
+
+    end_index: Optional[int] = None
+
+    file_path: Optional[FilePath] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
diff --git a/src/openai/types/beta/threads/image_file.py b/src/openai/types/beta/threads/image_file.py
new file mode 100644
index 0000000000..6000d97500
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFile"]
+
+
+class ImageFile(BaseModel):
+    file_id: str
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content. Set `purpose="vision"` when uploading the File if you
+    need to later display the file content.
+    """
+
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image if specified by the user.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
diff --git a/src/openai/types/beta/threads/image_file_content_block.py b/src/openai/types/beta/threads/image_file_content_block.py
new file mode 100644
index 0000000000..a909999065
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file import ImageFile
+
+__all__ = ["ImageFileContentBlock"]
+
+
+class ImageFileContentBlock(BaseModel):
+    image_file: ImageFile
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
diff --git a/src/openai/types/beta/threads/image_file_content_block_param.py b/src/openai/types/beta/threads/image_file_content_block_param.py
new file mode 100644
index 0000000000..48d94bee36
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_content_block_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .image_file_param import ImageFileParam
+
+__all__ = ["ImageFileContentBlockParam"]
+
+
+class ImageFileContentBlockParam(TypedDict, total=False):
+    image_file: Required[ImageFileParam]
+
+    type: Required[Literal["image_file"]]
+    """Always `image_file`."""
diff --git a/src/openai/types/beta/threads/image_file_delta.py b/src/openai/types/beta/threads/image_file_delta.py
new file mode 100644
index 0000000000..4581184c7a
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_delta.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFileDelta"]
+
+
+class ImageFileDelta(BaseModel):
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image if specified by the user.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
+
+    file_id: Optional[str] = None
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content. Set `purpose="vision"` when uploading the File if you
+    need to later display the file content.
+    """
diff --git a/src/openai/types/beta/threads/image_file_delta_block.py b/src/openai/types/beta/threads/image_file_delta_block.py
new file mode 100644
index 0000000000..0a5a2e8a5f
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file_delta import ImageFileDelta
+
+__all__ = ["ImageFileDeltaBlock"]
+
+
+class ImageFileDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
+
+    image_file: Optional[ImageFileDelta] = None
diff --git a/src/openai/types/beta/threads/image_file_param.py b/src/openai/types/beta/threads/image_file_param.py
new file mode 100644
index 0000000000..e4a85358b9
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ImageFileParam"]
+
+
+class ImageFileParam(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content. Set `purpose="vision"` when uploading the File if you
+    need to later display the file content.
+    """
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image if specified by the user.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
diff --git a/src/openai/types/beta/threads/image_url.py b/src/openai/types/beta/threads/image_url.py
new file mode 100644
index 0000000000..d1fac147b2
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageURL"]
+
+
+class ImageURL(BaseModel):
+    url: str
+    """
+    The external URL of the image, must be a supported image types: jpeg, jpg, png,
+    gif, webp.
+    """
+
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`. Default
+    value is `auto`
+    """
diff --git a/src/openai/types/beta/threads/image_url_content_block.py b/src/openai/types/beta/threads/image_url_content_block.py
new file mode 100644
index 0000000000..40a16c1df8
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .image_url import ImageURL
+from ...._models import BaseModel
+
+__all__ = ["ImageURLContentBlock"]
+
+
+class ImageURLContentBlock(BaseModel):
+    image_url: ImageURL
+
+    type: Literal["image_url"]
+    """The type of the content part."""
diff --git a/src/openai/types/beta/threads/image_url_content_block_param.py b/src/openai/types/beta/threads/image_url_content_block_param.py
new file mode 100644
index 0000000000..585b926c58
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_content_block_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .image_url_param import ImageURLParam
+
+__all__ = ["ImageURLContentBlockParam"]
+
+
+class ImageURLContentBlockParam(TypedDict, total=False):
+    image_url: Required[ImageURLParam]
+
+    type: Required[Literal["image_url"]]
+    """The type of the content part."""
diff --git a/src/openai/types/beta/threads/image_url_delta.py b/src/openai/types/beta/threads/image_url_delta.py
new file mode 100644
index 0000000000..e402671908
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_delta.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageURLDelta"]
+
+
+class ImageURLDelta(BaseModel):
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
+
+    url: Optional[str] = None
+    """
+    The URL of the image, must be a supported image types: jpeg, jpg, png, gif,
+    webp.
+    """
diff --git a/src/openai/types/beta/threads/image_url_delta_block.py b/src/openai/types/beta/threads/image_url_delta_block.py
new file mode 100644
index 0000000000..5252da12dd
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_url_delta import ImageURLDelta
+
+__all__ = ["ImageURLDeltaBlock"]
+
+
+class ImageURLDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["image_url"]
+    """Always `image_url`."""
+
+    image_url: Optional[ImageURLDelta] = None
diff --git a/src/openai/types/beta/threads/image_url_param.py b/src/openai/types/beta/threads/image_url_param.py
new file mode 100644
index 0000000000..6b7e427edd
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ImageURLParam"]
+
+
+class ImageURLParam(TypedDict, total=False):
+    url: Required[str]
+    """
+    The external URL of the image, must be a supported image types: jpeg, jpg, png,
+    gif, webp.
+    """
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`. Default
+    value is `auto`
+    """
diff --git a/src/openai/types/beta/threads/message.py b/src/openai/types/beta/threads/message.py
new file mode 100644
index 0000000000..ebaabdb0f5
--- /dev/null
+++ b/src/openai/types/beta/threads/message.py
@@ -0,0 +1,89 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_content import MessageContent
+from ..file_search_tool import FileSearchTool
+from ..code_interpreter_tool import CodeInterpreterTool
+
+__all__ = ["Message", "Attachment", "AttachmentTool", "IncompleteDetails"]
+
+AttachmentTool = Union[CodeInterpreterTool, FileSearchTool]
+
+
+class Attachment(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file to attach to the message."""
+
+    tools: Optional[List[AttachmentTool]] = None
+    """The tools to add this file to."""
+
+
+class IncompleteDetails(BaseModel):
+    reason: Literal["content_filter", "max_tokens", "run_cancelled", "run_expired", "run_failed"]
+    """The reason the message is incomplete."""
+
+
+class Message(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    assistant_id: Optional[str] = None
+    """
+    If applicable, the ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) that
+    authored this message.
+    """
+
+    attachments: Optional[List[Attachment]] = None
+    """A list of files attached to the message, and the tools they were added to."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was completed."""
+
+    content: List[MessageContent]
+    """The content of the message in array of text and/or images."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the message was created."""
+
+    incomplete_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was marked as incomplete."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """On an incomplete message, details about why the message is incomplete."""
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    object: Literal["thread.message"]
+    """The object type, which is always `thread.message`."""
+
+    role: Literal["user", "assistant"]
+    """The entity that produced the message. One of `user` or `assistant`."""
+
+    run_id: Optional[str] = None
+    """
+    The ID of the [run](https://platform.openai.com/docs/api-reference/runs)
+    associated with the creation of this message. Value is `null` when messages are
+    created manually using the create message or create thread endpoints.
+    """
+
+    status: Literal["in_progress", "incomplete", "completed"]
+    """
+    The status of the message, which can be either `in_progress`, `incomplete`, or
+    `completed`.
+    """
+
+    thread_id: str
+    """
+    The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
+    this message belongs to.
+    """
diff --git a/src/openai/types/beta/threads/message_content.py b/src/openai/types/beta/threads/message_content.py
new file mode 100644
index 0000000000..4f17d14786
--- /dev/null
+++ b/src/openai/types/beta/threads/message_content.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .text_content_block import TextContentBlock
+from .image_url_content_block import ImageURLContentBlock
+from .image_file_content_block import ImageFileContentBlock
+
+__all__ = ["MessageContent"]
+
+MessageContent = Annotated[
+    Union[ImageFileContentBlock, ImageURLContentBlock, TextContentBlock], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/threads/message_content_delta.py b/src/openai/types/beta/threads/message_content_delta.py
new file mode 100644
index 0000000000..6c5f732b12
--- /dev/null
+++ b/src/openai/types/beta/threads/message_content_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .text_delta_block import TextDeltaBlock
+from .image_url_delta_block import ImageURLDeltaBlock
+from .image_file_delta_block import ImageFileDeltaBlock
+
+__all__ = ["MessageContentDelta"]
+
+MessageContentDelta = Annotated[
+    Union[ImageFileDeltaBlock, TextDeltaBlock, ImageURLDeltaBlock], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/threads/message_content_image_file.py b/src/openai/types/beta/threads/message_content_image_file.py
deleted file mode 100644
index eeba5a633c..0000000000
--- a/src/openai/types/beta/threads/message_content_image_file.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["MessageContentImageFile", "ImageFile"]
-
-
-class ImageFile(BaseModel):
-    file_id: str
-    """
-    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
-    in the message content.
-    """
-
-
-class MessageContentImageFile(BaseModel):
-    image_file: ImageFile
-
-    type: Literal["image_file"]
-    """Always `image_file`."""
diff --git a/src/openai/types/beta/threads/message_content_part_param.py b/src/openai/types/beta/threads/message_content_part_param.py
new file mode 100644
index 0000000000..d11442a3a9
--- /dev/null
+++ b/src/openai/types/beta/threads/message_content_part_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .text_content_block_param import TextContentBlockParam
+from .image_url_content_block_param import ImageURLContentBlockParam
+from .image_file_content_block_param import ImageFileContentBlockParam
+
+__all__ = ["MessageContentPartParam"]
+
+MessageContentPartParam = Union[ImageFileContentBlockParam, ImageURLContentBlockParam, TextContentBlockParam]
diff --git a/src/openai/types/beta/threads/message_content_text.py b/src/openai/types/beta/threads/message_content_text.py
deleted file mode 100644
index b529a384c6..0000000000
--- a/src/openai/types/beta/threads/message_content_text.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import List, Union
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = [
-    "MessageContentText",
-    "Text",
-    "TextAnnotation",
-    "TextAnnotationFileCitation",
-    "TextAnnotationFileCitationFileCitation",
-    "TextAnnotationFilePath",
-    "TextAnnotationFilePathFilePath",
-]
-
-
-class TextAnnotationFileCitationFileCitation(BaseModel):
-    file_id: str
-    """The ID of the specific File the citation is from."""
-
-    quote: str
-    """The specific quote in the file."""
-
-
-class TextAnnotationFileCitation(BaseModel):
-    end_index: int
-
-    file_citation: TextAnnotationFileCitationFileCitation
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_citation"]
-    """Always `file_citation`."""
-
-
-class TextAnnotationFilePathFilePath(BaseModel):
-    file_id: str
-    """The ID of the file that was generated."""
-
-
-class TextAnnotationFilePath(BaseModel):
-    end_index: int
-
-    file_path: TextAnnotationFilePathFilePath
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_path"]
-    """Always `file_path`."""
-
-
-TextAnnotation = Union[TextAnnotationFileCitation, TextAnnotationFilePath]
-
-
-class Text(BaseModel):
-    annotations: List[TextAnnotation]
-
-    value: str
-    """The data that makes up the text."""
-
-
-class MessageContentText(BaseModel):
-    text: Text
-
-    type: Literal["text"]
-    """Always `text`."""
diff --git a/src/openai/types/beta/threads/message_create_params.py b/src/openai/types/beta/threads/message_create_params.py
index 8733f10b8a..3668df950d 100644
--- a/src/openai/types/beta/threads/message_create_params.py
+++ b/src/openai/types/beta/threads/message_create_params.py
@@ -1,30 +1,32 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["MessageCreateParams"]
+from ..file_search_tool_param import FileSearchToolParam
+from .message_content_part_param import MessageContentPartParam
+from ..code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool"]
 
 
 class MessageCreateParams(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
 
-    role: Required[Literal["user"]]
-    """The role of the entity that is creating the message.
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
 
-    Currently only `user` is supported.
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
     """
 
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
+    attachments: Optional[Iterable[Attachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
 
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
@@ -33,3 +35,14 @@ class MessageCreateParams(TypedDict, total=False):
     structured format. Keys can be a maximum of 64 characters long and values can be
     a maxium of 512 characters long.
     """
+
+
+AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class Attachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AttachmentTool]
+    """The tools to add this file to."""
diff --git a/src/openai/types/beta/threads/message_deleted.py b/src/openai/types/beta/threads/message_deleted.py
new file mode 100644
index 0000000000..48210777fa
--- /dev/null
+++ b/src/openai/types/beta/threads/message_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["MessageDeleted"]
+
+
+class MessageDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["thread.message.deleted"]
diff --git a/src/openai/types/beta/threads/message_delta.py b/src/openai/types/beta/threads/message_delta.py
new file mode 100644
index 0000000000..ecd0dfe319
--- /dev/null
+++ b/src/openai/types/beta/threads/message_delta.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_content_delta import MessageContentDelta
+
+__all__ = ["MessageDelta"]
+
+
+class MessageDelta(BaseModel):
+    content: Optional[List[MessageContentDelta]] = None
+    """The content of the message in array of text and/or images."""
+
+    role: Optional[Literal["user", "assistant"]] = None
+    """The entity that produced the message. One of `user` or `assistant`."""
diff --git a/src/openai/types/beta/threads/message_delta_event.py b/src/openai/types/beta/threads/message_delta_event.py
new file mode 100644
index 0000000000..3811cef679
--- /dev/null
+++ b/src/openai/types/beta/threads/message_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_delta import MessageDelta
+
+__all__ = ["MessageDeltaEvent"]
+
+
+class MessageDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the message, which can be referenced in API endpoints."""
+
+    delta: MessageDelta
+    """The delta containing the fields that have changed on the Message."""
+
+    object: Literal["thread.message.delta"]
+    """The object type, which is always `thread.message.delta`."""
diff --git a/src/openai/types/beta/threads/message_list_params.py b/src/openai/types/beta/threads/message_list_params.py
index 31e407bb22..18c2442fb5 100644
--- a/src/openai/types/beta/threads/message_list_params.py
+++ b/src/openai/types/beta/threads/message_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -37,3 +37,6 @@ class MessageListParams(TypedDict, total=False):
 
     `asc` for ascending order and `desc` for descending order.
     """
+
+    run_id: str
+    """Filter messages by the run ID that generated them."""
diff --git a/src/openai/types/beta/threads/message_update_params.py b/src/openai/types/beta/threads/message_update_params.py
index 2e3e1b4b1a..7000f33122 100644
--- a/src/openai/types/beta/threads/message_update_params.py
+++ b/src/openai/types/beta/threads/message_update_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/beta/threads/messages/__init__.py b/src/openai/types/beta/threads/messages/__init__.py
deleted file mode 100644
index 6046f68204..0000000000
--- a/src/openai/types/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from .message_file import MessageFile as MessageFile
-from .file_list_params import FileListParams as FileListParams
diff --git a/src/openai/types/beta/threads/messages/message_file.py b/src/openai/types/beta/threads/messages/message_file.py
deleted file mode 100644
index 5332dee962..0000000000
--- a/src/openai/types/beta/threads/messages/message_file.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["MessageFile"]
-
-
-class MessageFile(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the message file was created."""
-
-    message_id: str
-    """
-    The ID of the [message](https://platform.openai.com/docs/api-reference/messages)
-    that the [File](https://platform.openai.com/docs/api-reference/files) is
-    attached to.
-    """
-
-    object: Literal["thread.message.file"]
-    """The object type, which is always `thread.message.file`."""
diff --git a/src/openai/types/beta/threads/required_action_function_tool_call.py b/src/openai/types/beta/threads/required_action_function_tool_call.py
index 0284d0f188..a24dfd068b 100644
--- a/src/openai/types/beta/threads/required_action_function_tool_call.py
+++ b/src/openai/types/beta/threads/required_action_function_tool_call.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index db4bc0e07d..8244ffd598 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -1,29 +1,38 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import List, Union, Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
-from ...shared import FunctionDefinition
 from ...._models import BaseModel
+from .run_status import RunStatus
+from ..assistant_tool import AssistantTool
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
 
 __all__ = [
     "Run",
+    "IncompleteDetails",
     "LastError",
     "RequiredAction",
     "RequiredActionSubmitToolOutputs",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "TruncationStrategy",
     "Usage",
 ]
 
 
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+    """The reason why the run is incomplete.
+
+    This will point to which specific token limit was reached over the course of the
+    run.
+    """
+
+
 class LastError(BaseModel):
-    code: Literal["server_error", "rate_limit_exceeded"]
-    """One of `server_error` or `rate_limit_exceeded`."""
+    code: Literal["server_error", "rate_limit_exceeded", "invalid_prompt"]
+    """One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`."""
 
     message: str
     """A human-readable description of the error."""
@@ -42,24 +51,21 @@ class RequiredAction(BaseModel):
     """For now, this is always `submit_tool_outputs`."""
 
 
-class ToolAssistantToolsCode(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolAssistantToolsRetrieval(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
-
-
-class ToolAssistantToolsFunction(BaseModel):
-    function: FunctionDefinition
-
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
+class TruncationStrategy(BaseModel):
+    type: Literal["auto", "last_messages"]
+    """The truncation strategy to use for the thread.
 
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+    last_messages: Optional[int] = None
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
 
 
 class Usage(BaseModel):
@@ -93,17 +99,16 @@ class Run(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the run was created."""
 
-    expires_at: int
+    expires_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run will expire."""
 
     failed_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run failed."""
 
-    file_ids: List[str]
-    """
-    The list of [File](https://platform.openai.com/docs/api-reference/files) IDs the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    this run.
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details on why the run is incomplete.
+
+    Will be `null` if the run is not incomplete.
     """
 
     instructions: str
@@ -116,7 +121,19 @@ class Run(BaseModel):
     last_error: Optional[LastError] = None
     """The last error associated with this run. Will be `null` if there are no errors."""
 
-    metadata: Optional[builtins.object] = None
+    max_completion_tokens: Optional[int] = None
+    """
+    The maximum number of completion tokens specified to have been used over the
+    course of the run.
+    """
+
+    max_prompt_tokens: Optional[int] = None
+    """
+    The maximum number of prompt tokens specified to have been used over the course
+    of the run.
+    """
+
+    metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
@@ -140,16 +157,33 @@ class Run(BaseModel):
     Will be `null` if no action is required.
     """
 
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     started_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run was started."""
 
-    status: Literal[
-        "queued", "in_progress", "requires_action", "cancelling", "cancelled", "failed", "completed", "expired"
-    ]
+    status: RunStatus
     """
     The status of the run, which can be either `queued`, `in_progress`,
-    `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, or
-    `expired`.
+    `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`,
+    `incomplete`, or `expired`.
     """
 
     thread_id: str
@@ -158,16 +192,39 @@ class Run(BaseModel):
     that was executed on as a part of this run.
     """
 
-    tools: List[Tool]
+    tool_choice: Optional[AssistantToolChoiceOption] = None
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tools: List[AssistantTool]
     """
     The list of tools that the
     [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
     this run.
     """
 
+    truncation_strategy: Optional[TruncationStrategy] = None
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
     usage: Optional[Usage] = None
     """Usage statistics related to the run.
 
     This value will be `null` if the run is not in a terminal state (i.e.
     `in_progress`, `queued`, etc.).
     """
+
+    temperature: Optional[float] = None
+    """The sampling temperature used for this run. If not set, defaults to 1."""
+
+    top_p: Optional[float] = None
+    """The nucleus sampling value used for this run. If not set, defaults to 1."""
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index a4f41a9338..90c9708596 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -1,22 +1,29 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-from ....types import shared_params
+from ..assistant_tool_param import AssistantToolParam
+from ..file_search_tool_param import FileSearchToolParam
+from .message_content_part_param import MessageContentPartParam
+from ..code_interpreter_tool_param import CodeInterpreterToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
-    "RunCreateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "RunCreateParamsBase",
+    "AdditionalMessage",
+    "AdditionalMessageAttachment",
+    "AdditionalMessageAttachmentTool",
+    "TruncationStrategy",
+    "RunCreateParamsNonStreaming",
+    "RunCreateParamsStreaming",
 ]
 
 
-class RunCreateParams(TypedDict, total=False):
+class RunCreateParamsBase(TypedDict, total=False):
     assistant_id: Required[str]
     """
     The ID of the
@@ -31,6 +38,9 @@ class RunCreateParams(TypedDict, total=False):
     other instructions.
     """
 
+    additional_messages: Optional[Iterable[AdditionalMessage]]
+    """Adds additional messages to the thread before creating the run."""
+
     instructions: Optional[str]
     """
     Overrides the
@@ -38,6 +48,24 @@ class RunCreateParams(TypedDict, total=False):
     of the assistant. This is useful for modifying the behavior on a per-run basis.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -46,7 +74,32 @@ class RunCreateParams(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
-    model: Optional[str]
+    model: Union[
+        str,
+        Literal[
+            "gpt-4o",
+            "gpt-4o-2024-05-13",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -54,28 +107,134 @@ class RunCreateParams(TypedDict, total=False):
     assistant will be used.
     """
 
-    tools: Optional[List[Tool]]
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+
+AdditionalMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class AdditionalMessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AdditionalMessageAttachmentTool]
+    """The tools to add this file to."""
+
+
+class AdditionalMessage(TypedDict, total=False):
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
+
+    attachments: Optional[Iterable[AdditionalMessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
 
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
 
+class RunCreateParamsNonStreaming(RunCreateParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
+class RunCreateParamsStreaming(RunCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+RunCreateParams = Union[RunCreateParamsNonStreaming, RunCreateParamsStreaming]
diff --git a/src/openai/types/beta/threads/run_list_params.py b/src/openai/types/beta/threads/run_list_params.py
index 5f41347718..1e32bca4b4 100644
--- a/src/openai/types/beta/threads/run_list_params.py
+++ b/src/openai/types/beta/threads/run_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/beta/threads/run_status.py b/src/openai/types/beta/threads/run_status.py
new file mode 100644
index 0000000000..6666d00e5a
--- /dev/null
+++ b/src/openai/types/beta/threads/run_status.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+__all__ = ["RunStatus"]
+
+RunStatus = Literal[
+    "queued",
+    "in_progress",
+    "requires_action",
+    "cancelling",
+    "cancelled",
+    "failed",
+    "completed",
+    "incomplete",
+    "expired",
+]
diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
index a960f0f06f..ccb5e5e97e 100644
--- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -1,17 +1,22 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List
-from typing_extensions import Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["RunSubmitToolOutputsParams", "ToolOutput"]
+__all__ = [
+    "RunSubmitToolOutputsParamsBase",
+    "ToolOutput",
+    "RunSubmitToolOutputsParamsNonStreaming",
+    "RunSubmitToolOutputsParamsStreaming",
+]
 
 
-class RunSubmitToolOutputsParams(TypedDict, total=False):
+class RunSubmitToolOutputsParamsBase(TypedDict, total=False):
     thread_id: Required[str]
 
-    tool_outputs: Required[List[ToolOutput]]
+    tool_outputs: Required[Iterable[ToolOutput]]
     """A list of tools for which the outputs are being submitted."""
 
 
@@ -24,3 +29,24 @@ class ToolOutput(TypedDict, total=False):
     The ID of the tool call in the `required_action` object within the run object
     the output is being submitted for.
     """
+
+
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class RunSubmitToolOutputsParamsStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+RunSubmitToolOutputsParams = Union[RunSubmitToolOutputsParamsNonStreaming, RunSubmitToolOutputsParamsStreaming]
diff --git a/src/openai/types/beta/threads/run_update_params.py b/src/openai/types/beta/threads/run_update_params.py
index 09f81aa003..e595eac882 100644
--- a/src/openai/types/beta/threads/run_update_params.py
+++ b/src/openai/types/beta/threads/run_update_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/beta/threads/runs/__init__.py b/src/openai/types/beta/threads/runs/__init__.py
index 16cb852922..a312ce3df2 100644
--- a/src/openai/types/beta/threads/runs/__init__.py
+++ b/src/openai/types/beta/threads/runs/__init__.py
@@ -1,11 +1,22 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .run_step import RunStep as RunStep
-from .code_tool_call import CodeToolCall as CodeToolCall
+from .tool_call import ToolCall as ToolCall
+from .run_step_delta import RunStepDelta as RunStepDelta
+from .tool_call_delta import ToolCallDelta as ToolCallDelta
 from .step_list_params import StepListParams as StepListParams
 from .function_tool_call import FunctionToolCall as FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall as RetrievalToolCall
+from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
+from .file_search_tool_call import FileSearchToolCall as FileSearchToolCall
+from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
 from .tool_calls_step_details import ToolCallsStepDetails as ToolCallsStepDetails
+from .function_tool_call_delta import FunctionToolCallDelta as FunctionToolCallDelta
+from .code_interpreter_tool_call import CodeInterpreterToolCall as CodeInterpreterToolCall
+from .file_search_tool_call_delta import FileSearchToolCallDelta as FileSearchToolCallDelta
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta as RunStepDeltaMessageDelta
+from .code_interpreter_output_image import CodeInterpreterOutputImage as CodeInterpreterOutputImage
 from .message_creation_step_details import MessageCreationStepDetails as MessageCreationStepDetails
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta as CodeInterpreterToolCallDelta
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_logs.py b/src/openai/types/beta/threads/runs/code_interpreter_logs.py
new file mode 100644
index 0000000000..0bf8c1dac2
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/code_interpreter_logs.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterLogs"]
+
+
+class CodeInterpreterLogs(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["logs"]
+    """Always `logs`."""
+
+    logs: Optional[str] = None
+    """The text output from the Code Interpreter tool call."""
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_output_image.py b/src/openai/types/beta/threads/runs/code_interpreter_output_image.py
new file mode 100644
index 0000000000..2257f37e41
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/code_interpreter_output_image.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterOutputImage", "Image"]
+
+
+class Image(BaseModel):
+    file_id: Optional[str] = None
+    """
+    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
+    image.
+    """
+
+
+class CodeInterpreterOutputImage(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["image"]
+    """Always `image`."""
+
+    image: Optional[Image] = None
diff --git a/src/openai/types/beta/threads/runs/code_tool_call.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
similarity index 78%
rename from src/openai/types/beta/threads/runs/code_tool_call.py
rename to src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
index f808005ecb..2f07243684 100644
--- a/src/openai/types/beta/threads/runs/code_tool_call.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
@@ -1,12 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated
 
+from ....._utils import PropertyInfo
 from ....._models import BaseModel
 
 __all__ = [
-    "CodeToolCall",
+    "CodeInterpreterToolCall",
     "CodeInterpreter",
     "CodeInterpreterOutput",
     "CodeInterpreterOutputLogs",
@@ -38,7 +39,9 @@ class CodeInterpreterOutputImage(BaseModel):
     """Always `image`."""
 
 
-CodeInterpreterOutput = Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage]
+CodeInterpreterOutput = Annotated[
+    Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
+]
 
 
 class CodeInterpreter(BaseModel):
@@ -53,7 +56,7 @@ class CodeInterpreter(BaseModel):
     """
 
 
-class CodeToolCall(BaseModel):
+class CodeInterpreterToolCall(BaseModel):
     id: str
     """The ID of the tool call."""
 
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
new file mode 100644
index 0000000000..eff76355b3
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .code_interpreter_logs import CodeInterpreterLogs
+from .code_interpreter_output_image import CodeInterpreterOutputImage
+
+__all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
+
+CodeInterpreterOutput = Annotated[
+    Union[CodeInterpreterLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
+]
+
+
+class CodeInterpreter(BaseModel):
+    input: Optional[str] = None
+    """The input to the Code Interpreter tool call."""
+
+    outputs: Optional[List[CodeInterpreterOutput]] = None
+    """The outputs from the Code Interpreter tool call.
+
+    Code Interpreter can output one or more items, including text (`logs`) or images
+    (`image`). Each of these are represented by a different object type.
+    """
+
+
+class CodeInterpreterToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["code_interpreter"]
+    """The type of tool call.
+
+    This is always going to be `code_interpreter` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call."""
+
+    code_interpreter: Optional[CodeInterpreter] = None
+    """The Code Interpreter tool call definition."""
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call.py b/src/openai/types/beta/threads/runs/file_search_tool_call.py
new file mode 100644
index 0000000000..57c0ca9a90
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FileSearchToolCall"]
+
+
+class FileSearchToolCall(BaseModel):
+    id: str
+    """The ID of the tool call object."""
+
+    file_search: object
+    """For now, this is always going to be an empty object."""
+
+    type: Literal["file_search"]
+    """The type of tool call.
+
+    This is always going to be `file_search` for this type of tool call.
+    """
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call_delta.py b/src/openai/types/beta/threads/runs/file_search_tool_call_delta.py
new file mode 100644
index 0000000000..df5ac217dc
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call_delta.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FileSearchToolCallDelta"]
+
+
+class FileSearchToolCallDelta(BaseModel):
+    file_search: object
+    """For now, this is always going to be an empty object."""
+
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["file_search"]
+    """The type of tool call.
+
+    This is always going to be `file_search` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
diff --git a/src/openai/types/beta/threads/runs/function_tool_call.py b/src/openai/types/beta/threads/runs/function_tool_call.py
index bbd3cb7052..b1d354f894 100644
--- a/src/openai/types/beta/threads/runs/function_tool_call.py
+++ b/src/openai/types/beta/threads/runs/function_tool_call.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 from typing_extensions import Literal
diff --git a/src/openai/types/beta/threads/runs/function_tool_call_delta.py b/src/openai/types/beta/threads/runs/function_tool_call_delta.py
new file mode 100644
index 0000000000..faaf026f7f
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/function_tool_call_delta.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FunctionToolCallDelta", "Function"]
+
+
+class Function(BaseModel):
+    arguments: Optional[str] = None
+    """The arguments passed to the function."""
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    output: Optional[str] = None
+    """The output of the function.
+
+    This will be `null` if the outputs have not been
+    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
+    yet.
+    """
+
+
+class FunctionToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["function"]
+    """The type of tool call.
+
+    This is always going to be `function` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
+
+    function: Optional[Function] = None
+    """The definition of the function that was called."""
diff --git a/src/openai/types/beta/threads/runs/message_creation_step_details.py b/src/openai/types/beta/threads/runs/message_creation_step_details.py
index 13f9398515..73439079d3 100644
--- a/src/openai/types/beta/threads/runs/message_creation_step_details.py
+++ b/src/openai/types/beta/threads/runs/message_creation_step_details.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/threads/runs/retrieval_tool_call.py b/src/openai/types/beta/threads/runs/retrieval_tool_call.py
deleted file mode 100644
index 6cdbcdd93f..0000000000
--- a/src/openai/types/beta/threads/runs/retrieval_tool_call.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["RetrievalToolCall"]
-
-
-class RetrievalToolCall(BaseModel):
-    id: str
-    """The ID of the tool call object."""
-
-    retrieval: object
-    """For now, this is always going to be an empty object."""
-
-    type: Literal["retrieval"]
-    """The type of tool call.
-
-    This is always going to be `retrieval` for this type of tool call.
-    """
diff --git a/src/openai/types/beta/threads/runs/run_step.py b/src/openai/types/beta/threads/runs/run_step.py
index 5f3e29a312..7c81dcac2b 100644
--- a/src/openai/types/beta/threads/runs/run_step.py
+++ b/src/openai/types/beta/threads/runs/run_step.py
@@ -1,9 +1,9 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
 from typing import Union, Optional
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated
 
+from ....._utils import PropertyInfo
 from ....._models import BaseModel
 from .tool_calls_step_details import ToolCallsStepDetails
 from .message_creation_step_details import MessageCreationStepDetails
@@ -19,7 +19,7 @@ class LastError(BaseModel):
     """A human-readable description of the error."""
 
 
-StepDetails = Union[MessageCreationStepDetails, ToolCallsStepDetails]
+StepDetails = Annotated[Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")]
 
 
 class Usage(BaseModel):
@@ -68,7 +68,7 @@ class RunStep(BaseModel):
     Will be `null` if there are no errors.
     """
 
-    metadata: Optional[builtins.object] = None
+    metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
diff --git a/src/openai/types/beta/threads/runs/run_step_delta.py b/src/openai/types/beta/threads/runs/run_step_delta.py
new file mode 100644
index 0000000000..d6b4aefeb9
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_delta.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .tool_call_delta_object import ToolCallDeltaObject
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta
+
+__all__ = ["RunStepDelta", "StepDetails"]
+
+StepDetails = Annotated[Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")]
+
+
+class RunStepDelta(BaseModel):
+    step_details: Optional[StepDetails] = None
+    """The details of the run step."""
diff --git a/src/openai/types/beta/threads/runs/run_step_delta_event.py b/src/openai/types/beta/threads/runs/run_step_delta_event.py
new file mode 100644
index 0000000000..7f3f92aabf
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .run_step_delta import RunStepDelta
+
+__all__ = ["RunStepDeltaEvent"]
+
+
+class RunStepDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the run step, which can be referenced in API endpoints."""
+
+    delta: RunStepDelta
+    """The delta containing the fields that have changed on the run step."""
+
+    object: Literal["thread.run.step.delta"]
+    """The object type, which is always `thread.run.step.delta`."""
diff --git a/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py b/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
new file mode 100644
index 0000000000..f58ed3d96d
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["RunStepDeltaMessageDelta", "MessageCreation"]
+
+
+class MessageCreation(BaseModel):
+    message_id: Optional[str] = None
+    """The ID of the message that was created by this run step."""
+
+
+class RunStepDeltaMessageDelta(BaseModel):
+    type: Literal["message_creation"]
+    """Always `message_creation`."""
+
+    message_creation: Optional[MessageCreation] = None
diff --git a/src/openai/types/beta/threads/runs/step_list_params.py b/src/openai/types/beta/threads/runs/step_list_params.py
index 9c7b6c64d0..606d444539 100644
--- a/src/openai/types/beta/threads/runs/step_list_params.py
+++ b/src/openai/types/beta/threads/runs/step_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/beta/threads/runs/tool_call.py b/src/openai/types/beta/threads/runs/tool_call.py
new file mode 100644
index 0000000000..77d86b46d9
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/tool_call.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from .function_tool_call import FunctionToolCall
+from .file_search_tool_call import FileSearchToolCall
+from .code_interpreter_tool_call import CodeInterpreterToolCall
+
+__all__ = ["ToolCall"]
+
+ToolCall = Annotated[
+    Union[CodeInterpreterToolCall, FileSearchToolCall, FunctionToolCall], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta.py b/src/openai/types/beta/threads/runs/tool_call_delta.py
new file mode 100644
index 0000000000..90cfe0657e
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/tool_call_delta.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from .function_tool_call_delta import FunctionToolCallDelta
+from .file_search_tool_call_delta import FileSearchToolCallDelta
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta
+
+__all__ = ["ToolCallDelta"]
+
+ToolCallDelta = Annotated[
+    Union[CodeInterpreterToolCallDelta, FileSearchToolCallDelta, FunctionToolCallDelta],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta_object.py b/src/openai/types/beta/threads/runs/tool_call_delta_object.py
new file mode 100644
index 0000000000..189dce772c
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/tool_call_delta_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .tool_call_delta import ToolCallDelta
+
+__all__ = ["ToolCallDeltaObject"]
+
+
+class ToolCallDeltaObject(BaseModel):
+    type: Literal["tool_calls"]
+    """Always `tool_calls`."""
+
+    tool_calls: Optional[List[ToolCallDelta]] = None
+    """An array of tool calls the run step was involved in.
+
+    These can be associated with one of three types of tools: `code_interpreter`,
+    `file_search`, or `function`.
+    """
diff --git a/src/openai/types/beta/threads/runs/tool_calls_step_details.py b/src/openai/types/beta/threads/runs/tool_calls_step_details.py
index 80eb90bf66..a084d387c7 100644
--- a/src/openai/types/beta/threads/runs/tool_calls_step_details.py
+++ b/src/openai/types/beta/threads/runs/tool_calls_step_details.py
@@ -1,16 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union
+from typing import List
 from typing_extensions import Literal
 
+from .tool_call import ToolCall
 from ....._models import BaseModel
-from .code_tool_call import CodeToolCall
-from .function_tool_call import FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall
 
-__all__ = ["ToolCallsStepDetails", "ToolCall"]
-
-ToolCall = Union[CodeToolCall, RetrievalToolCall, FunctionToolCall]
+__all__ = ["ToolCallsStepDetails"]
 
 
 class ToolCallsStepDetails(BaseModel):
@@ -18,7 +14,7 @@ class ToolCallsStepDetails(BaseModel):
     """An array of tool calls the run step was involved in.
 
     These can be associated with one of three types of tools: `code_interpreter`,
-    `retrieval`, or `function`.
+    `file_search`, or `function`.
     """
 
     type: Literal["tool_calls"]
diff --git a/src/openai/types/beta/threads/text.py b/src/openai/types/beta/threads/text.py
new file mode 100644
index 0000000000..853bec2955
--- /dev/null
+++ b/src/openai/types/beta/threads/text.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ...._models import BaseModel
+from .annotation import Annotation
+
+__all__ = ["Text"]
+
+
+class Text(BaseModel):
+    annotations: List[Annotation]
+
+    value: str
+    """The data that makes up the text."""
diff --git a/src/openai/types/beta/threads/text_content_block.py b/src/openai/types/beta/threads/text_content_block.py
new file mode 100644
index 0000000000..3706d6b9d8
--- /dev/null
+++ b/src/openai/types/beta/threads/text_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .text import Text
+from ...._models import BaseModel
+
+__all__ = ["TextContentBlock"]
+
+
+class TextContentBlock(BaseModel):
+    text: Text
+
+    type: Literal["text"]
+    """Always `text`."""
diff --git a/src/openai/types/beta/threads/text_content_block_param.py b/src/openai/types/beta/threads/text_content_block_param.py
new file mode 100644
index 0000000000..6313de32cc
--- /dev/null
+++ b/src/openai/types/beta/threads/text_content_block_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TextContentBlockParam"]
+
+
+class TextContentBlockParam(TypedDict, total=False):
+    text: Required[str]
+    """Text content to be sent to the model"""
+
+    type: Required[Literal["text"]]
+    """Always `text`."""
diff --git a/src/openai/types/beta/threads/text_delta.py b/src/openai/types/beta/threads/text_delta.py
new file mode 100644
index 0000000000..09cd357027
--- /dev/null
+++ b/src/openai/types/beta/threads/text_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ...._models import BaseModel
+from .annotation_delta import AnnotationDelta
+
+__all__ = ["TextDelta"]
+
+
+class TextDelta(BaseModel):
+    annotations: Optional[List[AnnotationDelta]] = None
+
+    value: Optional[str] = None
+    """The data that makes up the text."""
diff --git a/src/openai/types/beta/threads/text_delta_block.py b/src/openai/types/beta/threads/text_delta_block.py
new file mode 100644
index 0000000000..586116e0d6
--- /dev/null
+++ b/src/openai/types/beta/threads/text_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .text_delta import TextDelta
+
+__all__ = ["TextDeltaBlock"]
+
+
+class TextDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["text"]
+    """Always `text`."""
+
+    text: Optional[TextDelta] = None
diff --git a/src/openai/types/beta/threads/thread_message.py b/src/openai/types/beta/threads/thread_message.py
deleted file mode 100644
index 8f1ac07d0a..0000000000
--- a/src/openai/types/beta/threads/thread_message.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-import builtins
-from typing import List, Union, Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .message_content_text import MessageContentText
-from .message_content_image_file import MessageContentImageFile
-
-__all__ = ["ThreadMessage", "Content"]
-
-Content = Union[MessageContentImageFile, MessageContentText]
-
-
-class ThreadMessage(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: Optional[str] = None
-    """
-    If applicable, the ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) that
-    authored this message.
-    """
-
-    content: List[Content]
-    """The content of the message in array of text and/or images."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the message was created."""
-
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs that
-    the assistant should use. Useful for tools like retrieval and code_interpreter
-    that can access files. A maximum of 10 files can be attached to a message.
-    """
-
-    metadata: Optional[builtins.object] = None
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    object: Literal["thread.message"]
-    """The object type, which is always `thread.message`."""
-
-    role: Literal["user", "assistant"]
-    """The entity that produced the message. One of `user` or `assistant`."""
-
-    run_id: Optional[str] = None
-    """
-    If applicable, the ID of the
-    [run](https://platform.openai.com/docs/api-reference/runs) associated with the
-    authoring of this message.
-    """
-
-    thread_id: str
-    """
-    The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
-    this message belongs to.
-    """
diff --git a/src/openai/types/beta/vector_store.py b/src/openai/types/beta/vector_store.py
new file mode 100644
index 0000000000..488961b444
--- /dev/null
+++ b/src/openai/types/beta/vector_store.py
@@ -0,0 +1,79 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStore", "FileCounts", "ExpiresAfter"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """The number of files that were cancelled."""
+
+    completed: int
+    """The number of files that have been successfully processed."""
+
+    failed: int
+    """The number of files that have failed to process."""
+
+    in_progress: int
+    """The number of files that are currently being processed."""
+
+    total: int
+    """The total number of files."""
+
+
+class ExpiresAfter(BaseModel):
+    anchor: Literal["last_active_at"]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: int
+    """The number of days after the anchor time that the vector store will expire."""
+
+
+class VectorStore(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the vector store was created."""
+
+    file_counts: FileCounts
+
+    last_active_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the vector store was last active."""
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: str
+    """The name of the vector store."""
+
+    object: Literal["vector_store"]
+    """The object type, which is always `vector_store`."""
+
+    status: Literal["expired", "in_progress", "completed"]
+    """
+    The status of the vector store, which can be either `expired`, `in_progress`, or
+    `completed`. A status of `completed` indicates that the vector store is ready
+    for use.
+    """
+
+    usage_bytes: int
+    """The total number of bytes used by the files in the vector store."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """The expiration policy for a vector store."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the vector store will expire."""
diff --git a/src/openai/types/beta/vector_store_create_params.py b/src/openai/types/beta/vector_store_create_params.py
new file mode 100644
index 0000000000..f1a3abcbdf
--- /dev/null
+++ b/src/openai/types/beta/vector_store_create_params.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["VectorStoreCreateParams", "ExpiresAfter"]
+
+
+class VectorStoreCreateParams(TypedDict, total=False):
+    expires_after: ExpiresAfter
+    """The expiration policy for a vector store."""
+
+    file_ids: List[str]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: str
+    """The name of the vector store."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: Required[int]
+    """The number of days after the anchor time that the vector store will expire."""
diff --git a/src/openai/types/beta/vector_store_deleted.py b/src/openai/types/beta/vector_store_deleted.py
new file mode 100644
index 0000000000..21ccda1db5
--- /dev/null
+++ b/src/openai/types/beta/vector_store_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStoreDeleted"]
+
+
+class VectorStoreDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["vector_store.deleted"]
diff --git a/src/openai/types/beta/assistants/file_list_params.py b/src/openai/types/beta/vector_store_list_params.py
similarity index 85%
rename from src/openai/types/beta/assistants/file_list_params.py
rename to src/openai/types/beta/vector_store_list_params.py
index 397e35a0d1..f39f67266d 100644
--- a/src/openai/types/beta/assistants/file_list_params.py
+++ b/src/openai/types/beta/vector_store_list_params.py
@@ -1,13 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing_extensions import Literal, TypedDict
 
-__all__ = ["FileListParams"]
+__all__ = ["VectorStoreListParams"]
 
 
-class FileListParams(TypedDict, total=False):
+class VectorStoreListParams(TypedDict, total=False):
     after: str
     """A cursor for use in pagination.
 
diff --git a/src/openai/types/beta/vector_store_update_params.py b/src/openai/types/beta/vector_store_update_params.py
new file mode 100644
index 0000000000..0f9593e476
--- /dev/null
+++ b/src/openai/types/beta/vector_store_update_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["VectorStoreUpdateParams", "ExpiresAfter"]
+
+
+class VectorStoreUpdateParams(TypedDict, total=False):
+    expires_after: Optional[ExpiresAfter]
+    """The expiration policy for a vector store."""
+
+    metadata: Optional[object]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    name: Optional[str]
+    """The name of the vector store."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: Required[int]
+    """The number of days after the anchor time that the vector store will expire."""
diff --git a/src/openai/types/beta/vector_stores/__init__.py b/src/openai/types/beta/vector_stores/__init__.py
new file mode 100644
index 0000000000..ff05dd63d8
--- /dev/null
+++ b/src/openai/types/beta/vector_stores/__init__.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .file_list_params import FileListParams as FileListParams
+from .vector_store_file import VectorStoreFile as VectorStoreFile
+from .file_create_params import FileCreateParams as FileCreateParams
+from .vector_store_file_batch import VectorStoreFileBatch as VectorStoreFileBatch
+from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
+from .vector_store_file_deleted import VectorStoreFileDeleted as VectorStoreFileDeleted
+from .file_batch_list_files_params import FileBatchListFilesParams as FileBatchListFilesParams
diff --git a/src/openai/types/beta/vector_stores/file_batch_create_params.py b/src/openai/types/beta/vector_stores/file_batch_create_params.py
new file mode 100644
index 0000000000..0882829732
--- /dev/null
+++ b/src/openai/types/beta/vector_stores/file_batch_create_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileBatchCreateParams"]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    file_ids: Required[List[str]]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
diff --git a/src/openai/types/beta/vector_stores/file_batch_list_files_params.py b/src/openai/types/beta/vector_stores/file_batch_list_files_params.py
new file mode 100644
index 0000000000..24dee7d5a5
--- /dev/null
+++ b/src/openai/types/beta/vector_stores/file_batch_list_files_params.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FileBatchListFilesParams"]
+
+
+class FileBatchListFilesParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    filter: Literal["in_progress", "completed", "failed", "cancelled"]
+    """Filter by file status.
+
+    One of `in_progress`, `completed`, `failed`, `cancelled`.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/openai/types/beta/assistants/file_create_params.py b/src/openai/types/beta/vector_stores/file_create_params.py
similarity index 57%
rename from src/openai/types/beta/assistants/file_create_params.py
rename to src/openai/types/beta/vector_stores/file_create_params.py
index f70f96fc1b..2fee588abf 100644
--- a/src/openai/types/beta/assistants/file_create_params.py
+++ b/src/openai/types/beta/vector_stores/file_create_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -10,7 +10,7 @@
 class FileCreateParams(TypedDict, total=False):
     file_id: Required[str]
     """
-    A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-    `purpose="assistants"`) that the assistant should use. Useful for tools like
-    `retrieval` and `code_interpreter` that can access files.
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
+    files.
     """
diff --git a/src/openai/types/beta/threads/messages/file_list_params.py b/src/openai/types/beta/vector_stores/file_list_params.py
similarity index 77%
rename from src/openai/types/beta/threads/messages/file_list_params.py
rename to src/openai/types/beta/vector_stores/file_list_params.py
index 3640b8508b..23dd7f0d94 100644
--- a/src/openai/types/beta/threads/messages/file_list_params.py
+++ b/src/openai/types/beta/vector_stores/file_list_params.py
@@ -1,15 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["FileListParams"]
 
 
 class FileListParams(TypedDict, total=False):
-    thread_id: Required[str]
-
     after: str
     """A cursor for use in pagination.
 
@@ -28,6 +26,12 @@ class FileListParams(TypedDict, total=False):
     of the list.
     """
 
+    filter: Literal["in_progress", "completed", "failed", "cancelled"]
+    """Filter by file status.
+
+    One of `in_progress`, `completed`, `failed`, `cancelled`.
+    """
+
     limit: int
     """A limit on the number of objects to be returned.
 
diff --git a/src/openai/types/beta/vector_stores/vector_store_file.py b/src/openai/types/beta/vector_stores/vector_store_file.py
new file mode 100644
index 0000000000..3fab489602
--- /dev/null
+++ b/src/openai/types/beta/vector_stores/vector_store_file.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["VectorStoreFile", "LastError"]
+
+
+class LastError(BaseModel):
+    code: Literal["internal_error", "file_not_found", "parsing_error", "unhandled_mime_type"]
+    """One of `server_error` or `rate_limit_exceeded`."""
+
+    message: str
+    """A human-readable description of the error."""
+
+
+class VectorStoreFile(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the vector store file was created."""
+
+    last_error: Optional[LastError] = None
+    """The last error associated with this vector store file.
+
+    Will be `null` if there are no errors.
+    """
+
+    object: Literal["vector_store.file"]
+    """The object type, which is always `vector_store.file`."""
+
+    status: Literal["in_progress", "completed", "cancelled", "failed"]
+    """
+    The status of the vector store file, which can be either `in_progress`,
+    `completed`, `cancelled`, or `failed`. The status `completed` indicates that the
+    vector store file is ready for use.
+    """
+
+    usage_bytes: int
+    """The total vector store usage in bytes.
+
+    Note that this may be different from the original file size.
+    """
+
+    vector_store_id: str
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    that the [File](https://platform.openai.com/docs/api-reference/files) is
+    attached to.
+    """
diff --git a/src/openai/types/beta/vector_stores/vector_store_file_batch.py b/src/openai/types/beta/vector_stores/vector_store_file_batch.py
new file mode 100644
index 0000000000..df130a58de
--- /dev/null
+++ b/src/openai/types/beta/vector_stores/vector_store_file_batch.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["VectorStoreFileBatch", "FileCounts"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """The number of files that where cancelled."""
+
+    completed: int
+    """The number of files that have been processed."""
+
+    failed: int
+    """The number of files that have failed to process."""
+
+    in_progress: int
+    """The number of files that are currently being processed."""
+
+    total: int
+    """The total number of files."""
+
+
+class VectorStoreFileBatch(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """
+    The Unix timestamp (in seconds) for when the vector store files batch was
+    created.
+    """
+
+    file_counts: FileCounts
+
+    object: Literal["vector_store.files_batch"]
+    """The object type, which is always `vector_store.file_batch`."""
+
+    status: Literal["in_progress", "completed", "cancelled", "failed"]
+    """
+    The status of the vector store files batch, which can be either `in_progress`,
+    `completed`, `cancelled` or `failed`.
+    """
+
+    vector_store_id: str
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    that the [File](https://platform.openai.com/docs/api-reference/files) is
+    attached to.
+    """
diff --git a/src/openai/types/beta/vector_stores/vector_store_file_deleted.py b/src/openai/types/beta/vector_stores/vector_store_file_deleted.py
new file mode 100644
index 0000000000..ae37f84364
--- /dev/null
+++ b/src/openai/types/beta/vector_stores/vector_store_file_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["VectorStoreFileDeleted"]
+
+
+class VectorStoreFileDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["vector_store.file.deleted"]
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
index 39a6335f64..0ba812ff9b 100644
--- a/src/openai/types/chat/__init__.py
+++ b/src/openai/types/chat/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -14,6 +14,7 @@
 from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
 from .chat_completion_user_message_param import ChatCompletionUserMessageParam as ChatCompletionUserMessageParam
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam as ChatCompletionStreamOptionsParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import (
     ChatCompletionFunctionMessageParam as ChatCompletionFunctionMessageParam,
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index dc63d84945..61a94a258e 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py
index 72a5bff83b..e1e399486e 100644
--- a/src/openai/types/chat/chat_completion_assistant_message_param.py
+++ b/src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -1,8 +1,8 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
 from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
@@ -47,5 +47,5 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role.
     """
 
-    tool_calls: List[ChatCompletionMessageToolCallParam]
+    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
     """The tool calls generated by the model, such as function calls."""
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 95013e7a4f..084a5fcc07 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -1,9 +1,10 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..completion_usage import CompletionUsage
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
 __all__ = [
@@ -105,7 +106,8 @@ class ChatCompletionChunk(BaseModel):
     choices: List[Choice]
     """A list of chat completion choices.
 
-    Can be more than one if `n` is greater than 1.
+    Can contain more than one elements if `n` is greater than 1. Can also be empty
+    for the last chunk if you set `stream_options: {"include_usage": true}`.
     """
 
     created: int
@@ -126,3 +128,11 @@ class ChatCompletionChunk(BaseModel):
     Can be used in conjunction with the `seed` request parameter to understand when
     backend changes have been made that might impact determinism.
     """
+
+    usage: Optional[CompletionUsage] = None
+    """
+    An optional field that will only be present when you set
+    `stream_options: {"include_usage": true}` in your request. When present, it
+    contains a null value except for the last chunk which contains the token usage
+    statistics for the entire request.
+    """
diff --git a/src/openai/types/chat/chat_completion_content_part_image_param.py b/src/openai/types/chat/chat_completion_content_part_image_param.py
index e6732185ef..b1a186aa6d 100644
--- a/src/openai/types/chat/chat_completion_content_part_image_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_image_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_content_part_param.py b/src/openai/types/chat/chat_completion_content_part_param.py
index 8e58239258..f9b5f71e43 100644
--- a/src/openai/types/chat/chat_completion_content_part_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_content_part_text_param.py b/src/openai/types/chat/chat_completion_content_part_text_param.py
index 38edcf054e..a270744417 100644
--- a/src/openai/types/chat/chat_completion_content_part_text_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_text_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_function_call_option_param.py b/src/openai/types/chat/chat_completion_function_call_option_param.py
index 72d41d908c..2bc014af7a 100644
--- a/src/openai/types/chat/chat_completion_function_call_option_param.py
+++ b/src/openai/types/chat/chat_completion_function_call_option_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_function_message_param.py b/src/openai/types/chat/chat_completion_function_message_param.py
index 3f9a1a9039..5af12bf94f 100644
--- a/src/openai/types/chat/chat_completion_function_message_param.py
+++ b/src/openai/types/chat/chat_completion_function_message_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_message.py b/src/openai/types/chat/chat_completion_message.py
index da8b2fcd5c..8db7d17d24 100644
--- a/src/openai/types/chat/chat_completion_message.py
+++ b/src/openai/types/chat/chat_completion_message.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
diff --git a/src/openai/types/chat/chat_completion_message_param.py b/src/openai/types/chat/chat_completion_message_param.py
index 7ec3d6a7b7..a3644a5310 100644
--- a/src/openai/types/chat/chat_completion_message_param.py
+++ b/src/openai/types/chat/chat_completion_message_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_message_tool_call.py b/src/openai/types/chat/chat_completion_message_tool_call.py
index 63c72fcdca..4fec667096 100644
--- a/src/openai/types/chat/chat_completion_message_tool_call.py
+++ b/src/openai/types/chat/chat_completion_message_tool_call.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/chat/chat_completion_message_tool_call_param.py b/src/openai/types/chat/chat_completion_message_tool_call_param.py
index a700f02c4f..f616c363d0 100644
--- a/src/openai/types/chat/chat_completion_message_tool_call_param.py
+++ b/src/openai/types/chat/chat_completion_message_tool_call_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_named_tool_choice_param.py b/src/openai/types/chat/chat_completion_named_tool_choice_param.py
index 0b5ffde37b..369f8b42dd 100644
--- a/src/openai/types/chat/chat_completion_named_tool_choice_param.py
+++ b/src/openai/types/chat/chat_completion_named_tool_choice_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_role.py b/src/openai/types/chat/chat_completion_role.py
index 9fa2acb4bb..1fd83888d3 100644
--- a/src/openai/types/chat/chat_completion_role.py
+++ b/src/openai/types/chat/chat_completion_role.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/chat/chat_completion_stream_options_param.py b/src/openai/types/chat/chat_completion_stream_options_param.py
new file mode 100644
index 0000000000..fbf7291821
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_stream_options_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ChatCompletionStreamOptionsParam"]
+
+
+class ChatCompletionStreamOptionsParam(TypedDict, total=False):
+    include_usage: bool
+    """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+    The `usage` field on this chunk shows the token usage statistics for the entire
+    request, and the `choices` field will always be an empty array. All other chunks
+    will also include a `usage` field, but with a null value.
+    """
diff --git a/src/openai/types/chat/chat_completion_system_message_param.py b/src/openai/types/chat/chat_completion_system_message_param.py
index 6e862e75c7..94bb3f636c 100644
--- a/src/openai/types/chat/chat_completion_system_message_param.py
+++ b/src/openai/types/chat/chat_completion_system_message_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_token_logprob.py b/src/openai/types/chat/chat_completion_token_logprob.py
index 728845fb33..c69e258910 100644
--- a/src/openai/types/chat/chat_completion_token_logprob.py
+++ b/src/openai/types/chat/chat_completion_token_logprob.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 
@@ -20,7 +20,12 @@ class TopLogprob(BaseModel):
     """
 
     logprob: float
-    """The log probability of this token."""
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
 
 
 class ChatCompletionTokenLogprob(BaseModel):
@@ -36,7 +41,12 @@ class ChatCompletionTokenLogprob(BaseModel):
     """
 
     logprob: float
-    """The log probability of this token."""
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
 
     top_logprobs: List[TopLogprob]
     """List of the most likely tokens and their log probability, at this token
diff --git a/src/openai/types/chat/chat_completion_tool_choice_option_param.py b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
index 8104b26acb..1d3c2506ab 100644
--- a/src/openai/types/chat/chat_completion_tool_choice_option_param.py
+++ b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -9,4 +9,4 @@
 
 __all__ = ["ChatCompletionToolChoiceOptionParam"]
 
-ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto"], ChatCompletionNamedToolChoiceParam]
+ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto", "required"], ChatCompletionNamedToolChoiceParam]
diff --git a/src/openai/types/chat/chat_completion_tool_message_param.py b/src/openai/types/chat/chat_completion_tool_message_param.py
index 373c5b88f4..5c590e033f 100644
--- a/src/openai/types/chat/chat_completion_tool_message_param.py
+++ b/src/openai/types/chat/chat_completion_tool_message_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_tool_param.py b/src/openai/types/chat/chat_completion_tool_param.py
index 54c223955e..0cf6ea7268 100644
--- a/src/openai/types/chat/chat_completion_tool_param.py
+++ b/src/openai/types/chat/chat_completion_tool_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_user_message_param.py b/src/openai/types/chat/chat_completion_user_message_param.py
index 07be67c405..5c15322a22 100644
--- a/src/openai/types/chat/chat_completion_user_message_param.py
+++ b/src/openai/types/chat/chat_completion_user_message_param.py
@@ -1,8 +1,8 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
 from .chat_completion_content_part_param import ChatCompletionContentPartParam
@@ -11,7 +11,7 @@
 
 
 class ChatCompletionUserMessageParam(TypedDict, total=False):
-    content: Required[Union[str, List[ChatCompletionContentPartParam]]]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
     """The contents of the user message."""
 
     role: Required[Literal["user"]]
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 6b38a89263..226cf15882 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -1,13 +1,15 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional
+from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
 from ...types import shared_params
+from ..chat_model import ChatModel
 from .chat_completion_tool_param import ChatCompletionToolParam
 from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
 from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
 
@@ -22,33 +24,13 @@
 
 
 class CompletionCreateParamsBase(TypedDict, total=False):
-    messages: Required[List[ChatCompletionMessageParam]]
+    messages: Required[Iterable[ChatCompletionMessageParam]]
     """A list of messages comprising the conversation so far.
 
     [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
     """
 
-    model: Required[
-        Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ]
-    ]
+    model: Required[Union[str, ChatModel]]
     """ID of the model to use.
 
     See the
@@ -78,7 +60,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     functions are present.
     """
 
-    functions: List[Function]
+    functions: Iterable[Function]
     """Deprecated in favor of `tools`.
 
     A list of functions the model may generate JSON inputs for.
@@ -99,8 +81,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """Whether to return log probabilities of the output tokens or not.
 
     If true, returns the log probabilities of each output token returned in the
-    `content` of `message`. This option is currently not available on the
-    `gpt-4-vision-preview` model.
+    `content` of `message`.
     """
 
     max_tokens: Optional[int]
@@ -133,7 +114,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
-    Compatible with `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -159,6 +142,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     stop: Union[Optional[str], List[str]]
     """Up to 4 sequences where the API will stop generating further tokens."""
 
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
 
@@ -170,29 +156,30 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     tool_choice: ChatCompletionToolChoiceOptionParam
     """
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tool and instead generates a message. `auto` means the model can
+    pick between generating a message or calling one or more tools. `required` means
+    the model must call one or more tools. Specifying a particular tool via
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-    call that function.
+    call that tool.
 
-    `none` is the default when no functions are present. `auto` is the default if
-    functions are present.
+    `none` is the default when no tools are present. `auto` is the default if tools
+    are present.
     """
 
-    tools: List[ChatCompletionToolParam]
+    tools: Iterable[ChatCompletionToolParam]
     """A list of tools the model may call.
 
     Currently, only functions are supported as a tool. Use this to provide a list of
-    functions the model may generate JSON inputs for.
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
     """
 
     top_logprobs: Optional[int]
     """
-    An integer between 0 and 5 specifying the number of most likely tokens to return
-    at each token position, each with an associated log probability. `logprobs` must
-    be set to `true` if this parameter is used.
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    `logprobs` must be set to `true` if this parameter is used.
     """
 
     top_p: Optional[float]
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
new file mode 100644
index 0000000000..0d2937ea32
--- /dev/null
+++ b/src/openai/types/chat_model.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+__all__ = ["ChatModel"]
+
+ChatModel = Literal[
+    "gpt-4o",
+    "gpt-4o-2024-05-13",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/completion.py b/src/openai/types/completion.py
index cd80498b16..d3b3102a4a 100644
--- a/src/openai/types/completion.py
+++ b/src/openai/types/completion.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
diff --git a/src/openai/types/completion_choice.py b/src/openai/types/completion_choice.py
index 7b08582bfd..d948ebc942 100644
--- a/src/openai/types/completion_choice.py
+++ b/src/openai/types/completion_choice.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict, List, Optional
 from typing_extensions import Literal
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index e14c2860df..9fe22fe3c9 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -1,10 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional
+from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+
 __all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
 
 
@@ -19,7 +21,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     descriptions of them.
     """
 
-    prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
+    prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]]
     """
     The prompt(s) to generate completions for, encoded as a string, array of
     strings, array of tokens, or array of token arrays.
@@ -123,8 +125,14 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     The returned text will not contain the stop sequence.
     """
 
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
     suffix: Optional[str]
-    """The suffix that comes after a completion of inserted text."""
+    """The suffix that comes after a completion of inserted text.
+
+    This parameter is only supported for `gpt-3.5-turbo-instruct`.
+    """
 
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py
index b825d5529f..0d57b96595 100644
--- a/src/openai/types/completion_usage.py
+++ b/src/openai/types/completion_usage.py
@@ -1,4 +1,6 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
 
 from .._models import BaseModel
 
diff --git a/src/openai/types/create_embedding_response.py b/src/openai/types/create_embedding_response.py
index bf64037e16..eff247a112 100644
--- a/src/openai/types/create_embedding_response.py
+++ b/src/openai/types/create_embedding_response.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 from typing_extensions import Literal
diff --git a/src/openai/types/embedding.py b/src/openai/types/embedding.py
index 9c53704d5d..769b1d165f 100644
--- a/src/openai/types/embedding.py
+++ b/src/openai/types/embedding.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 from typing_extensions import Literal
diff --git a/src/openai/types/embedding_create_params.py b/src/openai/types/embedding_create_params.py
index fd2fc5b48d..930b3b7914 100644
--- a/src/openai/types/embedding_create_params.py
+++ b/src/openai/types/embedding_create_params.py
@@ -1,15 +1,15 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union
+from typing import List, Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["EmbeddingCreateParams"]
 
 
 class EmbeddingCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str], List[int], List[List[int]]]]
+    input: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
     """Input text to embed, encoded as a string or array of tokens.
 
     To embed multiple inputs in a single request, pass an array of strings or array
@@ -20,7 +20,7 @@ class EmbeddingCreateParams(TypedDict, total=False):
     for counting tokens.
     """
 
-    model: Required[Union[str, Literal["text-embedding-ada-002"]]]
+    model: Required[Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]]]
     """ID of the model to use.
 
     You can use the
@@ -30,6 +30,12 @@ class EmbeddingCreateParams(TypedDict, total=False):
     descriptions of them.
     """
 
+    dimensions: int
+    """The number of dimensions the resulting output embeddings should have.
+
+    Only supported in `text-embedding-3` and later models.
+    """
+
     encoding_format: Literal["float", "base64"]
     """The format to return the embeddings in.
 
diff --git a/src/openai/types/file_content.py b/src/openai/types/file_content.py
index 92b316b9eb..b4aa08a9a3 100644
--- a/src/openai/types/file_content.py
+++ b/src/openai/types/file_content.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
 __all__ = ["FileContent"]
diff --git a/src/openai/types/file_create_params.py b/src/openai/types/file_create_params.py
index a59ddb2817..caa913d4d2 100644
--- a/src/openai/types/file_create_params.py
+++ b/src/openai/types/file_create_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -13,13 +13,13 @@ class FileCreateParams(TypedDict, total=False):
     file: Required[FileTypes]
     """The File object (not file name) to be uploaded."""
 
-    purpose: Required[Literal["fine-tune", "assistants"]]
+    purpose: Required[Literal["assistants", "batch", "fine-tune"]]
     """The intended purpose of the uploaded file.
 
-    Use "fine-tune" for
-    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-    "assistants" for
+    Use "assistants" for
     [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-    [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-    us to validate the format of the uploaded file is correct for fine-tuning.
+    [Message](https://platform.openai.com/docs/api-reference/messages) files,
+    "vision" for Assistants image file inputs, "batch" for
+    [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
+    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
     """
diff --git a/src/openai/types/file_deleted.py b/src/openai/types/file_deleted.py
index 3ac8592ff6..f25fa87a8d 100644
--- a/src/openai/types/file_deleted.py
+++ b/src/openai/types/file_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/file_list_params.py b/src/openai/types/file_list_params.py
index a962dd239c..212eca13c0 100644
--- a/src/openai/types/file_list_params.py
+++ b/src/openai/types/file_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/file_object.py b/src/openai/types/file_object.py
index 4ae91b754e..6e2bf310a4 100644
--- a/src/openai/types/file_object.py
+++ b/src/openai/types/file_object.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 from typing_extensions import Literal
@@ -24,11 +24,13 @@ class FileObject(BaseModel):
     object: Literal["file"]
     """The object type, which is always `file`."""
 
-    purpose: Literal["fine-tune", "fine-tune-results", "assistants", "assistants_output"]
+    purpose: Literal[
+        "assistants", "assistants_output", "batch", "batch_output", "fine-tune", "fine-tune-results", "vision"
+    ]
     """The intended purpose of the file.
 
-    Supported values are `fine-tune`, `fine-tune-results`, `assistants`, and
-    `assistants_output`.
+    Supported values are `assistants`, `assistants_output`, `batch`, `batch_output`,
+    `fine-tune`, `fine-tune-results` and `vision`.
     """
 
     status: Literal["uploaded", "processed", "error"]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index d24160c5bd..92b81329b1 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -7,3 +7,8 @@
 from .job_create_params import JobCreateParams as JobCreateParams
 from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
 from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 5aa4f07eb1..7ac8792787 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -1,9 +1,10 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
 __all__ = ["FineTuningJob", "Error", "Hyperparameters"]
 
@@ -80,6 +81,9 @@ class FineTuningJob(BaseModel):
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
 
+    seed: int
+    """The seed used for the fine-tuning job."""
+
     status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
     """
     The current status of the fine-tuning job, which can be either
@@ -105,3 +109,12 @@ class FineTuningJob(BaseModel):
     You can retrieve the validation results with the
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
+
+    estimated_finish: Optional[int] = None
+    """
+    The Unix timestamp (in seconds) for when the fine-tuning job is estimated to
+    finish. The value will be null if the fine-tuning job is not running.
+    """
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+    """A list of integrations to enable for this fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_event.py b/src/openai/types/fine_tuning/fine_tuning_job_event.py
index 62f268868b..2d204bb980 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_event.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 0000000000..8076313cae
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 0000000000..4ac282eb54
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+    project: str
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str] = None
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str] = None
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: Optional[List[str]] = None
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 0000000000..5b94354d50
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+    type: Literal["wandb"]
+    """The type of the integration being enabled for the fine-tuning job"""
+
+    wandb: FineTuningJobWandbIntegration
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index da750ffc19..1925f90d12 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -1,11 +1,11 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters"]
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
 
 
 class JobCreateParams(TypedDict, total=False):
@@ -19,7 +19,7 @@ class JobCreateParams(TypedDict, total=False):
     training_file: Required[str]
     """The ID of an uploaded file that contains training data.
 
-    See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
     for how to upload a file.
 
     Your dataset must be formatted as a JSONL file. Additionally, you must upload
@@ -32,6 +32,17 @@ class JobCreateParams(TypedDict, total=False):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job."""
 
+    integrations: Optional[Iterable[Integration]]
+    """A list of integrations to enable for your fine-tuning job."""
+
+    seed: Optional[int]
+    """The seed controls the reproducibility of the job.
+
+    Passing in the same seed and job parameters should produce the same results, but
+    may differ in rare cases. If a seed is not specified, one will be generated for
+    you.
+    """
+
     suffix: Optional[str]
     """
     A string of up to 18 characters that will be added to your fine-tuned model
@@ -76,3 +87,45 @@ class Hyperparameters(TypedDict, total=False):
 
     An epoch refers to one full cycle through the training dataset.
     """
+
+
+class IntegrationWandb(TypedDict, total=False):
+    project: Required[str]
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str]
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str]
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: List[str]
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
+
+
+class Integration(TypedDict, total=False):
+    type: Required[Literal["wandb"]]
+    """The type of integration to enable.
+
+    Currently, only "wandb" (Weights and Biases) is supported.
+    """
+
+    wandb: Required[IntegrationWandb]
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/job_list_events_params.py b/src/openai/types/fine_tuning/job_list_events_params.py
index 7be3d53315..e1c9a64dc8 100644
--- a/src/openai/types/fine_tuning/job_list_events_params.py
+++ b/src/openai/types/fine_tuning/job_list_events_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/fine_tuning/job_list_params.py b/src/openai/types/fine_tuning/job_list_params.py
index 8160136901..5c075ca33f 100644
--- a/src/openai/types/fine_tuning/job_list_params.py
+++ b/src/openai/types/fine_tuning/job_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/fine_tuning/jobs/__init__.py b/src/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..6c93da1b69
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import FineTuningJobCheckpoint as FineTuningJobCheckpoint
diff --git a/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 0000000000..adceb3b218
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last checkpoint ID from the previous pagination request."""
+
+    limit: int
+    """Number of checkpoints to retrieve."""
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 0000000000..bd07317a3e
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+    full_valid_loss: Optional[float] = None
+
+    full_valid_mean_token_accuracy: Optional[float] = None
+
+    step: Optional[float] = None
+
+    train_loss: Optional[float] = None
+
+    train_mean_token_accuracy: Optional[float] = None
+
+    valid_loss: Optional[float] = None
+
+    valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+    id: str
+    """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+    fine_tuned_model_checkpoint: str
+    """The name of the fine-tuned checkpoint model that is created."""
+
+    fine_tuning_job_id: str
+    """The name of the fine-tuning job that this checkpoint was created from."""
+
+    metrics: Metrics
+    """Metrics at the step number during the fine-tuning job."""
+
+    object: Literal["fine_tuning.job.checkpoint"]
+    """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+    step_number: int
+    """The step number that the checkpoint was created at."""
diff --git a/src/openai/types/image.py b/src/openai/types/image.py
index a040caf7b6..f48aa2c702 100644
--- a/src/openai/types/image.py
+++ b/src/openai/types/image.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 
diff --git a/src/openai/types/image_create_variation_params.py b/src/openai/types/image_create_variation_params.py
index 7b015fc176..2549307372 100644
--- a/src/openai/types/image_create_variation_params.py
+++ b/src/openai/types/image_create_variation_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -32,7 +32,8 @@ class ImageCreateVariationParams(TypedDict, total=False):
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
-    Must be one of `url` or `b64_json`.
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
     """
 
     size: Optional[Literal["256x256", "512x512", "1024x1024"]]
diff --git a/src/openai/types/image_edit_params.py b/src/openai/types/image_edit_params.py
index 043885cc38..073456e349 100644
--- a/src/openai/types/image_edit_params.py
+++ b/src/openai/types/image_edit_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -43,7 +43,8 @@ class ImageEditParams(TypedDict, total=False):
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
-    Must be one of `url` or `b64_json`.
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
     """
 
     size: Optional[Literal["256x256", "512x512", "1024x1024"]]
diff --git a/src/openai/types/image_generate_params.py b/src/openai/types/image_generate_params.py
index 7eca29a7ba..18c56f8ed6 100644
--- a/src/openai/types/image_generate_params.py
+++ b/src/openai/types/image_generate_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -35,7 +35,8 @@ class ImageGenerateParams(TypedDict, total=False):
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
-    Must be one of `url` or `b64_json`.
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
     """
 
     size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]]
diff --git a/src/openai/types/images_response.py b/src/openai/types/images_response.py
index 9d1bc95a42..7cee813184 100644
--- a/src/openai/types/images_response.py
+++ b/src/openai/types/images_response.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 
diff --git a/src/openai/types/model.py b/src/openai/types/model.py
index 58f3997f70..2631ee8d1a 100644
--- a/src/openai/types/model.py
+++ b/src/openai/types/model.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/model_deleted.py b/src/openai/types/model_deleted.py
index 5329da1378..d9a48bb1b5 100644
--- a/src/openai/types/model_deleted.py
+++ b/src/openai/types/model_deleted.py
@@ -1,4 +1,6 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
 
 from .._models import BaseModel
 
diff --git a/src/openai/types/moderation.py b/src/openai/types/moderation.py
index 09c9a6058b..5aa691823a 100644
--- a/src/openai/types/moderation.py
+++ b/src/openai/types/moderation.py
@@ -1,4 +1,5 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 
 from pydantic import Field as FieldInfo
 
@@ -114,7 +115,4 @@ class Moderation(BaseModel):
     """A list of the categories along with their scores as predicted by model."""
 
     flagged: bool
-    """
-    Whether the content violates
-    [OpenAI's usage policies](/policies/usage-policies).
-    """
+    """Whether any of the below categories are flagged."""
diff --git a/src/openai/types/moderation_create_params.py b/src/openai/types/moderation_create_params.py
index 25ed3ce940..d4608def54 100644
--- a/src/openai/types/moderation_create_params.py
+++ b/src/openai/types/moderation_create_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/moderation_create_response.py b/src/openai/types/moderation_create_response.py
index 0962cdbfd9..79684f8a70 100644
--- a/src/openai/types/moderation_create_response.py
+++ b/src/openai/types/moderation_create_response.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 
diff --git a/src/openai/types/shared/__init__.py b/src/openai/types/shared/__init__.py
index 05bc4ff9ba..e085744e29 100644
--- a/src/openai/types/shared/__init__.py
+++ b/src/openai/types/shared/__init__.py
@@ -1,4 +1,5 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .error_object import ErrorObject as ErrorObject
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/src/openai/types/shared/error_object.py b/src/openai/types/shared/error_object.py
new file mode 100644
index 0000000000..32d7045e00
--- /dev/null
+++ b/src/openai/types/shared/error_object.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ErrorObject"]
+
+
+class ErrorObject(BaseModel):
+    code: Optional[str] = None
+
+    message: str
+
+    param: Optional[str] = None
+
+    type: str
diff --git a/src/openai/types/shared/function_definition.py b/src/openai/types/shared/function_definition.py
index 32658220fa..a39116d6bd 100644
--- a/src/openai/types/shared/function_definition.py
+++ b/src/openai/types/shared/function_definition.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 
diff --git a/src/openai/types/shared/function_parameters.py b/src/openai/types/shared/function_parameters.py
index 405c2d14cc..c9524e4cb8 100644
--- a/src/openai/types/shared/function_parameters.py
+++ b/src/openai/types/shared/function_parameters.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict
 
diff --git a/src/openai/types/shared_params/__init__.py b/src/openai/types/shared_params/__init__.py
index 05bc4ff9ba..ef638cb279 100644
--- a/src/openai/types/shared_params/__init__.py
+++ b/src/openai/types/shared_params/__init__.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/src/openai/types/shared_params/function_definition.py b/src/openai/types/shared_params/function_definition.py
index 8e89bd41dd..58d0203b4f 100644
--- a/src/openai/types/shared_params/function_definition.py
+++ b/src/openai/types/shared_params/function_definition.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/shared_params/function_parameters.py b/src/openai/types/shared_params/function_parameters.py
index a405f6b2e2..5b40efb78f 100644
--- a/src/openai/types/shared_params/function_parameters.py
+++ b/src/openai/types/shared_params/function_parameters.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/__init__.py b/tests/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/__init__.py b/tests/api_resources/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/__init__.py
+++ b/tests/api_resources/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/audio/__init__.py b/tests/api_resources/audio/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/audio/__init__.py
+++ b/tests/api_resources/audio/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/audio/test_speech.py b/tests/api_resources/audio/test_speech.py
index b1c7f79b1e..781ebeceb9 100644
--- a/tests/api_resources/audio/test_speech.py
+++ b/tests/api_resources/audio/test_speech.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/api_resources/audio/test_transcriptions.py b/tests/api_resources/audio/test_transcriptions.py
index d957871abc..ba8e9e4099 100644
--- a/tests/api_resources/audio/test_transcriptions.py
+++ b/tests/api_resources/audio/test_transcriptions.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -34,6 +34,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
             prompt="string",
             response_format="json",
             temperature=0,
+            timestamp_granularities=["word", "segment"],
         )
         assert_matches_type(Transcription, transcription, path=["response"])
 
@@ -84,6 +85,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
             prompt="string",
             response_format="json",
             temperature=0,
+            timestamp_granularities=["word", "segment"],
         )
         assert_matches_type(Transcription, transcription, path=["response"])
 
diff --git a/tests/api_resources/audio/test_translations.py b/tests/api_resources/audio/test_translations.py
index 72960c3249..f5c6c68f0b 100644
--- a/tests/api_resources/audio/test_translations.py
+++ b/tests/api_resources/audio/test_translations.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/api_resources/beta/__init__.py b/tests/api_resources/beta/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/beta/__init__.py
+++ b/tests/api_resources/beta/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/assistants/__init__.py b/tests/api_resources/beta/assistants/__init__.py
deleted file mode 100644
index 1016754ef3..0000000000
--- a/tests/api_resources/beta/assistants/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
diff --git a/tests/api_resources/beta/chat/__init__.py b/tests/api_resources/beta/chat/__init__.py
deleted file mode 100644
index 1016754ef3..0000000000
--- a/tests/api_resources/beta/chat/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index 8db40bde93..a92acb2ca5 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -24,27 +24,41 @@ class TestAssistants:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
             description="string",
-            file_ids=["string", "string", "string"],
             instructions="string",
             metadata={},
             name="string",
+            response_format="none",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {
+                    "vector_store_ids": ["string"],
+                    "vector_stores": [
+                        {
+                            "file_ids": ["string", "string", "string"],
+                            "metadata": {},
+                        }
+                    ],
+                },
+            },
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -55,7 +69,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.create(
-            model="string",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -115,12 +129,18 @@ def test_method_update_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
             "string",
             description="string",
-            file_ids=["string", "string", "string"],
             instructions="string",
             metadata={},
             model="string",
             name="string",
+            response_format="none",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
@@ -235,27 +255,41 @@ class TestAsyncAssistants:
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
             description="string",
-            file_ids=["string", "string", "string"],
             instructions="string",
             metadata={},
             name="string",
+            response_format="none",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {
+                    "vector_store_ids": ["string"],
+                    "vector_stores": [
+                        {
+                            "file_ids": ["string", "string", "string"],
+                            "metadata": {},
+                        }
+                    ],
+                },
+            },
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -266,7 +300,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.create(
-            model="string",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -326,12 +360,18 @@ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) ->
         assistant = await async_client.beta.assistants.update(
             "string",
             description="string",
-            file_ids=["string", "string", "string"],
             instructions="string",
             metadata={},
             model="string",
             name="string",
+            response_format="none",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index 5b347de1f0..02c6e2586e 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -32,24 +32,111 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
             messages=[
                 {
                     "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
                     "metadata": {},
                 },
                 {
                     "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
                     "metadata": {},
                 },
                 {
                     "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
                     "metadata": {},
                 },
             ],
             metadata={},
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {
+                    "vector_store_ids": ["string"],
+                    "vector_stores": [
+                        {
+                            "file_ids": ["string", "string", "string"],
+                            "metadata": {},
+                        }
+                    ],
+                },
+            },
         )
         assert_matches_type(Thread, thread, path=["response"])
 
@@ -123,6 +210,10 @@ def test_method_update_with_all_params(self, client: OpenAI) -> None:
         thread = client.beta.threads.update(
             "string",
             metadata={},
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
         )
         assert_matches_type(Thread, thread, path=["response"])
 
@@ -196,48 +287,150 @@ def test_path_params_delete(self, client: OpenAI) -> None:
             )
 
     @parametrize
-    def test_method_create_and_run(self, client: OpenAI) -> None:
+    def test_method_create_and_run_overload_1(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
             assistant_id="string",
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_method_create_and_run_with_all_params(self, client: OpenAI) -> None:
+    def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
+            stream=False,
+            temperature=1,
             thread={
                 "messages": [
                     {
                         "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
                         "metadata": {},
                     },
                     {
                         "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
                         "metadata": {},
                     },
                     {
                         "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
                         "metadata": {},
                     },
                 ],
+                "tool_resources": {
+                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "file_ids": ["string", "string", "string"],
+                                "metadata": {},
+                            }
+                        ],
+                    },
+                },
                 "metadata": {},
             },
+            tool_choice="none",
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_raw_response_create_and_run(self, client: OpenAI) -> None:
+    def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.create_and_run(
             assistant_id="string",
         )
@@ -248,7 +441,7 @@ def test_raw_response_create_and_run(self, client: OpenAI) -> None:
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_streaming_response_create_and_run(self, client: OpenAI) -> None:
+    def test_streaming_response_create_and_run_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.create_and_run(
             assistant_id="string",
         ) as response:
@@ -260,6 +453,175 @@ def test_streaming_response_create_and_run(self, client: OpenAI) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_create_and_run_overload_2(self, client: OpenAI) -> None:
+        thread_stream = client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+        thread_stream.response.close()
+
+    @parametrize
+    def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI) -> None:
+        thread_stream = client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+            instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
+            metadata={},
+            model="gpt-4-turbo",
+            response_format="none",
+            temperature=1,
+            thread={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
+                        "metadata": {},
+                    },
+                ],
+                "tool_resources": {
+                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "file_ids": ["string", "string", "string"],
+                                "metadata": {},
+                            }
+                        ],
+                    },
+                },
+                "metadata": {},
+            },
+            tool_choice="none",
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
+        )
+        thread_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
+        response = client.beta.threads.with_raw_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_and_run_overload_2(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncThreads:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -275,24 +637,111 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
             messages=[
                 {
                     "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
                     "metadata": {},
                 },
                 {
                     "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
                     "metadata": {},
                 },
                 {
                     "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
                     "metadata": {},
                 },
             ],
             metadata={},
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {
+                    "vector_store_ids": ["string"],
+                    "vector_stores": [
+                        {
+                            "file_ids": ["string", "string", "string"],
+                            "metadata": {},
+                        }
+                    ],
+                },
+            },
         )
         assert_matches_type(Thread, thread, path=["response"])
 
@@ -366,6 +815,10 @@ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) ->
         thread = await async_client.beta.threads.update(
             "string",
             metadata={},
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
         )
         assert_matches_type(Thread, thread, path=["response"])
 
@@ -439,48 +892,150 @@ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
             )
 
     @parametrize
-    async def test_method_create_and_run(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.create_and_run(
             assistant_id="string",
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_method_create_and_run_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_and_run_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
+            stream=False,
+            temperature=1,
             thread={
                 "messages": [
                     {
                         "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
                         "metadata": {},
                     },
                     {
                         "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
                         "metadata": {},
                     },
                     {
                         "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
                         "metadata": {},
                     },
                 ],
+                "tool_resources": {
+                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "file_ids": ["string", "string", "string"],
+                                "metadata": {},
+                            }
+                        ],
+                    },
+                },
                 "metadata": {},
             },
+            tool_choice="none",
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_create_and_run(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.create_and_run(
             assistant_id="string",
         )
@@ -491,7 +1046,7 @@ async def test_raw_response_create_and_run(self, async_client: AsyncOpenAI) -> N
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_streaming_response_create_and_run(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.create_and_run(
             assistant_id="string",
         ) as response:
@@ -502,3 +1057,172 @@ async def test_streaming_response_create_and_run(self, async_client: AsyncOpenAI
             assert_matches_type(Run, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        thread_stream = await async_client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+        await thread_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_and_run_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        thread_stream = await async_client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+            instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
+            metadata={},
+            model="gpt-4-turbo",
+            response_format="none",
+            temperature=1,
+            thread={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "string",
+                        "attachments": [
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                            {
+                                "file_id": "string",
+                                "tools": [
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                    {"type": "code_interpreter"},
+                                ],
+                            },
+                        ],
+                        "metadata": {},
+                    },
+                ],
+                "tool_resources": {
+                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "file_ids": ["string", "string", "string"],
+                                "metadata": {},
+                            }
+                        ],
+                    },
+                },
+                "metadata": {},
+            },
+            tool_choice="none",
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
+        )
+        await thread_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.with_raw_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.with_streaming_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/test_vector_stores.py b/tests/api_resources/beta/test_vector_stores.py
new file mode 100644
index 0000000000..e671c96a45
--- /dev/null
+++ b/tests/api_resources/beta/test_vector_stores.py
@@ -0,0 +1,426 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.beta import (
+    VectorStore,
+    VectorStoreDeleted,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestVectorStores:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.create()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.create(
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            file_ids=["string", "string", "string"],
+            metadata={},
+            name="string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.retrieve(
+            "string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.update(
+            "string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.update(
+            "string",
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            metadata={},
+            name="string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.with_raw_response.update(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.with_streaming_response.update(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.with_raw_response.update(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.list()
+        assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.list(
+            after="string",
+            before="string",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        vector_store = client.beta.vector_stores.delete(
+            "string",
+        )
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.with_raw_response.delete(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncVectorStores:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.create()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.create(
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            file_ids=["string", "string", "string"],
+            metadata={},
+            name="string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.retrieve(
+            "string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.update(
+            "string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.update(
+            "string",
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            metadata={},
+            name="string",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.with_raw_response.update(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.with_streaming_response.update(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.with_raw_response.update(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.list()
+        assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.list(
+            after="string",
+            before="string",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.beta.vector_stores.delete(
+            "string",
+        )
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.with_raw_response.delete(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/beta/threads/__init__.py b/tests/api_resources/beta/threads/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/beta/threads/__init__.py
+++ b/tests/api_resources/beta/threads/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/threads/messages/__init__.py b/tests/api_resources/beta/threads/messages/__init__.py
deleted file mode 100644
index 1016754ef3..0000000000
--- a/tests/api_resources/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
diff --git a/tests/api_resources/beta/threads/messages/test_files.py b/tests/api_resources/beta/threads/messages/test_files.py
deleted file mode 100644
index 4d0613fd2f..0000000000
--- a/tests/api_resources/beta/threads/messages/test_files.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.threads.messages import MessageFile
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestFiles:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: OpenAI) -> None:
-        file = client.beta.threads.messages.files.retrieve(
-            "file-abc123",
-            thread_id="thread_abc123",
-            message_id="msg_abc123",
-        )
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.files.with_raw_response.retrieve(
-            "file-abc123",
-            thread_id="thread_abc123",
-            message_id="msg_abc123",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.files.with_streaming_response.retrieve(
-            "file-abc123",
-            thread_id="thread_abc123",
-            message_id="msg_abc123",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(MessageFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.files.with_raw_response.retrieve(
-                "file-abc123",
-                thread_id="",
-                message_id="msg_abc123",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            client.beta.threads.messages.files.with_raw_response.retrieve(
-                "file-abc123",
-                thread_id="thread_abc123",
-                message_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.threads.messages.files.with_raw_response.retrieve(
-                "",
-                thread_id="thread_abc123",
-                message_id="msg_abc123",
-            )
-
-    @parametrize
-    def test_method_list(self, client: OpenAI) -> None:
-        file = client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        file = client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.files.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.files.with_streaming_response.list(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.files.with_raw_response.list(
-                "string",
-                thread_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            client.beta.threads.messages.files.with_raw_response.list(
-                "",
-                thread_id="string",
-            )
-
-
-class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.threads.messages.files.retrieve(
-            "file-abc123",
-            thread_id="thread_abc123",
-            message_id="msg_abc123",
-        )
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.files.with_raw_response.retrieve(
-            "file-abc123",
-            thread_id="thread_abc123",
-            message_id="msg_abc123",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.files.with_streaming_response.retrieve(
-            "file-abc123",
-            thread_id="thread_abc123",
-            message_id="msg_abc123",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(MessageFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.files.with_raw_response.retrieve(
-                "file-abc123",
-                thread_id="",
-                message_id="msg_abc123",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            await async_client.beta.threads.messages.files.with_raw_response.retrieve(
-                "file-abc123",
-                thread_id="thread_abc123",
-                message_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.threads.messages.files.with_raw_response.retrieve(
-                "",
-                thread_id="thread_abc123",
-                message_id="msg_abc123",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.files.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.files.with_streaming_response.list(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.files.with_raw_response.list(
-                "string",
-                thread_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            await async_client.beta.threads.messages.files.with_raw_response.list(
-                "",
-                thread_id="string",
-            )
diff --git a/tests/api_resources/beta/threads/runs/__init__.py b/tests/api_resources/beta/threads/runs/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/beta/threads/runs/__init__.py
+++ b/tests/api_resources/beta/threads/runs/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/threads/runs/test_steps.py b/tests/api_resources/beta/threads/runs/test_steps.py
index c15848cd70..e6108d8dad 100644
--- a/tests/api_resources/beta/threads/runs/test_steps.py
+++ b/tests/api_resources/beta/threads/runs/test_steps.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/api_resources/beta/threads/test_messages.py b/tests/api_resources/beta/threads/test_messages.py
index 538d2f4c2a..b5be32a421 100644
--- a/tests/api_resources/beta/threads/test_messages.py
+++ b/tests/api_resources/beta/threads/test_messages.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -10,7 +10,10 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.threads import ThreadMessage
+from openai.types.beta.threads import (
+    Message,
+    MessageDeleted,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -22,47 +25,60 @@ class TestMessages:
     def test_method_create(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.create(
             "string",
-            content="x",
+            content="string",
             role="user",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.create(
             "string",
-            content="x",
+            content="string",
             role="user",
-            file_ids=["string"],
+            attachments=[
+                {
+                    "file_id": "string",
+                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+                },
+                {
+                    "file_id": "string",
+                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+                },
+                {
+                    "file_id": "string",
+                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+                },
+            ],
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.with_raw_response.create(
             "string",
-            content="x",
+            content="string",
             role="user",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.threads.messages.with_streaming_response.create(
             "string",
-            content="x",
+            content="string",
             role="user",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -71,7 +87,7 @@ def test_path_params_create(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.messages.with_raw_response.create(
                 "",
-                content="x",
+                content="string",
                 role="user",
             )
 
@@ -81,7 +97,7 @@ def test_method_retrieve(self, client: OpenAI) -> None:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
@@ -93,7 +109,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
@@ -105,7 +121,7 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -129,7 +145,7 @@ def test_method_update(self, client: OpenAI) -> None:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
@@ -138,7 +154,7 @@ def test_method_update_with_all_params(self, client: OpenAI) -> None:
             thread_id="string",
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
@@ -150,7 +166,7 @@ def test_raw_response_update(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
@@ -162,7 +178,7 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -185,7 +201,7 @@ def test_method_list(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.list(
             "string",
         )
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
@@ -195,8 +211,9 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
             before="string",
             limit=0,
             order="asc",
+            run_id="string",
         )
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
@@ -207,7 +224,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
@@ -218,7 +235,7 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+            assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -229,6 +246,54 @@ def test_path_params_list(self, client: OpenAI) -> None:
                 "",
             )
 
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        message = client.beta.threads.messages.delete(
+            "string",
+            thread_id="string",
+        )
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.beta.threads.messages.with_raw_response.delete(
+            "string",
+            thread_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.with_streaming_response.delete(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(MessageDeleted, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.threads.messages.with_raw_response.delete(
+                "string",
+                thread_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+            client.beta.threads.messages.with_raw_response.delete(
+                "",
+                thread_id="string",
+            )
+
 
 class TestAsyncMessages:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -237,47 +302,60 @@ class TestAsyncMessages:
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.create(
             "string",
-            content="x",
+            content="string",
             role="user",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.create(
             "string",
-            content="x",
+            content="string",
             role="user",
-            file_ids=["string"],
+            attachments=[
+                {
+                    "file_id": "string",
+                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+                },
+                {
+                    "file_id": "string",
+                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+                },
+                {
+                    "file_id": "string",
+                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+                },
+            ],
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.messages.with_raw_response.create(
             "string",
-            content="x",
+            content="string",
             role="user",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.messages.with_streaming_response.create(
             "string",
-            content="x",
+            content="string",
             role="user",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -286,7 +364,7 @@ async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.create(
                 "",
-                content="x",
+                content="string",
                 role="user",
             )
 
@@ -296,7 +374,7 @@ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
@@ -308,7 +386,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
@@ -320,7 +398,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -344,7 +422,7 @@ async def test_method_update(self, async_client: AsyncOpenAI) -> None:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
@@ -353,7 +431,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) ->
             thread_id="string",
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
@@ -365,7 +443,7 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
@@ -377,7 +455,7 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -400,7 +478,7 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.list(
             "string",
         )
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
@@ -410,8 +488,9 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
             before="string",
             limit=0,
             order="asc",
+            run_id="string",
         )
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -422,7 +501,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -433,7 +512,7 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+            assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -443,3 +522,51 @@ async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
             await async_client.beta.threads.messages.with_raw_response.list(
                 "",
             )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.beta.threads.messages.delete(
+            "string",
+            thread_id="string",
+        )
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.messages.with_raw_response.delete(
+            "string",
+            thread_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.messages.with_streaming_response.delete(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(MessageDeleted, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.threads.messages.with_raw_response.delete(
+                "string",
+                thread_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+            await async_client.beta.threads.messages.with_raw_response.delete(
+                "",
+                thread_id="string",
+            )
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 9e88d65eaf..089dd1253e 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -14,6 +14,8 @@
     Run,
 )
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
@@ -21,7 +23,7 @@ class TestRuns:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
             "string",
             assistant_id="string",
@@ -29,20 +31,126 @@ def test_method_create(self, client: OpenAI) -> None:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
             "string",
             assistant_id="string",
             additional_instructions="string",
+            additional_messages=[
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+            ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
+            stream=False,
+            temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.create(
             "string",
             assistant_id="string",
@@ -54,7 +162,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_streaming_response_create(self, client: OpenAI) -> None:
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.create(
             "string",
             assistant_id="string",
@@ -68,13 +176,177 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    def test_path_params_create(self, client: OpenAI) -> None:
+    def test_path_params_create_overload_1(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.create(
                 "",
                 assistant_id="string",
             )
 
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        run_stream = client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+        run_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        run_stream = client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+            additional_instructions="string",
+            additional_messages=[
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+            ],
+            instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
+            metadata={},
+            model="gpt-4-turbo",
+            response_format="none",
+            temperature=1,
+            tool_choice="none",
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
+        )
+        run_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.beta.threads.runs.with_raw_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create_overload_2(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.threads.runs.with_raw_response.create(
+                "",
+                assistant_id="string",
+                stream=True,
+            )
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.retrieve(
@@ -278,7 +550,7 @@ def test_path_params_cancel(self, client: OpenAI) -> None:
             )
 
     @parametrize
-    def test_method_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_method_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -287,7 +559,30 @@ def test_method_submit_tool_outputs(self, client: OpenAI) -> None:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_raw_response_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_method_submit_tool_outputs_with_all_params_overload_1(self, client: OpenAI) -> None:
+        run = client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            tool_outputs=[
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+            ],
+            stream=False,
+        )
+        assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -300,7 +595,7 @@ def test_raw_response_submit_tool_outputs(self, client: OpenAI) -> None:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_streaming_response_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_streaming_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -315,11 +610,67 @@ def test_streaming_response_submit_tool_outputs(self, client: OpenAI) -> None:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    def test_path_params_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_path_params_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "string",
+                thread_id="",
+                tool_outputs=[{}, {}, {}],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "",
+                thread_id="string",
+                tool_outputs=[{}, {}, {}],
+            )
+
+    @parametrize
+    def test_method_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        run_stream = client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+        run_stream.response.close()
+
+    @parametrize
+    def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "string",
                 thread_id="",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
 
@@ -327,6 +678,7 @@ def test_path_params_submit_tool_outputs(self, client: OpenAI) -> None:
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "",
                 thread_id="string",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
 
@@ -335,7 +687,7 @@ class TestAsyncRuns:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
             "string",
             assistant_id="string",
@@ -343,20 +695,126 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
             "string",
             assistant_id="string",
             additional_instructions="string",
+            additional_messages=[
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+            ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
+            stream=False,
+            temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.create(
             "string",
             assistant_id="string",
@@ -368,7 +826,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.create(
             "string",
             assistant_id="string",
@@ -382,13 +840,177 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_path_params_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.create(
                 "",
                 assistant_id="string",
             )
 
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        run_stream = await async_client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        run_stream = await async_client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+            additional_instructions="string",
+            additional_messages=[
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+                {
+                    "role": "user",
+                    "content": "string",
+                    "attachments": [
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                        {
+                            "file_id": "string",
+                            "tools": [
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                                {"type": "code_interpreter"},
+                            ],
+                        },
+                    ],
+                    "metadata": {},
+                },
+            ],
+            instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
+            metadata={},
+            model="gpt-4-turbo",
+            response_format="none",
+            temperature=1,
+            tool_choice="none",
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            top_p=1,
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
+        )
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.runs.with_raw_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.runs.with_streaming_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.threads.runs.with_raw_response.create(
+                "",
+                assistant_id="string",
+                stream=True,
+            )
+
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.retrieve(
@@ -592,7 +1214,7 @@ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
             )
 
     @parametrize
-    async def test_method_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -601,7 +1223,30 @@ async def test_method_submit_tool_outputs(self, async_client: AsyncOpenAI) -> No
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_submit_tool_outputs_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            tool_outputs=[
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+            ],
+            stream=False,
+        )
+        assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -614,7 +1259,7 @@ async def test_raw_response_submit_tool_outputs(self, async_client: AsyncOpenAI)
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_streaming_response_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -629,11 +1274,67 @@ async def test_streaming_response_submit_tool_outputs(self, async_client: AsyncO
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_path_params_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_path_params_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "string",
+                thread_id="",
+                tool_outputs=[{}, {}, {}],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "",
+                thread_id="string",
+                tool_outputs=[{}, {}, {}],
+            )
+
+    @parametrize
+    async def test_method_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        run_stream = await async_client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "string",
                 thread_id="",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
 
@@ -641,5 +1342,6 @@ async def test_path_params_submit_tool_outputs(self, async_client: AsyncOpenAI)
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "",
                 thread_id="string",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
diff --git a/tests/api_resources/beta/vector_stores/__init__.py b/tests/api_resources/beta/vector_stores/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/beta/vector_stores/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/vector_stores/test_file_batches.py b/tests/api_resources/beta/vector_stores/test_file_batches.py
new file mode 100644
index 0000000000..9854d1a138
--- /dev/null
+++ b/tests/api_resources/beta/vector_stores/test_file_batches.py
@@ -0,0 +1,424 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.beta.vector_stores import (
+    VectorStoreFile,
+    VectorStoreFileBatch,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestFileBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        file_batch = client.beta.vector_stores.file_batches.create(
+            "vs_abc123",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.file_batches.with_raw_response.create(
+            "vs_abc123",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.file_batches.with_streaming_response.create(
+            "vs_abc123",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.create(
+                "",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        file_batch = client.beta.vector_stores.file_batches.retrieve(
+            "vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.file_batches.with_raw_response.retrieve(
+            "vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.file_batches.with_streaming_response.retrieve(
+            "vsfb_abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.retrieve(
+                "vsfb_abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.retrieve(
+                "",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        file_batch = client.beta.vector_stores.file_batches.cancel(
+            "string",
+            vector_store_id="string",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.file_batches.with_raw_response.cancel(
+            "string",
+            vector_store_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.file_batches.with_streaming_response.cancel(
+            "string",
+            vector_store_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.cancel(
+                "string",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.cancel(
+                "",
+                vector_store_id="string",
+            )
+
+    @parametrize
+    def test_method_list_files(self, client: OpenAI) -> None:
+        file_batch = client.beta.vector_stores.file_batches.list_files(
+            "string",
+            vector_store_id="string",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_method_list_files_with_all_params(self, client: OpenAI) -> None:
+        file_batch = client.beta.vector_stores.file_batches.list_files(
+            "string",
+            vector_store_id="string",
+            after="string",
+            before="string",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list_files(self, client: OpenAI) -> None:
+        response = client.beta.vector_stores.file_batches.with_raw_response.list_files(
+            "string",
+            vector_store_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list_files(self, client: OpenAI) -> None:
+        with client.beta.vector_stores.file_batches.with_streaming_response.list_files(
+            "string",
+            vector_store_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list_files(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.list_files(
+                "string",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.beta.vector_stores.file_batches.with_raw_response.list_files(
+                "",
+                vector_store_id="string",
+            )
+
+
+class TestAsyncFileBatches:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.beta.vector_stores.file_batches.create(
+            "vs_abc123",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.file_batches.with_raw_response.create(
+            "vs_abc123",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.file_batches.with_streaming_response.create(
+            "vs_abc123",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.create(
+                "",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.beta.vector_stores.file_batches.retrieve(
+            "vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
+            "vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.file_batches.with_streaming_response.retrieve(
+            "vsfb_abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
+                "vsfb_abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
+                "",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.beta.vector_stores.file_batches.cancel(
+            "string",
+            vector_store_id="string",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
+            "string",
+            vector_store_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.file_batches.with_streaming_response.cancel(
+            "string",
+            vector_store_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
+                "string",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
+                "",
+                vector_store_id="string",
+            )
+
+    @parametrize
+    async def test_method_list_files(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.beta.vector_stores.file_batches.list_files(
+            "string",
+            vector_store_id="string",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_files_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.beta.vector_stores.file_batches.list_files(
+            "string",
+            vector_store_id="string",
+            after="string",
+            before="string",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list_files(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
+            "string",
+            vector_store_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list_files(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.vector_stores.file_batches.with_streaming_response.list_files(
+            "string",
+            vector_store_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list_files(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
+                "string",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
+                "",
+                vector_store_id="string",
+            )
diff --git a/tests/api_resources/beta/assistants/test_files.py b/tests/api_resources/beta/vector_stores/test_files.py
similarity index 58%
rename from tests/api_resources/beta/assistants/test_files.py
rename to tests/api_resources/beta/vector_stores/test_files.py
index 66e3e2efe6..58301e2d37 100644
--- a/tests/api_resources/beta/assistants/test_files.py
+++ b/tests/api_resources/beta/vector_stores/test_files.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -10,7 +10,10 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.assistants import AssistantFile, FileDeleteResponse
+from openai.types.beta.vector_stores import (
+    VectorStoreFile,
+    VectorStoreFileDeleted,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -20,189 +23,190 @@ class TestFiles:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.create(
-            "file-abc123",
+        file = client.beta.vector_stores.files.create(
+            "vs_abc123",
             file_id="string",
         )
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.create(
-            "file-abc123",
+        response = client.beta.vector_stores.files.with_raw_response.create(
+            "vs_abc123",
             file_id="string",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.assistants.files.with_streaming_response.create(
-            "file-abc123",
+        with client.beta.vector_stores.files.with_streaming_response.create(
+            "vs_abc123",
             file_id="string",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(AssistantFile, file, path=["response"])
+            assert_matches_type(VectorStoreFile, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_create(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            client.beta.assistants.files.with_raw_response.create(
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.files.with_raw_response.create(
                 "",
                 file_id="string",
             )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.retrieve(
-            "string",
-            assistant_id="string",
+        file = client.beta.vector_stores.files.retrieve(
+            "file-abc123",
+            vector_store_id="vs_abc123",
         )
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.retrieve(
-            "string",
-            assistant_id="string",
+        response = client.beta.vector_stores.files.with_raw_response.retrieve(
+            "file-abc123",
+            vector_store_id="vs_abc123",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.assistants.files.with_streaming_response.retrieve(
-            "string",
-            assistant_id="string",
+        with client.beta.vector_stores.files.with_streaming_response.retrieve(
+            "file-abc123",
+            vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(AssistantFile, file, path=["response"])
+            assert_matches_type(VectorStoreFile, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            client.beta.assistants.files.with_raw_response.retrieve(
-                "string",
-                assistant_id="",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.files.with_raw_response.retrieve(
+                "file-abc123",
+                vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.assistants.files.with_raw_response.retrieve(
+            client.beta.vector_stores.files.with_raw_response.retrieve(
                 "",
-                assistant_id="string",
+                vector_store_id="vs_abc123",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.list(
+        file = client.beta.vector_stores.files.list(
             "string",
         )
-        assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.list(
+        file = client.beta.vector_stores.files.list(
             "string",
             after="string",
             before="string",
+            filter="in_progress",
             limit=0,
             order="asc",
         )
-        assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.list(
+        response = client.beta.vector_stores.files.with_raw_response.list(
             "string",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.assistants.files.with_streaming_response.list(
+        with client.beta.vector_stores.files.with_streaming_response.list(
             "string",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
+            assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            client.beta.assistants.files.with_raw_response.list(
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.files.with_raw_response.list(
                 "",
             )
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.delete(
+        file = client.beta.vector_stores.files.delete(
             "string",
-            assistant_id="string",
+            vector_store_id="string",
         )
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.delete(
+        response = client.beta.vector_stores.files.with_raw_response.delete(
             "string",
-            assistant_id="string",
+            vector_store_id="string",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
 
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.assistants.files.with_streaming_response.delete(
+        with client.beta.vector_stores.files.with_streaming_response.delete(
             "string",
-            assistant_id="string",
+            vector_store_id="string",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(FileDeleteResponse, file, path=["response"])
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_delete(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            client.beta.assistants.files.with_raw_response.delete(
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.beta.vector_stores.files.with_raw_response.delete(
                 "string",
-                assistant_id="",
+                vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.assistants.files.with_raw_response.delete(
+            client.beta.vector_stores.files.with_raw_response.delete(
                 "",
-                assistant_id="string",
+                vector_store_id="string",
             )
 
 
@@ -211,187 +215,188 @@ class TestAsyncFiles:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.assistants.files.create(
-            "file-abc123",
+        file = await async_client.beta.vector_stores.files.create(
+            "vs_abc123",
             file_id="string",
         )
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.assistants.files.with_raw_response.create(
-            "file-abc123",
+        response = await async_client.beta.vector_stores.files.with_raw_response.create(
+            "vs_abc123",
             file_id="string",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.assistants.files.with_streaming_response.create(
-            "file-abc123",
+        async with async_client.beta.vector_stores.files.with_streaming_response.create(
+            "vs_abc123",
             file_id="string",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(AssistantFile, file, path=["response"])
+            assert_matches_type(VectorStoreFile, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            await async_client.beta.assistants.files.with_raw_response.create(
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.files.with_raw_response.create(
                 "",
                 file_id="string",
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.assistants.files.retrieve(
-            "string",
-            assistant_id="string",
+        file = await async_client.beta.vector_stores.files.retrieve(
+            "file-abc123",
+            vector_store_id="vs_abc123",
         )
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.assistants.files.with_raw_response.retrieve(
-            "string",
-            assistant_id="string",
+        response = await async_client.beta.vector_stores.files.with_raw_response.retrieve(
+            "file-abc123",
+            vector_store_id="vs_abc123",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
+        assert_matches_type(VectorStoreFile, file, path=["response"])
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.assistants.files.with_streaming_response.retrieve(
-            "string",
-            assistant_id="string",
+        async with async_client.beta.vector_stores.files.with_streaming_response.retrieve(
+            "file-abc123",
+            vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(AssistantFile, file, path=["response"])
+            assert_matches_type(VectorStoreFile, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            await async_client.beta.assistants.files.with_raw_response.retrieve(
-                "string",
-                assistant_id="",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.files.with_raw_response.retrieve(
+                "file-abc123",
+                vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.assistants.files.with_raw_response.retrieve(
+            await async_client.beta.vector_stores.files.with_raw_response.retrieve(
                 "",
-                assistant_id="string",
+                vector_store_id="vs_abc123",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.assistants.files.list(
+        file = await async_client.beta.vector_stores.files.list(
             "string",
         )
-        assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.assistants.files.list(
+        file = await async_client.beta.vector_stores.files.list(
             "string",
             after="string",
             before="string",
+            filter="in_progress",
             limit=0,
             order="asc",
         )
-        assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.assistants.files.with_raw_response.list(
+        response = await async_client.beta.vector_stores.files.with_raw_response.list(
             "string",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.assistants.files.with_streaming_response.list(
+        async with async_client.beta.vector_stores.files.with_streaming_response.list(
             "string",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
+            assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            await async_client.beta.assistants.files.with_raw_response.list(
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.files.with_raw_response.list(
                 "",
             )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.assistants.files.delete(
+        file = await async_client.beta.vector_stores.files.delete(
             "string",
-            assistant_id="string",
+            vector_store_id="string",
         )
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.assistants.files.with_raw_response.delete(
+        response = await async_client.beta.vector_stores.files.with_raw_response.delete(
             "string",
-            assistant_id="string",
+            vector_store_id="string",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.assistants.files.with_streaming_response.delete(
+        async with async_client.beta.vector_stores.files.with_streaming_response.delete(
             "string",
-            assistant_id="string",
+            vector_store_id="string",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(FileDeleteResponse, file, path=["response"])
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
-            await async_client.beta.assistants.files.with_raw_response.delete(
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.beta.vector_stores.files.with_raw_response.delete(
                 "string",
-                assistant_id="",
+                vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.assistants.files.with_raw_response.delete(
+            await async_client.beta.vector_stores.files.with_raw_response.delete(
                 "",
-                assistant_id="string",
+                vector_store_id="string",
             )
diff --git a/tests/api_resources/chat/__init__.py b/tests/api_resources/chat/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/chat/__init__.py
+++ b/tests/api_resources/chat/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 4fa069ba2e..1c195c4001 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -9,7 +9,9 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.types.chat import ChatCompletion
+from openai.types.chat import (
+    ChatCompletion,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -26,7 +28,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -40,7 +42,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -59,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             seed=-9223372036854776000,
             stop="string",
             stream=False,
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
@@ -102,7 +105,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -119,7 +122,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,7 +141,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
         completion_stream.response.close()
@@ -153,7 +156,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
             frequency_penalty=-2,
             function_call="none",
@@ -172,6 +175,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             response_format={"type": "json_object"},
             seed=-9223372036854776000,
             stop="string",
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
@@ -215,7 +219,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
 
@@ -232,7 +236,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -256,7 +260,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -270,7 +274,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -289,6 +293,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             seed=-9223372036854776000,
             stop="string",
             stream=False,
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
@@ -332,7 +337,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -349,7 +354,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -368,7 +373,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -383,7 +388,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
             frequency_penalty=-2,
             function_call="none",
@@ -402,6 +407,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             response_format={"type": "json_object"},
             seed=-9223372036854776000,
             stop="string",
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
@@ -445,7 +451,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
 
@@ -462,7 +468,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/fine_tuning/__init__.py b/tests/api_resources/fine_tuning/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/fine_tuning/__init__.py
+++ b/tests/api_resources/fine_tuning/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/__init__.py b/tests/api_resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
new file mode 100644
index 0000000000..915d5c6f63
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -0,0 +1,117 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCheckpoints:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
+
+
+class TestAsyncCheckpoints:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index 204cc3b1f5..1ff6d63b31 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -39,6 +39,36 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+            ],
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
@@ -248,6 +278,36 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+            ],
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
diff --git a/tests/api_resources/test_batches.py b/tests/api_resources/test_batches.py
new file mode 100644
index 0000000000..6f9b598e61
--- /dev/null
+++ b/tests/api_resources/test_batches.py
@@ -0,0 +1,335 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Batch
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        batch = client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        batch = client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        batch = client.batches.retrieve(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        batch = client.batches.list()
+        assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        batch = client.batches.list(
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        batch = client.batches.cancel(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.cancel(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.cancel(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.batches.with_raw_response.cancel(
+                "",
+            )
+
+
+class TestAsyncBatches:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.retrieve(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.list()
+        assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.list(
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.cancel(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.cancel(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.cancel(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.batches.with_raw_response.cancel(
+                "",
+            )
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index 916cdd3cb6..69d914200f 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -41,6 +41,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             seed=-9223372036854776000,
             stop="\n",
             stream=False,
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -99,6 +100,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             presence_penalty=-2,
             seed=-9223372036854776000,
             stop="\n",
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -161,6 +163,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             seed=-9223372036854776000,
             stop="\n",
             stream=False,
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -219,6 +222,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             presence_penalty=-2,
             seed=-9223372036854776000,
             stop="\n",
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index cd4ff8e391..e75545b4e2 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -21,7 +21,7 @@ class TestEmbeddings:
     def test_method_create(self, client: OpenAI) -> None:
         embedding = client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
@@ -29,7 +29,8 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         embedding = client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
+            dimensions=1,
             encoding_format="float",
             user="user-1234",
         )
@@ -39,7 +40,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.embeddings.with_raw_response.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
 
         assert response.is_closed is True
@@ -51,7 +52,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.embeddings.with_streaming_response.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -69,7 +70,7 @@ class TestAsyncEmbeddings:
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         embedding = await async_client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
@@ -77,7 +78,8 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         embedding = await async_client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
+            dimensions=1,
             encoding_format="float",
             user="user-1234",
         )
@@ -87,7 +89,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.embeddings.with_raw_response.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
 
         assert response.is_closed is True
@@ -99,7 +101,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.embeddings.with_streaming_response.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index d1a17923a6..882f0ddbe7 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -27,7 +27,7 @@ class TestFiles:
     def test_method_create(self, client: OpenAI) -> None:
         file = client.files.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
@@ -35,7 +35,7 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
 
         assert response.is_closed is True
@@ -47,7 +47,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.files.with_streaming_response.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -263,7 +263,7 @@ class TestAsyncFiles:
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
@@ -271,7 +271,7 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.files.with_raw_response.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
 
         assert response.is_closed is True
@@ -283,7 +283,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.files.with_streaming_response.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index b6cb2572ab..2e31f3354a 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index d031d54f6a..71f8e5834b 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/api_resources/test_moderations.py b/tests/api_resources/test_moderations.py
index 285e738c0e..94b9ecd31b 100644
--- a/tests/api_resources/test_moderations.py
+++ b/tests/api_resources/test_moderations.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/tests/sample_file.txt b/tests/sample_file.txt
new file mode 100644
index 0000000000..af5626b4a1
--- /dev/null
+++ b/tests/sample_file.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/tests/test_client.py b/tests/test_client.py
index 3d2dd35821..c1e545e66f 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -17,7 +17,6 @@
 from pydantic import ValidationError
 
 from openai import OpenAI, AsyncOpenAI, APIResponseValidationError
-from openai._client import OpenAI, AsyncOpenAI
 from openai._models import BaseModel, FinalRequestOptions
 from openai._constants import RAW_RESPONSE_HEADER
 from openai._streaming import Stream, AsyncStream
@@ -292,6 +291,16 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+    async def test_invalid_http_client(self) -> None:
+        with pytest.raises(TypeError, match="Invalid `http_client` arg"):
+            async with httpx.AsyncClient() as http_client:
+                OpenAI(
+                    base_url=base_url,
+                    api_key=api_key,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
+                )
+
     def test_default_headers_option(self) -> None:
         client = OpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
@@ -437,6 +446,35 @@ def test_request_extra_query(self) -> None:
         params = dict(request.url.params)
         assert params == {"foo": "2"}
 
+    def test_multipart_repeating_array(self, client: OpenAI) -> None:
+        request = client._build_request(
+            FinalRequestOptions.construct(
+                method="get",
+                url="/foo",
+                headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
+                json_data={"array": ["foo", "bar"]},
+                files=[("foo.txt", b"hello world")],
+            )
+        )
+
+        assert request.read().split(b"\r\n") == [
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"foo",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"bar",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="foo.txt"; filename="upload"',
+            b"Content-Type: application/octet-stream",
+            b"",
+            b"hello world",
+            b"--6b7ba517decee4a450543ea6ae821c82--",
+            b"",
+        ]
+
     @pytest.mark.respx(base_url=base_url)
     def test_basic_union_response(self, respx_mock: MockRouter) -> None:
         class Model1(BaseModel):
@@ -607,6 +645,10 @@ class Model(BaseModel):
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
+    def test_client_max_retries_validation(self) -> None:
+        with pytest.raises(TypeError, match=r"max_retries cannot be None"):
+            OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None))
+
     @pytest.mark.respx(base_url=base_url)
     def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
@@ -672,14 +714,17 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
         with pytest.raises(APITimeoutError):
             self.client.post(
                 "/chat/completions",
-                body=dict(
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": "Say this is a test",
-                        }
-                    ],
-                    model="gpt-3.5-turbo",
+                body=cast(
+                    object,
+                    dict(
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": "Say this is a test",
+                            }
+                        ],
+                        model="gpt-3.5-turbo",
+                    ),
                 ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -695,14 +740,17 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
         with pytest.raises(APIStatusError):
             self.client.post(
                 "/chat/completions",
-                body=dict(
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": "Say this is a test",
-                        }
-                    ],
-                    model="gpt-3.5-turbo",
+                body=cast(
+                    object,
+                    dict(
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": "Say this is a test",
+                            }
+                        ],
+                        model="gpt-3.5-turbo",
+                    ),
                 ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -959,6 +1007,16 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+    def test_invalid_http_client(self) -> None:
+        with pytest.raises(TypeError, match="Invalid `http_client` arg"):
+            with httpx.Client() as http_client:
+                AsyncOpenAI(
+                    base_url=base_url,
+                    api_key=api_key,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
+                )
+
     def test_default_headers_option(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
@@ -1104,6 +1162,35 @@ def test_request_extra_query(self) -> None:
         params = dict(request.url.params)
         assert params == {"foo": "2"}
 
+    def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None:
+        request = async_client._build_request(
+            FinalRequestOptions.construct(
+                method="get",
+                url="/foo",
+                headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
+                json_data={"array": ["foo", "bar"]},
+                files=[("foo.txt", b"hello world")],
+            )
+        )
+
+        assert request.read().split(b"\r\n") == [
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"foo",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"bar",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="foo.txt"; filename="upload"',
+            b"Content-Type: application/octet-stream",
+            b"",
+            b"hello world",
+            b"--6b7ba517decee4a450543ea6ae821c82--",
+            b"",
+        ]
+
     @pytest.mark.respx(base_url=base_url)
     async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
         class Model1(BaseModel):
@@ -1284,6 +1371,12 @@ class Model(BaseModel):
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
+    async def test_client_max_retries_validation(self) -> None:
+        with pytest.raises(TypeError, match=r"max_retries cannot be None"):
+            AsyncOpenAI(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None)
+            )
+
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
     async def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
@@ -1352,14 +1445,17 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
         with pytest.raises(APITimeoutError):
             await self.client.post(
                 "/chat/completions",
-                body=dict(
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": "Say this is a test",
-                        }
-                    ],
-                    model="gpt-3.5-turbo",
+                body=cast(
+                    object,
+                    dict(
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": "Say this is a test",
+                            }
+                        ],
+                        model="gpt-3.5-turbo",
+                    ),
                 ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -1375,14 +1471,17 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
         with pytest.raises(APIStatusError):
             await self.client.post(
                 "/chat/completions",
-                body=dict(
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": "Say this is a test",
-                        }
-                    ],
-                    model="gpt-3.5-turbo",
+                body=cast(
+                    object,
+                    dict(
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": "Say this is a test",
+                            }
+                        ],
+                        model="gpt-3.5-turbo",
+                    ),
                 ),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
diff --git a/tests/test_legacy_response.py b/tests/test_legacy_response.py
new file mode 100644
index 0000000000..45025f81d0
--- /dev/null
+++ b/tests/test_legacy_response.py
@@ -0,0 +1,84 @@
+import json
+from typing import cast
+from typing_extensions import Annotated
+
+import httpx
+import pytest
+import pydantic
+
+from openai import OpenAI, BaseModel
+from openai._streaming import Stream
+from openai._base_client import FinalRequestOptions
+from openai._legacy_response import LegacyAPIResponse
+
+
+class PydanticModel(pydantic.BaseModel):
+    ...
+
+
+def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    with pytest.raises(
+        TypeError,
+        match="Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`",
+    ):
+        response.parse(to=PydanticModel)
+
+
+def test_response_parse_custom_stream(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=True,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    stream = response.parse(to=Stream[int])
+    assert stream._cast_to == int
+
+
+class CustomModel(BaseModel):
+    foo: str
+    bar: int
+
+
+def test_response_parse_custom_model(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+def test_response_parse_annotated_type(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(
+        to=cast("type[CustomModel]", Annotated[CustomModel, "random metadata"]),
+    )
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
diff --git a/tests/test_models.py b/tests/test_models.py
index 713bd2cb1b..b703444248 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,14 +1,15 @@
 import json
 from typing import Any, Dict, List, Union, Optional, cast
 from datetime import datetime, timezone
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated
 
 import pytest
 import pydantic
 from pydantic import Field
 
+from openai._utils import PropertyInfo
 from openai._compat import PYDANTIC_V2, parse_obj, model_dump, model_json
-from openai._models import BaseModel
+from openai._models import BaseModel, construct_type
 
 
 class BasicModel(BaseModel):
@@ -30,7 +31,7 @@ class NestedModel(BaseModel):
 
     # mismatched types
     m = NestedModel.construct(nested="hello!")
-    assert m.nested == "hello!"
+    assert cast(Any, m.nested) == "hello!"
 
 
 def test_optional_nested_model() -> None:
@@ -47,7 +48,7 @@ class NestedModel(BaseModel):
     # mismatched types
     m3 = NestedModel.construct(nested={"foo"})
     assert isinstance(cast(Any, m3.nested), set)
-    assert m3.nested == {"foo"}
+    assert cast(Any, m3.nested) == {"foo"}
 
 
 def test_list_nested_model() -> None:
@@ -322,7 +323,7 @@ class Model(BaseModel):
     assert len(m.items) == 2
     assert isinstance(m.items[0], Submodel1)
     assert m.items[0].level == -1
-    assert m.items[1] == 156
+    assert cast(Any, m.items[1]) == 156
 
 
 def test_union_of_lists() -> None:
@@ -354,7 +355,7 @@ class Model(BaseModel):
     assert len(m.items) == 2
     assert isinstance(m.items[0], SubModel1)
     assert m.items[0].level == -1
-    assert m.items[1] == 156
+    assert cast(Any, m.items[1]) == 156
 
 
 def test_dict_of_union() -> None:
@@ -500,6 +501,42 @@ class Model(BaseModel):
     assert "resource_id" in m.model_fields_set
 
 
+def test_to_dict() -> None:
+    class Model(BaseModel):
+        foo: Optional[str] = Field(alias="FOO", default=None)
+
+    m = Model(FOO="hello")
+    assert m.to_dict() == {"FOO": "hello"}
+    assert m.to_dict(use_api_names=False) == {"foo": "hello"}
+
+    m2 = Model()
+    assert m2.to_dict() == {}
+    assert m2.to_dict(exclude_unset=False) == {"FOO": None}
+    assert m2.to_dict(exclude_unset=False, exclude_none=True) == {}
+    assert m2.to_dict(exclude_unset=False, exclude_defaults=True) == {}
+
+    m3 = Model(FOO=None)
+    assert m3.to_dict() == {"FOO": None}
+    assert m3.to_dict(exclude_none=True) == {}
+    assert m3.to_dict(exclude_defaults=True) == {}
+
+    if PYDANTIC_V2:
+
+        class Model2(BaseModel):
+            created_at: datetime
+
+        time_str = "2024-03-21T11:39:01.275859"
+        m4 = Model2.construct(created_at=time_str)
+        assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
+        assert m4.to_dict(mode="json") == {"created_at": time_str}
+    else:
+        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
+            m.to_dict(mode="json")
+
+        with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
+            m.to_dict(warnings=False)
+
+
 def test_forwards_compat_model_dump_method() -> None:
     class Model(BaseModel):
         foo: Optional[str] = Field(alias="FOO", default=None)
@@ -531,6 +568,34 @@ class Model(BaseModel):
             m.model_dump(warnings=False)
 
 
+def test_to_json() -> None:
+    class Model(BaseModel):
+        foo: Optional[str] = Field(alias="FOO", default=None)
+
+    m = Model(FOO="hello")
+    assert json.loads(m.to_json()) == {"FOO": "hello"}
+    assert json.loads(m.to_json(use_api_names=False)) == {"foo": "hello"}
+
+    if PYDANTIC_V2:
+        assert m.to_json(indent=None) == '{"FOO":"hello"}'
+    else:
+        assert m.to_json(indent=None) == '{"FOO": "hello"}'
+
+    m2 = Model()
+    assert json.loads(m2.to_json()) == {}
+    assert json.loads(m2.to_json(exclude_unset=False)) == {"FOO": None}
+    assert json.loads(m2.to_json(exclude_unset=False, exclude_none=True)) == {}
+    assert json.loads(m2.to_json(exclude_unset=False, exclude_defaults=True)) == {}
+
+    m3 = Model(FOO=None)
+    assert json.loads(m3.to_json()) == {"FOO": None}
+    assert json.loads(m3.to_json(exclude_none=True)) == {}
+
+    if not PYDANTIC_V2:
+        with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
+            m.to_json(warnings=False)
+
+
 def test_forwards_compat_model_dump_json_method() -> None:
     class Model(BaseModel):
         foo: Optional[str] = Field(alias="FOO", default=None)
@@ -571,3 +636,194 @@ class OurModel(BaseModel):
         foo: Optional[str] = None
 
     takes_pydantic(OurModel())
+
+
+def test_annotated_types() -> None:
+    class Model(BaseModel):
+        value: str
+
+    m = construct_type(
+        value={"value": "foo"},
+        type_=cast(Any, Annotated[Model, "random metadata"]),
+    )
+    assert isinstance(m, Model)
+    assert m.value == "foo"
+
+
+def test_discriminated_unions_invalid_data() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    m = construct_type(
+        value={"type": "a", "data": 100},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, A)
+    assert m.type == "a"
+    if PYDANTIC_V2:
+        assert m.data == 100  # type: ignore[comparison-overlap]
+    else:
+        # pydantic v1 automatically converts inputs to strings
+        # if the expected type is a str
+        assert m.data == "100"
+
+
+def test_discriminated_unions_unknown_variant() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    m = construct_type(
+        value={"type": "c", "data": None, "new_thing": "bar"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+
+    # just chooses the first variant
+    assert isinstance(m, A)
+    assert m.type == "c"  # type: ignore[comparison-overlap]
+    assert m.data == None  # type: ignore[unreachable]
+    assert m.new_thing == "bar"
+
+
+def test_discriminated_unions_invalid_data_nested_unions() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    class C(BaseModel):
+        type: Literal["c"]
+
+        data: bool
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"},
+        type_=cast(Any, Annotated[Union[Union[A, B], C], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    m = construct_type(
+        value={"type": "c", "data": "foo"},
+        type_=cast(Any, Annotated[Union[Union[A, B], C], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, C)
+    assert m.type == "c"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+
+def test_discriminated_unions_with_aliases_invalid_data() -> None:
+    class A(BaseModel):
+        foo_type: Literal["a"] = Field(alias="type")
+
+        data: str
+
+    class B(BaseModel):
+        foo_type: Literal["b"] = Field(alias="type")
+
+        data: int
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="foo_type")]),
+    )
+    assert isinstance(m, B)
+    assert m.foo_type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    m = construct_type(
+        value={"type": "a", "data": 100},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="foo_type")]),
+    )
+    assert isinstance(m, A)
+    assert m.foo_type == "a"
+    if PYDANTIC_V2:
+        assert m.data == 100  # type: ignore[comparison-overlap]
+    else:
+        # pydantic v1 automatically converts inputs to strings
+        # if the expected type is a str
+        assert m.data == "100"
+
+
+def test_discriminated_unions_overlapping_discriminators_invalid_data() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: bool
+
+    class B(BaseModel):
+        type: Literal["a"]
+
+        data: int
+
+    m = construct_type(
+        value={"type": "a", "data": "foo"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, B)
+    assert m.type == "a"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+
+def test_discriminated_unions_invalid_data_uses_cache() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    UnionType = cast(Any, Union[A, B])
+
+    assert not hasattr(UnionType, "__discriminator__")
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"}, type_=cast(Any, Annotated[UnionType, PropertyInfo(discriminator="type")])
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    discriminator = UnionType.__discriminator__
+    assert discriminator is not None
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"}, type_=cast(Any, Annotated[UnionType, PropertyInfo(discriminator="type")])
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    # if the discriminator details object stays the same between invocations then
+    # we hit the cache
+    assert UnionType.__discriminator__ is discriminator
diff --git a/tests/test_module_client.py b/tests/test_module_client.py
index 40b0bde10b..05b5f81111 100644
--- a/tests/test_module_client.py
+++ b/tests/test_module_client.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -16,6 +16,7 @@ def reset_state() -> None:
     openai._reset_client()
     openai.api_key = None or "My API Key"
     openai.organization = None
+    openai.project = None
     openai.base_url = None
     openai.timeout = DEFAULT_TIMEOUT
     openai.max_retries = DEFAULT_MAX_RETRIES
diff --git a/tests/test_response.py b/tests/test_response.py
index 335ca7922a..af153b67c4 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -1,8 +1,12 @@
-from typing import List
+import json
+from typing import List, cast
+from typing_extensions import Annotated
 
 import httpx
 import pytest
+import pydantic
 
+from openai import OpenAI, BaseModel, AsyncOpenAI
 from openai._response import (
     APIResponse,
     BaseAPIResponse,
@@ -11,6 +15,8 @@
     AsyncBinaryAPIResponse,
     extract_response_type,
 )
+from openai._streaming import Stream
+from openai._base_client import FinalRequestOptions
 
 
 class ConcreteBaseAPIResponse(APIResponse[bytes]):
@@ -48,3 +54,141 @@ def test_extract_response_type_concrete_subclasses() -> None:
 def test_extract_response_type_binary_response() -> None:
     assert extract_response_type(BinaryAPIResponse) == bytes
     assert extract_response_type(AsyncBinaryAPIResponse) == bytes
+
+
+class PydanticModel(pydantic.BaseModel):
+    ...
+
+
+def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    with pytest.raises(
+        TypeError,
+        match="Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`",
+    ):
+        response.parse(to=PydanticModel)
+
+
+@pytest.mark.asyncio
+async def test_async_response_parse_mismatched_basemodel(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    with pytest.raises(
+        TypeError,
+        match="Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`",
+    ):
+        await response.parse(to=PydanticModel)
+
+
+def test_response_parse_custom_stream(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=True,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    stream = response.parse(to=Stream[int])
+    assert stream._cast_to == int
+
+
+@pytest.mark.asyncio
+async def test_async_response_parse_custom_stream(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=async_client,
+        stream=True,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    stream = await response.parse(to=Stream[int])
+    assert stream._cast_to == int
+
+
+class CustomModel(BaseModel):
+    foo: str
+    bar: int
+
+
+def test_response_parse_custom_model(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+@pytest.mark.asyncio
+async def test_async_response_parse_custom_model(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=CustomModel)
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+def test_response_parse_annotated_type(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(
+        to=cast("type[CustomModel]", Annotated[CustomModel, "random metadata"]),
+    )
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+async def test_async_response_parse_annotated_type(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(
+        to=cast("type[CustomModel]", Annotated[CustomModel, "random metadata"]),
+    )
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 75e4ca2699..04f8e51abd 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -1,104 +1,248 @@
+from __future__ import annotations
+
 from typing import Iterator, AsyncIterator
 
+import httpx
 import pytest
 
-from openai._streaming import SSEDecoder
+from openai import OpenAI, AsyncOpenAI
+from openai._streaming import Stream, AsyncStream, ServerSentEvent
 
 
 @pytest.mark.asyncio
-async def test_basic_async() -> None:
-    async def body() -> AsyncIterator[str]:
-        yield "event: completion"
-        yield 'data: {"foo":true}'
-        yield ""
-
-    async for sse in SSEDecoder().aiter(body()):
-        assert sse.event == "completion"
-        assert sse.json() == {"foo": True}
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_basic(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: completion\n"
+        yield b'data: {"foo":true}\n'
+        yield b"\n"
 
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
 
-def test_basic() -> None:
-    def body() -> Iterator[str]:
-        yield "event: completion"
-        yield 'data: {"foo":true}'
-        yield ""
-
-    it = SSEDecoder().iter(body())
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "completion"
     assert sse.json() == {"foo": True}
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
 
 
-def test_data_missing_event() -> None:
-    def body() -> Iterator[str]:
-        yield 'data: {"foo":true}'
-        yield ""
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_data_missing_event(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b'data: {"foo":true}\n'
+        yield b"\n"
 
-    it = SSEDecoder().iter(body())
-    sse = next(it)
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
     assert sse.event is None
     assert sse.json() == {"foo": True}
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
+
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_event_missing_data(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"\n"
 
-def test_event_missing_data() -> None:
-    def body() -> Iterator[str]:
-        yield "event: ping"
-        yield ""
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
 
-    it = SSEDecoder().iter(body())
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "ping"
     assert sse.data == ""
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
 
 
-def test_multiple_events() -> None:
-    def body() -> Iterator[str]:
-        yield "event: ping"
-        yield ""
-        yield "event: completion"
-        yield ""
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_events(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"\n"
+        yield b"event: completion\n"
+        yield b"\n"
 
-    it = SSEDecoder().iter(body())
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
 
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "ping"
     assert sse.data == ""
 
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "completion"
     assert sse.data == ""
 
-    with pytest.raises(StopIteration):
-        next(it)
-
-
-def test_multiple_events_with_data() -> None:
-    def body() -> Iterator[str]:
-        yield "event: ping"
-        yield 'data: {"foo":true}'
-        yield ""
-        yield "event: completion"
-        yield 'data: {"bar":false}'
-        yield ""
+    await assert_empty_iter(iterator)
 
-    it = SSEDecoder().iter(body())
 
-    sse = next(it)
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_events_with_data(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b'data: {"foo":true}\n'
+        yield b"\n"
+        yield b"event: completion\n"
+        yield b'data: {"bar":false}\n'
+        yield b"\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
     assert sse.event == "ping"
     assert sse.json() == {"foo": True}
 
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "completion"
     assert sse.json() == {"bar": False}
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_data_lines_with_empty_line(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"data: {\n"
+        yield b'data: "foo":\n'
+        yield b"data: \n"
+        yield b"data:\n"
+        yield b"data: true}\n"
+        yield b"\n\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event == "ping"
+    assert sse.json() == {"foo": True}
+    assert sse.data == '{\n"foo":\n\n\ntrue}'
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_data_json_escaped_double_new_line(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b'data: {"foo": "my long\\n\\ncontent"}'
+        yield b"\n\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event == "ping"
+    assert sse.json() == {"foo": "my long\n\ncontent"}
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_data_lines(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"data: {\n"
+        yield b'data: "foo":\n'
+        yield b"data: true}\n"
+        yield b"\n\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event == "ping"
+    assert sse.json() == {"foo": True}
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_special_new_line_character(
+    sync: bool,
+    client: OpenAI,
+    async_client: AsyncOpenAI,
+) -> None:
+    def body() -> Iterator[bytes]:
+        yield b'data: {"content":" culpa"}\n'
+        yield b"\n"
+        yield b'data: {"content":" \xe2\x80\xa8"}\n'
+        yield b"\n"
+        yield b'data: {"content":"foo"}\n'
+        yield b"\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": " culpa"}
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": "  "}
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": "foo"}
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multi_byte_character_multiple_chunks(
+    sync: bool,
+    client: OpenAI,
+    async_client: AsyncOpenAI,
+) -> None:
+    def body() -> Iterator[bytes]:
+        yield b'data: {"content":"'
+        # bytes taken from the string 'известни' and arbitrarily split
+        # so that some multi-byte characters span multiple chunks
+        yield b"\xd0"
+        yield b"\xb8\xd0\xb7\xd0"
+        yield b"\xb2\xd0\xb5\xd1\x81\xd1\x82\xd0\xbd\xd0\xb8"
+        yield b'"}\n'
+        yield b"\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": "известни"}
+
+
+async def to_aiter(iter: Iterator[bytes]) -> AsyncIterator[bytes]:
+    for chunk in iter:
+        yield chunk
+
+
+async def iter_next(iter: Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]) -> ServerSentEvent:
+    if isinstance(iter, AsyncIterator):
+        return await iter.__anext__()
+
+    return next(iter)
+
+
+async def assert_empty_iter(iter: Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]) -> None:
+    with pytest.raises((StopAsyncIteration, RuntimeError)):
+        await iter_next(iter)
+
+
+def make_event_iterator(
+    content: Iterator[bytes],
+    *,
+    sync: bool,
+    client: OpenAI,
+    async_client: AsyncOpenAI,
+) -> Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]:
+    if sync:
+        return Stream(cast_to=object, client=client, response=httpx.Response(200, content=content))._iter_events()
+
+    return AsyncStream(
+        cast_to=object, client=async_client, response=httpx.Response(200, content=to_aiter(content))
+    )._iter_events()
diff --git a/tests/test_transform.py b/tests/test_transform.py
index c4dffb3bb0..1eb6cde9d6 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -1,22 +1,50 @@
 from __future__ import annotations
 
-from typing import Any, List, Union, Optional
+import io
+import pathlib
+from typing import Any, List, Union, TypeVar, Iterable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import Required, Annotated, TypedDict
 
 import pytest
 
-from openai._utils import PropertyInfo, transform, parse_datetime
+from openai._types import Base64FileInput
+from openai._utils import (
+    PropertyInfo,
+    transform as _transform,
+    parse_datetime,
+    async_transform as _async_transform,
+)
 from openai._compat import PYDANTIC_V2
 from openai._models import BaseModel
 
+_T = TypeVar("_T")
+
+SAMPLE_FILE_PATH = pathlib.Path(__file__).parent.joinpath("sample_file.txt")
+
+
+async def transform(
+    data: _T,
+    expected_type: object,
+    use_async: bool,
+) -> _T:
+    if use_async:
+        return await _async_transform(data, expected_type=expected_type)
+
+    return _transform(data, expected_type=expected_type)
+
+
+parametrize = pytest.mark.parametrize("use_async", [False, True], ids=["sync", "async"])
+
 
 class Foo1(TypedDict):
     foo_bar: Annotated[str, PropertyInfo(alias="fooBar")]
 
 
-def test_top_level_alias() -> None:
-    assert transform({"foo_bar": "hello"}, expected_type=Foo1) == {"fooBar": "hello"}
+@parametrize
+@pytest.mark.asyncio
+async def test_top_level_alias(use_async: bool) -> None:
+    assert await transform({"foo_bar": "hello"}, expected_type=Foo1, use_async=use_async) == {"fooBar": "hello"}
 
 
 class Foo2(TypedDict):
@@ -32,9 +60,11 @@ class Baz2(TypedDict):
     my_baz: Annotated[str, PropertyInfo(alias="myBaz")]
 
 
-def test_recursive_typeddict() -> None:
-    assert transform({"bar": {"this_thing": 1}}, Foo2) == {"bar": {"this__thing": 1}}
-    assert transform({"bar": {"baz": {"my_baz": "foo"}}}, Foo2) == {"bar": {"Baz": {"myBaz": "foo"}}}
+@parametrize
+@pytest.mark.asyncio
+async def test_recursive_typeddict(use_async: bool) -> None:
+    assert await transform({"bar": {"this_thing": 1}}, Foo2, use_async) == {"bar": {"this__thing": 1}}
+    assert await transform({"bar": {"baz": {"my_baz": "foo"}}}, Foo2, use_async) == {"bar": {"Baz": {"myBaz": "foo"}}}
 
 
 class Foo3(TypedDict):
@@ -45,8 +75,10 @@ class Bar3(TypedDict):
     my_field: Annotated[str, PropertyInfo(alias="myField")]
 
 
-def test_list_of_typeddict() -> None:
-    result = transform({"things": [{"my_field": "foo"}, {"my_field": "foo2"}]}, expected_type=Foo3)
+@parametrize
+@pytest.mark.asyncio
+async def test_list_of_typeddict(use_async: bool) -> None:
+    result = await transform({"things": [{"my_field": "foo"}, {"my_field": "foo2"}]}, Foo3, use_async)
     assert result == {"things": [{"myField": "foo"}, {"myField": "foo2"}]}
 
 
@@ -62,10 +94,14 @@ class Baz4(TypedDict):
     foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
 
 
-def test_union_of_typeddict() -> None:
-    assert transform({"foo": {"foo_bar": "bar"}}, Foo4) == {"foo": {"fooBar": "bar"}}
-    assert transform({"foo": {"foo_baz": "baz"}}, Foo4) == {"foo": {"fooBaz": "baz"}}
-    assert transform({"foo": {"foo_baz": "baz", "foo_bar": "bar"}}, Foo4) == {"foo": {"fooBaz": "baz", "fooBar": "bar"}}
+@parametrize
+@pytest.mark.asyncio
+async def test_union_of_typeddict(use_async: bool) -> None:
+    assert await transform({"foo": {"foo_bar": "bar"}}, Foo4, use_async) == {"foo": {"fooBar": "bar"}}
+    assert await transform({"foo": {"foo_baz": "baz"}}, Foo4, use_async) == {"foo": {"fooBaz": "baz"}}
+    assert await transform({"foo": {"foo_baz": "baz", "foo_bar": "bar"}}, Foo4, use_async) == {
+        "foo": {"fooBaz": "baz", "fooBar": "bar"}
+    }
 
 
 class Foo5(TypedDict):
@@ -80,9 +116,11 @@ class Baz5(TypedDict):
     foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
 
 
-def test_union_of_list() -> None:
-    assert transform({"foo": {"foo_bar": "bar"}}, Foo5) == {"FOO": {"fooBar": "bar"}}
-    assert transform(
+@parametrize
+@pytest.mark.asyncio
+async def test_union_of_list(use_async: bool) -> None:
+    assert await transform({"foo": {"foo_bar": "bar"}}, Foo5, use_async) == {"FOO": {"fooBar": "bar"}}
+    assert await transform(
         {
             "foo": [
                 {"foo_baz": "baz"},
@@ -90,6 +128,7 @@ def test_union_of_list() -> None:
             ]
         },
         Foo5,
+        use_async,
     ) == {"FOO": [{"fooBaz": "baz"}, {"fooBaz": "baz"}]}
 
 
@@ -97,8 +136,10 @@ class Foo6(TypedDict):
     bar: Annotated[str, PropertyInfo(alias="Bar")]
 
 
-def test_includes_unknown_keys() -> None:
-    assert transform({"bar": "bar", "baz_": {"FOO": 1}}, Foo6) == {
+@parametrize
+@pytest.mark.asyncio
+async def test_includes_unknown_keys(use_async: bool) -> None:
+    assert await transform({"bar": "bar", "baz_": {"FOO": 1}}, Foo6, use_async) == {
         "Bar": "bar",
         "baz_": {"FOO": 1},
     }
@@ -113,9 +154,11 @@ class Bar7(TypedDict):
     foo: str
 
 
-def test_ignores_invalid_input() -> None:
-    assert transform({"bar": "<foo>"}, Foo7) == {"bAr": "<foo>"}
-    assert transform({"foo": "<foo>"}, Foo7) == {"foo": "<foo>"}
+@parametrize
+@pytest.mark.asyncio
+async def test_ignores_invalid_input(use_async: bool) -> None:
+    assert await transform({"bar": "<foo>"}, Foo7, use_async) == {"bAr": "<foo>"}
+    assert await transform({"foo": "<foo>"}, Foo7, use_async) == {"foo": "<foo>"}
 
 
 class DatetimeDict(TypedDict, total=False):
@@ -134,52 +177,66 @@ class DateDict(TypedDict, total=False):
     foo: Annotated[date, PropertyInfo(format="iso8601")]
 
 
-def test_iso8601_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"foo": dt}, DatetimeDict) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
 
     dt = dt.replace(tzinfo=None)
-    assert transform({"foo": dt}, DatetimeDict) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
+    assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
 
-    assert transform({"foo": None}, DateDict) == {"foo": None}  # type: ignore[comparison-overlap]
-    assert transform({"foo": date.fromisoformat("2023-02-23")}, DateDict) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
+    assert await transform({"foo": None}, DateDict, use_async) == {"foo": None}  # type: ignore[comparison-overlap]
+    assert await transform({"foo": date.fromisoformat("2023-02-23")}, DateDict, use_async) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
 
 
-def test_optional_iso8601_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_optional_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"bar": dt}, DatetimeDict) == {"bar": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform({"bar": dt}, DatetimeDict, use_async) == {"bar": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
 
-    assert transform({"bar": None}, DatetimeDict) == {"bar": None}
+    assert await transform({"bar": None}, DatetimeDict, use_async) == {"bar": None}
 
 
-def test_required_iso8601_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_required_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"required": dt}, DatetimeDict) == {"required": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform({"required": dt}, DatetimeDict, use_async) == {
+        "required": "2023-02-23T14:16:36.337692+00:00"
+    }  # type: ignore[comparison-overlap]
 
-    assert transform({"required": None}, DatetimeDict) == {"required": None}
+    assert await transform({"required": None}, DatetimeDict, use_async) == {"required": None}
 
 
-def test_union_datetime() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_union_datetime(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"union": dt}, DatetimeDict) == {  # type: ignore[comparison-overlap]
+    assert await transform({"union": dt}, DatetimeDict, use_async) == {  # type: ignore[comparison-overlap]
         "union": "2023-02-23T14:16:36.337692+00:00"
     }
 
-    assert transform({"union": "foo"}, DatetimeDict) == {"union": "foo"}
+    assert await transform({"union": "foo"}, DatetimeDict, use_async) == {"union": "foo"}
 
 
-def test_nested_list_iso6801_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_nested_list_iso6801_format(use_async: bool) -> None:
     dt1 = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
     dt2 = parse_datetime("2022-01-15T06:34:23Z")
-    assert transform({"list_": [dt1, dt2]}, DatetimeDict) == {  # type: ignore[comparison-overlap]
+    assert await transform({"list_": [dt1, dt2]}, DatetimeDict, use_async) == {  # type: ignore[comparison-overlap]
         "list_": ["2023-02-23T14:16:36.337692+00:00", "2022-01-15T06:34:23+00:00"]
     }
 
 
-def test_datetime_custom_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_datetime_custom_format(use_async: bool) -> None:
     dt = parse_datetime("2022-01-15T06:34:23Z")
 
-    result = transform(dt, Annotated[datetime, PropertyInfo(format="custom", format_template="%H")])
+    result = await transform(dt, Annotated[datetime, PropertyInfo(format="custom", format_template="%H")], use_async)
     assert result == "06"  # type: ignore[comparison-overlap]
 
 
@@ -187,58 +244,74 @@ class DateDictWithRequiredAlias(TypedDict, total=False):
     required_prop: Required[Annotated[date, PropertyInfo(format="iso8601", alias="prop")]]
 
 
-def test_datetime_with_alias() -> None:
-    assert transform({"required_prop": None}, DateDictWithRequiredAlias) == {"prop": None}  # type: ignore[comparison-overlap]
-    assert transform({"required_prop": date.fromisoformat("2023-02-23")}, DateDictWithRequiredAlias) == {
-        "prop": "2023-02-23"
-    }  # type: ignore[comparison-overlap]
+@parametrize
+@pytest.mark.asyncio
+async def test_datetime_with_alias(use_async: bool) -> None:
+    assert await transform({"required_prop": None}, DateDictWithRequiredAlias, use_async) == {"prop": None}  # type: ignore[comparison-overlap]
+    assert await transform(
+        {"required_prop": date.fromisoformat("2023-02-23")}, DateDictWithRequiredAlias, use_async
+    ) == {"prop": "2023-02-23"}  # type: ignore[comparison-overlap]
 
 
 class MyModel(BaseModel):
     foo: str
 
 
-def test_pydantic_model_to_dictionary() -> None:
-    assert transform(MyModel(foo="hi!"), Any) == {"foo": "hi!"}
-    assert transform(MyModel.construct(foo="hi!"), Any) == {"foo": "hi!"}
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_model_to_dictionary(use_async: bool) -> None:
+    assert cast(Any, await transform(MyModel(foo="hi!"), Any, use_async)) == {"foo": "hi!"}
+    assert cast(Any, await transform(MyModel.construct(foo="hi!"), Any, use_async)) == {"foo": "hi!"}
 
 
-def test_pydantic_empty_model() -> None:
-    assert transform(MyModel.construct(), Any) == {}
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_empty_model(use_async: bool) -> None:
+    assert cast(Any, await transform(MyModel.construct(), Any, use_async)) == {}
 
 
-def test_pydantic_unknown_field() -> None:
-    assert transform(MyModel.construct(my_untyped_field=True), Any) == {"my_untyped_field": True}
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_unknown_field(use_async: bool) -> None:
+    assert cast(Any, await transform(MyModel.construct(my_untyped_field=True), Any, use_async)) == {
+        "my_untyped_field": True
+    }
 
 
-def test_pydantic_mismatched_types() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_mismatched_types(use_async: bool) -> None:
     model = MyModel.construct(foo=True)
     if PYDANTIC_V2:
         with pytest.warns(UserWarning):
-            params = transform(model, Any)
+            params = await transform(model, Any, use_async)
     else:
-        params = transform(model, Any)
-    assert params == {"foo": True}
+        params = await transform(model, Any, use_async)
+    assert cast(Any, params) == {"foo": True}
 
 
-def test_pydantic_mismatched_object_type() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_mismatched_object_type(use_async: bool) -> None:
     model = MyModel.construct(foo=MyModel.construct(hello="world"))
     if PYDANTIC_V2:
         with pytest.warns(UserWarning):
-            params = transform(model, Any)
+            params = await transform(model, Any, use_async)
     else:
-        params = transform(model, Any)
-    assert params == {"foo": {"hello": "world"}}
+        params = await transform(model, Any, use_async)
+    assert cast(Any, params) == {"foo": {"hello": "world"}}
 
 
 class ModelNestedObjects(BaseModel):
     nested: MyModel
 
 
-def test_pydantic_nested_objects() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_nested_objects(use_async: bool) -> None:
     model = ModelNestedObjects.construct(nested={"foo": "stainless"})
     assert isinstance(model.nested, MyModel)
-    assert transform(model, Any) == {"nested": {"foo": "stainless"}}
+    assert cast(Any, await transform(model, Any, use_async)) == {"nested": {"foo": "stainless"}}
 
 
 class ModelWithDefaultField(BaseModel):
@@ -247,21 +320,91 @@ class ModelWithDefaultField(BaseModel):
     with_str_default: str = "foo"
 
 
-def test_pydantic_default_field() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_default_field(use_async: bool) -> None:
     # should be excluded when defaults are used
     model = ModelWithDefaultField.construct()
     assert model.with_none_default is None
     assert model.with_str_default == "foo"
-    assert transform(model, Any) == {}
+    assert cast(Any, await transform(model, Any, use_async)) == {}
 
     # should be included when the default value is explicitly given
     model = ModelWithDefaultField.construct(with_none_default=None, with_str_default="foo")
     assert model.with_none_default is None
     assert model.with_str_default == "foo"
-    assert transform(model, Any) == {"with_none_default": None, "with_str_default": "foo"}
+    assert cast(Any, await transform(model, Any, use_async)) == {"with_none_default": None, "with_str_default": "foo"}
 
     # should be included when a non-default value is explicitly given
     model = ModelWithDefaultField.construct(with_none_default="bar", with_str_default="baz")
     assert model.with_none_default == "bar"
     assert model.with_str_default == "baz"
-    assert transform(model, Any) == {"with_none_default": "bar", "with_str_default": "baz"}
+    assert cast(Any, await transform(model, Any, use_async)) == {"with_none_default": "bar", "with_str_default": "baz"}
+
+
+class TypedDictIterableUnion(TypedDict):
+    foo: Annotated[Union[Bar8, Iterable[Baz8]], PropertyInfo(alias="FOO")]
+
+
+class Bar8(TypedDict):
+    foo_bar: Annotated[str, PropertyInfo(alias="fooBar")]
+
+
+class Baz8(TypedDict):
+    foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_iterable_of_dictionaries(use_async: bool) -> None:
+    assert await transform({"foo": [{"foo_baz": "bar"}]}, TypedDictIterableUnion, use_async) == {
+        "FOO": [{"fooBaz": "bar"}]
+    }
+    assert cast(Any, await transform({"foo": ({"foo_baz": "bar"},)}, TypedDictIterableUnion, use_async)) == {
+        "FOO": [{"fooBaz": "bar"}]
+    }
+
+    def my_iter() -> Iterable[Baz8]:
+        yield {"foo_baz": "hello"}
+        yield {"foo_baz": "world"}
+
+    assert await transform({"foo": my_iter()}, TypedDictIterableUnion, use_async) == {
+        "FOO": [{"fooBaz": "hello"}, {"fooBaz": "world"}]
+    }
+
+
+class TypedDictIterableUnionStr(TypedDict):
+    foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_iterable_union_str(use_async: bool) -> None:
+    assert await transform({"foo": "bar"}, TypedDictIterableUnionStr, use_async) == {"FOO": "bar"}
+    assert cast(Any, await transform(iter([{"foo_baz": "bar"}]), Union[str, Iterable[Baz8]], use_async)) == [
+        {"fooBaz": "bar"}
+    ]
+
+
+class TypedDictBase64Input(TypedDict):
+    foo: Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_base64_file_input(use_async: bool) -> None:
+    # strings are left as-is
+    assert await transform({"foo": "bar"}, TypedDictBase64Input, use_async) == {"foo": "bar"}
+
+    # pathlib.Path is automatically converted to base64
+    assert await transform({"foo": SAMPLE_FILE_PATH}, TypedDictBase64Input, use_async) == {
+        "foo": "SGVsbG8sIHdvcmxkIQo="
+    }  # type: ignore[comparison-overlap]
+
+    # io instances are automatically converted to base64
+    assert await transform({"foo": io.StringIO("Hello, world!")}, TypedDictBase64Input, use_async) == {
+        "foo": "SGVsbG8sIHdvcmxkIQ=="
+    }  # type: ignore[comparison-overlap]
+    assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == {
+        "foo": "SGVsbG8sIHdvcmxkIQ=="
+    }  # type: ignore[comparison-overlap]
diff --git a/tests/utils.py b/tests/utils.py
index 216b333550..060b99339f 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -14,6 +14,8 @@
     is_list,
     is_list_type,
     is_union_type,
+    extract_type_arg,
+    is_annotated_type,
 )
 from openai._compat import PYDANTIC_V2, field_outer_type, get_model_fields
 from openai._models import BaseModel
@@ -49,6 +51,10 @@ def assert_matches_type(
     path: list[str],
     allow_none: bool = False,
 ) -> None:
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        type_ = extract_type_arg(type_, 0)
+
     if allow_none and value is None:
         return
 
@@ -91,7 +97,22 @@ def assert_matches_type(
             assert_matches_type(key_type, key, path=[*path, "<dict key>"])
             assert_matches_type(items_type, item, path=[*path, "<dict item>"])
     elif is_union_type(type_):
-        for i, variant in enumerate(get_args(type_)):
+        variants = get_args(type_)
+
+        try:
+            none_index = variants.index(type(None))
+        except ValueError:
+            pass
+        else:
+            # special case Optional[T] for better error messages
+            if len(variants) == 2:
+                if value is None:
+                    # valid
+                    return
+
+                return assert_matches_type(type_=variants[not none_index], value=value, path=path)
+
+        for i, variant in enumerate(variants):
             try:
                 assert_matches_type(variant, value, path=[*path, f"variant {i}"])
                 return