Skip to content

Commit 0ca70d0

Browse files
feat(xai): add Grok Imagine for image and video generation (withceleste#128)
* feat(xai): add grok-imagine-image and grok-imagine-video models Add xAI Grok Imagine support for image and video generation: Images (grok-imagine-image): - Generate and edit operations - Parameters: aspect_ratio, num_images, output_format - Aspect ratios: 1:1, 3:4, 4:3, 9:16, 16:9, 2:3, 3:2, and more Videos (grok-imagine-video): - Generate and edit operations - Async polling pattern (HTTP 200=ready, 202=processing) - Parameters: duration (1-15s), aspect_ratio, resolution Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(xai): serialize image artifacts and validate video URLs - Image edit: serialize ImageArtifact to URL or base64 string instead of passing object directly (xAI API expects string) - Video edit: validate video has URL before using, raise clear error if not (xAI only supports URL, not base64/path) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(xai): remove client-side video URL validation Let the xAI API handle validation for video edit requests. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4381e8a commit 0ca70d0

File tree

27 files changed

+832
-34
lines changed

27 files changed

+832
-34
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,4 @@ uv.lock
159159
bandit-report.json
160160

161161
scripts/
162+
assets/

notebooks/working-with-images.ipynb

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
},
2121
{
2222
"cell_type": "code",
23+
"execution_count": null,
2324
"metadata": {},
25+
"outputs": [],
2426
"source": [
2527
"import celeste\n",
2628
"from IPython.display import Image, display"
27-
],
28-
"outputs": [],
29-
"execution_count": null
29+
]
3030
},
3131
{
3232
"cell_type": "markdown",
@@ -41,24 +41,24 @@
4141
},
4242
{
4343
"cell_type": "code",
44+
"execution_count": null,
4445
"metadata": {},
46+
"outputs": [],
4547
"source": [
4648
"img_gen_result = await celeste.images.generate(\n",
4749
" \"A nano banana on the beach\",\n",
4850
" model=\"gemini-2.5-flash-image\",\n",
4951
")"
50-
],
51-
"outputs": [],
52-
"execution_count": null
52+
]
5353
},
5454
{
5555
"cell_type": "code",
56+
"execution_count": null,
5657
"metadata": {},
58+
"outputs": [],
5759
"source": [
5860
"display(Image(data=img_gen_result.content.data))"
59-
],
60-
"outputs": [],
61-
"execution_count": null
61+
]
6262
},
6363
{
6464
"cell_type": "markdown",
@@ -73,25 +73,25 @@
7373
},
7474
{
7575
"cell_type": "code",
76+
"execution_count": null,
7677
"metadata": {},
78+
"outputs": [],
7779
"source": [
7880
"img_edit_result = await celeste.images.edit(\n",
7981
" image=img_gen_result.content,\n",
8082
" prompt=\"Make it night time\",\n",
8183
" model=\"gemini-2.5-flash-image\",\n",
8284
")"
83-
],
84-
"outputs": [],
85-
"execution_count": null
85+
]
8686
},
8787
{
8888
"cell_type": "code",
89+
"execution_count": null,
8990
"metadata": {},
91+
"outputs": [],
9092
"source": [
9193
"display(Image(data=img_edit_result.content.data))"
92-
],
93-
"outputs": [],
94-
"execution_count": null
94+
]
9595
},
9696
{
9797
"cell_type": "markdown",
@@ -106,25 +106,25 @@
106106
},
107107
{
108108
"cell_type": "code",
109+
"execution_count": null,
109110
"metadata": {},
111+
"outputs": [],
110112
"source": [
111113
"analyze_result = await celeste.images.analyze(\n",
112114
" prompt=\"What fruit is in this image and what color is it?\",\n",
113115
" image=img_gen_result.content,\n",
114116
" model=\"gemini-2.5-flash-lite\",\n",
115117
")"
116-
],
117-
"outputs": [],
118-
"execution_count": null
118+
]
119119
},
120120
{
121121
"cell_type": "code",
122+
"execution_count": null,
122123
"metadata": {},
124+
"outputs": [],
123125
"source": [
124126
"print(analyze_result.content)"
125-
],
126-
"outputs": [],
127-
"execution_count": null
127+
]
128128
},
129129
{
130130
"cell_type": "markdown",
@@ -159,7 +159,9 @@
159159
},
160160
{
161161
"cell_type": "code",
162+
"execution_count": null,
162163
"metadata": {},
164+
"outputs": [],
163165
"source": [
164166
"prompt = \"A blurry iPhone-style photograph showing the window of a moving train. Through the window, a scenic landscape appears: tall green cliffs running alongside a river, with a small European village built on the slopes. The motion blur suggests the train is moving quickly, with soft reflections on the glass, natural daylight, and a casual handheld phone-camera aesthetic. Sharp textures where possible, rich colors, and a realistic sense of depth and distance.\"\n",
165167
"\n",
@@ -170,13 +172,11 @@
170172
" steps=1,\n",
171173
")\n",
172174
"display(Image(data=local_result.content.data))"
173-
],
174-
"outputs": [],
175-
"execution_count": null
175+
]
176176
},
177177
{
178-
"metadata": {},
179178
"cell_type": "markdown",
179+
"metadata": {},
180180
"source": [
181181
"---\n",
182182
"\n",
@@ -187,7 +187,9 @@
187187
},
188188
{
189189
"cell_type": "code",
190+
"execution_count": null,
190191
"metadata": {},
192+
"outputs": [],
191193
"source": [
192194
"from tqdm.asyncio import tqdm\n",
193195
"\n",
@@ -204,24 +206,22 @@
204206
" pass\n",
205207
"\n",
206208
"display(Image(data=chunk.content.data))"
207-
],
208-
"outputs": [],
209-
"execution_count": null
209+
]
210210
},
211211
{
212-
"metadata": {},
213212
"cell_type": "markdown",
213+
"metadata": {},
214214
"source": [
215215
"---\n",
216216
"Star on GitHub 👉 [withceleste/celeste-python](https://github.com/withceleste/celeste-python)"
217217
]
218218
},
219219
{
220-
"metadata": {},
221220
"cell_type": "code",
222-
"outputs": [],
223221
"execution_count": null,
224-
"source": ""
222+
"metadata": {},
223+
"outputs": [],
224+
"source": []
225225
}
226226
],
227227
"metadata": {

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "celeste-ai"
3-
version = "0.9.5"
3+
version = "0.9.6"
44
description = "Open source, type-safe primitives for multi-modal AI. All capabilities, all providers, one interface"
55
authors = [{name = "Kamilbenkirane", email = "kamil@withceleste.ai"}]
66
readme = "README.md"

src/celeste/modalities/images/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
from .providers.byteplus.models import MODELS as BYTEPLUS_MODELS
77
from .providers.google.models import MODELS as GOOGLE_MODELS
88
from .providers.openai.models import MODELS as OPENAI_MODELS
9+
from .providers.xai.models import MODELS as XAI_MODELS
910

1011
MODELS: list[Model] = [
1112
*BFL_MODELS,
1213
*BYTEPLUS_MODELS,
1314
*GOOGLE_MODELS,
1415
*OPENAI_MODELS,
16+
*XAI_MODELS,
1517
]

src/celeste/modalities/images/providers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
from .google import GoogleImagesClient
99
from .ollama import OllamaImagesClient
1010
from .openai import OpenAIImagesClient
11+
from .xai import XAIImagesClient
1112

1213
PROVIDERS: dict[Provider, type[ImagesClient]] = {
1314
Provider.BFL: BFLImagesClient,
1415
Provider.BYTEPLUS: BytePlusImagesClient,
1516
Provider.GOOGLE: GoogleImagesClient,
1617
Provider.OLLAMA: OllamaImagesClient,
1718
Provider.OPENAI: OpenAIImagesClient,
19+
Provider.XAI: XAIImagesClient,
1820
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""xAI provider for images modality."""
2+
3+
from .client import XAIImagesClient
4+
from .models import MODELS
5+
6+
__all__ = ["MODELS", "XAIImagesClient"]
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""xAI images client."""
2+
3+
from typing import Any, Unpack
4+
5+
from celeste.artifacts import ImageArtifact
6+
from celeste.parameters import ParameterMapper
7+
from celeste.providers.xai.images import config
8+
from celeste.providers.xai.images.client import XAIImagesClient as XAIImagesMixin
9+
10+
from ...client import ImagesClient
11+
from ...io import (
12+
ImageFinishReason,
13+
ImageInput,
14+
ImageOutput,
15+
ImageUsage,
16+
)
17+
from ...parameters import ImageParameters
18+
from .parameters import XAI_PARAMETER_MAPPERS
19+
20+
21+
class XAIImagesClient(XAIImagesMixin, ImagesClient):
22+
"""xAI images client."""
23+
24+
@classmethod
25+
def parameter_mappers(cls) -> list[ParameterMapper]:
26+
return XAI_PARAMETER_MAPPERS
27+
28+
def _init_request(self, inputs: ImageInput) -> dict[str, Any]:
29+
"""Initialize request from inputs."""
30+
request: dict[str, Any] = {"prompt": inputs.prompt}
31+
if inputs.image is not None:
32+
# xAI accepts URL or base64 string
33+
if inputs.image.url:
34+
request["image"] = inputs.image.url
35+
else:
36+
request["image"] = inputs.image.get_base64()
37+
return request
38+
39+
async def generate(
40+
self,
41+
prompt: str,
42+
**parameters: Unpack[ImageParameters],
43+
) -> ImageOutput:
44+
"""Generate images from prompt."""
45+
inputs = ImageInput(prompt=prompt)
46+
return await self._predict(
47+
inputs,
48+
endpoint=config.XAIImagesEndpoint.CREATE_IMAGE,
49+
**parameters,
50+
)
51+
52+
async def edit(
53+
self,
54+
image: ImageArtifact,
55+
prompt: str,
56+
**parameters: Unpack[ImageParameters],
57+
) -> ImageOutput:
58+
"""Edit an image with text instructions."""
59+
inputs = ImageInput(image=image, prompt=prompt)
60+
return await self._predict(
61+
inputs,
62+
endpoint=config.XAIImagesEndpoint.CREATE_EDIT,
63+
**parameters,
64+
)
65+
66+
def _parse_usage(self, response_data: dict[str, Any]) -> ImageUsage:
67+
"""Parse usage from response."""
68+
usage = super()._parse_usage(response_data)
69+
return ImageUsage(**usage)
70+
71+
def _parse_content(
72+
self,
73+
response_data: dict[str, Any],
74+
**parameters: Unpack[ImageParameters],
75+
) -> ImageArtifact:
76+
"""Parse content from response."""
77+
data = super()._parse_content(response_data)
78+
image_data = data[0]
79+
80+
# xAI returns either b64_json or url
81+
b64_json = image_data.get("b64_json")
82+
if b64_json:
83+
import base64
84+
85+
image_bytes = base64.b64decode(b64_json)
86+
return ImageArtifact(data=image_bytes)
87+
88+
url = image_data.get("url")
89+
if url:
90+
return ImageArtifact(url=url)
91+
92+
msg = "No image URL or base64 data in response"
93+
raise ValueError(msg)
94+
95+
def _parse_finish_reason(self, response_data: dict[str, Any]) -> ImageFinishReason:
96+
"""Parse finish reason from response."""
97+
finish_reason = super()._parse_finish_reason(response_data)
98+
return ImageFinishReason(reason=finish_reason.reason)
99+
100+
101+
__all__ = ["XAIImagesClient"]
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""xAI models for images modality."""
2+
3+
from celeste.constraints import Choice, Range
4+
from celeste.core import Modality, Operation, Provider
5+
from celeste.models import Model
6+
7+
from ...parameters import ImageParameter
8+
9+
MODELS: list[Model] = [
10+
Model(
11+
id="grok-imagine-image",
12+
provider=Provider.XAI,
13+
display_name="Grok Imagine Image",
14+
operations={Modality.IMAGES: {Operation.GENERATE, Operation.EDIT}},
15+
parameter_constraints={
16+
ImageParameter.NUM_IMAGES: Range(min=1, max=10),
17+
ImageParameter.ASPECT_RATIO: Choice(
18+
options=[
19+
"1:1",
20+
"3:4",
21+
"4:3",
22+
"9:16",
23+
"16:9",
24+
"2:3",
25+
"3:2",
26+
"9:19.5",
27+
"19.5:9",
28+
"9:20",
29+
"20:9",
30+
"1:2",
31+
"2:1",
32+
"auto",
33+
]
34+
),
35+
ImageParameter.OUTPUT_FORMAT: Choice(options=["url", "b64_json"]),
36+
},
37+
),
38+
]

0 commit comments

Comments
 (0)