Skip to content

Commit eeea2bb

Browse files
authored
🤖 Add image pasting support to chat input (#151)
Enables pasting images directly into the chat input box for multimodal AI conversations. ## Changes **Frontend** - New `ImageAttachments` component displays pasted images as thumbnails with remove buttons (80x80px in input, 300x300px max in messages) - `ChatInput` handles paste events to extract images from clipboard - Images converted to base64 data URLs for preview and transmission - Updated send logic to allow sending messages with only images (no text required) - `UserMessage` component now renders inline images in chat history **Backend** - Extended `CmuxMessage` type with `CmuxImagePart` (image data + mimeType) - Updated IPC `sendMessage` to accept optional `imageParts` array - Modified message construction in `ipcMain` to include image parts - `StreamingMessageAggregator` extracts image parts when converting to `DisplayedMessage` - Images flow through AI SDK's `convertToModelMessages` to provider APIs **Type Updates** - `DisplayedMessage` type for user messages now includes optional `imageParts` field - Image parts properly preserved through entire message pipeline (input → IPC → history → aggregator → UI) ## Implementation Notes The AI SDK automatically converts image parts to the appropriate format for each provider (Anthropic, OpenAI, Google, etc.), so no provider-specific handling is needed. Image attachments are: - Cleared on successful send or when explicitly removed by user - Displayed as thumbnails in input area before sending - Rendered inline in message history after sending - Included in chat history for context in subsequent messages ## Testing Manually tested: 1. Copy image to clipboard 2. Paste into chat input (Cmd+V / Ctrl+V) 3. Verify thumbnail appears with remove button 4. Send message and confirm image renders in chat 5. Verify AI can see and respond to image content _Generated with `cmux`_
1 parent 71f7749 commit eeea2bb

File tree

8 files changed

+212
-18
lines changed

8 files changed

+212
-18
lines changed

src/components/ChatInput.tsx

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { matchesKeybind, formatKeybind, KEYBINDS, isEditableElement } from "@/ut
2323
import { ModelSelector, type ModelSelectorRef } from "./ModelSelector";
2424
import { useModelLRU } from "@/hooks/useModelLRU";
2525
import { VimTextArea } from "./VimTextArea";
26+
import { ImageAttachments, type ImageAttachment } from "./ImageAttachments";
2627

2728
import type { ThinkingLevel } from "@/types/thinking";
2829

@@ -293,6 +294,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
293294
const [commandSuggestions, setCommandSuggestions] = useState<SlashSuggestion[]>([]);
294295
const [providerNames, setProviderNames] = useState<string[]>([]);
295296
const [toast, setToast] = useState<Toast | null>(null);
297+
const [imageAttachments, setImageAttachments] = useState<ImageAttachment[]>([]);
296298
const handleToastDismiss = useCallback(() => {
297299
setToast(null);
298300
}, []);
@@ -450,6 +452,42 @@ export const ChatInput: React.FC<ChatInputProps> = ({
450452
window.removeEventListener(CUSTOM_EVENTS.THINKING_LEVEL_TOAST, handler as EventListener);
451453
}, [workspaceId, setToast]);
452454

455+
// Handle paste events to extract images
456+
const handlePaste = useCallback((e: React.ClipboardEvent<HTMLTextAreaElement>) => {
457+
const items = e.clipboardData?.items;
458+
if (!items) return;
459+
460+
// Look for image items in clipboard
461+
for (const item of Array.from(items)) {
462+
if (!item?.type.startsWith("image/")) continue;
463+
464+
e.preventDefault(); // Prevent default paste behavior for images
465+
466+
const file = item.getAsFile();
467+
if (!file) continue;
468+
469+
// Convert to base64 data URL
470+
const reader = new FileReader();
471+
reader.onload = (event) => {
472+
const dataUrl = event.target?.result as string;
473+
if (dataUrl) {
474+
const attachment: ImageAttachment = {
475+
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
476+
dataUrl,
477+
mimeType: file.type,
478+
};
479+
setImageAttachments((prev) => [...prev, attachment]);
480+
}
481+
};
482+
reader.readAsDataURL(file);
483+
}
484+
}, []);
485+
486+
// Handle removing an image attachment
487+
const handleRemoveImage = useCallback((id: string) => {
488+
setImageAttachments((prev) => prev.filter((img) => img.id !== id));
489+
}, []);
490+
453491
// Handle command selection
454492
const handleCommandSelect = useCallback(
455493
(suggestion: SlashSuggestion) => {
@@ -461,7 +499,9 @@ export const ChatInput: React.FC<ChatInputProps> = ({
461499
);
462500

463501
const handleSend = async () => {
464-
if (!input.trim() || disabled || isSending || isCompacting) return;
502+
// Allow sending if there's text or images
503+
if ((!input.trim() && imageAttachments.length === 0) || disabled || isSending || isCompacting)
504+
return;
465505

466506
const messageText = input.trim();
467507

@@ -608,9 +648,16 @@ export const ChatInput: React.FC<ChatInputProps> = ({
608648
setIsSending(true);
609649

610650
try {
651+
// Prepare image parts if any
652+
const imageParts = imageAttachments.map((img) => ({
653+
image: img.dataUrl,
654+
mimeType: img.mimeType,
655+
}));
656+
611657
const result = await window.api.workspace.sendMessage(workspaceId, messageText, {
612658
...sendMessageOptions,
613659
editMessageId: editingMessage?.id,
660+
imageParts: imageParts.length > 0 ? imageParts : undefined,
614661
});
615662

616663
if (!result.success) {
@@ -621,8 +668,9 @@ export const ChatInput: React.FC<ChatInputProps> = ({
621668
// Restore input on error so user can try again
622669
setInput(messageText);
623670
} else {
624-
// Success - clear input
671+
// Success - clear input and images
625672
setInput("");
673+
setImageAttachments([]);
626674
// Reset textarea height
627675
if (inputRef.current) {
628676
inputRef.current.style.height = "36px";
@@ -743,6 +791,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
743791
mode={mode}
744792
onChange={setInput}
745793
onKeyDown={handleKeyDown}
794+
onPaste={handlePaste}
746795
suppressKeys={showCommandSuggestions ? COMMAND_SUGGESTION_KEYS : undefined}
747796
placeholder={placeholder}
748797
disabled={disabled || isSending || isCompacting}
@@ -754,6 +803,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
754803
aria-expanded={showCommandSuggestions && commandSuggestions.length > 0}
755804
/>
756805
</InputControls>
806+
<ImageAttachments images={imageAttachments} onRemove={handleRemoveImage} />
757807
<ModeToggles data-component="ChatModeToggles">
758808
{editingMessage && (
759809
<EditingIndicator>
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import React from "react";
2+
import styled from "@emotion/styled";
3+
4+
const AttachmentsContainer = styled.div`
5+
display: flex;
6+
flex-wrap: wrap;
7+
gap: 8px;
8+
padding: 8px 0;
9+
`;
10+
11+
const ImagePreview = styled.div`
12+
position: relative;
13+
width: 80px;
14+
height: 80px;
15+
border-radius: 4px;
16+
overflow: hidden;
17+
border: 1px solid #3e3e42;
18+
background: #1e1e1e;
19+
`;
20+
21+
const PreviewImage = styled.img`
22+
width: 100%;
23+
height: 100%;
24+
object-fit: cover;
25+
`;
26+
27+
const RemoveButton = styled.button`
28+
position: absolute;
29+
top: 4px;
30+
right: 4px;
31+
width: 20px;
32+
height: 20px;
33+
border-radius: 50%;
34+
background: rgba(0, 0, 0, 0.7);
35+
color: white;
36+
border: none;
37+
cursor: pointer;
38+
display: flex;
39+
align-items: center;
40+
justify-content: center;
41+
font-size: 14px;
42+
line-height: 1;
43+
padding: 0;
44+
45+
&:hover {
46+
background: rgba(0, 0, 0, 0.9);
47+
}
48+
`;
49+
50+
export interface ImageAttachment {
51+
id: string;
52+
dataUrl: string;
53+
mimeType: string;
54+
}
55+
56+
interface ImageAttachmentsProps {
57+
images: ImageAttachment[];
58+
onRemove: (id: string) => void;
59+
}
60+
61+
export const ImageAttachments: React.FC<ImageAttachmentsProps> = ({ images, onRemove }) => {
62+
if (images.length === 0) return null;
63+
64+
return (
65+
<AttachmentsContainer>
66+
{images.map((image) => (
67+
<ImagePreview key={image.id}>
68+
<PreviewImage src={image.dataUrl} alt="Attached image" />
69+
<RemoveButton onClick={() => onRemove(image.id)} title="Remove image">
70+
×
71+
</RemoveButton>
72+
</ImagePreview>
73+
))}
74+
</AttachmentsContainer>
75+
);
76+
};

src/components/Messages/UserMessage.tsx

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,20 @@ const FormattedContent = styled.pre`
1616
opacity: 0.9;
1717
`;
1818

19+
const ImageContainer = styled.div`
20+
display: flex;
21+
flex-wrap: wrap;
22+
gap: 8px;
23+
margin-top: 8px;
24+
`;
25+
26+
const MessageImage = styled.img`
27+
max-width: 300px;
28+
max-height: 300px;
29+
border-radius: 4px;
30+
border: 1px solid #3e3e42;
31+
`;
32+
1933
interface UserMessageProps {
2034
message: DisplayedMessage & { type: "user" };
2135
className?: string;
@@ -91,7 +105,14 @@ export const UserMessage: React.FC<UserMessageProps> = ({ message, className, on
91105
buttons={buttons}
92106
className={className}
93107
>
94-
<FormattedContent>{content}</FormattedContent>
108+
{content && <FormattedContent>{content}</FormattedContent>}
109+
{message.imageParts && message.imageParts.length > 0 && (
110+
<ImageContainer>
111+
{message.imageParts.map((img, idx) => (
112+
<MessageImage key={idx} src={img.image} alt={`Attachment ${idx + 1}`} />
113+
))}
114+
</ImageContainer>
115+
)}
95116
</MessageWindow>
96117
);
97118
};

src/services/ipcMain.ts

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,12 @@ export class IpcMain {
492492

493493
ipcMain.handle(
494494
IPC_CHANNELS.WORKSPACE_SEND_MESSAGE,
495-
async (_event, workspaceId: string, message: string, options?: SendMessageOptions) => {
495+
async (
496+
_event,
497+
workspaceId: string,
498+
message: string,
499+
options?: SendMessageOptions & { imageParts?: Array<{ image: string; mimeType: string }> }
500+
) => {
496501
const {
497502
editMessageId,
498503
thinkingLevel,
@@ -501,6 +506,7 @@ export class IpcMain {
501506
additionalSystemInstructions,
502507
maxOutputTokens,
503508
providerOptions,
509+
imageParts,
504510
} = options ?? {};
505511
log.debug("sendMessage handler: Received", {
506512
workspaceId,
@@ -514,9 +520,9 @@ export class IpcMain {
514520
providerOptions,
515521
});
516522
try {
517-
// Early exit: empty message = either interrupt (if streaming) or invalid input
523+
// Early exit: empty message and no images = either interrupt (if streaming) or invalid input
518524
// This prevents race conditions where empty messages arrive after streaming stops
519-
if (!message.trim()) {
525+
if (!message.trim() && (!imageParts || imageParts.length === 0)) {
520526
// If streaming, this is an interrupt request (from Esc key)
521527
if (this.aiService.isStreaming(workspaceId)) {
522528
log.debug("sendMessage handler: Empty message during streaming, interrupting");
@@ -553,13 +559,30 @@ export class IpcMain {
553559
// replacement automatically when the new message arrives with the same historySequence
554560
}
555561

556-
// Create user message
562+
// Create user message with text and optional image parts
557563
const messageId = `user-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
558-
const userMessage = createCmuxMessage(messageId, "user", message, {
559-
// historySequence will be assigned by historyService.appendToHistory()
560-
timestamp: Date.now(),
561-
toolPolicy, // Store for historical record and compaction detection
562-
});
564+
const additionalParts = imageParts?.map((img) => ({
565+
type: "image" as const,
566+
image: img.image,
567+
mimeType: img.mimeType,
568+
}));
569+
if (additionalParts && additionalParts.length > 0) {
570+
log.debug("sendMessage: Creating message with images", {
571+
imageCount: additionalParts.length,
572+
mimeTypes: additionalParts.map((p) => p.mimeType),
573+
});
574+
}
575+
const userMessage = createCmuxMessage(
576+
messageId,
577+
"user",
578+
message,
579+
{
580+
// historySequence will be assigned by historyService.appendToHistory()
581+
timestamp: Date.now(),
582+
toolPolicy, // Store for historical record and compaction detection
583+
},
584+
additionalParts
585+
);
563586

564587
// Append user message to history
565588
const appendResult = await this.historyService.appendToHistory(workspaceId, userMessage);

src/types/ipc.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ export interface IPCApi {
177177
sendMessage(
178178
workspaceId: string,
179179
message: string,
180-
options?: SendMessageOptions
180+
options?: SendMessageOptions & { imageParts?: Array<{ image: string; mimeType: string }> }
181181
): Promise<Result<void, SendMessageError>>;
182182
resumeStream(
183183
workspaceId: string,

src/types/message.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,17 @@ export interface CmuxReasoningPart {
4343
text: string;
4444
}
4545

46+
// Image part type for multimodal messages
47+
export interface CmuxImagePart {
48+
type: "image";
49+
image: string | Uint8Array | ArrayBuffer | URL; // base64 string or binary data or URL
50+
mimeType?: string; // e.g., "image/png", "image/jpeg"
51+
}
52+
4653
// CmuxMessage extends UIMessage with our metadata and custom parts
47-
// Supports text, reasoning, and tool parts (including interrupted tool calls)
54+
// Supports text, reasoning, image, and tool parts (including interrupted tool calls)
4855
export type CmuxMessage = Omit<UIMessage<CmuxMetadata, never, never>, "parts"> & {
49-
parts: Array<CmuxTextPart | CmuxReasoningPart | CmuxToolPart>;
56+
parts: Array<CmuxTextPart | CmuxReasoningPart | CmuxImagePart | CmuxToolPart>;
5057
};
5158

5259
// DisplayedMessage represents a single UI message block
@@ -57,6 +64,7 @@ export type DisplayedMessage =
5764
id: string; // Display ID for UI/React keys
5865
historyId: string; // Original CmuxMessage ID for history operations
5966
content: string;
67+
imageParts?: Array<{ image: string; mimeType?: string }>; // Optional image attachments
6068
historySequence: number; // Global ordering across all messages
6169
timestamp?: number;
6270
}

src/utils/messages/StreamingMessageAggregator.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,17 +415,25 @@ export class StreamingMessageAggregator {
415415
const historySequence = message.metadata?.historySequence ?? 0;
416416

417417
if (message.role === "user") {
418-
// User messages: combine all text parts into single block
418+
// User messages: combine all text parts into single block, extract images
419419
const content = message.parts
420420
.filter((p) => p.type === "text")
421421
.map((p) => p.text)
422422
.join("");
423423

424+
const imageParts = message.parts
425+
.filter((p) => p.type === "image")
426+
.map((p) => ({
427+
image: typeof p.image === "string" ? p.image : "",
428+
mimeType: p.mimeType,
429+
}));
430+
424431
displayedMessages.push({
425432
type: "user",
426433
id: message.id,
427434
historyId: message.id,
428435
content,
436+
imageParts: imageParts.length > 0 ? imageParts : undefined,
429437
historySequence,
430438
timestamp: baseTimestamp,
431439
});

src/utils/messages/modelMessageTransform.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,10 +413,18 @@ function mergeConsecutiveUserMessages(messages: ModelMessage[]): ModelMessage[]
413413
// Merge with newline prefix
414414
const mergedText = prevText + "\n" + currentText;
415415

416-
// Update the previous message
416+
// Collect image parts from both messages
417+
const prevImageParts = Array.isArray(prevMsg.content)
418+
? prevMsg.content.filter((c) => c.type === "image")
419+
: [];
420+
const currentImageParts = Array.isArray(msg.content)
421+
? msg.content.filter((c) => c.type === "image")
422+
: [];
423+
424+
// Update the previous message with merged text and all image parts
417425
merged[merged.length - 1] = {
418426
role: "user",
419-
content: [{ type: "text", text: mergedText }],
427+
content: [{ type: "text", text: mergedText }, ...prevImageParts, ...currentImageParts],
420428
};
421429
} else {
422430
// Not consecutive user message, add as-is

0 commit comments

Comments
 (0)