|
| 1 | +import { describe, expect, test } from "bun:test"; |
| 2 | +import { applyCacheControl } from "./cacheStrategy"; |
| 3 | +import type { ModelMessage } from "ai"; |
| 4 | + |
| 5 | +describe("applyCacheControl", () => { |
| 6 | + test("should not apply cache control for non-Anthropic models", () => { |
| 7 | + const messages: ModelMessage[] = [ |
| 8 | + { role: "user", content: "Hello" }, |
| 9 | + { role: "assistant", content: "Hi there" }, |
| 10 | + ]; |
| 11 | + |
| 12 | + const result = applyCacheControl(messages, "openai:gpt-5"); |
| 13 | + expect(result).toEqual(messages); |
| 14 | + }); |
| 15 | + |
| 16 | + test("should not apply cache control with less than 2 messages", () => { |
| 17 | + const messages: ModelMessage[] = [{ role: "user", content: "Hello" }]; |
| 18 | + |
| 19 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 20 | + expect(result).toEqual(messages); |
| 21 | + }); |
| 22 | + |
| 23 | + test("should apply single cache breakpoint for short conversation", () => { |
| 24 | + const messages: ModelMessage[] = [ |
| 25 | + { role: "user", content: "What is the capital of France? ".repeat(200) }, // ~6400 chars > 1024 tokens |
| 26 | + { role: "assistant", content: "Paris is the capital. ".repeat(100) }, |
| 27 | + { role: "user", content: "What about Germany?" }, |
| 28 | + ]; |
| 29 | + |
| 30 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 31 | + |
| 32 | + // With the improved strategy, should cache at index 1 (second-to-last message) |
| 33 | + // First message may also be cached if it has enough content |
| 34 | + const hasCaching = result.some((msg) => msg.providerOptions?.anthropic?.cacheControl); |
| 35 | + expect(hasCaching).toBe(true); |
| 36 | + |
| 37 | + // The last message (current user input) should never be cached |
| 38 | + expect(result[2].providerOptions?.anthropic?.cacheControl).toBeUndefined(); |
| 39 | + }); |
| 40 | + |
| 41 | + test("should cache system message with 1h TTL", () => { |
| 42 | + const largeSystemPrompt = "You are a helpful assistant. ".repeat(200); // ~6000 chars |
| 43 | + const messages: ModelMessage[] = [ |
| 44 | + { role: "system", content: largeSystemPrompt }, |
| 45 | + { role: "user", content: "Hello" }, |
| 46 | + { role: "assistant", content: "Hi!" }, |
| 47 | + { role: "user", content: "How are you?" }, |
| 48 | + ]; |
| 49 | + |
| 50 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 51 | + |
| 52 | + // System message should be cached with 1h TTL |
| 53 | + expect(result[0].providerOptions?.anthropic?.cacheControl).toEqual({ |
| 54 | + type: "ephemeral", |
| 55 | + ttl: "1h", |
| 56 | + }); |
| 57 | + |
| 58 | + // Should also cache before last message with 5m TTL |
| 59 | + expect(result[2].providerOptions?.anthropic?.cacheControl).toEqual({ |
| 60 | + type: "ephemeral", |
| 61 | + ttl: "5m", |
| 62 | + }); |
| 63 | + }); |
| 64 | + |
| 65 | + test("should apply multiple breakpoints for long conversation", () => { |
| 66 | + const messages: ModelMessage[] = [ |
| 67 | + { role: "system", content: "System instructions. ".repeat(200) }, // Large system |
| 68 | + { role: "user", content: "Question 1 ".repeat(100) }, |
| 69 | + { role: "assistant", content: "Answer 1 ".repeat(100) }, |
| 70 | + { role: "user", content: "Question 2 ".repeat(100) }, |
| 71 | + { role: "assistant", content: "Answer 2 ".repeat(100) }, |
| 72 | + { role: "user", content: "Question 3 ".repeat(100) }, |
| 73 | + { role: "assistant", content: "Answer 3 ".repeat(100) }, |
| 74 | + { role: "user", content: "Question 4 ".repeat(100) }, |
| 75 | + { role: "assistant", content: "Answer 4 ".repeat(100) }, |
| 76 | + { role: "user", content: "Question 5" }, |
| 77 | + ]; |
| 78 | + |
| 79 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 80 | + |
| 81 | + // Count breakpoints |
| 82 | + const breakpointIndices = result |
| 83 | + .map((msg, idx) => (msg.providerOptions?.anthropic?.cacheControl ? idx : -1)) |
| 84 | + .filter((idx) => idx >= 0); |
| 85 | + |
| 86 | + // Should have multiple breakpoints (max 4) |
| 87 | + expect(breakpointIndices.length).toBeGreaterThan(1); |
| 88 | + expect(breakpointIndices.length).toBeLessThanOrEqual(4); |
| 89 | + |
| 90 | + // System message should have 1h TTL |
| 91 | + const systemCacheControl = result[0].providerOptions?.anthropic?.cacheControl; |
| 92 | + if (systemCacheControl && typeof systemCacheControl === "object" && "ttl" in systemCacheControl) { |
| 93 | + expect(systemCacheControl.ttl).toBe("1h"); |
| 94 | + } |
| 95 | + |
| 96 | + // Last cached message should have 5m TTL |
| 97 | + const lastCachedIdx = breakpointIndices[breakpointIndices.length - 1]; |
| 98 | + const lastCacheControl = result[lastCachedIdx].providerOptions?.anthropic?.cacheControl; |
| 99 | + if (lastCacheControl && typeof lastCacheControl === "object" && "ttl" in lastCacheControl) { |
| 100 | + expect(lastCacheControl.ttl).toBe("5m"); |
| 101 | + } |
| 102 | + }); |
| 103 | + |
| 104 | + test("should respect Haiku minimum token requirement (2048)", () => { |
| 105 | + // Small messages that don't meet Haiku threshold |
| 106 | + const messages: ModelMessage[] = [ |
| 107 | + { role: "user", content: "Short question" }, // ~60 chars < 2048 tokens |
| 108 | + { role: "assistant", content: "Short answer" }, |
| 109 | + { role: "user", content: "Another question" }, |
| 110 | + ]; |
| 111 | + |
| 112 | + const result = applyCacheControl(messages, "anthropic:claude-haiku-3-5"); |
| 113 | + |
| 114 | + // Should not apply caching for Haiku with small content |
| 115 | + const hasCaching = result.some((msg) => msg.providerOptions?.anthropic?.cacheControl); |
| 116 | + expect(hasCaching).toBe(false); |
| 117 | + }); |
| 118 | + |
| 119 | + test("should apply caching for Haiku with sufficient content", () => { |
| 120 | + const messages: ModelMessage[] = [ |
| 121 | + { role: "user", content: "Long message ".repeat(400) }, // ~5200 chars > 2048 tokens |
| 122 | + { role: "assistant", content: "Response ".repeat(400) }, |
| 123 | + { role: "user", content: "Follow up" }, |
| 124 | + ]; |
| 125 | + |
| 126 | + const result = applyCacheControl(messages, "anthropic:claude-haiku-3-5"); |
| 127 | + |
| 128 | + // Should cache with Haiku when content is large enough |
| 129 | + const hasCaching = result.some((msg) => msg.providerOptions?.anthropic?.cacheControl); |
| 130 | + expect(hasCaching).toBe(true); |
| 131 | + }); |
| 132 | + |
| 133 | + test("should handle messages with array content", () => { |
| 134 | + const messages: ModelMessage[] = [ |
| 135 | + { |
| 136 | + role: "user", |
| 137 | + content: [ |
| 138 | + { type: "text", text: "Here is a long document. ".repeat(200) }, |
| 139 | + { type: "text", text: "Additional context. ".repeat(100) }, |
| 140 | + ], |
| 141 | + }, |
| 142 | + { role: "assistant", content: "I understand" }, |
| 143 | + { role: "user", content: "What did I say?" }, |
| 144 | + ]; |
| 145 | + |
| 146 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 147 | + |
| 148 | + // Should handle multi-part content and apply caching |
| 149 | + expect(result[1].providerOptions?.anthropic?.cacheControl).toEqual({ |
| 150 | + type: "ephemeral", |
| 151 | + ttl: "5m", |
| 152 | + }); |
| 153 | + }); |
| 154 | + |
| 155 | + test("should preserve existing providerOptions", () => { |
| 156 | + const messages: ModelMessage[] = [ |
| 157 | + { |
| 158 | + role: "system", |
| 159 | + content: "System prompt with detailed instructions. ".repeat(300), // ~12600 chars > 1024 tokens |
| 160 | + providerOptions: { |
| 161 | + anthropic: { |
| 162 | + customOption: "value", |
| 163 | + }, |
| 164 | + }, |
| 165 | + }, |
| 166 | + { role: "user", content: "Hello" }, |
| 167 | + { role: "assistant", content: "Hi there!" }, |
| 168 | + { role: "user", content: "Continue" }, |
| 169 | + ]; |
| 170 | + |
| 171 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 172 | + |
| 173 | + // Should preserve existing options while adding cacheControl |
| 174 | + const anthropicOptions = result[0].providerOptions?.anthropic as Record<string, unknown>; |
| 175 | + expect(anthropicOptions?.customOption).toBe("value"); |
| 176 | + expect(anthropicOptions?.cacheControl).toBeDefined(); |
| 177 | + }); |
| 178 | + |
| 179 | + test("should not exceed 4 breakpoint limit", () => { |
| 180 | + // Create a very long conversation |
| 181 | + const messages: ModelMessage[] = [ |
| 182 | + { role: "system", content: "System ".repeat(300) }, |
| 183 | + ]; |
| 184 | + |
| 185 | + // Add 20 message pairs |
| 186 | + for (let i = 0; i < 20; i++) { |
| 187 | + messages.push({ role: "user", content: `User message ${i} `.repeat(100) }); |
| 188 | + messages.push({ role: "assistant", content: `Assistant ${i} `.repeat(100) }); |
| 189 | + } |
| 190 | + |
| 191 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 192 | + |
| 193 | + // Count breakpoints |
| 194 | + const breakpointCount = result.filter( |
| 195 | + (msg) => msg.providerOptions?.anthropic?.cacheControl |
| 196 | + ).length; |
| 197 | + |
| 198 | + // Should never exceed 4 breakpoints |
| 199 | + expect(breakpointCount).toBeLessThanOrEqual(4); |
| 200 | + expect(breakpointCount).toBeGreaterThan(0); |
| 201 | + }); |
| 202 | + |
| 203 | + test("should place 1h TTL before 5m TTL", () => { |
| 204 | + const messages: ModelMessage[] = [ |
| 205 | + { role: "system", content: "System instructions. ".repeat(200) }, |
| 206 | + { role: "user", content: "Q1 ".repeat(100) }, |
| 207 | + { role: "assistant", content: "A1 ".repeat(100) }, |
| 208 | + { role: "user", content: "Q2 ".repeat(100) }, |
| 209 | + { role: "assistant", content: "A2 ".repeat(100) }, |
| 210 | + { role: "user", content: "Q3" }, |
| 211 | + ]; |
| 212 | + |
| 213 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 214 | + |
| 215 | + // Collect breakpoints with their TTLs |
| 216 | + const breakpoints = result |
| 217 | + .map((msg, idx) => { |
| 218 | + const cacheControl = msg.providerOptions?.anthropic?.cacheControl; |
| 219 | + const ttl = |
| 220 | + cacheControl && typeof cacheControl === "object" && "ttl" in cacheControl |
| 221 | + ? (cacheControl.ttl as "5m" | "1h" | undefined) |
| 222 | + : undefined; |
| 223 | + return { idx, ttl }; |
| 224 | + }) |
| 225 | + .filter((bp): bp is { idx: number; ttl: "5m" | "1h" } => bp.ttl !== undefined); |
| 226 | + |
| 227 | + // Find first 1h and first 5m |
| 228 | + const firstOneHour = breakpoints.find((bp) => bp.ttl === "1h"); |
| 229 | + const firstFiveMin = breakpoints.find((bp) => bp.ttl === "5m"); |
| 230 | + |
| 231 | + // If both exist, 1h should come before 5m |
| 232 | + if (firstOneHour && firstFiveMin) { |
| 233 | + expect(firstOneHour.idx).toBeLessThan(firstFiveMin.idx); |
| 234 | + } |
| 235 | + }); |
| 236 | + |
| 237 | + test("should handle image content in token estimation", () => { |
| 238 | + const messages: ModelMessage[] = [ |
| 239 | + { |
| 240 | + role: "user", |
| 241 | + content: [ |
| 242 | + { type: "text", text: "Analyze this image: ".repeat(100) }, |
| 243 | + { type: "image", image: "data:image/png;base64,..." }, |
| 244 | + ], |
| 245 | + }, |
| 246 | + { role: "assistant", content: "I see a test image" }, |
| 247 | + { role: "user", content: "What else?" }, |
| 248 | + ]; |
| 249 | + |
| 250 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 251 | + |
| 252 | + // Should account for image tokens and apply caching |
| 253 | + const hasCaching = result.some((msg) => msg.providerOptions?.anthropic?.cacheControl); |
| 254 | + expect(hasCaching).toBe(true); |
| 255 | + }); |
| 256 | + |
| 257 | + test("should handle edge case with exact minimum tokens", () => { |
| 258 | + // Create content that's exactly at the threshold (1024 tokens ≈ 4096 chars) |
| 259 | + const messages: ModelMessage[] = [ |
| 260 | + { role: "user", content: "x".repeat(4096) }, |
| 261 | + { role: "assistant", content: "ok" }, |
| 262 | + { role: "user", content: "continue" }, |
| 263 | + ]; |
| 264 | + |
| 265 | + const result = applyCacheControl(messages, "anthropic:claude-sonnet-4-5"); |
| 266 | + |
| 267 | + // Should apply caching at the threshold |
| 268 | + const hasCaching = result.some((msg) => msg.providerOptions?.anthropic?.cacheControl); |
| 269 | + expect(hasCaching).toBe(true); |
| 270 | + }); |
| 271 | +}); |
0 commit comments