|
15 | 15 | import org.elasticsearch.common.util.LazyInitializable;
|
16 | 16 | import org.elasticsearch.core.Nullable;
|
17 | 17 | import org.elasticsearch.core.TimeValue;
|
| 18 | +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; |
18 | 19 | import org.elasticsearch.inference.ChunkedInference;
|
19 | 20 | import org.elasticsearch.inference.ChunkingSettings;
|
20 | 21 | import org.elasticsearch.inference.InferenceServiceConfiguration;
|
@@ -349,19 +350,18 @@ public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) {
|
349 | 350 | }
|
350 | 351 |
|
351 | 352 | /**
|
352 |
| - * Return the default similarity measure for the embedding type. |
353 |
| - * Cohere embeddings are normalized to unit vectors therefor Dot |
354 |
| - * Product similarity can be used and is the default for all Cohere |
355 |
| - * models. |
| 353 | + * Returns the default similarity measure for the embedding type. |
| 354 | + * Cohere embeddings are expected to be normalized to unit vectors, but due to floating point precision issues, |
| 355 | + * our check ({@link DenseVectorFieldMapper#isNotUnitVector(float)}) often fails. |
| 356 | + * Therefore, we use cosine similarity to ensure compatibility. |
356 | 357 | *
|
357 |
| - * @return The default similarity. |
| 358 | + * @return The default similarity measure. |
358 | 359 | */
|
359 | 360 | static SimilarityMeasure defaultSimilarity(CohereEmbeddingType embeddingType) {
|
360 | 361 | if (embeddingType == CohereEmbeddingType.BIT || embeddingType == CohereEmbeddingType.BINARY) {
|
361 | 362 | return SimilarityMeasure.L2_NORM;
|
362 | 363 | }
|
363 |
| - |
364 |
| - return SimilarityMeasure.DOT_PRODUCT; |
| 364 | + return SimilarityMeasure.COSINE; |
365 | 365 | }
|
366 | 366 |
|
367 | 367 | @Override
|
|
0 commit comments