Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use FallbackSyntheticSourceBlockLoader for shape and geo_shape #124927

Merged
merged 7 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/124927.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 124927
summary: Use `FallbackSyntheticSourceBlockLoader` for `shape` and `geo_shape`
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ public Builder builder(BlockFactory factory, int expectedCount) {
private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
var nullValueAdjusted = nullValue != null ? adjustSourceValue(nullValue, scalingFactor) : null;

return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<Double>(nullValue) {
return new FallbackSyntheticSourceBlockLoader.SingleValueReader<Double>(nullValue) {
@Override
public void convertValue(Object value, List<Double> accumulator) {
if (coerce && value.equals("")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.GeometryFormatterFactory;
Expand Down Expand Up @@ -67,12 +68,16 @@ public abstract void parse(XContentParser parser, CheckedConsumer<T, IOException

private void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
try (XContentParser parser = wrapObject(sourceMap)) {
parse(parser, v -> consumer.accept(normalizeFromSource(v)), NoopMalformedValueHandler.INSTANCE);
parseFromSource(parser, consumer);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

private void parseFromSource(XContentParser parser, Consumer<T> consumer) throws IOException {
parse(parser, v -> consumer.accept(normalizeFromSource(v)), NoopMalformedValueHandler.INSTANCE);
}

/**
* Normalize a geometry when reading from source. When reading from source we can skip
* some expensive steps as the geometry has already been indexed.
Expand Down Expand Up @@ -187,6 +192,80 @@ protected BlockLoader blockLoaderFromSource(BlockLoaderContext blContext) {
}

protected abstract Object nullValueAsSource(T nullValue);

protected BlockLoader blockLoaderFromFallbackSyntheticSource(BlockLoaderContext blContext) {
return new FallbackSyntheticSourceBlockLoader(new GeometriesFallbackSyntheticSourceReader(), name()) {
@Override
public Builder builder(BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}
};
}

private class GeometriesFallbackSyntheticSourceReader implements FallbackSyntheticSourceBlockLoader.Reader<BytesRef> {
private final Function<List<T>, List<Object>> formatter;

private GeometriesFallbackSyntheticSourceReader() {
this.formatter = getFormatter(GeometryFormatterFactory.WKB);
}

@Override
public void convertValue(Object value, List<BytesRef> accumulator) {
final List<T> values = new ArrayList<>();

geometryParser.fetchFromSource(value, v -> {
if (v != null) {
values.add(v);
} else if (nullValue != null) {
values.add(nullValue);
}
});
var formatted = formatter.apply(values);

for (var formattedValue : formatted) {
if (formattedValue instanceof byte[] wkb) {
accumulator.add(new BytesRef(wkb));
} else {
throw new IllegalArgumentException(
"Unsupported source type for spatial geometry: " + formattedValue.getClass().getSimpleName()
);
}
}
}

@Override
public void parse(XContentParser parser, List<BytesRef> accumulator) throws IOException {
final List<T> values = new ArrayList<>();

geometryParser.parseFromSource(parser, v -> {
if (v != null) {
values.add(v);
} else if (nullValue != null) {
values.add(nullValue);
}
});
var formatted = formatter.apply(values);

for (var formattedValue : formatted) {
if (formattedValue instanceof byte[] wkb) {
accumulator.add(new BytesRef(wkb));
} else {
throw new IllegalArgumentException(
"Unsupported source type for spatial geometry: " + formattedValue.getClass().getSimpleName()
);
}
}
}

@Override
public void writeToBlock(List<BytesRef> values, BlockLoader.Builder blockBuilder) {
var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder;

for (var value : values) {
bytesRefBuilder.appendBytesRef(value);
}
}
}
}

private final Explicit<Boolean> ignoreMalformed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ public Builder builder(BlockFactory factory, int expectedCount) {
}

private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<Boolean>(nullValue) {
return new FallbackSyntheticSourceBlockLoader.SingleValueReader<Boolean>(nullValue) {
@Override
public void convertValue(Object value, List<Boolean> accumulator) {
try {
Expand Down Expand Up @@ -360,10 +360,10 @@ protected void parseNonNullValue(XContentParser parser, List<Boolean> accumulato

@Override
public void writeToBlock(List<Boolean> values, BlockLoader.Builder blockBuilder) {
var longBuilder = (BlockLoader.BooleanBuilder) blockBuilder;
var booleanBuilder = (BlockLoader.BooleanBuilder) blockBuilder;

for (var value : values) {
longBuilder.appendBoolean(value);
booleanBuilder.appendBoolean(value);
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ public Builder builder(BlockFactory factory, int expectedCount) {
private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
Function<String, Long> dateParser = this::parse;

return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<Long>(nullValue) {
return new FallbackSyntheticSourceBlockLoader.SingleValueReader<Long>(nullValue) {
@Override
public void convertValue(Object value, List<Long> accumulator) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,6 @@ private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue,
}

private void parseWithReader(XContentParser parser, List<T> blockValues) throws IOException {
if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
reader.parse(parser, blockValues);
}
return;
}

reader.parse(parser, blockValues);
}

Expand Down Expand Up @@ -274,10 +267,15 @@ public interface Reader<T> {
void writeToBlock(List<T> values, Builder blockBuilder);
}

public abstract static class ReaderWithNullValueSupport<T> implements Reader<T> {
/**
* Reader for field types that don't parse arrays (arrays are always treated as multiple values)
* as opposed to field types that treat arrays as special cases (for example point).
* @param <T>
*/
public abstract static class SingleValueReader<T> implements Reader<T> {
private final Object nullValue;

public ReaderWithNullValueSupport(Object nullValue) {
public SingleValueReader(Object nullValue) {
this.nullValue = nullValue;
}

Expand All @@ -289,6 +287,18 @@ public void parse(XContentParser parser, List<T> accumulator) throws IOException
}
return;
}
if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
if (nullValue != null) {
convertValue(nullValue, accumulator);
}
} else {
parseNonNullValue(parser, accumulator);
}
}
return;
}

parseNonNullValue(parser, accumulator);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ public Builder builder(BlockFactory factory, int expectedCount) {

private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
var nullValueBytes = nullValue != null ? new BytesRef(nullValue) : null;
return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<BytesRef>(nullValueBytes) {
return new FallbackSyntheticSourceBlockLoader.SingleValueReader<BytesRef>(nullValueBytes) {
@Override
public void convertValue(Object value, List<BytesRef> accumulator) {
String stringValue = ((BytesRef) value).utf8ToString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1729,8 +1729,7 @@ public Builder builder(BlockFactory factory, int expectedCount) {
};
}

abstract static class NumberFallbackSyntheticSourceReader extends FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<
Number> {
abstract static class NumberFallbackSyntheticSourceReader extends FallbackSyntheticSourceBlockLoader.SingleValueReader<Number> {
private final NumberType type;
private final Number nullValue;
private final boolean coerce;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

public class BooleanFieldBlockLoaderTests extends BlockLoaderTestCase {
public BooleanFieldBlockLoaderTests(Params params) {
super(FieldType.BOOLEAN, params);
super(FieldType.BOOLEAN.toString(), params);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

public class DateFieldBlockLoaderTests extends BlockLoaderTestCase {
public DateFieldBlockLoaderTests(Params params) {
super(FieldType.DATE, params);
super(FieldType.DATE.toString(), params);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
public KeywordFieldBlockLoaderTests(Params params) {
super(FieldType.KEYWORD, params);
super(FieldType.KEYWORD.toString(), params);
}

@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
import org.elasticsearch.logsdb.datageneration.DocumentGenerator;
import org.elasticsearch.logsdb.datageneration.FieldType;
import org.elasticsearch.logsdb.datageneration.Mapping;
import org.elasticsearch.logsdb.datageneration.MappingGenerator;
import org.elasticsearch.logsdb.datageneration.Template;
Expand All @@ -35,6 +34,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -59,14 +59,18 @@ public static List<Object[]> args() {

public record Params(boolean syntheticSource, MappedFieldType.FieldExtractPreference preference) {}

private final FieldType fieldType;
private final String fieldType;
protected final Params params;

private final String fieldName;
private final MappingGenerator mappingGenerator;
private final DocumentGenerator documentGenerator;

protected BlockLoaderTestCase(FieldType fieldType, Params params) {
protected BlockLoaderTestCase(String fieldType, Params params) {
this(fieldType, List.of(), params);
}

protected BlockLoaderTestCase(String fieldType, Collection<DataSourceHandler> customHandlers, Params params) {
this.fieldType = fieldType;
this.params = params;
this.fieldName = randomAlphaOfLengthBetween(5, 10);
Expand All @@ -87,6 +91,7 @@ public DataSourceResponse.ObjectMappingParametersGenerator handle(
return new DataSourceResponse.ObjectMappingParametersGenerator(HashMap::new); // just defaults
}
}))
.withDataSourceHandlers(customHandlers)
.build();

this.mappingGenerator = new MappingGenerator(specification);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

public abstract class NumberFieldBlockLoaderTestCase<T extends Number> extends BlockLoaderTestCase {
public NumberFieldBlockLoaderTestCase(FieldType fieldType, Params params) {
super(fieldType, params);
super(fieldType.toString(), params);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ private void generateFields(Map<String, Object> document, Map<String, Template.E
// Unsigned long does not play well when dynamically mapped because
// it gets mapped as just long and large values fail to index.
// Just skip it.
if (leaf.type() == FieldType.UNSIGNED_LONG && fieldMapping == null) {
// TODO we can actually handle this in UnsignedLongFieldDataGenerator
if (leaf.type().equals(FieldType.UNSIGNED_LONG.toString()) && fieldMapping == null) {
continue;
}

var generator = leaf.type().generator(fieldName, specification.dataSource());

var generator = specification.dataSource()
.get(new DataSourceRequest.FieldDataGenerator(fieldName, leaf.type(), specification.dataSource()))
.generator();
document.put(fieldName, generator.generateValue(fieldMapping));
} else if (templateEntry instanceof Template.Object object) {
Optional<Integer> arrayLength = objectArrayGenerator.lengthGenerator().get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.elasticsearch.logsdb.datageneration.fields.leaf.UnsignedLongFieldDataGenerator;

/**
* Lists all leaf field types that are supported for data generation.
* Lists all leaf field types that are supported for data generation by default.
*/
public enum FieldType {
KEYWORD("keyword"),
Expand Down Expand Up @@ -66,6 +66,25 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
};
}

public static FieldType tryParse(String name) {
return switch (name) {
case "keyword" -> FieldType.KEYWORD;
case "long" -> FieldType.LONG;
case "unsigned_long" -> FieldType.UNSIGNED_LONG;
case "integer" -> FieldType.INTEGER;
case "short" -> FieldType.SHORT;
case "byte" -> FieldType.BYTE;
case "double" -> FieldType.DOUBLE;
case "float" -> FieldType.FLOAT;
case "half_float" -> FieldType.HALF_FLOAT;
case "scaled_float" -> FieldType.SCALED_FLOAT;
case "counted_keyword" -> FieldType.COUNTED_KEYWORD;
case "boolean" -> FieldType.BOOLEAN;
case "date" -> FieldType.DATE;
default -> null;
};
}

@Override
public String toString() {
return name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ private void generateMapping(
)
.mappingGenerator();

mappingParameters.put("type", leaf.type().toString());
mappingParameters.put("type", leaf.type());
mappingParameters.putAll(mappingParametersGenerator.get());

// For simplicity we only copy to keyword fields, synthetic source logic to handle copy_to is generic.
if (leaf.type() == FieldType.KEYWORD) {
if (leaf.type().equals(FieldType.KEYWORD.toString())) {
context.addCopyToCandidate(fieldName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
public record Template(Map<String, Entry> template) {
public sealed interface Entry permits Leaf, Object {}

public record Leaf(String name, FieldType type) implements Entry {}
public record Leaf(String name, String type) implements Entry {}

public record Object(String name, boolean nested, Map<String, Entry> children) implements Entry {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ private void generateChildFields(Map<String, Template.Entry> mapping, int depth,
generateChildFields(children, depth + 1, nestedFieldsCount);
} else {
var fieldTypeInfo = fieldTypeGenerator.get();
mapping.put(fieldName, new Template.Leaf(fieldName, fieldTypeInfo.fieldType()));
mapping.put(fieldName, new Template.Leaf(fieldName, fieldTypeInfo.fieldType().toString()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public DataSource(Collection<DataSourceHandler> additionalHandlers) {

this.handlers.addAll(additionalHandlers);

this.handlers.add(new DefaultFieldDataGeneratorHandler());
this.handlers.add(new DefaultPrimitiveTypesHandler());
this.handlers.add(new DefaultWrappersHandler());
this.handlers.add(new DefaultObjectGenerationHandler());
Expand Down
Loading