Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.common.xcontent;

import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;

/**
* A filter that filter fields away from source
*/
public interface XContentFieldFilter {
/**
* filter source in {@link BytesReference} format and in {@link XContentType} content type
* note that xContentType may be null in some case, we should guess xContentType from sourceBytes in such cases
*/
BytesReference apply(BytesReference sourceBytes, @Nullable XContentType xContentType) throws IOException;

/**
* Construct {@link XContentFieldFilter} using given includes and excludes
*
* @param includes fields to keep, wildcard supported
* @param excludes fields to remove, wildcard supported
* @return filter using {@link XContentMapValues#filter(String[], String[])} if wildcard found in excludes
* , otherwise return filter using {@link XContentParser}
*/
static XContentFieldFilter newFieldFilter(String[] includes, String[] excludes) {
if ((CollectionUtils.isEmpty(excludes) == false) && Arrays.stream(excludes).filter(field -> field.contains("*")).count() > 0) {
return (originalSource, contentType) -> {
Function<Map<String, ?>, Map<String, Object>> mapFilter = XContentMapValues.filter(includes, excludes);
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
Map<String, Object> filteredSource = mapFilter.apply(mapTuple.v2());
BytesStreamOutput bStream = new BytesStreamOutput();
XContentType actualContentType = mapTuple.v1();
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
builder.close();
return bStream.bytes();
};
} else {
final XContentParserConfiguration parserConfig = XContentParserConfiguration.EMPTY.withFiltering(
Set.of(includes),
Set.of(excludes)
);
return (originalSource, contentType) -> {
if (contentType == null) {
contentType = XContentHelper.xContentTypeMayCompressed(originalSource);
}
BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, originalSource.length()));
XContentBuilder builder = new XContentBuilder(contentType.xContent(), streamOutput);
XContentParser parser = contentType.xContent().createParser(parserConfig, originalSource.streamInput());
builder.copyCurrentStructure(parser);
return BytesReference.bytes(builder);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

XContentParser resource leak in parser-based filtering branch

Medium Severity

The XContentParser created at line 70 is never closed. XContentParser extends Closeable and wraps an input stream that needs proper cleanup. This resource leak occurs every time the filter is applied when excludes doesn't contain wildcards. The codebase pattern (visible in IndexAbstraction.java and IndexRouting.java) uses try-with-resources for parsers created with streamInput(), but this implementation omits that.

Fix in Cursor Fix in Web

};
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compressed source not decompressed before parsing in filter

High Severity

The parser-based filtering branch detects content type using xContentTypeMayCompressed() which handles compressed bytes, but then passes originalSource.streamInput() directly to the parser without decompressing. If the source is compressed, the parser receives compressed bytes and will fail or produce incorrect results. The map-based branch correctly handles compression via XContentHelper.convertToMap. This is a regression since the old code in ShardGetService always used convertToMap which handles compression properly.

Fix in Cursor Fix in Web

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,32 @@ public static BytesReference toXContent(ToXContent toXContent, XContentType xCon
}
}

/**
* Guesses the content type based on the provided bytes which may be compressed.
*
* @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
* The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
* This method is deprecated to prevent usages of it from spreading further without specific reasons.
*/
@Deprecated
public static XContentType xContentTypeMayCompressed(BytesReference bytes) {
Compressor compressor = CompressorFactory.compressor(bytes);
if (compressor != null) {
try {
InputStream compressedStreamInput = compressor.threadLocalInputStream(bytes.streamInput());
if (compressedStreamInput.markSupported() == false) {
compressedStreamInput = new BufferedInputStream(compressedStreamInput);
}
return XContentFactory.xContentType(compressedStreamInput);
} catch (IOException e) {
assert false : "Should not happen, we're just reading bytes from memory";
throw new UncheckedIOException(e);
}
} else {
return XContentHelper.xContentType(bytes);
}
}

/**
* Guesses the content type based on the provided bytes.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@
import org.elasticsearch.common.metrics.CounterMetric;
import org.elasticsearch.common.metrics.MeanMetric;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.common.xcontent.XContentFieldFilter;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.engine.Engine;
Expand All @@ -33,8 +31,6 @@
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.HashMap;
Expand Down Expand Up @@ -253,15 +249,11 @@ private GetResult innerGetLoadFromStoredFields(
if (fetchSourceContext.fetchSource() == false) {
source = null;
} else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
Map<String, Object> sourceAsMap;
// TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different.
// Do we care?
Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
XContentType sourceContentType = typeMapTuple.v1();
sourceAsMap = typeMapTuple.v2();
sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
try {
source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
source = XContentFieldFilter.newFieldFilter(fetchSourceContext.includes(), fetchSourceContext.excludes())
.apply(source, null);
} catch (IOException e) {
throw new ElasticsearchException("Failed to get id [" + id + "] with includes/excludes set", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,24 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.common.xcontent.XContentFieldFilter;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

public class SourceFieldMapper extends MetadataFieldMapper {

public static final String NAME = "_source";
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";

public static final String CONTENT_TYPE = "_source";
private final Function<Map<String, ?>, Map<String, Object>> filter;
private final XContentFieldFilter filter;

private static final SourceFieldMapper DEFAULT = new SourceFieldMapper(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);

Expand Down Expand Up @@ -145,7 +137,9 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes)
this.includes = includes;
this.excludes = excludes;
final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false;
this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null;
this.filter = enabled && filtered
? XContentFieldFilter.newFieldFilter(includes, excludes)
: (sourceBytes, contentType) -> sourceBytes;
this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes);
}

Expand Down Expand Up @@ -180,18 +174,7 @@ public void preParse(DocumentParserContext context) throws IOException {
public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException {
if (enabled && originalSource != null) {
// Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data
if (filter != null) {
// we don't update the context source if we filter, we want to keep it as is...
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
Map<String, Object> filteredSource = filter.apply(mapTuple.v2());
BytesStreamOutput bStream = new BytesStreamOutput();
XContentType actualContentType = mapTuple.v1();
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
builder.close();
return bStream.bytes();
} else {
return originalSource;
}
return filter.apply(originalSource, contentType);
} else {
return null;
}
Expand Down