ignore_above
编辑ignore_above
编辑长度超过 ignore_above
设置的字符串将不会被索引或存储。对于字符串数组,ignore_above
将分别应用于每个数组元素,并且长度超过 ignore_above
的字符串元素将不会被索引或存储。
如果启用了 _source
字段(Elasticsearch 中的默认设置),所有字符串/数组元素仍然会存在于该字段中。
resp = client.indices.create( index="my-index-000001", mappings={ "properties": { "message": { "type": "keyword", "ignore_above": 20 } } }, ) print(resp) resp1 = client.index( index="my-index-000001", id="1", document={ "message": "Syntax error" }, ) print(resp1) resp2 = client.index( index="my-index-000001", id="2", document={ "message": "Syntax error with some long stacktrace" }, ) print(resp2) resp3 = client.search( index="my-index-000001", aggs={ "messages": { "terms": { "field": "message" } } }, ) print(resp3)
response = client.indices.create( index: 'my-index-000001', body: { mappings: { properties: { message: { type: 'keyword', ignore_above: 20 } } } } ) puts response response = client.index( index: 'my-index-000001', id: 1, body: { message: 'Syntax error' } ) puts response response = client.index( index: 'my-index-000001', id: 2, body: { message: 'Syntax error with some long stacktrace' } ) puts response response = client.search( index: 'my-index-000001', body: { aggregations: { messages: { terms: { field: 'message' } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", mappings: { properties: { message: { type: "keyword", ignore_above: 20, }, }, }, }); console.log(response); const response1 = await client.index({ index: "my-index-000001", id: 1, document: { message: "Syntax error", }, }); console.log(response1); const response2 = await client.index({ index: "my-index-000001", id: 2, document: { message: "Syntax error with some long stacktrace", }, }); console.log(response2); const response3 = await client.search({ index: "my-index-000001", aggs: { messages: { terms: { field: "message", }, }, }, }); console.log(response3);
PUT my-index-000001 { "mappings": { "properties": { "message": { "type": "keyword", "ignore_above": 20 } } } } PUT my-index-000001/_doc/1 { "message": "Syntax error" } PUT my-index-000001/_doc/2 { "message": "Syntax error with some long stacktrace" } GET my-index-000001/_search { "aggs": { "messages": { "terms": { "field": "message" } } } }
可以使用更新映射 API 在现有字段上更新 ignore_above
设置。
此选项对于防止 Lucene 的词条字节长度限制 32766
也很有用。
ignore_above
的值是字符数,但 Lucene 计算的是字节数。如果您使用包含许多非 ASCII 字符的 UTF-8 文本,您可能需要将限制设置为 32766 / 4 = 8191
,因为 UTF-8 字符最多可能占用 4 个字节。