normalizer
编辑normalizer
编辑keyword
字段的 normalizer
属性类似于 analyzer
,但它保证分析链只生成一个词元。
normalizer
在索引 keyword 之前应用,并且在通过查询解析器(例如 match
查询)搜索 keyword
字段或通过词条级别查询(例如 term
查询)搜索时,也会在搜索时应用。
Elasticsearch 提供了一个名为 lowercase
的简单 normalizer,可以直接使用。自定义的 normalizer 可以按照如下方式在分析设置中定义。
resp = client.indices.create( index="index", settings={ "analysis": { "normalizer": { "my_normalizer": { "type": "custom", "char_filter": [], "filter": [ "lowercase", "asciifolding" ] } } } }, mappings={ "properties": { "foo": { "type": "keyword", "normalizer": "my_normalizer" } } }, ) print(resp) resp1 = client.index( index="index", id="1", document={ "foo": "BÀR" }, ) print(resp1) resp2 = client.index( index="index", id="2", document={ "foo": "bar" }, ) print(resp2) resp3 = client.index( index="index", id="3", document={ "foo": "baz" }, ) print(resp3) resp4 = client.indices.refresh( index="index", ) print(resp4) resp5 = client.search( index="index", query={ "term": { "foo": "BAR" } }, ) print(resp5) resp6 = client.search( index="index", query={ "match": { "foo": "BAR" } }, ) print(resp6)
response = client.indices.create( index: 'index', body: { settings: { analysis: { normalizer: { my_normalizer: { type: 'custom', char_filter: [], filter: [ 'lowercase', 'asciifolding' ] } } } }, mappings: { properties: { foo: { type: 'keyword', normalizer: 'my_normalizer' } } } } ) puts response response = client.index( index: 'index', id: 1, body: { foo: 'BÀR' } ) puts response response = client.index( index: 'index', id: 2, body: { foo: 'bar' } ) puts response response = client.index( index: 'index', id: 3, body: { foo: 'baz' } ) puts response response = client.indices.refresh( index: 'index' ) puts response response = client.search( index: 'index', body: { query: { term: { foo: 'BAR' } } } ) puts response response = client.search( index: 'index', body: { query: { match: { foo: 'BAR' } } } ) puts response
const response = await client.indices.create({ index: "index", settings: { analysis: { normalizer: { my_normalizer: { type: "custom", char_filter: [], filter: ["lowercase", "asciifolding"], }, }, }, }, mappings: { properties: { foo: { type: "keyword", normalizer: "my_normalizer", }, }, }, }); console.log(response); const response1 = await client.index({ index: "index", id: 1, document: { foo: "BÀR", }, }); console.log(response1); const response2 = await client.index({ index: "index", id: 2, document: { foo: "bar", }, }); console.log(response2); const response3 = await client.index({ index: "index", id: 3, document: { foo: "baz", }, }); console.log(response3); const response4 = await client.indices.refresh({ index: "index", }); console.log(response4); const response5 = await client.search({ index: "index", query: { term: { foo: "BAR", }, }, }); console.log(response5); const response6 = await client.search({ index: "index", query: { match: { foo: "BAR", }, }, }); console.log(response6);
PUT index { "settings": { "analysis": { "normalizer": { "my_normalizer": { "type": "custom", "char_filter": [], "filter": ["lowercase", "asciifolding"] } } } }, "mappings": { "properties": { "foo": { "type": "keyword", "normalizer": "my_normalizer" } } } } PUT index/_doc/1 { "foo": "BÀR" } PUT index/_doc/2 { "foo": "bar" } PUT index/_doc/3 { "foo": "baz" } POST index/_refresh GET index/_search { "query": { "term": { "foo": "BAR" } } } GET index/_search { "query": { "match": { "foo": "BAR" } } }
上述查询匹配文档 1 和 2,因为 BÀR
在索引和查询时都被转换为 bar
。
{ "took": $body.took, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped" : 0, "failed": 0 }, "hits": { "total" : { "value": 2, "relation": "eq" }, "max_score": 0.4700036, "hits": [ { "_index": "index", "_id": "1", "_score": 0.4700036, "_source": { "foo": "BÀR" } }, { "_index": "index", "_id": "2", "_score": 0.4700036, "_source": { "foo": "bar" } } ] } }
此外,关键词在索引前被转换的事实也意味着聚合会返回规范化的值
resp = client.search( index="index", size=0, aggs={ "foo_terms": { "terms": { "field": "foo" } } }, ) print(resp)
response = client.search( index: 'index', body: { size: 0, aggregations: { foo_terms: { terms: { field: 'foo' } } } } ) puts response
const response = await client.search({ index: "index", size: 0, aggs: { foo_terms: { terms: { field: "foo", }, }, }, }); console.log(response);
GET index/_search { "size": 0, "aggs": { "foo_terms": { "terms": { "field": "foo" } } } }
返回
{ "took": 43, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped" : 0, "failed": 0 }, "hits": { "total" : { "value": 3, "relation": "eq" }, "max_score": null, "hits": [] }, "aggregations": { "foo_terms": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "bar", "doc_count": 2 }, { "key": "baz", "doc_count": 1 } ] } } }