展平图标记过滤器
编辑展平图标记过滤器
编辑展平由图标记过滤器生成的标记图,例如 synonym_graph
或 word_delimiter_graph
。
展平包含多位置标记的标记图,使该图适合索引。否则,索引不支持包含多位置标记的标记图。
展平图是一个有损的过程。
如果可能,请避免使用 flatten_graph
过滤器。相反,仅在搜索分析器中使用图标记过滤器。这消除了对 flatten_graph
过滤器的需求。
flatten_graph
过滤器使用 Lucene 的 FlattenGraphFilter。
示例
编辑要了解 flatten_graph
过滤器的工作原理,首先需要生成一个包含多位置标记的标记图。
以下 分析 API 请求使用 synonym_graph
过滤器将 dns
添加为文本 domain name system is fragile
中 domain name system
的多位置同义词
resp = client.indices.analyze( tokenizer="standard", filter=[ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] } ], text="domain name system is fragile", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ { type: 'synonym_graph', synonyms: [ 'dns, domain name system' ] } ], text: 'domain name system is fragile' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: [ { type: "synonym_graph", synonyms: ["dns, domain name system"], }, ], text: "domain name system is fragile", }); console.log(response);
GET /_analyze { "tokenizer": "standard", "filter": [ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] } ], "text": "domain name system is fragile" }
该过滤器生成以下标记图,其中 dns
为多位置标记。
索引不支持包含多位置标记的标记图。为了使此标记图适合索引,需要将其展平。
要展平标记图,请在之前的分析 API 请求中,在 synonym_graph
过滤器之后添加 flatten_graph
过滤器。
resp = client.indices.analyze( tokenizer="standard", filter=[ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] }, "flatten_graph" ], text="domain name system is fragile", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ { type: 'synonym_graph', synonyms: [ 'dns, domain name system' ] }, 'flatten_graph' ], text: 'domain name system is fragile' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: [ { type: "synonym_graph", synonyms: ["dns, domain name system"], }, "flatten_graph", ], text: "domain name system is fragile", }); console.log(response);
GET /_analyze { "tokenizer": "standard", "filter": [ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] }, "flatten_graph" ], "text": "domain name system is fragile" }
该过滤器生成以下展平的标记图,该图适合索引。
添加到分析器
编辑以下创建索引 API 请求使用 flatten_graph
标记过滤器来配置新的自定义分析器。
在此分析器中,自定义的 word_delimiter_graph
过滤器生成包含连接的多位置标记的标记图。flatten_graph
过滤器展平这些标记图,使其适合索引。
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_custom_index_analyzer": { "type": "custom", "tokenizer": "standard", "filter": [ "my_custom_word_delimiter_graph_filter", "flatten_graph" ] } }, "filter": { "my_custom_word_delimiter_graph_filter": { "type": "word_delimiter_graph", "catenate_all": True } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_custom_index_analyzer: { type: 'custom', tokenizer: 'standard', filter: [ 'my_custom_word_delimiter_graph_filter', 'flatten_graph' ] } }, filter: { my_custom_word_delimiter_graph_filter: { type: 'word_delimiter_graph', catenate_all: true } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_custom_index_analyzer: { type: "custom", tokenizer: "standard", filter: ["my_custom_word_delimiter_graph_filter", "flatten_graph"], }, }, filter: { my_custom_word_delimiter_graph_filter: { type: "word_delimiter_graph", catenate_all: true, }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_custom_index_analyzer": { "type": "custom", "tokenizer": "standard", "filter": [ "my_custom_word_delimiter_graph_filter", "flatten_graph" ] } }, "filter": { "my_custom_word_delimiter_graph_filter": { "type": "word_delimiter_graph", "catenate_all": true } } } } }