展平图标记过滤器
编辑展平图标记过滤器
编辑展平由图标记过滤器(例如 标记图)生成的 标记图,例如 synonym_graph
或 word_delimiter_graph
。
展平包含 多位置标记 的标记图,使该图适合于 索引。否则,索引不支持包含多位置标记的标记图。
展平图是一个有损的过程。
如果可能,请避免使用 flatten_graph
过滤器。相反,仅在 搜索分析器 中使用图标记过滤器。这样就无需使用 flatten_graph
过滤器。
flatten_graph
过滤器使用 Lucene 的 FlattenGraphFilter。
示例
编辑要了解 flatten_graph
过滤器的使用方法,首先需要生成一个包含多位置标记的标记图。
以下 分析 API 请求使用 synonym_graph
过滤器在文本 domain name system is fragile
中将 dns
作为 domain name system
的多位置同义词添加。
resp = client.indices.analyze( tokenizer="standard", filter=[ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] } ], text="domain name system is fragile", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ { type: 'synonym_graph', synonyms: [ 'dns, domain name system' ] } ], text: 'domain name system is fragile' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: [ { type: "synonym_graph", synonyms: ["dns, domain name system"], }, ], text: "domain name system is fragile", }); console.log(response);
GET /_analyze { "tokenizer": "standard", "filter": [ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] } ], "text": "domain name system is fragile" }
该过滤器生成以下标记图,其中 dns
为多位置标记。
索引不支持包含多位置标记的标记图。要使此标记图适合于索引,需要将其展平。
要展平标记图,请在前面的分析 API 请求中,在 synonym_graph
过滤器之后添加 flatten_graph
过滤器。
resp = client.indices.analyze( tokenizer="standard", filter=[ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] }, "flatten_graph" ], text="domain name system is fragile", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ { type: 'synonym_graph', synonyms: [ 'dns, domain name system' ] }, 'flatten_graph' ], text: 'domain name system is fragile' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: [ { type: "synonym_graph", synonyms: ["dns, domain name system"], }, "flatten_graph", ], text: "domain name system is fragile", }); console.log(response);
GET /_analyze { "tokenizer": "standard", "filter": [ { "type": "synonym_graph", "synonyms": [ "dns, domain name system" ] }, "flatten_graph" ], "text": "domain name system is fragile" }
该过滤器生成以下展平的标记图,该图适合于索引。
添加到分析器
编辑以下 创建索引 API 请求使用 flatten_graph
标记过滤器配置新的 自定义分析器。
在此分析器中,自定义的 word_delimiter_graph
过滤器生成包含连接的多位置标记的标记图。 flatten_graph
过滤器展平这些标记图,使其适合于索引。
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_custom_index_analyzer": { "type": "custom", "tokenizer": "standard", "filter": [ "my_custom_word_delimiter_graph_filter", "flatten_graph" ] } }, "filter": { "my_custom_word_delimiter_graph_filter": { "type": "word_delimiter_graph", "catenate_all": True } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_custom_index_analyzer: { type: 'custom', tokenizer: 'standard', filter: [ 'my_custom_word_delimiter_graph_filter', 'flatten_graph' ] } }, filter: { my_custom_word_delimiter_graph_filter: { type: 'word_delimiter_graph', catenate_all: true } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_custom_index_analyzer: { type: "custom", tokenizer: "standard", filter: ["my_custom_word_delimiter_graph_filter", "flatten_graph"], }, }, filter: { my_custom_word_delimiter_graph_filter: { type: "word_delimiter_graph", catenate_all: true, }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_custom_index_analyzer": { "type": "custom", "tokenizer": "standard", "filter": [ "my_custom_word_delimiter_graph_filter", "flatten_graph" ] } }, "filter": { "my_custom_word_delimiter_graph_filter": { "type": "word_delimiter_graph", "catenate_all": true } } } } }