ASCII 折叠词元过滤器
编辑ASCII 折叠词元过滤器
编辑将不在基本拉丁 Unicode 区块(前 127 个 ASCII 字符)中的字母、数字和符号字符转换为其 ASCII 等效字符(如果存在)。例如,该过滤器将 à
更改为 a
。
此过滤器使用 Lucene 的 ASCIIFoldingFilter。
示例
编辑以下 analyze API 请求使用 asciifolding
过滤器删除 açaí à la carte
中的变音符号。
resp = client.indices.analyze( tokenizer="standard", filter=[ "asciifolding" ], text="açaí à la carte", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'standard', filter: [ 'asciifolding' ], text: 'açaí à la carte' } ) puts response
const response = await client.indices.analyze({ tokenizer: "standard", filter: ["asciifolding"], text: "açaí à la carte", }); console.log(response);
GET /_analyze { "tokenizer" : "standard", "filter" : ["asciifolding"], "text" : "açaí à la carte" }
该过滤器生成以下词元:
[ acai, a, la, carte ]
添加到分析器
编辑以下 创建索引 API 请求使用 asciifolding
过滤器来配置新的自定义分析器。
resp = client.indices.create( index="asciifold_example", settings={ "analysis": { "analyzer": { "standard_asciifolding": { "tokenizer": "standard", "filter": [ "asciifolding" ] } } } }, ) print(resp)
response = client.indices.create( index: 'asciifold_example', body: { settings: { analysis: { analyzer: { standard_asciifolding: { tokenizer: 'standard', filter: [ 'asciifolding' ] } } } } } ) puts response
const response = await client.indices.create({ index: "asciifold_example", settings: { analysis: { analyzer: { standard_asciifolding: { tokenizer: "standard", filter: ["asciifolding"], }, }, }, }, }); console.log(response);
PUT /asciifold_example { "settings": { "analysis": { "analyzer": { "standard_asciifolding": { "tokenizer": "standard", "filter": [ "asciifolding" ] } } } } }
可配置参数
编辑-
preserve_original
- (可选,布尔值)如果为
true
,则同时发出原始词元和折叠后的词元。默认为false
。
自定义
编辑要自定义 asciifolding
过滤器,请将其复制以创建新的自定义词元过滤器的基础。您可以使用其可配置参数修改过滤器。
例如,以下请求创建一个将 preserve_original
设置为 true 的自定义 asciifolding
过滤器:
resp = client.indices.create( index="asciifold_example", settings={ "analysis": { "analyzer": { "standard_asciifolding": { "tokenizer": "standard", "filter": [ "my_ascii_folding" ] } }, "filter": { "my_ascii_folding": { "type": "asciifolding", "preserve_original": True } } } }, ) print(resp)
response = client.indices.create( index: 'asciifold_example', body: { settings: { analysis: { analyzer: { standard_asciifolding: { tokenizer: 'standard', filter: [ 'my_ascii_folding' ] } }, filter: { my_ascii_folding: { type: 'asciifolding', preserve_original: true } } } } } ) puts response
const response = await client.indices.create({ index: "asciifold_example", settings: { analysis: { analyzer: { standard_asciifolding: { tokenizer: "standard", filter: ["my_ascii_folding"], }, }, filter: { my_ascii_folding: { type: "asciifolding", preserve_original: true, }, }, }, }, }); console.log(response);
PUT /asciifold_example { "settings": { "analysis": { "analyzer": { "standard_asciifolding": { "tokenizer": "standard", "filter": [ "my_ascii_folding" ] } }, "filter": { "my_ascii_folding": { "type": "asciifolding", "preserve_original": true } } } } }