映射字符过滤器
编辑映射字符过滤器
编辑mapping
字符过滤器接受键值对的映射。每当遇到与键相同的字符串时,它会将其替换为与该键关联的值。
匹配是贪婪的;在给定点匹配的最长模式获胜。允许替换为空字符串。
mapping
过滤器使用 Lucene 的 MappingCharFilter。
示例
编辑以下 分析 API 请求使用 mapping
过滤器将印度-阿拉伯数字 (٠١٢٣٤٥٦٧٨٩) 转换为阿拉伯-拉丁语等效数字 (0123456789),将文本 My license plate is ٢٥٠١٥
更改为 My license plate is 25015
。
resp = client.indices.analyze( tokenizer="keyword", char_filter=[ { "type": "mapping", "mappings": [ "٠ => 0", "١ => 1", "٢ => 2", "٣ => 3", "٤ => 4", "٥ => 5", "٦ => 6", "٧ => 7", "٨ => 8", "٩ => 9" ] } ], text="My license plate is ٢٥٠١٥", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'keyword', char_filter: [ { type: 'mapping', mappings: [ '٠ => 0', '١ => 1', '٢ => 2', '٣ => 3', '٤ => 4', '٥ => 5', '٦ => 6', '٧ => 7', '٨ => 8', '٩ => 9' ] } ], text: 'My license plate is ٢٥٠١٥' } ) puts response
const response = await client.indices.analyze({ tokenizer: "keyword", char_filter: [ { type: "mapping", mappings: [ "٠ => 0", "١ => 1", "٢ => 2", "٣ => 3", "٤ => 4", "٥ => 5", "٦ => 6", "٧ => 7", "٨ => 8", "٩ => 9", ], }, ], text: "My license plate is ٢٥٠١٥", }); console.log(response);
GET /_analyze { "tokenizer": "keyword", "char_filter": [ { "type": "mapping", "mappings": [ "٠ => 0", "١ => 1", "٢ => 2", "٣ => 3", "٤ => 4", "٥ => 5", "٦ => 6", "٧ => 7", "٨ => 8", "٩ => 9" ] } ], "text": "My license plate is ٢٥٠١٥" }
过滤器产生以下文本
[ My license plate is 25015 ]
可配置参数
编辑-
mappings
-
(必需*,字符串数组) 映射数组,每个元素的形式为
key => value
。必须指定此参数或
mappings_path
参数。 -
mappings_path
-
(必需*,字符串) 包含
key => value
映射的文件路径。此路径必须是绝对路径或相对于
config
位置的相对路径,并且该文件必须使用 UTF-8 编码。文件中的每个映射必须以换行符分隔。必须指定此参数或
mappings
参数。
自定义并添加到分析器
编辑要自定义 mappings
过滤器,请复制它以创建新的自定义字符过滤器的基础。您可以使用其可配置参数修改过滤器。
以下 创建索引 API 请求使用自定义 mappings
过滤器 my_mappings_char_filter
配置新的自定义分析器。
my_mappings_char_filter
过滤器将 :)
和 :(
表情符号替换为等效的文本。
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "char_filter": [ "my_mappings_char_filter" ] } }, "char_filter": { "my_mappings_char_filter": { "type": "mapping", "mappings": [ ":) => _happy_", ":( => _sad_" ] } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_analyzer: { tokenizer: 'standard', char_filter: [ 'my_mappings_char_filter' ] } }, char_filter: { my_mappings_char_filter: { type: 'mapping', mappings: [ ':) => _happy_', ':( => _sad_' ] } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_analyzer: { tokenizer: "standard", char_filter: ["my_mappings_char_filter"], }, }, char_filter: { my_mappings_char_filter: { type: "mapping", mappings: [":) => _happy_", ":( => _sad_"], }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "char_filter": [ "my_mappings_char_filter" ] } }, "char_filter": { "my_mappings_char_filter": { "type": "mapping", "mappings": [ ":) => _happy_", ":( => _sad_" ] } } } } }
以下 分析 API 请求使用自定义 my_mappings_char_filter
将文本 I'm delighted about it :(
中的 :(
替换为 _sad_
。
resp = client.indices.analyze( index="my-index-000001", tokenizer="keyword", char_filter=[ "my_mappings_char_filter" ], text="I'm delighted about it :(", ) print(resp)
const response = await client.indices.analyze({ index: "my-index-000001", tokenizer: "keyword", char_filter: ["my_mappings_char_filter"], text: "I'm delighted about it :(", }); console.log(response);
GET /my-index-000001/_analyze { "tokenizer": "keyword", "char_filter": [ "my_mappings_char_filter" ], "text": "I'm delighted about it :(" }
过滤器产生以下文本
[ I'm delighted about it _sad_ ]