谓词脚本分词过滤器
编辑谓词脚本分词过滤器
编辑移除不匹配提供的谓词脚本的分词。该过滤器仅支持内联 Painless 脚本。脚本在 分析谓词上下文 中进行评估。
示例
编辑以下 分析 API 请求使用 predicate_token_filter
过滤器仅输出来自 the fox jumps the lazy dog
的长度超过三个字符的分词。
resp = client.indices.analyze( tokenizer="whitespace", filter=[ { "type": "predicate_token_filter", "script": { "source": "\n token.term.length() > 3\n " } } ], text="the fox jumps the lazy dog", ) print(resp)
response = client.indices.analyze( body: { tokenizer: 'whitespace', filter: [ { type: 'predicate_token_filter', script: { source: "\n token.term.length() > 3\n " } } ], text: 'the fox jumps the lazy dog' } ) puts response
const response = await client.indices.analyze({ tokenizer: "whitespace", filter: [ { type: "predicate_token_filter", script: { source: "\n token.term.length() > 3\n ", }, }, ], text: "the fox jumps the lazy dog", }); console.log(response);
GET /_analyze { "tokenizer": "whitespace", "filter": [ { "type": "predicate_token_filter", "script": { "source": """ token.term.length() > 3 """ } } ], "text": "the fox jumps the lazy dog" }
该过滤器生成以下分词。
[ jumps, lazy ]
API 响应包含每个输出分词的位置和偏移量。请注意,predicate_token_filter
过滤器不会更改分词的原始位置或偏移量。
可配置参数
编辑自定义并添加到分析器
编辑要自定义 predicate_token_filter
过滤器,请复制它以创建新自定义分词过滤器的基础。您可以使用其可配置参数修改过滤器。
以下 创建索引 API 请求使用自定义 predicate_token_filter
过滤器 my_script_filter
配置新的 自定义分析器。
my_script_filter
过滤器会移除除 ALPHANUM
以外任何类型的分词。
resp = client.indices.create( index="my-index-000001", settings={ "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "filter": [ "my_script_filter" ] } }, "filter": { "my_script_filter": { "type": "predicate_token_filter", "script": { "source": "\n token.type.contains(\"ALPHANUM\")\n " } } } } }, ) print(resp)
response = client.indices.create( index: 'my-index-000001', body: { settings: { analysis: { analyzer: { my_analyzer: { tokenizer: 'standard', filter: [ 'my_script_filter' ] } }, filter: { my_script_filter: { type: 'predicate_token_filter', script: { source: "\n token.type.contains(\"ALPHANUM\")\n " } } } } } } ) puts response
const response = await client.indices.create({ index: "my-index-000001", settings: { analysis: { analyzer: { my_analyzer: { tokenizer: "standard", filter: ["my_script_filter"], }, }, filter: { my_script_filter: { type: "predicate_token_filter", script: { source: '\n token.type.contains("ALPHANUM")\n ', }, }, }, }, }, }); console.log(response);
PUT /my-index-000001 { "settings": { "analysis": { "analyzer": { "my_analyzer": { "tokenizer": "standard", "filter": [ "my_script_filter" ] } }, "filter": { "my_script_filter": { "type": "predicate_token_filter", "script": { "source": """ token.type.contains("ALPHANUM") """ } } } } } }