加权词元查询
编辑加权词元查询
编辑自 8.15.0 版本起已弃用。
此查询已被 稀疏向量查询 取代,并将 在未来的版本中移除。
此功能处于技术预览阶段,可能在将来的版本中发生更改或被移除。Elastic 将努力修复任何问题,但技术预览中的功能不受官方 GA 功能的支持 SLA 的约束。
加权词元查询需要一个词元-权重对列表,这些对通过查询发送,而不是使用自然语言处理模型计算。然后,这些词元对用于针对 稀疏向量 或 排序特征 字段的查询。
当您想使用外部查询扩展模型或快速原型化更改而不重新索引新模型时,加权词元查询非常有用。
示例请求
编辑resp = client.search( query={ "weighted_tokens": { "query_expansion_field": { "tokens": { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": False } } } }, ) print(resp)
response = client.search( body: { query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false } } } } } ) puts response
const response = await client.search({ query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012, }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false, }, }, }, }, }); console.log(response);
POST _search { "query": { "weighted_tokens": { "query_expansion_field": { "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012}, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": false } } } } }
weighted_token
的顶级参数
编辑-
<tokens>
-
(必填,字典) 词元-权重对的字典。
-
pruning_config
-
(可选,对象) 可选的剪枝配置。如果启用,这将省略查询中不重要的词元以提高查询性能。默认:禁用。
<pruning_config>
的参数为-
tokens_freq_ratio_threshold
- (可选,整数) 频率超过指定字段中所有词元平均频率
tokens_freq_ratio_threshold
倍的词元被视为异常值并被剪枝。此值必须在 1 到 100 之间。默认值:5
。 -
tokens_weight_threshold
- (可选,浮点数) 权重小于
tokens_weight_threshold
的词元被视为不重要并被剪枝。此值必须在 0 到 1 之间。默认值:0.4
。 -
only_score_pruned_tokens
- (可选,布尔值) 如果为
true
,我们只将剪枝后的词元输入评分,并丢弃未剪枝的词元。强烈建议将此设置为false
用于主查询,但这可以设置为true
用于重评分查询以获得更相关的结果。默认值:false
。
tokens_freq_ratio_threshold
和tokens_weight_threshold
的默认值是根据使用 ELSER 进行的测试选择的,这些测试提供了最佳结果。 -
-
带有剪枝配置和重评分的示例加权词元查询
编辑以下示例向 text_expansion
查询添加剪枝配置。剪枝配置识别要从查询中剪枝的非重要词元,以提高查询性能。
词元剪枝发生在分片级别。虽然这应该导致相同词元在各个分片中被标记为不重要,但这并不能保证基于每个分片的组成。因此,如果您在多分片索引上运行带有 pruning_config
的 text_expansion
,我们强烈建议添加一个使用最初从查询中剪枝的词元的 重评分过滤后的搜索结果 函数。这将有助于减轻剪枝词元的分片级别不一致性,并总体上提供更好的相关性。
resp = client.search( index="my-index", query={ "weighted_tokens": { "query_expansion_field": { "tokens": { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": False } } } }, rescore={ "window_size": 100, "query": { "rescore_query": { "weighted_tokens": { "query_expansion_field": { "tokens": { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": True } } } } } }, ) print(resp)
response = client.search( index: 'my-index', body: { query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false } } } }, rescore: { window_size: 100, query: { rescore_query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: true } } } } } } } ) puts response
const response = await client.search({ index: "my-index", query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012, }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false, }, }, }, }, rescore: { window_size: 100, query: { rescore_query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012, }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: true, }, }, }, }, }, }, }); console.log(response);
GET my-index/_search { "query":{ "weighted_tokens": { "query_expansion_field": { "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012}, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": false } } } }, "rescore": { "window_size": 100, "query": { "rescore_query": { "weighted_tokens": { "query_expansion_field": { "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012}, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": true } } } } } } }