加权词条查询
编辑加权词条查询
编辑在 8.15.0 中已弃用。
此查询已被 稀疏向量 取代,并将在未来的版本中移除。
此功能为技术预览版,可能会在未来版本中更改或删除。Elastic 将努力修复任何问题,但技术预览版中的功能不受官方 GA 功能的支持 SLA 的约束。
加权词条查询需要一个词条-权重对的列表,该列表与查询一起发送,而不是使用自然语言处理模型计算得出。然后,这些词条对将用于对 稀疏向量 或 排名特征 字段的查询。
当您想使用外部查询扩展模型,或者快速原型化更改而无需重新索引新模型时,加权词条查询非常有用。
请求示例
编辑resp = client.search( query={ "weighted_tokens": { "query_expansion_field": { "tokens": { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": False } } } }, ) print(resp)
response = client.search( body: { query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false } } } } } ) puts response
const response = await client.search({ query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012, }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false, }, }, }, }, }); console.log(response);
POST _search { "query": { "weighted_tokens": { "query_expansion_field": { "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012}, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": false } } } } }
weighted_token
的顶层参数
编辑-
<tokens>
-
(必需,字典) 一个词条-权重对的字典。
-
pruning_config
-
(可选,对象) 可选的修剪配置。如果启用,这将从查询中省略不重要的词条,以提高查询性能。默认值:禁用。
<pruning_config>
的参数为-
tokens_freq_ratio_threshold
- (可选,整数) 频率高于指定字段中所有词条的平均频率的
tokens_freq_ratio_threshold
倍的词条被视为异常值并被修剪。此值必须介于 1 和 100 之间。默认值:5
。 -
tokens_weight_threshold
- (可选,浮点数) 权重小于
tokens_weight_threshold
的词条被认为是不重要的并被修剪。此值必须介于 0 和 1 之间。默认值:0.4
。 -
only_score_pruned_tokens
- (可选,布尔值) 如果为
true
,我们只将修剪后的词条输入到评分中,并丢弃未修剪的词条。强烈建议将主查询设置为false
,但是可以将其设置为true
以便对重新评分查询获得更相关的结果。默认值:false
。
tokens_freq_ratio_threshold
和tokens_weight_threshold
的默认值是根据使用 ELSER 的测试选择的,这些测试提供了最佳的结果。 -
-
带有修剪配置和重新评分的加权词条查询示例
编辑以下示例向 text_expansion
查询添加了修剪配置。修剪配置标识要从查询中修剪的不重要词条,以提高查询性能。
词条修剪发生在分片级别。虽然这应该导致相同的词条在分片之间被标记为不重要,但这并不能根据每个分片的组成来保证。因此,如果您在多分片索引上使用 pruning_config
运行 text_expansion
,我们强烈建议添加一个 对过滤的搜索结果重新评分 函数,其中包含最初从查询中修剪的词条。这将有助于缓解修剪词条的任何分片级别不一致性,并提供更好的整体相关性。
resp = client.search( index="my-index", query={ "weighted_tokens": { "query_expansion_field": { "tokens": { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": False } } } }, rescore={ "window_size": 100, "query": { "rescore_query": { "weighted_tokens": { "query_expansion_field": { "tokens": { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": True } } } } } }, ) print(resp)
response = client.search( index: 'my-index', body: { query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false } } } }, rescore: { window_size: 100, query: { rescore_query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012 }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: true } } } } } } } ) puts response
const response = await client.search({ index: "my-index", query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012, }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: false, }, }, }, }, rescore: { window_size: 100, query: { rescore_query: { weighted_tokens: { query_expansion_field: { tokens: { "2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012, }, pruning_config: { tokens_freq_ratio_threshold: 5, tokens_weight_threshold: 0.4, only_score_pruned_tokens: true, }, }, }, }, }, }, }); console.log(response);
GET my-index/_search { "query":{ "weighted_tokens": { "query_expansion_field": { "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012}, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": false } } } }, "rescore": { "window_size": 100, "query": { "rescore_query": { "weighted_tokens": { "query_expansion_field": { "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012}, "pruning_config": { "tokens_freq_ratio_threshold": 5, "tokens_weight_threshold": 0.4, "only_score_pruned_tokens": true } } } } } } }