加权词条查询

编辑

在 8.15.0 中已弃用。

此查询已被 稀疏向量 取代,并将在未来的版本中移除。

此功能为技术预览版,可能会在未来版本中更改或删除。Elastic 将努力修复任何问题,但技术预览版中的功能不受官方 GA 功能的支持 SLA 的约束。

加权词条查询需要一个词条-权重对的列表,该列表与查询一起发送,而不是使用自然语言处理模型计算得出。然后,这些词条对将用于对 稀疏向量排名特征 字段的查询。

当您想使用外部查询扩展模型,或者快速原型化更改而无需重新索引新模型时,加权词条查询非常有用。

请求示例

编辑
resp = client.search(
    query={
        "weighted_tokens": {
            "query_expansion_field": {
                "tokens": {
                    "2161": 0.4679,
                    "2621": 0.307,
                    "2782": 0.1299,
                    "2851": 0.1056,
                    "3088": 0.3041,
                    "3376": 0.1038,
                    "3467": 0.4873,
                    "3684": 0.8958,
                    "4380": 0.334,
                    "4542": 0.4636,
                    "4633": 2.2805,
                    "4785": 1.2628,
                    "4860": 1.0655,
                    "5133": 1.0709,
                    "7139": 1.0016,
                    "7224": 0.2486,
                    "7387": 0.0985,
                    "7394": 0.0542,
                    "8915": 0.369,
                    "9156": 2.8947,
                    "10505": 0.2771,
                    "11464": 0.3996,
                    "13525": 0.0088,
                    "14178": 0.8161,
                    "16893": 0.1376,
                    "17851": 1.5348,
                    "19939": 0.6012
                },
                "pruning_config": {
                    "tokens_freq_ratio_threshold": 5,
                    "tokens_weight_threshold": 0.4,
                    "only_score_pruned_tokens": False
                }
            }
        }
    },
)
print(resp)
response = client.search(
  body: {
    query: {
      weighted_tokens: {
        query_expansion_field: {
          tokens: {
            "2161": 0.4679,
            "2621": 0.307,
            "2782": 0.1299,
            "2851": 0.1056,
            "3088": 0.3041,
            "3376": 0.1038,
            "3467": 0.4873,
            "3684": 0.8958,
            "4380": 0.334,
            "4542": 0.4636,
            "4633": 2.2805,
            "4785": 1.2628,
            "4860": 1.0655,
            "5133": 1.0709,
            "7139": 1.0016,
            "7224": 0.2486,
            "7387": 0.0985,
            "7394": 0.0542,
            "8915": 0.369,
            "9156": 2.8947,
            "10505": 0.2771,
            "11464": 0.3996,
            "13525": 0.0088,
            "14178": 0.8161,
            "16893": 0.1376,
            "17851": 1.5348,
            "19939": 0.6012
          },
          pruning_config: {
            tokens_freq_ratio_threshold: 5,
            tokens_weight_threshold: 0.4,
            only_score_pruned_tokens: false
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  query: {
    weighted_tokens: {
      query_expansion_field: {
        tokens: {
          "2161": 0.4679,
          "2621": 0.307,
          "2782": 0.1299,
          "2851": 0.1056,
          "3088": 0.3041,
          "3376": 0.1038,
          "3467": 0.4873,
          "3684": 0.8958,
          "4380": 0.334,
          "4542": 0.4636,
          "4633": 2.2805,
          "4785": 1.2628,
          "4860": 1.0655,
          "5133": 1.0709,
          "7139": 1.0016,
          "7224": 0.2486,
          "7387": 0.0985,
          "7394": 0.0542,
          "8915": 0.369,
          "9156": 2.8947,
          "10505": 0.2771,
          "11464": 0.3996,
          "13525": 0.0088,
          "14178": 0.8161,
          "16893": 0.1376,
          "17851": 1.5348,
          "19939": 0.6012,
        },
        pruning_config: {
          tokens_freq_ratio_threshold: 5,
          tokens_weight_threshold: 0.4,
          only_score_pruned_tokens: false,
        },
      },
    },
  },
});
console.log(response);
POST _search
{
  "query": {
    "weighted_tokens": {
      "query_expansion_field": {
        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
        "pruning_config": {
          "tokens_freq_ratio_threshold": 5,
          "tokens_weight_threshold": 0.4,
          "only_score_pruned_tokens": false
        }
      }
    }
  }
}

weighted_token 的顶层参数

编辑
<tokens>

(必需,字典) 一个词条-权重对的字典。

pruning_config

(可选,对象) 可选的修剪配置。如果启用,这将从查询中省略不重要的词条,以提高查询性能。默认值:禁用。

<pruning_config> 的参数为

tokens_freq_ratio_threshold
(可选,整数) 频率高于指定字段中所有词条的平均频率的 tokens_freq_ratio_threshold 倍的词条被视为异常值并被修剪。此值必须介于 1 和 100 之间。默认值:5
tokens_weight_threshold
(可选,浮点数) 权重小于 tokens_weight_threshold 的词条被认为是不重要的并被修剪。此值必须介于 0 和 1 之间。默认值:0.4
only_score_pruned_tokens
(可选,布尔值) 如果为 true,我们只将修剪后的词条输入到评分中,并丢弃未修剪的词条。强烈建议将主查询设置为 false,但是可以将其设置为 true 以便对重新评分查询获得更相关的结果。默认值:false

tokens_freq_ratio_thresholdtokens_weight_threshold 的默认值是根据使用 ELSER 的测试选择的,这些测试提供了最佳的结果。

带有修剪配置和重新评分的加权词条查询示例

编辑

以下示例向 text_expansion 查询添加了修剪配置。修剪配置标识要从查询中修剪的不重要词条,以提高查询性能。

词条修剪发生在分片级别。虽然这应该导致相同的词条在分片之间被标记为不重要,但这并不能根据每个分片的组成来保证。因此,如果您在多分片索引上使用 pruning_config 运行 text_expansion,我们强烈建议添加一个 对过滤的搜索结果重新评分 函数,其中包含最初从查询中修剪的词条。这将有助于缓解修剪词条的任何分片级别不一致性,并提供更好的整体相关性。

resp = client.search(
    index="my-index",
    query={
        "weighted_tokens": {
            "query_expansion_field": {
                "tokens": {
                    "2161": 0.4679,
                    "2621": 0.307,
                    "2782": 0.1299,
                    "2851": 0.1056,
                    "3088": 0.3041,
                    "3376": 0.1038,
                    "3467": 0.4873,
                    "3684": 0.8958,
                    "4380": 0.334,
                    "4542": 0.4636,
                    "4633": 2.2805,
                    "4785": 1.2628,
                    "4860": 1.0655,
                    "5133": 1.0709,
                    "7139": 1.0016,
                    "7224": 0.2486,
                    "7387": 0.0985,
                    "7394": 0.0542,
                    "8915": 0.369,
                    "9156": 2.8947,
                    "10505": 0.2771,
                    "11464": 0.3996,
                    "13525": 0.0088,
                    "14178": 0.8161,
                    "16893": 0.1376,
                    "17851": 1.5348,
                    "19939": 0.6012
                },
                "pruning_config": {
                    "tokens_freq_ratio_threshold": 5,
                    "tokens_weight_threshold": 0.4,
                    "only_score_pruned_tokens": False
                }
            }
        }
    },
    rescore={
        "window_size": 100,
        "query": {
            "rescore_query": {
                "weighted_tokens": {
                    "query_expansion_field": {
                        "tokens": {
                            "2161": 0.4679,
                            "2621": 0.307,
                            "2782": 0.1299,
                            "2851": 0.1056,
                            "3088": 0.3041,
                            "3376": 0.1038,
                            "3467": 0.4873,
                            "3684": 0.8958,
                            "4380": 0.334,
                            "4542": 0.4636,
                            "4633": 2.2805,
                            "4785": 1.2628,
                            "4860": 1.0655,
                            "5133": 1.0709,
                            "7139": 1.0016,
                            "7224": 0.2486,
                            "7387": 0.0985,
                            "7394": 0.0542,
                            "8915": 0.369,
                            "9156": 2.8947,
                            "10505": 0.2771,
                            "11464": 0.3996,
                            "13525": 0.0088,
                            "14178": 0.8161,
                            "16893": 0.1376,
                            "17851": 1.5348,
                            "19939": 0.6012
                        },
                        "pruning_config": {
                            "tokens_freq_ratio_threshold": 5,
                            "tokens_weight_threshold": 0.4,
                            "only_score_pruned_tokens": True
                        }
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'my-index',
  body: {
    query: {
      weighted_tokens: {
        query_expansion_field: {
          tokens: {
            "2161": 0.4679,
            "2621": 0.307,
            "2782": 0.1299,
            "2851": 0.1056,
            "3088": 0.3041,
            "3376": 0.1038,
            "3467": 0.4873,
            "3684": 0.8958,
            "4380": 0.334,
            "4542": 0.4636,
            "4633": 2.2805,
            "4785": 1.2628,
            "4860": 1.0655,
            "5133": 1.0709,
            "7139": 1.0016,
            "7224": 0.2486,
            "7387": 0.0985,
            "7394": 0.0542,
            "8915": 0.369,
            "9156": 2.8947,
            "10505": 0.2771,
            "11464": 0.3996,
            "13525": 0.0088,
            "14178": 0.8161,
            "16893": 0.1376,
            "17851": 1.5348,
            "19939": 0.6012
          },
          pruning_config: {
            tokens_freq_ratio_threshold: 5,
            tokens_weight_threshold: 0.4,
            only_score_pruned_tokens: false
          }
        }
      }
    },
    rescore: {
      window_size: 100,
      query: {
        rescore_query: {
          weighted_tokens: {
            query_expansion_field: {
              tokens: {
                "2161": 0.4679,
                "2621": 0.307,
                "2782": 0.1299,
                "2851": 0.1056,
                "3088": 0.3041,
                "3376": 0.1038,
                "3467": 0.4873,
                "3684": 0.8958,
                "4380": 0.334,
                "4542": 0.4636,
                "4633": 2.2805,
                "4785": 1.2628,
                "4860": 1.0655,
                "5133": 1.0709,
                "7139": 1.0016,
                "7224": 0.2486,
                "7387": 0.0985,
                "7394": 0.0542,
                "8915": 0.369,
                "9156": 2.8947,
                "10505": 0.2771,
                "11464": 0.3996,
                "13525": 0.0088,
                "14178": 0.8161,
                "16893": 0.1376,
                "17851": 1.5348,
                "19939": 0.6012
              },
              pruning_config: {
                tokens_freq_ratio_threshold: 5,
                tokens_weight_threshold: 0.4,
                only_score_pruned_tokens: true
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "my-index",
  query: {
    weighted_tokens: {
      query_expansion_field: {
        tokens: {
          "2161": 0.4679,
          "2621": 0.307,
          "2782": 0.1299,
          "2851": 0.1056,
          "3088": 0.3041,
          "3376": 0.1038,
          "3467": 0.4873,
          "3684": 0.8958,
          "4380": 0.334,
          "4542": 0.4636,
          "4633": 2.2805,
          "4785": 1.2628,
          "4860": 1.0655,
          "5133": 1.0709,
          "7139": 1.0016,
          "7224": 0.2486,
          "7387": 0.0985,
          "7394": 0.0542,
          "8915": 0.369,
          "9156": 2.8947,
          "10505": 0.2771,
          "11464": 0.3996,
          "13525": 0.0088,
          "14178": 0.8161,
          "16893": 0.1376,
          "17851": 1.5348,
          "19939": 0.6012,
        },
        pruning_config: {
          tokens_freq_ratio_threshold: 5,
          tokens_weight_threshold: 0.4,
          only_score_pruned_tokens: false,
        },
      },
    },
  },
  rescore: {
    window_size: 100,
    query: {
      rescore_query: {
        weighted_tokens: {
          query_expansion_field: {
            tokens: {
              "2161": 0.4679,
              "2621": 0.307,
              "2782": 0.1299,
              "2851": 0.1056,
              "3088": 0.3041,
              "3376": 0.1038,
              "3467": 0.4873,
              "3684": 0.8958,
              "4380": 0.334,
              "4542": 0.4636,
              "4633": 2.2805,
              "4785": 1.2628,
              "4860": 1.0655,
              "5133": 1.0709,
              "7139": 1.0016,
              "7224": 0.2486,
              "7387": 0.0985,
              "7394": 0.0542,
              "8915": 0.369,
              "9156": 2.8947,
              "10505": 0.2771,
              "11464": 0.3996,
              "13525": 0.0088,
              "14178": 0.8161,
              "16893": 0.1376,
              "17851": 1.5348,
              "19939": 0.6012,
            },
            pruning_config: {
              tokens_freq_ratio_threshold: 5,
              tokens_weight_threshold: 0.4,
              only_score_pruned_tokens: true,
            },
          },
        },
      },
    },
  },
});
console.log(response);
GET my-index/_search
{
   "query":{
      "weighted_tokens": {
      "query_expansion_field": {
        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
        "pruning_config": {
          "tokens_freq_ratio_threshold": 5,
          "tokens_weight_threshold": 0.4,
          "only_score_pruned_tokens": false
        }
      }
    }
   },
   "rescore": {
      "window_size": 100,
      "query": {
         "rescore_query": {
            "weighted_tokens": {
              "query_expansion_field": {
                "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
                "pruning_config": {
                  "tokens_freq_ratio_threshold": 5,
                  "tokens_weight_threshold": 0.4,
                  "only_score_pruned_tokens": true
                }
              }
            }
         }
      }
   }
}