加权词元查询

编辑

自 8.15.0 版本起已弃用。

此查询已被 稀疏向量查询 取代,并将 在未来的版本中移除。

此功能处于技术预览阶段,可能在将来的版本中发生更改或被移除。Elastic 将努力修复任何问题,但技术预览中的功能不受官方 GA 功能的支持 SLA 的约束。

加权词元查询需要一个词元-权重对列表,这些对通过查询发送,而不是使用自然语言处理模型计算。然后,这些词元对用于针对 稀疏向量排序特征 字段的查询。

当您想使用外部查询扩展模型或快速原型化更改而不重新索引新模型时,加权词元查询非常有用。

示例请求

编辑
resp = client.search(
    query={
        "weighted_tokens": {
            "query_expansion_field": {
                "tokens": {
                    "2161": 0.4679,
                    "2621": 0.307,
                    "2782": 0.1299,
                    "2851": 0.1056,
                    "3088": 0.3041,
                    "3376": 0.1038,
                    "3467": 0.4873,
                    "3684": 0.8958,
                    "4380": 0.334,
                    "4542": 0.4636,
                    "4633": 2.2805,
                    "4785": 1.2628,
                    "4860": 1.0655,
                    "5133": 1.0709,
                    "7139": 1.0016,
                    "7224": 0.2486,
                    "7387": 0.0985,
                    "7394": 0.0542,
                    "8915": 0.369,
                    "9156": 2.8947,
                    "10505": 0.2771,
                    "11464": 0.3996,
                    "13525": 0.0088,
                    "14178": 0.8161,
                    "16893": 0.1376,
                    "17851": 1.5348,
                    "19939": 0.6012
                },
                "pruning_config": {
                    "tokens_freq_ratio_threshold": 5,
                    "tokens_weight_threshold": 0.4,
                    "only_score_pruned_tokens": False
                }
            }
        }
    },
)
print(resp)
response = client.search(
  body: {
    query: {
      weighted_tokens: {
        query_expansion_field: {
          tokens: {
            "2161": 0.4679,
            "2621": 0.307,
            "2782": 0.1299,
            "2851": 0.1056,
            "3088": 0.3041,
            "3376": 0.1038,
            "3467": 0.4873,
            "3684": 0.8958,
            "4380": 0.334,
            "4542": 0.4636,
            "4633": 2.2805,
            "4785": 1.2628,
            "4860": 1.0655,
            "5133": 1.0709,
            "7139": 1.0016,
            "7224": 0.2486,
            "7387": 0.0985,
            "7394": 0.0542,
            "8915": 0.369,
            "9156": 2.8947,
            "10505": 0.2771,
            "11464": 0.3996,
            "13525": 0.0088,
            "14178": 0.8161,
            "16893": 0.1376,
            "17851": 1.5348,
            "19939": 0.6012
          },
          pruning_config: {
            tokens_freq_ratio_threshold: 5,
            tokens_weight_threshold: 0.4,
            only_score_pruned_tokens: false
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  query: {
    weighted_tokens: {
      query_expansion_field: {
        tokens: {
          "2161": 0.4679,
          "2621": 0.307,
          "2782": 0.1299,
          "2851": 0.1056,
          "3088": 0.3041,
          "3376": 0.1038,
          "3467": 0.4873,
          "3684": 0.8958,
          "4380": 0.334,
          "4542": 0.4636,
          "4633": 2.2805,
          "4785": 1.2628,
          "4860": 1.0655,
          "5133": 1.0709,
          "7139": 1.0016,
          "7224": 0.2486,
          "7387": 0.0985,
          "7394": 0.0542,
          "8915": 0.369,
          "9156": 2.8947,
          "10505": 0.2771,
          "11464": 0.3996,
          "13525": 0.0088,
          "14178": 0.8161,
          "16893": 0.1376,
          "17851": 1.5348,
          "19939": 0.6012,
        },
        pruning_config: {
          tokens_freq_ratio_threshold: 5,
          tokens_weight_threshold: 0.4,
          only_score_pruned_tokens: false,
        },
      },
    },
  },
});
console.log(response);
POST _search
{
  "query": {
    "weighted_tokens": {
      "query_expansion_field": {
        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
        "pruning_config": {
          "tokens_freq_ratio_threshold": 5,
          "tokens_weight_threshold": 0.4,
          "only_score_pruned_tokens": false
        }
      }
    }
  }
}

weighted_token 的顶级参数

编辑
<tokens>

(必填,字典) 词元-权重对的字典。

pruning_config

(可选,对象) 可选的剪枝配置。如果启用,这将省略查询中不重要的词元以提高查询性能。默认:禁用。

<pruning_config> 的参数为

tokens_freq_ratio_threshold
(可选,整数) 频率超过指定字段中所有词元平均频率 tokens_freq_ratio_threshold 倍的词元被视为异常值并被剪枝。此值必须在 1 到 100 之间。默认值:5
tokens_weight_threshold
(可选,浮点数) 权重小于 tokens_weight_threshold 的词元被视为不重要并被剪枝。此值必须在 0 到 1 之间。默认值:0.4
only_score_pruned_tokens
(可选,布尔值) 如果为 true,我们只将剪枝后的词元输入评分,并丢弃未剪枝的词元。强烈建议将此设置为 false 用于主查询,但这可以设置为 true 用于重评分查询以获得更相关的结果。默认值:false

tokens_freq_ratio_thresholdtokens_weight_threshold 的默认值是根据使用 ELSER 进行的测试选择的,这些测试提供了最佳结果。

带有剪枝配置和重评分的示例加权词元查询

编辑

以下示例向 text_expansion 查询添加剪枝配置。剪枝配置识别要从查询中剪枝的非重要词元,以提高查询性能。

词元剪枝发生在分片级别。虽然这应该导致相同词元在各个分片中被标记为不重要,但这并不能保证基于每个分片的组成。因此,如果您在多分片索引上运行带有 pruning_configtext_expansion,我们强烈建议添加一个使用最初从查询中剪枝的词元的 重评分过滤后的搜索结果 函数。这将有助于减轻剪枝词元的分片级别不一致性,并总体上提供更好的相关性。

resp = client.search(
    index="my-index",
    query={
        "weighted_tokens": {
            "query_expansion_field": {
                "tokens": {
                    "2161": 0.4679,
                    "2621": 0.307,
                    "2782": 0.1299,
                    "2851": 0.1056,
                    "3088": 0.3041,
                    "3376": 0.1038,
                    "3467": 0.4873,
                    "3684": 0.8958,
                    "4380": 0.334,
                    "4542": 0.4636,
                    "4633": 2.2805,
                    "4785": 1.2628,
                    "4860": 1.0655,
                    "5133": 1.0709,
                    "7139": 1.0016,
                    "7224": 0.2486,
                    "7387": 0.0985,
                    "7394": 0.0542,
                    "8915": 0.369,
                    "9156": 2.8947,
                    "10505": 0.2771,
                    "11464": 0.3996,
                    "13525": 0.0088,
                    "14178": 0.8161,
                    "16893": 0.1376,
                    "17851": 1.5348,
                    "19939": 0.6012
                },
                "pruning_config": {
                    "tokens_freq_ratio_threshold": 5,
                    "tokens_weight_threshold": 0.4,
                    "only_score_pruned_tokens": False
                }
            }
        }
    },
    rescore={
        "window_size": 100,
        "query": {
            "rescore_query": {
                "weighted_tokens": {
                    "query_expansion_field": {
                        "tokens": {
                            "2161": 0.4679,
                            "2621": 0.307,
                            "2782": 0.1299,
                            "2851": 0.1056,
                            "3088": 0.3041,
                            "3376": 0.1038,
                            "3467": 0.4873,
                            "3684": 0.8958,
                            "4380": 0.334,
                            "4542": 0.4636,
                            "4633": 2.2805,
                            "4785": 1.2628,
                            "4860": 1.0655,
                            "5133": 1.0709,
                            "7139": 1.0016,
                            "7224": 0.2486,
                            "7387": 0.0985,
                            "7394": 0.0542,
                            "8915": 0.369,
                            "9156": 2.8947,
                            "10505": 0.2771,
                            "11464": 0.3996,
                            "13525": 0.0088,
                            "14178": 0.8161,
                            "16893": 0.1376,
                            "17851": 1.5348,
                            "19939": 0.6012
                        },
                        "pruning_config": {
                            "tokens_freq_ratio_threshold": 5,
                            "tokens_weight_threshold": 0.4,
                            "only_score_pruned_tokens": True
                        }
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'my-index',
  body: {
    query: {
      weighted_tokens: {
        query_expansion_field: {
          tokens: {
            "2161": 0.4679,
            "2621": 0.307,
            "2782": 0.1299,
            "2851": 0.1056,
            "3088": 0.3041,
            "3376": 0.1038,
            "3467": 0.4873,
            "3684": 0.8958,
            "4380": 0.334,
            "4542": 0.4636,
            "4633": 2.2805,
            "4785": 1.2628,
            "4860": 1.0655,
            "5133": 1.0709,
            "7139": 1.0016,
            "7224": 0.2486,
            "7387": 0.0985,
            "7394": 0.0542,
            "8915": 0.369,
            "9156": 2.8947,
            "10505": 0.2771,
            "11464": 0.3996,
            "13525": 0.0088,
            "14178": 0.8161,
            "16893": 0.1376,
            "17851": 1.5348,
            "19939": 0.6012
          },
          pruning_config: {
            tokens_freq_ratio_threshold: 5,
            tokens_weight_threshold: 0.4,
            only_score_pruned_tokens: false
          }
        }
      }
    },
    rescore: {
      window_size: 100,
      query: {
        rescore_query: {
          weighted_tokens: {
            query_expansion_field: {
              tokens: {
                "2161": 0.4679,
                "2621": 0.307,
                "2782": 0.1299,
                "2851": 0.1056,
                "3088": 0.3041,
                "3376": 0.1038,
                "3467": 0.4873,
                "3684": 0.8958,
                "4380": 0.334,
                "4542": 0.4636,
                "4633": 2.2805,
                "4785": 1.2628,
                "4860": 1.0655,
                "5133": 1.0709,
                "7139": 1.0016,
                "7224": 0.2486,
                "7387": 0.0985,
                "7394": 0.0542,
                "8915": 0.369,
                "9156": 2.8947,
                "10505": 0.2771,
                "11464": 0.3996,
                "13525": 0.0088,
                "14178": 0.8161,
                "16893": 0.1376,
                "17851": 1.5348,
                "19939": 0.6012
              },
              pruning_config: {
                tokens_freq_ratio_threshold: 5,
                tokens_weight_threshold: 0.4,
                only_score_pruned_tokens: true
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "my-index",
  query: {
    weighted_tokens: {
      query_expansion_field: {
        tokens: {
          "2161": 0.4679,
          "2621": 0.307,
          "2782": 0.1299,
          "2851": 0.1056,
          "3088": 0.3041,
          "3376": 0.1038,
          "3467": 0.4873,
          "3684": 0.8958,
          "4380": 0.334,
          "4542": 0.4636,
          "4633": 2.2805,
          "4785": 1.2628,
          "4860": 1.0655,
          "5133": 1.0709,
          "7139": 1.0016,
          "7224": 0.2486,
          "7387": 0.0985,
          "7394": 0.0542,
          "8915": 0.369,
          "9156": 2.8947,
          "10505": 0.2771,
          "11464": 0.3996,
          "13525": 0.0088,
          "14178": 0.8161,
          "16893": 0.1376,
          "17851": 1.5348,
          "19939": 0.6012,
        },
        pruning_config: {
          tokens_freq_ratio_threshold: 5,
          tokens_weight_threshold: 0.4,
          only_score_pruned_tokens: false,
        },
      },
    },
  },
  rescore: {
    window_size: 100,
    query: {
      rescore_query: {
        weighted_tokens: {
          query_expansion_field: {
            tokens: {
              "2161": 0.4679,
              "2621": 0.307,
              "2782": 0.1299,
              "2851": 0.1056,
              "3088": 0.3041,
              "3376": 0.1038,
              "3467": 0.4873,
              "3684": 0.8958,
              "4380": 0.334,
              "4542": 0.4636,
              "4633": 2.2805,
              "4785": 1.2628,
              "4860": 1.0655,
              "5133": 1.0709,
              "7139": 1.0016,
              "7224": 0.2486,
              "7387": 0.0985,
              "7394": 0.0542,
              "8915": 0.369,
              "9156": 2.8947,
              "10505": 0.2771,
              "11464": 0.3996,
              "13525": 0.0088,
              "14178": 0.8161,
              "16893": 0.1376,
              "17851": 1.5348,
              "19939": 0.6012,
            },
            pruning_config: {
              tokens_freq_ratio_threshold: 5,
              tokens_weight_threshold: 0.4,
              only_score_pruned_tokens: true,
            },
          },
        },
      },
    },
  },
});
console.log(response);
GET my-index/_search
{
   "query":{
      "weighted_tokens": {
      "query_expansion_field": {
        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
        "pruning_config": {
          "tokens_freq_ratio_threshold": 5,
          "tokens_weight_threshold": 0.4,
          "only_score_pruned_tokens": false
        }
      }
    }
   },
   "rescore": {
      "window_size": 100,
      "query": {
         "rescore_query": {
            "weighted_tokens": {
              "query_expansion_field": {
                "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
                "pruning_config": {
                  "tokens_freq_ratio_threshold": 5,
                  "tokens_weight_threshold": 0.4,
                  "only_score_pruned_tokens": true
                }
              }
            }
         }
      }
   }
}