过滤搜索结果
编辑过滤搜索结果
编辑你可以使用两种方法来过滤搜索结果
后置过滤器
编辑当你使用 post_filter
参数来过滤搜索结果时,搜索命中结果会在聚合计算之后被过滤。后置过滤器对聚合结果没有影响。
例如,你正在销售具有以下属性的衬衫
resp = client.indices.create( index="shirts", mappings={ "properties": { "brand": { "type": "keyword" }, "color": { "type": "keyword" }, "model": { "type": "keyword" } } }, ) print(resp) resp1 = client.index( index="shirts", id="1", refresh=True, document={ "brand": "gucci", "color": "red", "model": "slim" }, ) print(resp1)
response = client.indices.create( index: 'shirts', body: { mappings: { properties: { brand: { type: 'keyword' }, color: { type: 'keyword' }, model: { type: 'keyword' } } } } ) puts response response = client.index( index: 'shirts', id: 1, refresh: true, body: { brand: 'gucci', color: 'red', model: 'slim' } ) puts response
const response = await client.indices.create({ index: "shirts", mappings: { properties: { brand: { type: "keyword", }, color: { type: "keyword", }, model: { type: "keyword", }, }, }, }); console.log(response); const response1 = await client.index({ index: "shirts", id: 1, refresh: "true", document: { brand: "gucci", color: "red", model: "slim", }, }); console.log(response1);
PUT /shirts { "mappings": { "properties": { "brand": { "type": "keyword"}, "color": { "type": "keyword"}, "model": { "type": "keyword"} } } } PUT /shirts/_doc/1?refresh { "brand": "gucci", "color": "red", "model": "slim" }
假设用户指定了两个过滤器
color:red
和 brand:gucci
。你只想在搜索结果中向他们展示 Gucci 制造的红色衬衫。通常,你可以使用bool
查询来完成此操作
resp = client.search( index="shirts", query={ "bool": { "filter": [ { "term": { "color": "red" } }, { "term": { "brand": "gucci" } } ] } }, ) print(resp)
response = client.search( index: 'shirts', body: { query: { bool: { filter: [ { term: { color: 'red' } }, { term: { brand: 'gucci' } } ] } } } ) puts response
const response = await client.search({ index: "shirts", query: { bool: { filter: [ { term: { color: "red", }, }, { term: { brand: "gucci", }, }, ], }, }, }); console.log(response);
GET /shirts/_search { "query": { "bool": { "filter": [ { "term": { "color": "red" }}, { "term": { "brand": "gucci" }} ] } } }
但是,你也希望使用分面导航来显示用户可以点击的其他选项的列表。也许你有一个 model
字段,可以让用户将他们的搜索结果限制为红色的 Gucci T 恤
或 正装衬衫
。
这可以通过terms
聚合来完成
resp = client.search( index="shirts", query={ "bool": { "filter": [ { "term": { "color": "red" } }, { "term": { "brand": "gucci" } } ] } }, aggs={ "models": { "terms": { "field": "model" } } }, ) print(resp)
response = client.search( index: 'shirts', body: { query: { bool: { filter: [ { term: { color: 'red' } }, { term: { brand: 'gucci' } } ] } }, aggregations: { models: { terms: { field: 'model' } } } } ) puts response
const response = await client.search({ index: "shirts", query: { bool: { filter: [ { term: { color: "red", }, }, { term: { brand: "gucci", }, }, ], }, }, aggs: { models: { terms: { field: "model", }, }, }, }); console.log(response);
GET /shirts/_search { "query": { "bool": { "filter": [ { "term": { "color": "red" }}, { "term": { "brand": "gucci" }} ] } }, "aggs": { "models": { "terms": { "field": "model" } } } }
但是,你可能还想告诉用户,有多少 Gucci 衬衫在其他颜色中可用。如果你只是在 color
字段上添加一个 terms
聚合,你只会得到 red
颜色,因为你的查询只返回 Gucci 的红色衬衫。
相反,你希望在聚合期间包含所有颜色的衬衫,然后仅将 colors
过滤器应用于搜索结果。这就是 post_filter
的目的
resp = client.search( index="shirts", query={ "bool": { "filter": { "term": { "brand": "gucci" } } } }, aggs={ "colors": { "terms": { "field": "color" } }, "color_red": { "filter": { "term": { "color": "red" } }, "aggs": { "models": { "terms": { "field": "model" } } } } }, post_filter={ "term": { "color": "red" } }, ) print(resp)
response = client.search( index: 'shirts', body: { query: { bool: { filter: { term: { brand: 'gucci' } } } }, aggregations: { colors: { terms: { field: 'color' } }, color_red: { filter: { term: { color: 'red' } }, aggregations: { models: { terms: { field: 'model' } } } } }, post_filter: { term: { color: 'red' } } } ) puts response
const response = await client.search({ index: "shirts", query: { bool: { filter: { term: { brand: "gucci", }, }, }, }, aggs: { colors: { terms: { field: "color", }, }, color_red: { filter: { term: { color: "red", }, }, aggs: { models: { terms: { field: "model", }, }, }, }, }, post_filter: { term: { color: "red", }, }, }); console.log(response);
GET /shirts/_search { "query": { "bool": { "filter": { "term": { "brand": "gucci" } } } }, "aggs": { "colors": { "terms": { "field": "color" } }, "color_red": { "filter": { "term": { "color": "red" } }, "aggs": { "models": { "terms": { "field": "model" } } } } }, "post_filter": { "term": { "color": "red" } } }
现在,主查询会查找所有 Gucci 衬衫,而不管颜色如何。 |
|
|
|
|
|
最后, |
重计分过滤后的搜索结果
编辑通过使用次要的(通常更昂贵的)算法,而不是将昂贵的算法应用于索引中的所有文档,重计分可以帮助提高精度,方法是重新排序由query
和post_filter
阶段返回的顶部文档(例如 100 - 500 个)。
在每个分片将其结果返回以由处理整个搜索请求的节点进行排序之前,会在每个分片上执行 rescore
请求。
当前,重计分 API 只有一个实现:查询重计分器,它使用查询来调整评分。将来,可能会提供其他重计分器,例如,成对重计分器。
如果显式sort
(除了按降序排列的 _score
)与 rescore
查询一起提供,则会引发错误。
在向用户公开分页时,你不应该在浏览每个页面时更改 window_size
(通过传递不同的 from
值),因为这会改变顶部命中,导致结果在用户浏览页面时令人困惑地发生变化。
查询重计分器
编辑查询重计分器仅在由query
和post_filter
阶段返回的 Top-K 结果上执行第二次查询。每个分片上将检查的文档数量可以通过 window_size
参数控制,该参数默认为 10。
默认情况下,原始查询和重计分查询的分数会线性组合,从而为每个文档生成最终的 _score
。原始查询和重计分查询的相对重要性可以分别通过 query_weight
和 rescore_query_weight
来控制。两者都默认为 1
。
例如
resp = client.search( query={ "match": { "message": { "operator": "or", "query": "the quick brown" } } }, rescore={ "window_size": 50, "query": { "rescore_query": { "match_phrase": { "message": { "query": "the quick brown", "slop": 2 } } }, "query_weight": 0.7, "rescore_query_weight": 1.2 } }, ) print(resp)
response = client.search( body: { query: { match: { message: { operator: 'or', query: 'the quick brown' } } }, rescore: { window_size: 50, query: { rescore_query: { match_phrase: { message: { query: 'the quick brown', slop: 2 } } }, query_weight: 0.7, rescore_query_weight: 1.2 } } } ) puts response
const response = await client.search({ query: { match: { message: { operator: "or", query: "the quick brown", }, }, }, rescore: { window_size: 50, query: { rescore_query: { match_phrase: { message: { query: "the quick brown", slop: 2, }, }, }, query_weight: 0.7, rescore_query_weight: 1.2, }, }, }); console.log(response);
POST /_search { "query" : { "match" : { "message" : { "operator" : "or", "query" : "the quick brown" } } }, "rescore" : { "window_size" : 50, "query" : { "rescore_query" : { "match_phrase" : { "message" : { "query" : "the quick brown", "slop" : 2 } } }, "query_weight" : 0.7, "rescore_query_weight" : 1.2 } } }
分数的组合方式可以使用 score_mode
来控制
评分模式 | 描述 |
---|---|
|
添加原始分数和重计分查询分数。默认值。 |
|
将原始分数乘以重计分查询分数。对于 |
|
取原始分数和重计分查询分数的平均值。 |
|
取原始分数和重计分查询分数的最大值。 |
|
取原始分数和重计分查询分数的最小值。 |
多次重计分
编辑也可以按顺序执行多次重计分
resp = client.search( query={ "match": { "message": { "operator": "or", "query": "the quick brown" } } }, rescore=[ { "window_size": 100, "query": { "rescore_query": { "match_phrase": { "message": { "query": "the quick brown", "slop": 2 } } }, "query_weight": 0.7, "rescore_query_weight": 1.2 } }, { "window_size": 10, "query": { "score_mode": "multiply", "rescore_query": { "function_score": { "script_score": { "script": { "source": "Math.log10(doc.count.value + 2)" } } } } } } ], ) print(resp)
response = client.search( body: { query: { match: { message: { operator: 'or', query: 'the quick brown' } } }, rescore: [ { window_size: 100, query: { rescore_query: { match_phrase: { message: { query: 'the quick brown', slop: 2 } } }, query_weight: 0.7, rescore_query_weight: 1.2 } }, { window_size: 10, query: { score_mode: 'multiply', rescore_query: { function_score: { script_score: { script: { source: 'Math.log10(doc.count.value + 2)' } } } } } } ] } ) puts response
const response = await client.search({ query: { match: { message: { operator: "or", query: "the quick brown", }, }, }, rescore: [ { window_size: 100, query: { rescore_query: { match_phrase: { message: { query: "the quick brown", slop: 2, }, }, }, query_weight: 0.7, rescore_query_weight: 1.2, }, }, { window_size: 10, query: { score_mode: "multiply", rescore_query: { function_score: { script_score: { script: { source: "Math.log10(doc.count.value + 2)", }, }, }, }, }, }, ], }); console.log(response);
POST /_search { "query" : { "match" : { "message" : { "operator" : "or", "query" : "the quick brown" } } }, "rescore" : [ { "window_size" : 100, "query" : { "rescore_query" : { "match_phrase" : { "message" : { "query" : "the quick brown", "slop" : 2 } } }, "query_weight" : 0.7, "rescore_query_weight" : 1.2 } }, { "window_size" : 10, "query" : { "score_mode": "multiply", "rescore_query" : { "function_score" : { "script_score": { "script": { "source": "Math.log10(doc.count.value + 2)" } } } } } } ] }
第一个重计分器获取查询结果,然后第二个重计分器获取第一个重计分器的结果,依此类推。第二个重计分器会“看到”第一个重计分器完成的排序,因此可以在第一个重计分器上使用较大的窗口将文档拉入第二个重计分器的较小窗口中。