折叠搜索结果
编辑折叠搜索结果
编辑您可以使用 collapse
参数基于字段值折叠搜索结果。折叠是通过为每个折叠键仅选择排序靠前的文档来完成的。
例如,以下搜索按 user.id
折叠结果,并按 http.response.bytes
排序。
resp = client.search( index="my-index-000001", query={ "match": { "message": "GET /search" } }, collapse={ "field": "user.id" }, sort=[ { "http.response.bytes": { "order": "desc" } } ], from_=0, ) print(resp)
const response = await client.search({ index: "my-index-000001", query: { match: { message: "GET /search", }, }, collapse: { field: "user.id", }, sort: [ { "http.response.bytes": { order: "desc", }, }, ], from: 0, }); console.log(response);
GET my-index-000001/_search { "query": { "match": { "message": "GET /search" } }, "collapse": { "field": "user.id" }, "sort": [ { "http.response.bytes": { "order": "desc" } } ], "from": 0 }
响应中的命中总数表示不进行折叠时匹配的文档数。不同组的总数未知。
用于折叠的字段必须是单值 keyword
或 numeric
字段,并且启用了 doc_values
。
折叠仅应用于最匹配的结果,不影响聚合。
展开折叠结果
编辑还可以使用 inner hits
选项展开每个折叠后的最匹配结果。
resp = client.search( index="my-index-000001", query={ "match": { "message": "GET /search" } }, collapse={ "field": "user.id", "inner_hits": { "name": "most_recent", "size": 5, "sort": [ { "@timestamp": "desc" } ] }, "max_concurrent_group_searches": 4 }, sort=[ { "http.response.bytes": { "order": "desc" } } ], ) print(resp)
const response = await client.search({ index: "my-index-000001", query: { match: { message: "GET /search", }, }, collapse: { field: "user.id", inner_hits: { name: "most_recent", size: 5, sort: [ { "@timestamp": "desc", }, ], }, max_concurrent_group_searches: 4, }, sort: [ { "http.response.bytes": { order: "desc", }, }, ], }); console.log(response);
GET /my-index-000001/_search { "query": { "match": { "message": "GET /search" } }, "collapse": { "field": "user.id", "inner_hits": { "name": "most_recent", "size": 5, "sort": [ { "@timestamp": "desc" } ] }, "max_concurrent_group_searches": 4 }, "sort": [ { "http.response.bytes": { "order": "desc" } } ] }
有关支持的选项的完整列表和响应的格式,请参阅 内部命中。
也可以为每个折叠的命中请求多个 inner hits
。当您想要获得折叠命中的多个表示形式时,这会很有用。
resp = client.search( index="my-index-000001", query={ "match": { "message": "GET /search" } }, collapse={ "field": "user.id", "inner_hits": [ { "name": "largest_responses", "size": 3, "sort": [ { "http.response.bytes": { "order": "desc" } } ] }, { "name": "most_recent", "size": 3, "sort": [ { "@timestamp": { "order": "desc" } } ] } ] }, sort=[ "http.response.bytes" ], ) print(resp)
const response = await client.search({ index: "my-index-000001", query: { match: { message: "GET /search", }, }, collapse: { field: "user.id", inner_hits: [ { name: "largest_responses", size: 3, sort: [ { "http.response.bytes": { order: "desc", }, }, ], }, { name: "most_recent", size: 3, sort: [ { "@timestamp": { order: "desc", }, }, ], }, ], }, sort: ["http.response.bytes"], }); console.log(response);
GET /my-index-000001/_search { "query": { "match": { "message": "GET /search" } }, "collapse": { "field": "user.id", "inner_hits": [ { "name": "largest_responses", "size": 3, "sort": [ { "http.response.bytes": { "order": "desc" } } ] }, { "name": "most_recent", "size": 3, "sort": [ { "@timestamp": { "order": "desc" } } ] } ] }, "sort": [ "http.response.bytes" ] }
组的展开是通过为响应中返回的每个折叠命中发送每个 inner_hit
请求的附加查询来完成的。如果您有太多的组或 inner_hit
请求,这会显著减慢您的搜索速度。
max_concurrent_group_searches
请求参数可用于控制此阶段允许的最大并发搜索数。默认值基于数据节点数和默认搜索线程池大小。
collapse
不能与 scroll 一起使用。
使用 search_after
进行折叠
编辑字段折叠可以与 search_after
参数一起使用。仅当在同一字段上进行排序和折叠时才支持使用 search_after
。也不允许辅助排序。例如,我们可以对 user.id
进行折叠和排序,同时使用 search_after
翻阅结果
resp = client.search( index="my-index-000001", query={ "match": { "message": "GET /search" } }, collapse={ "field": "user.id" }, sort=[ "user.id" ], search_after=[ "dd5ce1ad" ], ) print(resp)
const response = await client.search({ index: "my-index-000001", query: { match: { message: "GET /search", }, }, collapse: { field: "user.id", }, sort: ["user.id"], search_after: ["dd5ce1ad"], }); console.log(response);
GET /my-index-000001/_search { "query": { "match": { "message": "GET /search" } }, "collapse": { "field": "user.id" }, "sort": [ "user.id" ], "search_after": ["dd5ce1ad"] }
重新评分折叠结果
编辑您可以将字段折叠与 rescore
搜索参数一起使用。重新评分器在每个分片上对每个折叠字段排名靠前的文档运行。为了保持可靠的顺序,建议将共享相同折叠字段值的文档聚集在一个分片上。这可以通过在索引期间将折叠字段值指定为 路由键 来实现。
resp = client.index( index="my-index-000001", routing="xyz", document={ "@timestamp": "2099-11-15T13:12:00", "message": "You know for search!", "user.id": "xyz" }, ) print(resp)
const response = await client.index({ index: "my-index-000001", routing: "xyz", document: { "@timestamp": "2099-11-15T13:12:00", message: "You know for search!", "user.id": "xyz", }, }); console.log(response);
POST /my-index-000001/_doc?routing=xyz { "@timestamp": "2099-11-15T13:12:00", "message": "You know for search!", "user.id": "xyz" }
通过这样做,您保证每个折叠键只有一个最靠前的文档在全局范围内被重新评分。
以下请求在 user.id
字段上利用字段折叠,然后使用 查询重新评分器 对最靠前的组进行重新评分
resp = client.search( index="my-index-000001", query={ "match": { "message": "you know for search" } }, collapse={ "field": "user.id" }, rescore={ "window_size": 50, "query": { "rescore_query": { "match_phrase": { "message": "you know for search" } }, "query_weight": 0.3, "rescore_query_weight": 1.4 } }, ) print(resp)
const response = await client.search({ index: "my-index-000001", query: { match: { message: "you know for search", }, }, collapse: { field: "user.id", }, rescore: { window_size: 50, query: { rescore_query: { match_phrase: { message: "you know for search", }, }, query_weight: 0.3, rescore_query_weight: 1.4, }, }, }); console.log(response);
GET /my-index-000001/_search { "query": { "match": { "message": "you know for search" } }, "collapse": { "field": "user.id" }, "rescore" : { "window_size" : 50, "query" : { "rescore_query" : { "match_phrase": { "message": "you know for search" } }, "query_weight" : 0.3, "rescore_query_weight" : 1.4 } } }
重新评分器不应用于 inner hits
。
第二级折叠
编辑还支持第二级折叠,并应用于 inner_hits
。
例如,以下搜索按 geo.country_name
折叠结果。在每个 geo.country_name
中,内部命中按 user.id
折叠。
第二级折叠不允许 inner_hits
。
resp = client.search( index="my-index-000001", query={ "match": { "message": "GET /search" } }, collapse={ "field": "geo.country_name", "inner_hits": { "name": "by_location", "collapse": { "field": "user.id" }, "size": 3 } }, ) print(resp)
const response = await client.search({ index: "my-index-000001", query: { match: { message: "GET /search", }, }, collapse: { field: "geo.country_name", inner_hits: { name: "by_location", collapse: { field: "user.id", }, size: 3, }, }, }); console.log(response);
GET /my-index-000001/_search { "query": { "match": { "message": "GET /search" } }, "collapse": { "field": "geo.country_name", "inner_hits": { "name": "by_location", "collapse": { "field": "user.id" }, "size": 3 } } }
{ "hits" : { "hits" : [ { "_index" : "my-index-000001", "_id" : "oX9uXXoB0da05OCR3adK", "_score" : 0.5753642, "_source" : { "@timestamp" : "2099-11-15T14:12:12", "geo" : { "country_name" : "Amsterdam" }, "http" : { "request" : { "method" : "get" }, "response" : { "bytes" : 1070000, "status_code" : 200 }, "version" : "1.1" }, "message" : "GET /search HTTP/1.1 200 1070000", "source" : { "ip" : "127.0.0.1" }, "user" : { "id" : "kimchy" } }, "fields" : { "geo.country_name" : [ "Amsterdam" ] }, "inner_hits" : { "by_location" : { "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 0.5753642, "hits" : [ { "_index" : "my-index-000001", "_id" : "oX9uXXoB0da05OCR3adK", "_score" : 0.5753642, "_source" : { "@timestamp" : "2099-11-15T14:12:12", "geo" : { "country_name" : "Amsterdam" }, "http" : { "request" : { "method" : "get" }, "response" : { "bytes" : 1070000, "status_code" : 200 }, "version" : "1.1" }, "message" : "GET /search HTTP/1.1 200 1070000", "source" : { "ip" : "127.0.0.1" }, "user" : { "id" : "kimchy" } }, "fields" : { "user.id" : [ "kimchy" ] } } ] } } } } ] } }
跟踪分数
编辑当 collapse
与字段上的 sort
一起使用时,不会计算分数。将 track_scores
设置为 true 会指示 Elasticsearch 计算并跟踪分数。