排序搜索结果
编辑排序搜索结果编辑
允许您在特定字段上添加一个或多个排序。每个排序也可以反转。排序是在每个字段级别定义的,_score
用于按分数排序,_doc
用于按索引顺序排序。
假设以下索引映射
response = client.indices.create( index: 'my-index-000001', body: { mappings: { properties: { post_date: { type: 'date' }, user: { type: 'keyword' }, name: { type: 'keyword' }, age: { type: 'integer' } } } } ) puts response
res, err := es.Indices.Create( "my-index-000001", es.Indices.Create.WithBody(strings.NewReader(`{ "mappings": { "properties": { "post_date": { "type": "date" }, "user": { "type": "keyword" }, "name": { "type": "keyword" }, "age": { "type": "integer" } } } }`)), ) fmt.Println(res, err)
PUT /my-index-000001 { "mappings": { "properties": { "post_date": { "type": "date" }, "user": { "type": "keyword" }, "name": { "type": "keyword" }, "age": { "type": "integer" } } } }
response = client.search( index: 'my-index-000001', body: { sort: [ { post_date: { order: 'asc', format: 'strict_date_optional_time_nanos' } }, 'user', { name: 'desc' }, { age: 'desc' }, '_score' ], query: { term: { user: 'kimchy' } } } ) puts response
GET /my-index-000001/_search { "sort" : [ { "post_date" : {"order" : "asc", "format": "strict_date_optional_time_nanos"}}, "user", { "name" : "desc" }, { "age" : "desc" }, "_score" ], "query" : { "term" : { "user" : "kimchy" } } }
_doc
除了是最有效的排序顺序之外,没有实际用例。因此,如果您不关心文档返回的顺序,那么您应该按 _doc
排序。这在 滚动 时尤其有用。
排序值编辑
搜索响应包含每个文档的 sort
值。使用 format
参数为 日期格式 指定 sort
值 date
和 date_nanos
字段。以下搜索将返回 post_date
字段的 sort
值,格式为 strict_date_optional_time_nanos
。
response = client.search( index: 'my-index-000001', body: { sort: [ { post_date: { format: 'strict_date_optional_time_nanos' } } ], query: { term: { user: 'kimchy' } } } ) puts response
GET /my-index-000001/_search { "sort" : [ { "post_date" : {"format": "strict_date_optional_time_nanos"}} ], "query" : { "term" : { "user" : "kimchy" } } }
排序顺序编辑
order
选项可以具有以下值
|
按升序排序 |
|
按降序排序 |
当按 _score
排序时,顺序默认为 desc
,当按其他任何内容排序时,顺序默认为 asc
。
排序模式选项编辑
Elasticsearch 支持按数组或多值字段排序。 mode
选项控制为其所属文档选择哪个数组值进行排序。 mode
选项可以具有以下值
|
选择最低的值。 |
|
选择最高的值。 |
|
使用所有值的总和作为排序值。仅适用于基于数字的数组字段。 |
|
使用所有值的平均值作为排序值。仅适用于基于数字的数组字段。 |
|
使用所有值的中间值作为排序值。仅适用于基于数字的数组字段。 |
升序排序中的默认排序模式为 min
- 选择最低的值。降序排序中的默认排序模式为 max
- 选择最高的值。
排序模式示例用法编辑
在下面的示例中,字段 price 每个文档有多个价格。在这种情况下,结果命中将按每个文档的平均价格升序排序。
response = client.index( index: 'my-index-000001', id: 1, refresh: true, body: { product: 'chocolate', price: [ 20, 4 ] } ) puts response response = client.search( body: { query: { term: { product: 'chocolate' } }, sort: [ { price: { order: 'asc', mode: 'avg' } } ] } ) puts response
{ res, err := es.Index( "my-index-000001", strings.NewReader(`{ "product": "chocolate", "price": [ 20, 4 ] }`), es.Index.WithDocumentID("1"), es.Index.WithRefresh("true"), es.Index.WithPretty(), ) fmt.Println(res, err) } { res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "query": { "term": { "product": "chocolate" } }, "sort": [ { "price": { "order": "asc", "mode": "avg" } } ] }`)), es.Search.WithPretty(), ) fmt.Println(res, err) }
PUT /my-index-000001/_doc/1?refresh { "product": "chocolate", "price": [20, 4] } POST /_search { "query" : { "term" : { "product" : "chocolate" } }, "sort" : [ {"price" : {"order" : "asc", "mode" : "avg"}} ] }
排序数字字段编辑
对于数字字段,还可以使用 numeric_type
选项将值从一种类型转换为另一种类型。此选项接受以下值:["double", "long", "date", "date_nanos"
],对于跨多个数据流或索引的搜索非常有用,其中排序字段的映射不同。
例如,考虑以下两个索引
response = client.indices.create( index: 'index_double', body: { mappings: { properties: { field: { type: 'double' } } } } ) puts response
res, err := es.Indices.Create( "index_double", es.Indices.Create.WithBody(strings.NewReader(`{ "mappings": { "properties": { "field": { "type": "double" } } } }`)), ) fmt.Println(res, err)
PUT /index_double { "mappings": { "properties": { "field": { "type": "double" } } } }
response = client.indices.create( index: 'index_long', body: { mappings: { properties: { field: { type: 'long' } } } } ) puts response
res, err := es.Indices.Create( "index_long", es.Indices.Create.WithBody(strings.NewReader(`{ "mappings": { "properties": { "field": { "type": "long" } } } }`)), ) fmt.Println(res, err)
PUT /index_long { "mappings": { "properties": { "field": { "type": "long" } } } }
由于 field
在第一个索引中映射为 double
,在第二个索引中映射为 long
,因此默认情况下无法使用此字段对查询这两个索引的请求进行排序。但是,您可以使用 numeric_type
选项强制类型为其中之一,以强制对所有索引使用特定类型
$params = [ 'index' => 'index_long,index_double', 'body' => [ 'sort' => [ [ 'field' => [ 'numeric_type' => 'double', ], ], ], ], ]; $response = $client->search($params);
response = client.search( index: 'index_long,index_double', body: { sort: [ { field: { numeric_type: 'double' } } ] } ) puts response
res, err := es.Search( es.Search.WithIndex("index_long,index_double"), es.Search.WithBody(strings.NewReader(`{ "sort": [ { "field": { "numeric_type": "double" } } ] }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
const response = await client.search({ index: 'index_long,index_double', body: { sort: [ { field: { numeric_type: 'double' } } ] } }) console.log(response)
POST /index_long,index_double/_search { "sort" : [ { "field" : { "numeric_type" : "double" } } ] }
在上面的示例中,index_long
索引的值被强制转换为 double,以与 index_double
索引生成的值兼容。也可以将浮点字段转换为 long
,但请注意,在这种情况下,浮点数将被替换为小于或等于(如果值为负,则大于或等于)参数且等于数学整数的最大值。
此选项还可用于将使用毫秒分辨率的 date
字段转换为使用纳秒分辨率的 date_nanos
字段。例如,考虑以下两个索引
response = client.indices.create( index: 'index_double', body: { mappings: { properties: { field: { type: 'date' } } } } ) puts response
res, err := es.Indices.Create( "index_double", es.Indices.Create.WithBody(strings.NewReader(`{ "mappings": { "properties": { "field": { "type": "date" } } } }`)), ) fmt.Println(res, err)
PUT /index_double { "mappings": { "properties": { "field": { "type": "date" } } } }
response = client.indices.create( index: 'index_long', body: { mappings: { properties: { field: { type: 'date_nanos' } } } } ) puts response
res, err := es.Indices.Create( "index_long", es.Indices.Create.WithBody(strings.NewReader(`{ "mappings": { "properties": { "field": { "type": "date_nanos" } } } }`)), ) fmt.Println(res, err)
PUT /index_long { "mappings": { "properties": { "field": { "type": "date_nanos" } } } }
这些索引中的值以不同的分辨率存储,因此对这些字段进行排序将始终在 date_nanos
之前对 date
进行排序(升序)。使用 numeric_type
类型选项,可以为排序设置单个分辨率,设置为 date
将将 date_nanos
转换为毫秒分辨率,而 date_nanos
将将 date
字段中的值转换为纳秒分辨率
$params = [ 'index' => 'index_long,index_double', 'body' => [ 'sort' => [ [ 'field' => [ 'numeric_type' => 'date_nanos', ], ], ], ], ]; $response = $client->search($params);
res, err := es.Search( es.Search.WithIndex("index_long,index_double"), es.Search.WithBody(strings.NewReader(`{ "sort": [ { "field": { "numeric_type": "date_nanos" } } ] }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
const response = await client.search({ index: 'index_long,index_double', body: { sort: [ { field: { numeric_type: 'date_nanos' } } ] } }) console.log(response)
POST /index_long,index_double/_search { "sort" : [ { "field" : { "numeric_type" : "date_nanos" } } ] }
为了避免溢出,转换为 date_nanos
无法应用于 1970 年之前和 2262 年之后的日期,因为纳秒表示为长整数。
嵌套对象内的排序。编辑
Elasticsearch 还支持按位于一个或多个嵌套对象内的字段排序。按嵌套字段排序支持具有以下属性的 nested
排序选项
-
path
- 定义要排序的嵌套对象。实际排序字段必须是此嵌套对象内的直接字段。按嵌套字段排序时,此字段是必需的。
-
filter
- 嵌套路径内的内部对象应匹配的过滤器,以便其字段值被排序考虑在内。常见情况是在嵌套过滤器或查询中重复查询/过滤器。默认情况下,没有
filter
处于活动状态。 -
max_children
- 在为每个根文档选择排序值时要考虑的子项的最大数量。默认为无限制。
-
nested
- 与顶层
nested
相同,但适用于当前嵌套对象内的另一个嵌套路径。
如果在没有 nested
上下文的情况下在排序中定义了嵌套字段,Elasticsearch 将抛出错误。
嵌套排序示例编辑
在下面的示例中,offer
是类型为 nested
的字段。需要指定嵌套 path
;否则,Elasticsearch 无法知道需要在哪个嵌套级别捕获排序值。
$params = [ 'body' => [ 'query' => [ 'term' => [ 'product' => 'chocolate', ], ], 'sort' => [ [ 'offer.price' => [ 'mode' => 'avg', 'order' => 'asc', 'nested' => [ 'path' => 'offer', 'filter' => [ 'term' => [ 'offer.color' => 'blue', ], ], ], ], ], ], ], ]; $response = $client->search($params);
response = client.search( body: { query: { term: { product: 'chocolate' } }, sort: [ { 'offer.price' => { mode: 'avg', order: 'asc', nested: { path: 'offer', filter: { term: { 'offer.color' => 'blue' } } } } } ] } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "query": { "term": { "product": "chocolate" } }, "sort": [ { "offer.price": { "mode": "avg", "order": "asc", "nested": { "path": "offer", "filter": { "term": { "offer.color": "blue" } } } } } ] }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
const response = await client.search({ body: { query: { term: { product: 'chocolate' } }, sort: [ { 'offer.price': { mode: 'avg', order: 'asc', nested: { path: 'offer', filter: { term: { 'offer.color': 'blue' } } } } } ] } }) console.log(response)
POST /_search { "query" : { "term" : { "product" : "chocolate" } }, "sort" : [ { "offer.price" : { "mode" : "avg", "order" : "asc", "nested": { "path": "offer", "filter": { "term" : { "offer.color" : "blue" } } } } } ] }
在下面的示例中,parent
和 child
字段是类型为 nested
的字段。需要在每个级别指定 nested.path
;否则,Elasticsearch 无法知道需要在哪个嵌套级别捕获排序值。
$params = [ 'body' => [ 'query' => [ 'nested' => [ 'path' => 'parent', 'query' => [ 'bool' => [ 'must' => [ 'range' => [ 'parent.age' => [ 'gte' => 21, ], ], ], 'filter' => [ 'nested' => [ 'path' => 'parent.child', 'query' => [ 'match' => [ 'parent.child.name' => 'matt', ], ], ], ], ], ], ], ], 'sort' => [ [ 'parent.child.age' => [ 'mode' => 'min', 'order' => 'asc', 'nested' => [ 'path' => 'parent', 'filter' => [ 'range' => [ 'parent.age' => [ 'gte' => 21, ], ], ], 'nested' => [ 'path' => 'parent.child', 'filter' => [ 'match' => [ 'parent.child.name' => 'matt', ], ], ], ], ], ], ], ], ]; $response = $client->search($params);
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "query": { "nested": { "path": "parent", "query": { "bool": { "must": { "range": { "parent.age": { "gte": 21 } } }, "filter": { "nested": { "path": "parent.child", "query": { "match": { "parent.child.name": "matt" } } } } } } } }, "sort": [ { "parent.child.age": { "mode": "min", "order": "asc", "nested": { "path": "parent", "filter": { "range": { "parent.age": { "gte": 21 } } }, "nested": { "path": "parent.child", "filter": { "match": { "parent.child.name": "matt" } } } } } } ] }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
const response = await client.search({ body: { query: { nested: { path: 'parent', query: { bool: { must: { range: { 'parent.age': { gte: 21 } } }, filter: { nested: { path: 'parent.child', query: { match: { 'parent.child.name': 'matt' } } } } } } } }, sort: [ { 'parent.child.age': { mode: 'min', order: 'asc', nested: { path: 'parent', filter: { range: { 'parent.age': { gte: 21 } } }, nested: { path: 'parent.child', filter: { match: { 'parent.child.name': 'matt' } } } } } } ] } }) console.log(response)
POST /_search { "query": { "nested": { "path": "parent", "query": { "bool": { "must": {"range": {"parent.age": {"gte": 21}}}, "filter": { "nested": { "path": "parent.child", "query": {"match": {"parent.child.name": "matt"}} } } } } } }, "sort" : [ { "parent.child.age" : { "mode" : "min", "order" : "asc", "nested": { "path": "parent", "filter": { "range": {"parent.age": {"gte": 21}} }, "nested": { "path": "parent.child", "filter": { "match": {"parent.child.name": "matt"} } } } } } ] }
按脚本排序和按地理距离排序时也支持嵌套排序。
缺失值编辑
missing
参数指定如何处理缺少排序字段的文档:missing
值可以设置为 _last
、_first
或自定义值(将用作缺少文档的排序值)。默认值为 _last
。
例如
response = client.search( body: { sort: [ { price: { missing: '_last' } } ], query: { term: { product: 'chocolate' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "price": { "missing": "_last" } } ], "query": { "term": { "product": "chocolate" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort" : [ { "price" : {"missing" : "_last"} } ], "query" : { "term" : { "product" : "chocolate" } } }
如果嵌套内部对象与 nested.filter
不匹配,则使用缺失值。
忽略未映射的字段编辑
默认情况下,如果与字段关联的映射不存在,则搜索请求将失败。 unmapped_type
选项允许您忽略没有映射的字段,并且不按它们排序。此参数的值用于确定要发出的排序值。以下是如何使用它的示例
response = client.search( body: { sort: [ { price: { unmapped_type: 'long' } } ], query: { term: { product: 'chocolate' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "price": { "unmapped_type": "long" } } ], "query": { "term": { "product": "chocolate" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort" : [ { "price" : {"unmapped_type" : "long"} } ], "query" : { "term" : { "product" : "chocolate" } } }
如果查询的任何索引都没有为 price
映射,则 Elasticsearch 将处理它,就好像存在类型为 long
的映射一样,此索引中的所有文档都为此字段没有值。
地理距离排序编辑
允许按 _geo_distance
排序。以下是一个示例,假设 pin.location
是类型为 geo_point
的字段
response = client.search( body: { sort: [ { _geo_distance: { 'pin.location' => [ -70, 40 ], order: 'asc', unit: 'km', mode: 'min', distance_type: 'arc', ignore_unmapped: true } } ], query: { term: { user: 'kimchy' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "_geo_distance": { "pin.location": [ -70, 40 ], "order": "asc", "unit": "km", "mode": "min", "distance_type": "arc", "ignore_unmapped": true } } ], "query": { "term": { "user": "kimchy" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort" : [ { "_geo_distance" : { "pin.location" : [-70, 40], "order" : "asc", "unit" : "km", "mode" : "min", "distance_type" : "arc", "ignore_unmapped": true } } ], "query" : { "term" : { "user" : "kimchy" } } }
-
distance_type
- 如何计算距离。可以是
arc
(默认)或plane
(更快,但在长距离和靠近极点时不准确)。 -
mode
- 如果字段有多个地理点,该怎么办。默认情况下,在升序排序时考虑最短距离,在降序排序时考虑最长距离。支持的值为
min
、max
、median
和avg
。 -
unit
- 计算排序值时要使用的单位。默认值为
m
(米)。 -
ignore_unmapped
- 指示是否应将未映射的字段视为缺失值。将其设置为
true
等效于在字段排序中指定unmapped_type
。默认值为false
(未映射的字段会导致搜索失败)。
地理距离排序不支持可配置的缺失值:当文档没有用于距离计算的字段的值时,距离将始终被视为等于 Infinity
。
以下格式支持提供坐标
纬度经度作为属性编辑
response = client.search( body: { sort: [ { _geo_distance: { 'pin.location' => { lat: 40, lon: -70 }, order: 'asc', unit: 'km' } } ], query: { term: { user: 'kimchy' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "_geo_distance": { "pin.location": { "lat": 40, "lon": -70 }, "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort" : [ { "_geo_distance" : { "pin.location" : { "lat" : 40, "lon" : -70 }, "order" : "asc", "unit" : "km" } } ], "query" : { "term" : { "user" : "kimchy" } } }
纬度经度作为 WKT 字符串编辑
格式为 Well-Known Text。
response = client.search( body: { sort: [ { _geo_distance: { 'pin.location' => 'POINT (-70 40)', order: 'asc', unit: 'km' } } ], query: { term: { user: 'kimchy' } } } ) puts response
GET /_search { "sort": [ { "_geo_distance": { "pin.location": "POINT (-70 40)", "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }
Geohash编辑
response = client.search( body: { sort: [ { _geo_distance: { 'pin.location' => 'drm3btev3e86', order: 'asc', unit: 'km' } } ], query: { term: { user: 'kimchy' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "_geo_distance": { "pin.location": "drm3btev3e86", "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort": [ { "_geo_distance": { "pin.location": "drm3btev3e86", "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }
纬度经度作为数组编辑
格式为 [lon, lat]
,注意,经纬度顺序是为了符合 GeoJSON 规范。
response = client.search( body: { sort: [ { _geo_distance: { 'pin.location' => [ -70, 40 ], order: 'asc', unit: 'km' } } ], query: { term: { user: 'kimchy' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "_geo_distance": { "pin.location": [ -70, 40 ], "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort": [ { "_geo_distance": { "pin.location": [ -70, 40 ], "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }
多个参考点编辑
可以将多个地理点作为包含任何 geo_point
格式的数组传递,例如
response = client.search( body: { sort: [ { _geo_distance: { 'pin.location' => [ [ -70, 40 ], [ -71, 42 ] ], order: 'asc', unit: 'km' } } ], query: { term: { user: 'kimchy' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "sort": [ { "_geo_distance": { "pin.location": [ [ -70, 40 ], [ -71, 42 ] ], "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "sort": [ { "_geo_distance": { "pin.location": [ [ -70, 40 ], [ -71, 42 ] ], "order": "asc", "unit": "km" } } ], "query": { "term": { "user": "kimchy" } } }
等等。
文档的最终距离将是 min
/max
/avg
(通过 mode
定义)文档中所有点到排序请求中所有点的距离。
基于脚本的排序编辑
允许根据自定义脚本进行排序,以下是一个示例
response = client.search( body: { query: { term: { user: 'kimchy' } }, sort: { _script: { type: 'number', script: { lang: 'painless', source: "doc['field_name'].value * params.factor", params: { factor: 1.1 } }, order: 'asc' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "query": { "term": { "user": "kimchy" } }, "sort": { "_script": { "type": "number", "script": { "lang": "painless", "source": "doc['field_name'].value * params.factor", "params": { "factor": 1.1 } }, "order": "asc" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "query": { "term": { "user": "kimchy" } }, "sort": { "_script": { "type": "number", "script": { "lang": "painless", "source": "doc['field_name'].value * params.factor", "params": { "factor": 1.1 } }, "order": "asc" } } }
跟踪分数编辑
在对字段进行排序时,不会计算分数。通过将 track_scores
设置为 true,分数将继续计算并跟踪。
response = client.search( body: { track_scores: true, sort: [ { post_date: { order: 'desc' } }, { name: 'desc' }, { age: 'desc' } ], query: { term: { user: 'kimchy' } } } ) puts response
res, err := es.Search( es.Search.WithBody(strings.NewReader(`{ "track_scores": true, "sort": [ { "post_date": { "order": "desc" } }, { "name": "desc" }, { "age": "desc" } ], "query": { "term": { "user": "kimchy" } } }`)), es.Search.WithPretty(), ) fmt.Println(res, err)
GET /_search { "track_scores": true, "sort" : [ { "post_date" : {"order" : "desc"} }, { "name" : "desc" }, { "age" : "desc" } ], "query" : { "term" : { "user" : "kimchy" } } }
内存注意事项编辑
排序时,相关的排序字段值将加载到内存中。这意味着每个分片应该有足够的内存来容纳它们。对于基于字符串的类型,排序的字段不应该被分析/标记化。对于数值类型,如果可能,建议明确将类型设置为更窄的类型(如 short
、integer
和 float
)。