地理距离聚合
编辑地理距离聚合
编辑一种多桶聚合,作用于 geo_point
字段,概念上与 范围 聚合非常相似。用户可以定义一个原点和一组距离范围桶。聚合会评估每个文档值与原点的距离,并根据范围确定它所属的桶(如果文档与原点之间的距离落在桶的距离范围内,则该文档属于该桶)。
resp = client.indices.create( index="museums", mappings={ "properties": { "location": { "type": "geo_point" } } }, ) print(resp) resp1 = client.bulk( index="museums", refresh=True, operations=[ { "index": { "_id": 1 } }, { "location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum" }, { "index": { "_id": 2 } }, { "location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis" }, { "index": { "_id": 3 } }, { "location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum" }, { "index": { "_id": 4 } }, { "location": "POINT (4.405200 51.222900)", "name": "Letterenhuis" }, { "index": { "_id": 5 } }, { "location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre" }, { "index": { "_id": 6 } }, { "location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay" } ], ) print(resp1) resp2 = client.search( index="museums", size="0", aggs={ "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000 }, { "from": 100000, "to": 300000 }, { "from": 300000 } ] } } }, ) print(resp2)
response = client.indices.create( index: 'museums', body: { mappings: { properties: { location: { type: 'geo_point' } } } } ) puts response response = client.bulk( index: 'museums', refresh: true, body: [ { index: { _id: 1 } }, { location: 'POINT (4.912350 52.374081)', name: 'NEMO Science Museum' }, { index: { _id: 2 } }, { location: 'POINT (4.901618 52.369219)', name: 'Museum Het Rembrandthuis' }, { index: { _id: 3 } }, { location: 'POINT (4.914722 52.371667)', name: 'Nederlands Scheepvaartmuseum' }, { index: { _id: 4 } }, { location: 'POINT (4.405200 51.222900)', name: 'Letterenhuis' }, { index: { _id: 5 } }, { location: 'POINT (2.336389 48.861111)', name: 'Musée du Louvre' }, { index: { _id: 6 } }, { location: 'POINT (2.327000 48.860000)', name: "Musée d'Orsay" } ] ) puts response response = client.search( index: 'museums', size: 0, body: { aggregations: { rings_around_amsterdam: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', ranges: [ { to: 100_000 }, { from: 100_000, to: 300_000 }, { from: 300_000 } ] } } } } ) puts response
const response = await client.indices.create({ index: "museums", mappings: { properties: { location: { type: "geo_point", }, }, }, }); console.log(response); const response1 = await client.bulk({ index: "museums", refresh: "true", operations: [ { index: { _id: 1, }, }, { location: "POINT (4.912350 52.374081)", name: "NEMO Science Museum", }, { index: { _id: 2, }, }, { location: "POINT (4.901618 52.369219)", name: "Museum Het Rembrandthuis", }, { index: { _id: 3, }, }, { location: "POINT (4.914722 52.371667)", name: "Nederlands Scheepvaartmuseum", }, { index: { _id: 4, }, }, { location: "POINT (4.405200 51.222900)", name: "Letterenhuis", }, { index: { _id: 5, }, }, { location: "POINT (2.336389 48.861111)", name: "Musée du Louvre", }, { index: { _id: 6, }, }, { location: "POINT (2.327000 48.860000)", name: "Musée d'Orsay", }, ], }); console.log(response1); const response2 = await client.search({ index: "museums", size: 0, aggs: { rings_around_amsterdam: { geo_distance: { field: "location", origin: "POINT (4.894 52.3760)", ranges: [ { to: 100000, }, { from: 100000, to: 300000, }, { from: 300000, }, ], }, }, }, }); console.log(response2);
PUT /museums { "mappings": { "properties": { "location": { "type": "geo_point" } } } } POST /museums/_bulk?refresh {"index":{"_id":1}} {"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} {"index":{"_id":2}} {"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} {"index":{"_id":3}} {"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} {"index":{"_id":4}} {"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} {"index":{"_id":5}} {"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} {"index":{"_id":6}} {"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} POST /museums/_search?size=0 { "aggs": { "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000 }, { "from": 100000, "to": 300000 }, { "from": 300000 } ] } } } }
响应
{ ... "aggregations": { "rings_around_amsterdam": { "buckets": [ { "key": "*-100000.0", "from": 0.0, "to": 100000.0, "doc_count": 3 }, { "key": "100000.0-300000.0", "from": 100000.0, "to": 300000.0, "doc_count": 1 }, { "key": "300000.0-*", "from": 300000.0, "doc_count": 2 } ] } } }
指定的字段必须是 geo_point
类型(只能在映射中显式设置)。它也可以包含 geo_point
字段的数组,在这种情况下,所有字段都将在聚合期间被考虑在内。原点可以接受 geo_point
类型 支持的所有格式。
- 对象格式:
{ "lat" : 52.3760, "lon" : 4.894 }
- 这是最安全的格式,因为它最明确地说明了lat
和lon
值。 - 字符串格式:
"52.3760, 4.894"
- 其中第一个数字是lat
,第二个数字是lon
。 - 数组格式:
[4.894, 52.3760]
- 基于 GeoJSON 标准,其中第一个数字是lon
,第二个数字是lat
。
默认情况下,距离单位是 m
(米),但它也可以接受:mi
(英里),in
(英寸),yd
(码),km
(公里),cm
(厘米),mm
(毫米)。
resp = client.search( index="museums", size="0", aggs={ "rings": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "unit": "km", "ranges": [ { "to": 100 }, { "from": 100, "to": 300 }, { "from": 300 } ] } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', unit: 'km', ranges: [ { to: 100 }, { from: 100, to: 300 }, { from: 300 } ] } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggs: { rings: { geo_distance: { field: "location", origin: "POINT (4.894 52.3760)", unit: "km", ranges: [ { to: 100, }, { from: 100, to: 300, }, { from: 300, }, ], }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggs": { "rings": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "unit": "km", "ranges": [ { "to": 100 }, { "from": 100, "to": 300 }, { "from": 300 } ] } } } }
有两种距离计算模式:arc
(默认)和 plane
。arc
计算是最准确的。plane
是最快的但最不准确的。当您的搜索上下文“狭窄”并且跨越较小的地理区域(约 5 公里)时,请考虑使用 plane
。plane
将为跨越大范围的搜索(例如,跨大陆搜索)返回更高的误差范围。可以使用 distance_type
参数设置距离计算类型。
resp = client.search( index="museums", size="0", aggs={ "rings": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "unit": "km", "distance_type": "plane", "ranges": [ { "to": 100 }, { "from": 100, "to": 300 }, { "from": 300 } ] } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', unit: 'km', distance_type: 'plane', ranges: [ { to: 100 }, { from: 100, to: 300 }, { from: 300 } ] } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggs: { rings: { geo_distance: { field: "location", origin: "POINT (4.894 52.3760)", unit: "km", distance_type: "plane", ranges: [ { to: 100, }, { from: 100, to: 300, }, { from: 300, }, ], }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggs": { "rings": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "unit": "km", "distance_type": "plane", "ranges": [ { "to": 100 }, { "from": 100, "to": 300 }, { "from": 300 } ] } } } }
键控响应
编辑将 keyed
标志设置为 true
会为每个桶关联一个唯一的字符串键,并将范围作为哈希而不是数组返回。
resp = client.search( index="museums", size="0", aggs={ "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000 }, { "from": 100000, "to": 300000 }, { "from": 300000 } ], "keyed": True } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings_around_amsterdam: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', ranges: [ { to: 100_000 }, { from: 100_000, to: 300_000 }, { from: 300_000 } ], keyed: true } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggs: { rings_around_amsterdam: { geo_distance: { field: "location", origin: "POINT (4.894 52.3760)", ranges: [ { to: 100000, }, { from: 100000, to: 300000, }, { from: 300000, }, ], keyed: true, }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggs": { "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000 }, { "from": 100000, "to": 300000 }, { "from": 300000 } ], "keyed": true } } } }
响应
{ ... "aggregations": { "rings_around_amsterdam": { "buckets": { "*-100000.0": { "from": 0.0, "to": 100000.0, "doc_count": 3 }, "100000.0-300000.0": { "from": 100000.0, "to": 300000.0, "doc_count": 1 }, "300000.0-*": { "from": 300000.0, "doc_count": 2 } } } } }
也可以自定义每个范围的键。
resp = client.search( index="museums", size="0", aggs={ "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000, "key": "first_ring" }, { "from": 100000, "to": 300000, "key": "second_ring" }, { "from": 300000, "key": "third_ring" } ], "keyed": True } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { rings_around_amsterdam: { geo_distance: { field: 'location', origin: 'POINT (4.894 52.3760)', ranges: [ { to: 100_000, key: 'first_ring' }, { from: 100_000, to: 300_000, key: 'second_ring' }, { from: 300_000, key: 'third_ring' } ], keyed: true } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggs: { rings_around_amsterdam: { geo_distance: { field: "location", origin: "POINT (4.894 52.3760)", ranges: [ { to: 100000, key: "first_ring", }, { from: 100000, to: 300000, key: "second_ring", }, { from: 300000, key: "third_ring", }, ], keyed: true, }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggs": { "rings_around_amsterdam": { "geo_distance": { "field": "location", "origin": "POINT (4.894 52.3760)", "ranges": [ { "to": 100000, "key": "first_ring" }, { "from": 100000, "to": 300000, "key": "second_ring" }, { "from": 300000, "key": "third_ring" } ], "keyed": true } } } }
响应
{ ... "aggregations": { "rings_around_amsterdam": { "buckets": { "first_ring": { "from": 0.0, "to": 100000.0, "doc_count": 3 }, "second_ring": { "from": 100000.0, "to": 300000.0, "doc_count": 1 }, "third_ring": { "from": 300000.0, "doc_count": 2 } } } } }