Geohash 网格聚合
编辑Geohash 网格聚合
编辑一种多桶聚合,它将 geo_point
和 geo_shape
值分组到表示网格的桶中。生成的网格可以是稀疏的,并且仅包含具有匹配数据的单元格。每个单元格都使用 geohash 进行标记,该 geohash 的精度是用户可定义的。
- 高精度 geohash 具有较长的字符串长度,并表示仅覆盖小区域的单元格。
- 低精度 geohash 具有较短的字符串长度,并表示每个覆盖大面积的单元格。
此聚合中使用的 Geohash 可以选择 1 到 12 之间的精度。
长度为 12 的最高精度 geohash 生成的单元格覆盖面积小于一平方米,因此高精度请求在 RAM 和结果大小方面可能非常昂贵。请参阅下面的示例,了解如何在请求高细节级别之前首先将聚合过滤到较小的地理区域。
您只能使用 geohash_grid
来聚合显式映射的 geo_point
或 geo_shape
字段。如果 geo_point
字段包含一个数组,则 geohash_grid
会聚合所有数组值。
简单的低精度请求
编辑resp = client.indices.create( index="museums", mappings={ "properties": { "location": { "type": "geo_point" } } }, ) print(resp) resp1 = client.bulk( index="museums", refresh=True, operations=[ { "index": { "_id": 1 } }, { "location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum" }, { "index": { "_id": 2 } }, { "location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis" }, { "index": { "_id": 3 } }, { "location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum" }, { "index": { "_id": 4 } }, { "location": "POINT (4.405200 51.222900)", "name": "Letterenhuis" }, { "index": { "_id": 5 } }, { "location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre" }, { "index": { "_id": 6 } }, { "location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay" } ], ) print(resp1) resp2 = client.search( index="museums", size="0", aggregations={ "large-grid": { "geohash_grid": { "field": "location", "precision": 3 } } }, ) print(resp2)
response = client.indices.create( index: 'museums', body: { mappings: { properties: { location: { type: 'geo_point' } } } } ) puts response response = client.bulk( index: 'museums', refresh: true, body: [ { index: { _id: 1 } }, { location: 'POINT (4.912350 52.374081)', name: 'NEMO Science Museum' }, { index: { _id: 2 } }, { location: 'POINT (4.901618 52.369219)', name: 'Museum Het Rembrandthuis' }, { index: { _id: 3 } }, { location: 'POINT (4.914722 52.371667)', name: 'Nederlands Scheepvaartmuseum' }, { index: { _id: 4 } }, { location: 'POINT (4.405200 51.222900)', name: 'Letterenhuis' }, { index: { _id: 5 } }, { location: 'POINT (2.336389 48.861111)', name: 'Musée du Louvre' }, { index: { _id: 6 } }, { location: 'POINT (2.327000 48.860000)', name: "Musée d'Orsay" } ] ) puts response response = client.search( index: 'museums', size: 0, body: { aggregations: { "large-grid": { geohash_grid: { field: 'location', precision: 3 } } } } ) puts response
const response = await client.indices.create({ index: "museums", mappings: { properties: { location: { type: "geo_point", }, }, }, }); console.log(response); const response1 = await client.bulk({ index: "museums", refresh: "true", operations: [ { index: { _id: 1, }, }, { location: "POINT (4.912350 52.374081)", name: "NEMO Science Museum", }, { index: { _id: 2, }, }, { location: "POINT (4.901618 52.369219)", name: "Museum Het Rembrandthuis", }, { index: { _id: 3, }, }, { location: "POINT (4.914722 52.371667)", name: "Nederlands Scheepvaartmuseum", }, { index: { _id: 4, }, }, { location: "POINT (4.405200 51.222900)", name: "Letterenhuis", }, { index: { _id: 5, }, }, { location: "POINT (2.336389 48.861111)", name: "Musée du Louvre", }, { index: { _id: 6, }, }, { location: "POINT (2.327000 48.860000)", name: "Musée d'Orsay", }, ], }); console.log(response1); const response2 = await client.search({ index: "museums", size: 0, aggregations: { "large-grid": { geohash_grid: { field: "location", precision: 3, }, }, }, }); console.log(response2);
PUT /museums { "mappings": { "properties": { "location": { "type": "geo_point" } } } } POST /museums/_bulk?refresh {"index":{"_id":1}} {"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} {"index":{"_id":2}} {"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} {"index":{"_id":3}} {"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} {"index":{"_id":4}} {"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} {"index":{"_id":5}} {"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} {"index":{"_id":6}} {"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} POST /museums/_search?size=0 { "aggregations": { "large-grid": { "geohash_grid": { "field": "location", "precision": 3 } } } }
响应
{ ... "aggregations": { "large-grid": { "buckets": [ { "key": "u17", "doc_count": 3 }, { "key": "u09", "doc_count": 2 }, { "key": "u15", "doc_count": 1 } ] } } }
高精度请求
编辑当请求详细的桶时(通常用于显示“放大”的地图),应应用类似 geo_bounding_box 的过滤器来缩小主题区域,否则可能会创建并返回数百万个桶。
resp = client.search( index="museums", size="0", aggregations={ "zoomed-in": { "filter": { "geo_bounding_box": { "location": { "top_left": "POINT (4.9 52.4)", "bottom_right": "POINT (5.0 52.3)" } } }, "aggregations": { "zoom1": { "geohash_grid": { "field": "location", "precision": 8 } } } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { "zoomed-in": { filter: { geo_bounding_box: { location: { top_left: 'POINT (4.9 52.4)', bottom_right: 'POINT (5.0 52.3)' } } }, aggregations: { "zoom1": { geohash_grid: { field: 'location', precision: 8 } } } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggregations: { "zoomed-in": { filter: { geo_bounding_box: { location: { top_left: "POINT (4.9 52.4)", bottom_right: "POINT (5.0 52.3)", }, }, }, aggregations: { zoom1: { geohash_grid: { field: "location", precision: 8, }, }, }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggregations": { "zoomed-in": { "filter": { "geo_bounding_box": { "location": { "top_left": "POINT (4.9 52.4)", "bottom_right": "POINT (5.0 52.3)" } } }, "aggregations": { "zoom1": { "geohash_grid": { "field": "location", "precision": 8 } } } } } }
geohash_grid
聚合返回的 geohash 也可用于放大。要放大上一个示例中返回的第一个 geohash u17
,应将其指定为 top_left
和 bottom_right
角。
resp = client.search( index="museums", size="0", aggregations={ "zoomed-in": { "filter": { "geo_bounding_box": { "location": { "top_left": "u17", "bottom_right": "u17" } } }, "aggregations": { "zoom1": { "geohash_grid": { "field": "location", "precision": 8 } } } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { "zoomed-in": { filter: { geo_bounding_box: { location: { top_left: 'u17', bottom_right: 'u17' } } }, aggregations: { "zoom1": { geohash_grid: { field: 'location', precision: 8 } } } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggregations: { "zoomed-in": { filter: { geo_bounding_box: { location: { top_left: "u17", bottom_right: "u17", }, }, }, aggregations: { zoom1: { geohash_grid: { field: "location", precision: 8, }, }, }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggregations": { "zoomed-in": { "filter": { "geo_bounding_box": { "location": { "top_left": "u17", "bottom_right": "u17" } } }, "aggregations": { "zoom1": { "geohash_grid": { "field": "location", "precision": 8 } } } } } }
{ ... "aggregations": { "zoomed-in": { "doc_count": 3, "zoom1": { "buckets": [ { "key": "u173zy3j", "doc_count": 1 }, { "key": "u173zvfz", "doc_count": 1 }, { "key": "u173zt90", "doc_count": 1 } ] } } } }
对于不支持 geohash 的系统上的“放大”,应使用可用的 geohash 库之一将桶键转换为边界框。例如,对于 javascript,可以使用 node-geohash 库
var geohash = require('ngeohash'); // bbox will contain [ 52.03125, 4.21875, 53.4375, 5.625 ] // [ minlat, minlon, maxlat, maxlon] var bbox = geohash.decode_bbox('u17');
带有附加边界框过滤的请求
编辑geohash_grid
聚合支持一个可选的 bounds
参数,该参数将考虑的单元格限制为与提供的边界相交的单元格。bounds
参数接受边界框,其格式与地理边界框查询中指定的边界的所有 接受的格式相同。此边界框可以与聚合上下文中的任何其他 geo_bounding_box
查询一起使用,也可以不使用。它是一个独立的边界框,可以与在聚合上下文中定义的任何其他 geo_bounding_box
查询相交、相等或不相交。
resp = client.search( index="museums", size="0", aggregations={ "tiles-in-bounds": { "geohash_grid": { "field": "location", "precision": 8, "bounds": { "top_left": "POINT (4.21875 53.4375)", "bottom_right": "POINT (5.625 52.03125)" } } } }, ) print(resp)
response = client.search( index: 'museums', size: 0, body: { aggregations: { "tiles-in-bounds": { geohash_grid: { field: 'location', precision: 8, bounds: { top_left: 'POINT (4.21875 53.4375)', bottom_right: 'POINT (5.625 52.03125)' } } } } } ) puts response
const response = await client.search({ index: "museums", size: 0, aggregations: { "tiles-in-bounds": { geohash_grid: { field: "location", precision: 8, bounds: { top_left: "POINT (4.21875 53.4375)", bottom_right: "POINT (5.625 52.03125)", }, }, }, }, }); console.log(response);
POST /museums/_search?size=0 { "aggregations": { "tiles-in-bounds": { "geohash_grid": { "field": "location", "precision": 8, "bounds": { "top_left": "POINT (4.21875 53.4375)", "bottom_right": "POINT (5.625 52.03125)" } } } } }
{ ... "aggregations": { "tiles-in-bounds": { "buckets": [ { "key": "u173zy3j", "doc_count": 1 }, { "key": "u173zvfz", "doc_count": 1 }, { "key": "u173zt90", "doc_count": 1 } ] } } }
赤道的单元格尺寸
编辑下表显示了 geohash 的各种字符串长度所覆盖的单元格的度量尺寸。单元格尺寸随纬度而变化,因此该表适用于赤道的最坏情况。
GeoHash 长度 |
区域宽度 x 高度 |
1 |
5,009.4km x 4,992.6km |
2 |
1,252.3km x 624.1km |
3 |
156.5km x 156km |
4 |
39.1km x 19.5km |
5 |
4.9km x 4.9km |
6 |
1.2km x 609.4m |
7 |
152.9m x 152.4m |
8 |
38.2m x 19m |
9 |
4.8m x 4.8m |
10 |
1.2m x 59.5cm |
11 |
14.9cm x 14.9cm |
12 |
3.7cm x 1.9cm |
聚合 geo_shape
字段
编辑对 Geoshape 字段进行聚合的工作方式与对点进行聚合的方式相同,只是一个形状可以在多个图块中进行计数。如果形状的任何部分与该图块相交,则该形状将计入匹配值的计数。下面是一张演示此情况的图像
选项
编辑
field |
必填。包含索引的地理坐标点或地理形状值的字段。必须显式映射为 |
precision |
可选。用于定义结果中单元格/桶的 geohash 的字符串长度。默认为 5。精度可以根据上述整数精度级别定义。将拒绝 [1,12] 之外的值。或者,可以从诸如“1km”、“10m”之类的距离度量来近似精度级别。精度级别的计算方式是,单元格不会超过所需精度的指定大小(对角线)。当这导致高于支持的 12 级的精度级别时(例如,对于小于 5.6 厘米的距离),该值将被拒绝。 |
bounds |
可选。用于过滤桶中点的边界框。 |
size |
可选。要返回的最大 geohash 桶数(默认为 10,000)。当结果被修剪时,桶会根据它们包含的文档量进行优先级排序。 |
shard_size |
可选。为了更准确地计算最终结果中返回的顶部单元格,聚合默认为从每个分片返回 |