Geohash 网格聚合

编辑

一种多桶聚合,它将 geo_pointgeo_shape 值分组到表示网格的桶中。生成的网格可以是稀疏的,并且仅包含具有匹配数据的单元格。每个单元格都使用 geohash 进行标记,该 geohash 的精度是用户可定义的。

  • 高精度 geohash 具有较长的字符串长度,并表示仅覆盖小区域的单元格。
  • 低精度 geohash 具有较短的字符串长度,并表示每个覆盖大面积的单元格。

此聚合中使用的 Geohash 可以选择 1 到 12 之间的精度。

长度为 12 的最高精度 geohash 生成的单元格覆盖面积小于一平方米,因此高精度请求在 RAM 和结果大小方面可能非常昂贵。请参阅下面的示例,了解如何在请求高细节级别之前首先将聚合过滤到较小的地理区域。

您只能使用 geohash_grid 来聚合显式映射的 geo_pointgeo_shape 字段。如果 geo_point 字段包含一个数组,则 geohash_grid 会聚合所有数组值。

简单的低精度请求

编辑
resp = client.indices.create(
    index="museums",
    mappings={
        "properties": {
            "location": {
                "type": "geo_point"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="museums",
    refresh=True,
    operations=[
        {
            "index": {
                "_id": 1
            }
        },
        {
            "location": "POINT (4.912350 52.374081)",
            "name": "NEMO Science Museum"
        },
        {
            "index": {
                "_id": 2
            }
        },
        {
            "location": "POINT (4.901618 52.369219)",
            "name": "Museum Het Rembrandthuis"
        },
        {
            "index": {
                "_id": 3
            }
        },
        {
            "location": "POINT (4.914722 52.371667)",
            "name": "Nederlands Scheepvaartmuseum"
        },
        {
            "index": {
                "_id": 4
            }
        },
        {
            "location": "POINT (4.405200 51.222900)",
            "name": "Letterenhuis"
        },
        {
            "index": {
                "_id": 5
            }
        },
        {
            "location": "POINT (2.336389 48.861111)",
            "name": "Musée du Louvre"
        },
        {
            "index": {
                "_id": 6
            }
        },
        {
            "location": "POINT (2.327000 48.860000)",
            "name": "Musée d'Orsay"
        }
    ],
)
print(resp1)

resp2 = client.search(
    index="museums",
    size="0",
    aggregations={
        "large-grid": {
            "geohash_grid": {
                "field": "location",
                "precision": 3
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'museums',
  body: {
    mappings: {
      properties: {
        location: {
          type: 'geo_point'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'museums',
  refresh: true,
  body: [
    {
      index: {
        _id: 1
      }
    },
    {
      location: 'POINT (4.912350 52.374081)',
      name: 'NEMO Science Museum'
    },
    {
      index: {
        _id: 2
      }
    },
    {
      location: 'POINT (4.901618 52.369219)',
      name: 'Museum Het Rembrandthuis'
    },
    {
      index: {
        _id: 3
      }
    },
    {
      location: 'POINT (4.914722 52.371667)',
      name: 'Nederlands Scheepvaartmuseum'
    },
    {
      index: {
        _id: 4
      }
    },
    {
      location: 'POINT (4.405200 51.222900)',
      name: 'Letterenhuis'
    },
    {
      index: {
        _id: 5
      }
    },
    {
      location: 'POINT (2.336389 48.861111)',
      name: 'Musée du Louvre'
    },
    {
      index: {
        _id: 6
      }
    },
    {
      location: 'POINT (2.327000 48.860000)',
      name: "Musée d'Orsay"
    }
  ]
)
puts response

response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      "large-grid": {
        geohash_grid: {
          field: 'location',
          precision: 3
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "museums",
  mappings: {
    properties: {
      location: {
        type: "geo_point",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "museums",
  refresh: "true",
  operations: [
    {
      index: {
        _id: 1,
      },
    },
    {
      location: "POINT (4.912350 52.374081)",
      name: "NEMO Science Museum",
    },
    {
      index: {
        _id: 2,
      },
    },
    {
      location: "POINT (4.901618 52.369219)",
      name: "Museum Het Rembrandthuis",
    },
    {
      index: {
        _id: 3,
      },
    },
    {
      location: "POINT (4.914722 52.371667)",
      name: "Nederlands Scheepvaartmuseum",
    },
    {
      index: {
        _id: 4,
      },
    },
    {
      location: "POINT (4.405200 51.222900)",
      name: "Letterenhuis",
    },
    {
      index: {
        _id: 5,
      },
    },
    {
      location: "POINT (2.336389 48.861111)",
      name: "Musée du Louvre",
    },
    {
      index: {
        _id: 6,
      },
    },
    {
      location: "POINT (2.327000 48.860000)",
      name: "Musée d'Orsay",
    },
  ],
});
console.log(response1);

const response2 = await client.search({
  index: "museums",
  size: 0,
  aggregations: {
    "large-grid": {
      geohash_grid: {
        field: "location",
        precision: 3,
      },
    },
  },
});
console.log(response2);
PUT /museums
{
  "mappings": {
    "properties": {
      "location": {
        "type": "geo_point"
      }
    }
  }
}

POST /museums/_bulk?refresh
{"index":{"_id":1}}
{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"}
{"index":{"_id":2}}
{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"}
{"index":{"_id":3}}
{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"}
{"index":{"_id":4}}
{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"}
{"index":{"_id":5}}
{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"}
{"index":{"_id":6}}
{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"}

POST /museums/_search?size=0
{
  "aggregations": {
    "large-grid": {
      "geohash_grid": {
        "field": "location",
        "precision": 3
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
  "large-grid": {
    "buckets": [
      {
        "key": "u17",
        "doc_count": 3
      },
      {
        "key": "u09",
        "doc_count": 2
      },
      {
        "key": "u15",
        "doc_count": 1
      }
    ]
  }
}
}

高精度请求

编辑

当请求详细的桶时(通常用于显示“放大”的地图),应应用类似 geo_bounding_box 的过滤器来缩小主题区域,否则可能会创建并返回数百万个桶。

resp = client.search(
    index="museums",
    size="0",
    aggregations={
        "zoomed-in": {
            "filter": {
                "geo_bounding_box": {
                    "location": {
                        "top_left": "POINT (4.9 52.4)",
                        "bottom_right": "POINT (5.0 52.3)"
                    }
                }
            },
            "aggregations": {
                "zoom1": {
                    "geohash_grid": {
                        "field": "location",
                        "precision": 8
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      "zoomed-in": {
        filter: {
          geo_bounding_box: {
            location: {
              top_left: 'POINT (4.9 52.4)',
              bottom_right: 'POINT (5.0 52.3)'
            }
          }
        },
        aggregations: {
          "zoom1": {
            geohash_grid: {
              field: 'location',
              precision: 8
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggregations: {
    "zoomed-in": {
      filter: {
        geo_bounding_box: {
          location: {
            top_left: "POINT (4.9 52.4)",
            bottom_right: "POINT (5.0 52.3)",
          },
        },
      },
      aggregations: {
        zoom1: {
          geohash_grid: {
            field: "location",
            precision: 8,
          },
        },
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggregations": {
    "zoomed-in": {
      "filter": {
        "geo_bounding_box": {
          "location": {
            "top_left": "POINT (4.9 52.4)",
            "bottom_right": "POINT (5.0 52.3)"
          }
        }
      },
      "aggregations": {
        "zoom1": {
          "geohash_grid": {
            "field": "location",
            "precision": 8
          }
        }
      }
    }
  }
}

geohash_grid 聚合返回的 geohash 也可用于放大。要放大上一个示例中返回的第一个 geohash u17,应将其指定为 top_leftbottom_right 角。

resp = client.search(
    index="museums",
    size="0",
    aggregations={
        "zoomed-in": {
            "filter": {
                "geo_bounding_box": {
                    "location": {
                        "top_left": "u17",
                        "bottom_right": "u17"
                    }
                }
            },
            "aggregations": {
                "zoom1": {
                    "geohash_grid": {
                        "field": "location",
                        "precision": 8
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      "zoomed-in": {
        filter: {
          geo_bounding_box: {
            location: {
              top_left: 'u17',
              bottom_right: 'u17'
            }
          }
        },
        aggregations: {
          "zoom1": {
            geohash_grid: {
              field: 'location',
              precision: 8
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggregations: {
    "zoomed-in": {
      filter: {
        geo_bounding_box: {
          location: {
            top_left: "u17",
            bottom_right: "u17",
          },
        },
      },
      aggregations: {
        zoom1: {
          geohash_grid: {
            field: "location",
            precision: 8,
          },
        },
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggregations": {
    "zoomed-in": {
      "filter": {
        "geo_bounding_box": {
          "location": {
            "top_left": "u17",
            "bottom_right": "u17"
          }
        }
      },
      "aggregations": {
        "zoom1": {
          "geohash_grid": {
            "field": "location",
            "precision": 8
          }
        }
      }
    }
  }
}
{
  ...
  "aggregations": {
    "zoomed-in": {
      "doc_count": 3,
      "zoom1": {
        "buckets": [
          {
            "key": "u173zy3j",
            "doc_count": 1
          },
          {
            "key": "u173zvfz",
            "doc_count": 1
          },
          {
            "key": "u173zt90",
            "doc_count": 1
          }
        ]
      }
    }
  }
}

对于不支持 geohash 的系统上的“放大”,应使用可用的 geohash 库之一将桶键转换为边界框。例如,对于 javascript,可以使用 node-geohash

var geohash = require('ngeohash');

// bbox will contain [ 52.03125, 4.21875, 53.4375, 5.625 ]
//                   [   minlat,  minlon,  maxlat, maxlon]
var bbox = geohash.decode_bbox('u17');

带有附加边界框过滤的请求

编辑

geohash_grid 聚合支持一个可选的 bounds 参数,该参数将考虑的单元格限制为与提供的边界相交的单元格。bounds 参数接受边界框,其格式与地理边界框查询中指定的边界的所有 接受的格式相同。此边界框可以与聚合上下文中的任何其他 geo_bounding_box 查询一起使用,也可以不使用。它是一个独立的边界框,可以与在聚合上下文中定义的任何其他 geo_bounding_box 查询相交、相等或不相交。

resp = client.search(
    index="museums",
    size="0",
    aggregations={
        "tiles-in-bounds": {
            "geohash_grid": {
                "field": "location",
                "precision": 8,
                "bounds": {
                    "top_left": "POINT (4.21875 53.4375)",
                    "bottom_right": "POINT (5.625 52.03125)"
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      "tiles-in-bounds": {
        geohash_grid: {
          field: 'location',
          precision: 8,
          bounds: {
            top_left: 'POINT (4.21875 53.4375)',
            bottom_right: 'POINT (5.625 52.03125)'
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggregations: {
    "tiles-in-bounds": {
      geohash_grid: {
        field: "location",
        precision: 8,
        bounds: {
          top_left: "POINT (4.21875 53.4375)",
          bottom_right: "POINT (5.625 52.03125)",
        },
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggregations": {
    "tiles-in-bounds": {
      "geohash_grid": {
        "field": "location",
        "precision": 8,
        "bounds": {
          "top_left": "POINT (4.21875 53.4375)",
          "bottom_right": "POINT (5.625 52.03125)"
        }
      }
    }
  }
}
{
  ...
  "aggregations": {
    "tiles-in-bounds": {
      "buckets": [
        {
          "key": "u173zy3j",
          "doc_count": 1
        },
        {
          "key": "u173zvfz",
          "doc_count": 1
        },
        {
          "key": "u173zt90",
          "doc_count": 1
        }
      ]
    }
  }
}

赤道的单元格尺寸

编辑

下表显示了 geohash 的各种字符串长度所覆盖的单元格的度量尺寸。单元格尺寸随纬度而变化,因此该表适用于赤道的最坏情况。

GeoHash 长度

区域宽度 x 高度

1

5,009.4km x 4,992.6km

2

1,252.3km x 624.1km

3

156.5km x 156km

4

39.1km x 19.5km

5

4.9km x 4.9km

6

1.2km x 609.4m

7

152.9m x 152.4m

8

38.2m x 19m

9

4.8m x 4.8m

10

1.2m x 59.5cm

11

14.9cm x 14.9cm

12

3.7cm x 1.9cm

聚合 geo_shape 字段

编辑

Geoshape 字段进行聚合的工作方式与对点进行聚合的方式相同,只是一个形状可以在多个图块中进行计数。如果形状的任何部分与该图块相交,则该形状将计入匹配值的计数。下面是一张演示此情况的图像

geoshape grid

选项

编辑

field

必填。包含索引的地理坐标点或地理形状值的字段。必须显式映射为 geo_pointgeo_shape 字段。如果该字段包含数组,则 geohash_grid 会聚合所有数组值。

precision

可选。用于定义结果中单元格/桶的 geohash 的字符串长度。默认为 5。精度可以根据上述整数精度级别定义。将拒绝 [1,12] 之外的值。或者,可以从诸如“1km”、“10m”之类的距离度量来近似精度级别。精度级别的计算方式是,单元格不会超过所需精度的指定大小(对角线)。当这导致高于支持的 12 级的精度级别时(例如,对于小于 5.6 厘米的距离),该值将被拒绝。

bounds

可选。用于过滤桶中点的边界框。

size

可选。要返回的最大 geohash 桶数(默认为 10,000)。当结果被修剪时,桶会根据它们包含的文档量进行优先级排序。

shard_size

可选。为了更准确地计算最终结果中返回的顶部单元格,聚合默认为从每个分片返回 max(10,(size x number-of-shards)) 个桶。如果这种启发式方法不可取,可以使用此参数覆盖从每个分片考虑的数量。