地理距离聚合

编辑

一种多桶聚合,作用于 geo_point 字段,概念上与 范围 聚合非常相似。用户可以定义一个原点和一组距离范围桶。聚合会评估每个文档值与原点的距离,并根据范围确定它所属的桶(如果文档与原点之间的距离落在桶的距离范围内,则该文档属于该桶)。

resp = client.indices.create(
    index="museums",
    mappings={
        "properties": {
            "location": {
                "type": "geo_point"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="museums",
    refresh=True,
    operations=[
        {
            "index": {
                "_id": 1
            }
        },
        {
            "location": "POINT (4.912350 52.374081)",
            "name": "NEMO Science Museum"
        },
        {
            "index": {
                "_id": 2
            }
        },
        {
            "location": "POINT (4.901618 52.369219)",
            "name": "Museum Het Rembrandthuis"
        },
        {
            "index": {
                "_id": 3
            }
        },
        {
            "location": "POINT (4.914722 52.371667)",
            "name": "Nederlands Scheepvaartmuseum"
        },
        {
            "index": {
                "_id": 4
            }
        },
        {
            "location": "POINT (4.405200 51.222900)",
            "name": "Letterenhuis"
        },
        {
            "index": {
                "_id": 5
            }
        },
        {
            "location": "POINT (2.336389 48.861111)",
            "name": "Musée du Louvre"
        },
        {
            "index": {
                "_id": 6
            }
        },
        {
            "location": "POINT (2.327000 48.860000)",
            "name": "Musée d'Orsay"
        }
    ],
)
print(resp1)

resp2 = client.search(
    index="museums",
    size="0",
    aggs={
        "rings_around_amsterdam": {
            "geo_distance": {
                "field": "location",
                "origin": "POINT (4.894 52.3760)",
                "ranges": [
                    {
                        "to": 100000
                    },
                    {
                        "from": 100000,
                        "to": 300000
                    },
                    {
                        "from": 300000
                    }
                ]
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'museums',
  body: {
    mappings: {
      properties: {
        location: {
          type: 'geo_point'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'museums',
  refresh: true,
  body: [
    {
      index: {
        _id: 1
      }
    },
    {
      location: 'POINT (4.912350 52.374081)',
      name: 'NEMO Science Museum'
    },
    {
      index: {
        _id: 2
      }
    },
    {
      location: 'POINT (4.901618 52.369219)',
      name: 'Museum Het Rembrandthuis'
    },
    {
      index: {
        _id: 3
      }
    },
    {
      location: 'POINT (4.914722 52.371667)',
      name: 'Nederlands Scheepvaartmuseum'
    },
    {
      index: {
        _id: 4
      }
    },
    {
      location: 'POINT (4.405200 51.222900)',
      name: 'Letterenhuis'
    },
    {
      index: {
        _id: 5
      }
    },
    {
      location: 'POINT (2.336389 48.861111)',
      name: 'Musée du Louvre'
    },
    {
      index: {
        _id: 6
      }
    },
    {
      location: 'POINT (2.327000 48.860000)',
      name: "Musée d'Orsay"
    }
  ]
)
puts response

response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      rings_around_amsterdam: {
        geo_distance: {
          field: 'location',
          origin: 'POINT (4.894 52.3760)',
          ranges: [
            {
              to: 100_000
            },
            {
              from: 100_000,
              to: 300_000
            },
            {
              from: 300_000
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "museums",
  mappings: {
    properties: {
      location: {
        type: "geo_point",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "museums",
  refresh: "true",
  operations: [
    {
      index: {
        _id: 1,
      },
    },
    {
      location: "POINT (4.912350 52.374081)",
      name: "NEMO Science Museum",
    },
    {
      index: {
        _id: 2,
      },
    },
    {
      location: "POINT (4.901618 52.369219)",
      name: "Museum Het Rembrandthuis",
    },
    {
      index: {
        _id: 3,
      },
    },
    {
      location: "POINT (4.914722 52.371667)",
      name: "Nederlands Scheepvaartmuseum",
    },
    {
      index: {
        _id: 4,
      },
    },
    {
      location: "POINT (4.405200 51.222900)",
      name: "Letterenhuis",
    },
    {
      index: {
        _id: 5,
      },
    },
    {
      location: "POINT (2.336389 48.861111)",
      name: "Musée du Louvre",
    },
    {
      index: {
        _id: 6,
      },
    },
    {
      location: "POINT (2.327000 48.860000)",
      name: "Musée d'Orsay",
    },
  ],
});
console.log(response1);

const response2 = await client.search({
  index: "museums",
  size: 0,
  aggs: {
    rings_around_amsterdam: {
      geo_distance: {
        field: "location",
        origin: "POINT (4.894 52.3760)",
        ranges: [
          {
            to: 100000,
          },
          {
            from: 100000,
            to: 300000,
          },
          {
            from: 300000,
          },
        ],
      },
    },
  },
});
console.log(response2);
PUT /museums
{
  "mappings": {
    "properties": {
      "location": {
        "type": "geo_point"
      }
    }
  }
}

POST /museums/_bulk?refresh
{"index":{"_id":1}}
{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"}
{"index":{"_id":2}}
{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"}
{"index":{"_id":3}}
{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"}
{"index":{"_id":4}}
{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"}
{"index":{"_id":5}}
{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"}
{"index":{"_id":6}}
{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"}

POST /museums/_search?size=0
{
  "aggs": {
    "rings_around_amsterdam": {
      "geo_distance": {
        "field": "location",
        "origin": "POINT (4.894 52.3760)",
        "ranges": [
          { "to": 100000 },
          { "from": 100000, "to": 300000 },
          { "from": 300000 }
        ]
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "rings_around_amsterdam": {
      "buckets": [
        {
          "key": "*-100000.0",
          "from": 0.0,
          "to": 100000.0,
          "doc_count": 3
        },
        {
          "key": "100000.0-300000.0",
          "from": 100000.0,
          "to": 300000.0,
          "doc_count": 1
        },
        {
          "key": "300000.0-*",
          "from": 300000.0,
          "doc_count": 2
        }
      ]
    }
  }
}

指定的字段必须是 geo_point 类型(只能在映射中显式设置)。它也可以包含 geo_point 字段的数组,在这种情况下,所有字段都将在聚合期间被考虑在内。原点可以接受 geo_point 类型 支持的所有格式。

  • 对象格式: { "lat" : 52.3760, "lon" : 4.894 } - 这是最安全的格式,因为它最明确地说明了 latlon 值。
  • 字符串格式: "52.3760, 4.894" - 其中第一个数字是 lat,第二个数字是 lon
  • 数组格式: [4.894, 52.3760] - 基于 GeoJSON 标准,其中第一个数字是 lon,第二个数字是 lat

默认情况下,距离单位是 m(米),但它也可以接受:mi(英里),in(英寸),yd(码),km(公里),cm(厘米),mm(毫米)。

resp = client.search(
    index="museums",
    size="0",
    aggs={
        "rings": {
            "geo_distance": {
                "field": "location",
                "origin": "POINT (4.894 52.3760)",
                "unit": "km",
                "ranges": [
                    {
                        "to": 100
                    },
                    {
                        "from": 100,
                        "to": 300
                    },
                    {
                        "from": 300
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      rings: {
        geo_distance: {
          field: 'location',
          origin: 'POINT (4.894 52.3760)',
          unit: 'km',
          ranges: [
            {
              to: 100
            },
            {
              from: 100,
              to: 300
            },
            {
              from: 300
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggs: {
    rings: {
      geo_distance: {
        field: "location",
        origin: "POINT (4.894 52.3760)",
        unit: "km",
        ranges: [
          {
            to: 100,
          },
          {
            from: 100,
            to: 300,
          },
          {
            from: 300,
          },
        ],
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggs": {
    "rings": {
      "geo_distance": {
        "field": "location",
        "origin": "POINT (4.894 52.3760)",
        "unit": "km", 
        "ranges": [
          { "to": 100 },
          { "from": 100, "to": 300 },
          { "from": 300 }
        ]
      }
    }
  }
}

距离将以公里为单位计算

有两种距离计算模式:arc(默认)和 planearc 计算是最准确的。plane 是最快的但最不准确的。当您的搜索上下文“狭窄”并且跨越较小的地理区域(约 5 公里)时,请考虑使用 planeplane 将为跨越大范围的搜索(例如,跨大陆搜索)返回更高的误差范围。可以使用 distance_type 参数设置距离计算类型。

resp = client.search(
    index="museums",
    size="0",
    aggs={
        "rings": {
            "geo_distance": {
                "field": "location",
                "origin": "POINT (4.894 52.3760)",
                "unit": "km",
                "distance_type": "plane",
                "ranges": [
                    {
                        "to": 100
                    },
                    {
                        "from": 100,
                        "to": 300
                    },
                    {
                        "from": 300
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      rings: {
        geo_distance: {
          field: 'location',
          origin: 'POINT (4.894 52.3760)',
          unit: 'km',
          distance_type: 'plane',
          ranges: [
            {
              to: 100
            },
            {
              from: 100,
              to: 300
            },
            {
              from: 300
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggs: {
    rings: {
      geo_distance: {
        field: "location",
        origin: "POINT (4.894 52.3760)",
        unit: "km",
        distance_type: "plane",
        ranges: [
          {
            to: 100,
          },
          {
            from: 100,
            to: 300,
          },
          {
            from: 300,
          },
        ],
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggs": {
    "rings": {
      "geo_distance": {
        "field": "location",
        "origin": "POINT (4.894 52.3760)",
        "unit": "km",
        "distance_type": "plane",
        "ranges": [
          { "to": 100 },
          { "from": 100, "to": 300 },
          { "from": 300 }
        ]
      }
    }
  }
}

键控响应

编辑

keyed 标志设置为 true 会为每个桶关联一个唯一的字符串键,并将范围作为哈希而不是数组返回。

resp = client.search(
    index="museums",
    size="0",
    aggs={
        "rings_around_amsterdam": {
            "geo_distance": {
                "field": "location",
                "origin": "POINT (4.894 52.3760)",
                "ranges": [
                    {
                        "to": 100000
                    },
                    {
                        "from": 100000,
                        "to": 300000
                    },
                    {
                        "from": 300000
                    }
                ],
                "keyed": True
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      rings_around_amsterdam: {
        geo_distance: {
          field: 'location',
          origin: 'POINT (4.894 52.3760)',
          ranges: [
            {
              to: 100_000
            },
            {
              from: 100_000,
              to: 300_000
            },
            {
              from: 300_000
            }
          ],
          keyed: true
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggs: {
    rings_around_amsterdam: {
      geo_distance: {
        field: "location",
        origin: "POINT (4.894 52.3760)",
        ranges: [
          {
            to: 100000,
          },
          {
            from: 100000,
            to: 300000,
          },
          {
            from: 300000,
          },
        ],
        keyed: true,
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggs": {
    "rings_around_amsterdam": {
      "geo_distance": {
        "field": "location",
        "origin": "POINT (4.894 52.3760)",
        "ranges": [
          { "to": 100000 },
          { "from": 100000, "to": 300000 },
          { "from": 300000 }
        ],
        "keyed": true
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "rings_around_amsterdam": {
      "buckets": {
        "*-100000.0": {
          "from": 0.0,
          "to": 100000.0,
          "doc_count": 3
        },
        "100000.0-300000.0": {
          "from": 100000.0,
          "to": 300000.0,
          "doc_count": 1
        },
        "300000.0-*": {
          "from": 300000.0,
          "doc_count": 2
        }
      }
    }
  }
}

也可以自定义每个范围的键。

resp = client.search(
    index="museums",
    size="0",
    aggs={
        "rings_around_amsterdam": {
            "geo_distance": {
                "field": "location",
                "origin": "POINT (4.894 52.3760)",
                "ranges": [
                    {
                        "to": 100000,
                        "key": "first_ring"
                    },
                    {
                        "from": 100000,
                        "to": 300000,
                        "key": "second_ring"
                    },
                    {
                        "from": 300000,
                        "key": "third_ring"
                    }
                ],
                "keyed": True
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'museums',
  size: 0,
  body: {
    aggregations: {
      rings_around_amsterdam: {
        geo_distance: {
          field: 'location',
          origin: 'POINT (4.894 52.3760)',
          ranges: [
            {
              to: 100_000,
              key: 'first_ring'
            },
            {
              from: 100_000,
              to: 300_000,
              key: 'second_ring'
            },
            {
              from: 300_000,
              key: 'third_ring'
            }
          ],
          keyed: true
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "museums",
  size: 0,
  aggs: {
    rings_around_amsterdam: {
      geo_distance: {
        field: "location",
        origin: "POINT (4.894 52.3760)",
        ranges: [
          {
            to: 100000,
            key: "first_ring",
          },
          {
            from: 100000,
            to: 300000,
            key: "second_ring",
          },
          {
            from: 300000,
            key: "third_ring",
          },
        ],
        keyed: true,
      },
    },
  },
});
console.log(response);
POST /museums/_search?size=0
{
  "aggs": {
    "rings_around_amsterdam": {
      "geo_distance": {
        "field": "location",
        "origin": "POINT (4.894 52.3760)",
        "ranges": [
          { "to": 100000, "key": "first_ring" },
          { "from": 100000, "to": 300000, "key": "second_ring" },
          { "from": 300000, "key": "third_ring" }
        ],
        "keyed": true
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "rings_around_amsterdam": {
      "buckets": {
        "first_ring": {
          "from": 0.0,
          "to": 100000.0,
          "doc_count": 3
        },
        "second_ring": {
          "from": 100000.0,
          "to": 300000.0,
          "doc_count": 1
        },
        "third_ring": {
          "from": 300000.0,
          "doc_count": 2
        }
      }
    }
  }
}