地理线聚合

编辑

geo_line 聚合将存储桶内的所有 geo_point 值聚合为 LineString,并按选定的 sort 字段排序。 例如,此 sort 可以是日期字段。 返回的存储桶是一个有效的 GeoJSON Feature,表示线几何形状。

resp = client.indices.create(
    index="test",
    mappings={
        "properties": {
            "my_location": {
                "type": "geo_point"
            },
            "group": {
                "type": "keyword"
            },
            "@timestamp": {
                "type": "date"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="test",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "my_location": {
                "lat": 52.373184,
                "lon": 4.889187
            },
            "@timestamp": "2023-01-02T09:00:00Z"
        },
        {
            "index": {}
        },
        {
            "my_location": {
                "lat": 52.370159,
                "lon": 4.885057
            },
            "@timestamp": "2023-01-02T10:00:00Z"
        },
        {
            "index": {}
        },
        {
            "my_location": {
                "lat": 52.369219,
                "lon": 4.901618
            },
            "@timestamp": "2023-01-02T13:00:00Z"
        },
        {
            "index": {}
        },
        {
            "my_location": {
                "lat": 52.374081,
                "lon": 4.91235
            },
            "@timestamp": "2023-01-02T16:00:00Z"
        },
        {
            "index": {}
        },
        {
            "my_location": {
                "lat": 52.371667,
                "lon": 4.914722
            },
            "@timestamp": "2023-01-03T12:00:00Z"
        }
    ],
)
print(resp1)

resp2 = client.search(
    index="test",
    filter_path="aggregations",
    aggs={
        "line": {
            "geo_line": {
                "point": {
                    "field": "my_location"
                },
                "sort": {
                    "field": "@timestamp"
                }
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'test',
  body: {
    mappings: {
      properties: {
        my_location: {
          type: 'geo_point'
        },
        group: {
          type: 'keyword'
        },
        "@timestamp": {
          type: 'date'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'test',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      my_location: {
        lat: 52.373184,
        lon: 4.889187
      },
      "@timestamp": '2023-01-02T09:00:00Z'
    },
    {
      index: {}
    },
    {
      my_location: {
        lat: 52.370159,
        lon: 4.885057
      },
      "@timestamp": '2023-01-02T10:00:00Z'
    },
    {
      index: {}
    },
    {
      my_location: {
        lat: 52.369219,
        lon: 4.901618
      },
      "@timestamp": '2023-01-02T13:00:00Z'
    },
    {
      index: {}
    },
    {
      my_location: {
        lat: 52.374081,
        lon: 4.91235
      },
      "@timestamp": '2023-01-02T16:00:00Z'
    },
    {
      index: {}
    },
    {
      my_location: {
        lat: 52.371667,
        lon: 4.914722
      },
      "@timestamp": '2023-01-03T12:00:00Z'
    }
  ]
)
puts response

response = client.search(
  index: 'test',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      line: {
        geo_line: {
          point: {
            field: 'my_location'
          },
          sort: {
            field: '@timestamp'
          }
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "test",
  mappings: {
    properties: {
      my_location: {
        type: "geo_point",
      },
      group: {
        type: "keyword",
      },
      "@timestamp": {
        type: "date",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "test",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      my_location: {
        lat: 52.373184,
        lon: 4.889187,
      },
      "@timestamp": "2023-01-02T09:00:00Z",
    },
    {
      index: {},
    },
    {
      my_location: {
        lat: 52.370159,
        lon: 4.885057,
      },
      "@timestamp": "2023-01-02T10:00:00Z",
    },
    {
      index: {},
    },
    {
      my_location: {
        lat: 52.369219,
        lon: 4.901618,
      },
      "@timestamp": "2023-01-02T13:00:00Z",
    },
    {
      index: {},
    },
    {
      my_location: {
        lat: 52.374081,
        lon: 4.91235,
      },
      "@timestamp": "2023-01-02T16:00:00Z",
    },
    {
      index: {},
    },
    {
      my_location: {
        lat: 52.371667,
        lon: 4.914722,
      },
      "@timestamp": "2023-01-03T12:00:00Z",
    },
  ],
});
console.log(response1);

const response2 = await client.search({
  index: "test",
  filter_path: "aggregations",
  aggs: {
    line: {
      geo_line: {
        point: {
          field: "my_location",
        },
        sort: {
          field: "@timestamp",
        },
      },
    },
  },
});
console.log(response2);
PUT test
{
    "mappings": {
        "properties": {
            "my_location": { "type": "geo_point" },
            "group":       { "type": "keyword" },
            "@timestamp":  { "type": "date" }
        }
    }
}

POST /test/_bulk?refresh
{"index":{}}
{"my_location": {"lat":52.373184, "lon":4.889187}, "@timestamp": "2023-01-02T09:00:00Z"}
{"index":{}}
{"my_location": {"lat":52.370159, "lon":4.885057}, "@timestamp": "2023-01-02T10:00:00Z"}
{"index":{}}
{"my_location": {"lat":52.369219, "lon":4.901618}, "@timestamp": "2023-01-02T13:00:00Z"}
{"index":{}}
{"my_location": {"lat":52.374081, "lon":4.912350}, "@timestamp": "2023-01-02T16:00:00Z"}
{"index":{}}
{"my_location": {"lat":52.371667, "lon":4.914722}, "@timestamp": "2023-01-03T12:00:00Z"}

POST /test/_search?filter_path=aggregations
{
  "aggs": {
    "line": {
      "geo_line": {
        "point": {"field": "my_location"},
        "sort":  {"field": "@timestamp"}
      }
    }
  }
}

返回结果

{
  "aggregations": {
    "line": {
      "type": "Feature",
      "geometry": {
        "type": "LineString",
        "coordinates": [
            [ 4.889187, 52.373184 ],
            [ 4.885057, 52.370159 ],
            [ 4.901618, 52.369219 ],
            [ 4.912350, 52.374081 ],
            [ 4.914722, 52.371667 ]
        ]
      },
      "properties": {
        "complete": true
      }
    }
  }
}

生成的 GeoJSON Feature 同时包含由聚合生成的路径的 LineString 几何形状以及 properties 的映射。 属性 complete 说明是否使用了所有匹配的文档来生成几何形状。 可以使用 size 选项来限制聚合中包含的文档数量,从而导致结果的 complete: false。 哪些文档从结果中删除取决于聚合是否基于 time_series

此结果可以显示在地图用户界面中

Kibana map with museum tour of Amsterdam

选项

编辑
point
(必需)

此选项指定 geo_point 字段的名称

配置 my_location 作为点字段的示例用法

"point": {
  "field": "my_location"
}
sort
(在 time_series 聚合之外是必需的)

此选项指定要用作排序点的排序键的数字字段的名称。 当 geo_line 聚合嵌套在 time_series 聚合内部时,此字段默认为 @timestamp,任何其他值都会导致错误。

配置 @timestamp 作为排序键的示例用法

"sort": {
  "field": "@timestamp"
}
include_sort
(可选,布尔值,默认值:false)当为 true 时,此选项会在要素属性中包含一个额外的排序值数组。
sort_order
(可选,字符串,默认值:"ASC")此选项接受两个值之一:"ASC"、"DESC"。 当设置为 "ASC" 时,该行按排序键升序排序;当设置为 "DESC" 时,该行按降序排序。
size
(可选,整数,默认值:10000)聚合中表示的行的最大长度。 有效大小介于 1 和 10000 之间。 在 time_series 中,聚合使用线简化来约束大小,否则它使用截断。 有关所涉及的细微之处的讨论,请参阅为什么要按时间序列分组?

分组

编辑

这个简单的示例为查询选择的所有数据生成单个轨迹。 但是,更常见的是需要将数据分组到多个轨迹中。 例如,在按时间戳对每个航班进行排序并为每个航班生成单独的轨迹之前,按航班呼号对飞行应答器测量值进行分组。

在以下示例中,我们将对阿姆斯特丹、安特卫普和巴黎的城市中感兴趣的点的位置进行分组。 这些轨迹将按照博物馆和其他景点的步行游览的计划访问顺序进行排序。

为了演示时间序列分组和非时间序列分组之间的区别,我们将首先创建一个启用时间序列的索引,然后给出不使用时间序列和使用时间序列分组相同数据的示例。

resp = client.indices.create(
    index="tour",
    mappings={
        "properties": {
            "city": {
                "type": "keyword",
                "time_series_dimension": True
            },
            "category": {
                "type": "keyword"
            },
            "route": {
                "type": "long"
            },
            "name": {
                "type": "keyword"
            },
            "location": {
                "type": "geo_point"
            },
            "@timestamp": {
                "type": "date"
            }
        }
    },
    settings={
        "index": {
            "mode": "time_series",
            "routing_path": [
                "city"
            ],
            "time_series": {
                "start_time": "2023-01-01T00:00:00Z",
                "end_time": "2024-01-01T00:00:00Z"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="tour",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-02T09:00:00Z",
            "route": 0,
            "location": "POINT(4.889187 52.373184)",
            "city": "Amsterdam",
            "category": "Attraction",
            "name": "Royal Palace Amsterdam"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-02T10:00:00Z",
            "route": 1,
            "location": "POINT(4.885057 52.370159)",
            "city": "Amsterdam",
            "category": "Attraction",
            "name": "The Amsterdam Dungeon"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-02T13:00:00Z",
            "route": 2,
            "location": "POINT(4.901618 52.369219)",
            "city": "Amsterdam",
            "category": "Museum",
            "name": "Museum Het Rembrandthuis"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-02T16:00:00Z",
            "route": 3,
            "location": "POINT(4.912350 52.374081)",
            "city": "Amsterdam",
            "category": "Museum",
            "name": "NEMO Science Museum"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-03T12:00:00Z",
            "route": 4,
            "location": "POINT(4.914722 52.371667)",
            "city": "Amsterdam",
            "category": "Museum",
            "name": "Nederlands Scheepvaartmuseum"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-04T09:00:00Z",
            "route": 5,
            "location": "POINT(4.401384 51.220292)",
            "city": "Antwerp",
            "category": "Attraction",
            "name": "Cathedral of Our Lady"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-04T12:00:00Z",
            "route": 6,
            "location": "POINT(4.405819 51.221758)",
            "city": "Antwerp",
            "category": "Museum",
            "name": "Snijders&Rockoxhuis"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-04T15:00:00Z",
            "route": 7,
            "location": "POINT(4.405200 51.222900)",
            "city": "Antwerp",
            "category": "Museum",
            "name": "Letterenhuis"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-05T10:00:00Z",
            "route": 8,
            "location": "POINT(2.336389 48.861111)",
            "city": "Paris",
            "category": "Museum",
            "name": "Musée du Louvre"
        },
        {
            "index": {}
        },
        {
            "@timestamp": "2023-01-05T14:00:00Z",
            "route": 9,
            "location": "POINT(2.327000 48.860000)",
            "city": "Paris",
            "category": "Museum",
            "name": "Musée dOrsay"
        }
    ],
)
print(resp1)
response = client.indices.create(
  index: 'tour',
  body: {
    mappings: {
      properties: {
        city: {
          type: 'keyword',
          time_series_dimension: true
        },
        category: {
          type: 'keyword'
        },
        route: {
          type: 'long'
        },
        name: {
          type: 'keyword'
        },
        location: {
          type: 'geo_point'
        },
        "@timestamp": {
          type: 'date'
        }
      }
    },
    settings: {
      index: {
        mode: 'time_series',
        routing_path: [
          'city'
        ],
        time_series: {
          start_time: '2023-01-01T00:00:00Z',
          end_time: '2024-01-01T00:00:00Z'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'tour',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-02T09:00:00Z',
      route: 0,
      location: 'POINT(4.889187 52.373184)',
      city: 'Amsterdam',
      category: 'Attraction',
      name: 'Royal Palace Amsterdam'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-02T10:00:00Z',
      route: 1,
      location: 'POINT(4.885057 52.370159)',
      city: 'Amsterdam',
      category: 'Attraction',
      name: 'The Amsterdam Dungeon'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-02T13:00:00Z',
      route: 2,
      location: 'POINT(4.901618 52.369219)',
      city: 'Amsterdam',
      category: 'Museum',
      name: 'Museum Het Rembrandthuis'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-02T16:00:00Z',
      route: 3,
      location: 'POINT(4.912350 52.374081)',
      city: 'Amsterdam',
      category: 'Museum',
      name: 'NEMO Science Museum'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-03T12:00:00Z',
      route: 4,
      location: 'POINT(4.914722 52.371667)',
      city: 'Amsterdam',
      category: 'Museum',
      name: 'Nederlands Scheepvaartmuseum'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-04T09:00:00Z',
      route: 5,
      location: 'POINT(4.401384 51.220292)',
      city: 'Antwerp',
      category: 'Attraction',
      name: 'Cathedral of Our Lady'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-04T12:00:00Z',
      route: 6,
      location: 'POINT(4.405819 51.221758)',
      city: 'Antwerp',
      category: 'Museum',
      name: 'Snijders&Rockoxhuis'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-04T15:00:00Z',
      route: 7,
      location: 'POINT(4.405200 51.222900)',
      city: 'Antwerp',
      category: 'Museum',
      name: 'Letterenhuis'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-05T10:00:00Z',
      route: 8,
      location: 'POINT(2.336389 48.861111)',
      city: 'Paris',
      category: 'Museum',
      name: 'Musée du Louvre'
    },
    {
      index: {}
    },
    {
      "@timestamp": '2023-01-05T14:00:00Z',
      route: 9,
      location: 'POINT(2.327000 48.860000)',
      city: 'Paris',
      category: 'Museum',
      name: 'Musée dOrsay'
    }
  ]
)
puts response
const response = await client.indices.create({
  index: "tour",
  mappings: {
    properties: {
      city: {
        type: "keyword",
        time_series_dimension: true,
      },
      category: {
        type: "keyword",
      },
      route: {
        type: "long",
      },
      name: {
        type: "keyword",
      },
      location: {
        type: "geo_point",
      },
      "@timestamp": {
        type: "date",
      },
    },
  },
  settings: {
    index: {
      mode: "time_series",
      routing_path: ["city"],
      time_series: {
        start_time: "2023-01-01T00:00:00Z",
        end_time: "2024-01-01T00:00:00Z",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "tour",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-02T09:00:00Z",
      route: 0,
      location: "POINT(4.889187 52.373184)",
      city: "Amsterdam",
      category: "Attraction",
      name: "Royal Palace Amsterdam",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-02T10:00:00Z",
      route: 1,
      location: "POINT(4.885057 52.370159)",
      city: "Amsterdam",
      category: "Attraction",
      name: "The Amsterdam Dungeon",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-02T13:00:00Z",
      route: 2,
      location: "POINT(4.901618 52.369219)",
      city: "Amsterdam",
      category: "Museum",
      name: "Museum Het Rembrandthuis",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-02T16:00:00Z",
      route: 3,
      location: "POINT(4.912350 52.374081)",
      city: "Amsterdam",
      category: "Museum",
      name: "NEMO Science Museum",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-03T12:00:00Z",
      route: 4,
      location: "POINT(4.914722 52.371667)",
      city: "Amsterdam",
      category: "Museum",
      name: "Nederlands Scheepvaartmuseum",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-04T09:00:00Z",
      route: 5,
      location: "POINT(4.401384 51.220292)",
      city: "Antwerp",
      category: "Attraction",
      name: "Cathedral of Our Lady",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-04T12:00:00Z",
      route: 6,
      location: "POINT(4.405819 51.221758)",
      city: "Antwerp",
      category: "Museum",
      name: "Snijders&Rockoxhuis",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-04T15:00:00Z",
      route: 7,
      location: "POINT(4.405200 51.222900)",
      city: "Antwerp",
      category: "Museum",
      name: "Letterenhuis",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-05T10:00:00Z",
      route: 8,
      location: "POINT(2.336389 48.861111)",
      city: "Paris",
      category: "Museum",
      name: "Musée du Louvre",
    },
    {
      index: {},
    },
    {
      "@timestamp": "2023-01-05T14:00:00Z",
      route: 9,
      location: "POINT(2.327000 48.860000)",
      city: "Paris",
      category: "Museum",
      name: "Musée dOrsay",
    },
  ],
});
console.log(response1);
PUT tour
{
    "mappings": {
        "properties": {
            "city": {
                "type": "keyword",
                "time_series_dimension": true
            },
            "category":   { "type": "keyword" },
            "route":      { "type": "long" },
            "name":       { "type": "keyword" },
            "location":   { "type": "geo_point" },
            "@timestamp": { "type": "date" }
        }
    },
    "settings": {
        "index": {
            "mode": "time_series",
            "routing_path": [ "city" ],
            "time_series": {
                "start_time": "2023-01-01T00:00:00Z",
                "end_time": "2024-01-01T00:00:00Z"
            }
        }
    }
}

POST /tour/_bulk?refresh
{"index":{}}
{"@timestamp": "2023-01-02T09:00:00Z", "route": 0, "location": "POINT(4.889187 52.373184)", "city": "Amsterdam", "category": "Attraction", "name": "Royal Palace Amsterdam"}
{"index":{}}
{"@timestamp": "2023-01-02T10:00:00Z", "route": 1, "location": "POINT(4.885057 52.370159)", "city": "Amsterdam", "category": "Attraction", "name": "The Amsterdam Dungeon"}
{"index":{}}
{"@timestamp": "2023-01-02T13:00:00Z", "route": 2, "location": "POINT(4.901618 52.369219)", "city": "Amsterdam", "category": "Museum", "name": "Museum Het Rembrandthuis"}
{"index":{}}
{"@timestamp": "2023-01-02T16:00:00Z", "route": 3, "location": "POINT(4.912350 52.374081)", "city": "Amsterdam", "category": "Museum", "name": "NEMO Science Museum"}
{"index":{}}
{"@timestamp": "2023-01-03T12:00:00Z", "route": 4, "location": "POINT(4.914722 52.371667)", "city": "Amsterdam", "category": "Museum", "name": "Nederlands Scheepvaartmuseum"}
{"index":{}}
{"@timestamp": "2023-01-04T09:00:00Z", "route": 5, "location": "POINT(4.401384 51.220292)", "city": "Antwerp", "category": "Attraction", "name": "Cathedral of Our Lady"}
{"index":{}}
{"@timestamp": "2023-01-04T12:00:00Z", "route": 6, "location": "POINT(4.405819 51.221758)", "city": "Antwerp", "category": "Museum", "name": "Snijders&Rockoxhuis"}
{"index":{}}
{"@timestamp": "2023-01-04T15:00:00Z", "route": 7, "location": "POINT(4.405200 51.222900)", "city": "Antwerp", "category": "Museum", "name": "Letterenhuis"}
{"index":{}}
{"@timestamp": "2023-01-05T10:00:00Z", "route": 8, "location": "POINT(2.336389 48.861111)", "city": "Paris", "category": "Museum", "name": "Musée du Louvre"}
{"index":{}}
{"@timestamp": "2023-01-05T14:00:00Z", "route": 9, "location": "POINT(2.327000 48.860000)", "city": "Paris", "category": "Museum", "name": "Musée dOrsay"}

使用术语分组

编辑

对于非时间序列用例,可以使用基于城市名称的术语聚合来完成此数据的分组。 无论我们是否将 tour 索引定义为时间序列索引,这都将起作用。

resp = client.search(
    index="tour",
    filter_path="aggregations",
    aggregations={
        "path": {
            "terms": {
                "field": "city"
            },
            "aggregations": {
                "museum_tour": {
                    "geo_line": {
                        "point": {
                            "field": "location"
                        },
                        "sort": {
                            "field": "@timestamp"
                        }
                    }
                }
            }
        }
    },
)
print(resp)
const response = await client.search({
  index: "tour",
  filter_path: "aggregations",
  aggregations: {
    path: {
      terms: {
        field: "city",
      },
      aggregations: {
        museum_tour: {
          geo_line: {
            point: {
              field: "location",
            },
            sort: {
              field: "@timestamp",
            },
          },
        },
      },
    },
  },
});
console.log(response);
POST /tour/_search?filter_path=aggregations
{
  "aggregations": {
    "path": {
      "terms": {"field": "city"},
      "aggregations": {
        "museum_tour": {
          "geo_line": {
            "point": {"field": "location"},
            "sort": {"field": "@timestamp"}
          }
        }
      }
    }
  }
}

返回结果

{
  "aggregations": {
    "path": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "Amsterdam",
          "doc_count": 5,
          "museum_tour": {
            "type": "Feature",
            "geometry": {
              "coordinates": [ [ 4.889187, 52.373184 ], [ 4.885057, 52.370159 ], [ 4.901618, 52.369219 ], [ 4.91235, 52.374081 ], [ 4.914722, 52.371667 ] ],
              "type": "LineString"
            },
            "properties": {
              "complete": true
            }
          }
        },
        {
          "key": "Antwerp",
          "doc_count": 3,
          "museum_tour": {
            "type": "Feature",
            "geometry": {
              "coordinates": [ [ 4.401384, 51.220292 ], [ 4.405819, 51.221758 ], [ 4.4052, 51.2229 ] ],
              "type": "LineString"
            },
            "properties": {
              "complete": true
            }
          }
        },
        {
          "key": "Paris",
          "doc_count": 2,
          "museum_tour": {
            "type": "Feature",
            "geometry": {
              "coordinates": [ [ 2.336389, 48.861111 ], [ 2.327, 48.86 ] ],
              "type": "LineString"
            },
            "properties": {
              "complete": true
            }
          }
        }
      ]
    }
  }
}

这些结果包含一个存储桶数组,其中每个存储桶都是一个 JSON 对象,其中 key 显示 city 字段的名称,内部聚合结果称为 museum_tour,其中包含一个 GeoJSON Feature,描述该城市各种景点之间的实际路线。 每个结果还包括一个 properties 对象,其 complete 值如果几何图形被截断为 size 参数中指定的限制,则该值将为 false。 请注意,当我们在下一个示例中使用 time_series 时,我们将获得结构略有不同的相同结果。

使用时间序列分组

编辑

此功能处于技术预览状态,可能会在将来的版本中更改或删除。 Elastic 将致力于修复任何问题,但技术预览中的功能不受官方 GA 功能的支持 SLA 的约束。

使用与之前相同的数据,我们还可以使用time_series 聚合执行分组。 这将按 TSID 分组,TSID 定义为所有具有 time_series_dimension: true 的字段的组合,在本例中与之前的 术语聚合中使用的 city 字段相同。 只有在我们使用 index.mode="time_series"tour 索引定义为时间序列索引时,此示例才有效。

resp = client.search(
    index="tour",
    filter_path="aggregations",
    aggregations={
        "path": {
            "time_series": {},
            "aggregations": {
                "museum_tour": {
                    "geo_line": {
                        "point": {
                            "field": "location"
                        }
                    }
                }
            }
        }
    },
)
print(resp)
const response = await client.search({
  index: "tour",
  filter_path: "aggregations",
  aggregations: {
    path: {
      time_series: {},
      aggregations: {
        museum_tour: {
          geo_line: {
            point: {
              field: "location",
            },
          },
        },
      },
    },
  },
});
console.log(response);
POST /tour/_search?filter_path=aggregations
{
  "aggregations": {
    "path": {
      "time_series": {},
      "aggregations": {
        "museum_tour": {
          "geo_line": {
            "point": {"field": "location"}
          }
        }
      }
    }
  }
}

当嵌套在 time_series 聚合内时,geo_line 聚合不再需要 sort 字段。 这是因为排序字段设置为 @timestamp,所有时间序列索引都预先按此字段排序。 如果您确实设置此参数,并且将其设置为 @timestamp 之外的值,则会收到错误。

此查询将产生

{
  "aggregations": {
    "path": {
      "buckets": {
        "{city=Paris}": {
          "key": {
            "city": "Paris"
          },
          "doc_count": 2,
          "museum_tour": {
            "type": "Feature",
            "geometry": {
              "coordinates": [ [ 2.336389, 48.861111 ], [ 2.327, 48.86 ] ],
              "type": "LineString"
            },
            "properties": {
              "complete": true
            }
          }
        },
        "{city=Antwerp}": {
          "key": {
            "city": "Antwerp"
          },
          "doc_count": 3,
          "museum_tour": {
            "type": "Feature",
            "geometry": {
              "coordinates": [ [ 4.401384, 51.220292 ], [ 4.405819, 51.221758 ], [ 4.4052, 51.2229 ] ],
              "type": "LineString"
            },
            "properties": {
              "complete": true
            }
          }
        },
        "{city=Amsterdam}": {
          "key": {
            "city": "Amsterdam"
          },
          "doc_count": 5,
          "museum_tour": {
            "type": "Feature",
            "geometry": {
              "coordinates": [ [ 4.889187, 52.373184 ], [ 4.885057, 52.370159 ], [ 4.901618, 52.369219 ], [ 4.91235, 52.374081 ], [ 4.914722, 52.371667 ] ],
              "type": "LineString"
            },
            "properties": {
              "complete": true
            }
          }
        }
      }
    }
  }
}

这些结果与之前的 terms 聚合示例基本相同,但结构不同。 在这里,我们看到存储桶作为映射返回,其中键是 TSID 的内部描述。 此 TSID 对于所有具有 time_series_dimension: true 的字段的唯一组合是唯一的。 每个存储桶包含一个 key 字段,该字段也是 TSID 的所有维度值的映射,在本例中,仅使用城市名称进行分组。 此外,还有一个内部聚合结果称为 museum_tour,其中包含一个 GeoJSON Feature,描述该城市各种景点之间的实际路线。 每个结果还包括一个 properties 对象,其 complete 值如果几何图形被简化为 size 参数中指定的限制,则该值将为 false。

为什么要按时间序列分组?

编辑

在查看这些示例时,您可能会认为使用 termstime_series 对地理线进行分组几乎没有区别。 但是,这两种情况之间的行为存在一些重要差异。 时间序列索引以非常特定的顺序存储在磁盘上。 它们按时间序列维度字段预先分组,并按 @timestamp 字段预先排序。 这允许对 geo_line 聚合进行大幅优化

  • 为第一个存储桶分配的相同内存可以重复用于所有后续存储桶。 这比非时间序列情况所需的内存少得多,在非时间序列情况下,所有存储桶是同时收集的。
  • 不需要进行排序,因为数据是按 @timestamp 预先排序的。 时间序列数据自然会以 DESC 顺序到达聚合收集器。 这意味着,如果我们指定 sort_order:ASC(默认值),我们仍然以 DESC 顺序收集,但在生成最终的 LineString 几何图形之前执行高效的内存反向排序。
  • 可以使用 size 参数来执行流式线简化算法。 如果没有时间序列,我们会被迫截断数据(默认情况下,每个存储桶截断 10000 个文档之后),以防止内存使用不受限制。 这可能会导致地理线被截断,从而丢失重要数据。 使用时间序列,我们可以运行流式线简化算法,在保持内存使用的控制的同时,还可以保持整体几何形状。 实际上,对于大多数用例,将此 size 参数设置为低得多的上限也是可行的,并且可以节省更多内存。 例如,如果要以特定分辨率在显示地图上绘制 geo_line,则简化到只有 100 或 200 个点可能看起来也一样好。 这将节省服务器、网络和客户端上的内存。

注意:使用时间序列数据和使用 time_series 索引模式还有其他显著优势。 这些在有关时间序列数据流的文档中进行了讨论。

流式线简化

编辑

线简化是减小发送给客户端并在地图用户界面中显示的最终结果大小的好方法。然而,通常这些算法需要大量内存来执行简化,要求将整个几何图形连同用于简化的支持数据一起保存在内存中。使用流式线简化算法可以通过将内存限制在为简化几何图形定义的范围内,从而在简化过程中实现最小的内存使用。只有在不需要排序的情况下才有可能实现这一点,而当分组是由 time_series 聚合在具有 time_series 索引模式的索引上运行时,情况就是如此。

在这些条件下,geo_line 聚合会为指定的 size 分配内存,然后用传入的文档填充该内存。一旦内存完全填满,当添加新文档时,将删除该线内的文档。选择要删除的文档是为了最大限度地减少对几何图形的视觉影响。此过程使用了 Visvalingam-Whyatt 算法。本质上,这意味着如果点具有最小的三角形面积,则会删除这些点,三角形由考虑中的点以及线中在其之前和之后的两个点定义。此外,我们使用球面坐标计算面积,以便没有平面扭曲会影响选择。

为了演示线简化比线截断好多少,请考虑这个科迪亚克岛北岸的例子。此数据仅有 209 个点,但如果我们要将 size 设置为 100,我们会得到戏剧性的截断。

North short of Kodiak Island truncated to 100 points

灰色线是 209 个点的整个几何图形,而蓝色线是前 100 个点,与原始几何图形截然不同。

现在考虑将相同的几何图形简化为 100 个点。

North short of Kodiak Island simplified to 100 points

为了进行比较,我们以灰色显示了原始几何图形,以蓝色显示了截断的几何图形,并以洋红色显示了新的简化几何图形。可以看到新的简化线偏离原始线的位置,但整体几何形状几乎相同,并且仍然可以清楚地识别为科迪亚克岛的北岸。