累积基数聚合
编辑累积基数聚合
编辑一个父管道聚合,用于计算父直方图(或 date_histogram)聚合中的累积基数。指定的度量必须是基数聚合,并且封闭的直方图必须将 min_doc_count
设置为 0
(histogram
聚合的默认值)。
cumulative_cardinality
聚合对于查找“总新项目”很有用,例如每天访问您网站的新访客数量。常规基数聚合会告诉您每天有多少唯一访客,但不会区分“新”访客或“回头”访客。累积基数聚合可用于确定每天有多少唯一访客是“新”访客。
语法
编辑一个独立的 cumulative_cardinality
聚合如下所示
{ "cumulative_cardinality": { "buckets_path": "my_cardinality_agg" } }
表 59. cumulative_cardinality
参数
参数名称 | 描述 | 必需 | 默认值 |
---|---|---|---|
|
我们要查找累积基数的基数聚合的路径(有关更多详细信息,请参阅 |
必需 |
|
|
输出值的 DecimalFormat 模式。如果指定,格式化的值将返回到聚合的 |
可选 |
|
以下代码片段计算每日总 users
的累积基数
resp = client.search( index="user_hits", size=0, aggs={ "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } } } } }, ) print(resp)
response = client.search( index: 'user_hits', body: { size: 0, aggregations: { users_per_day: { date_histogram: { field: 'timestamp', calendar_interval: 'day' }, aggregations: { distinct_users: { cardinality: { field: 'user_id' } }, total_new_users: { cumulative_cardinality: { buckets_path: 'distinct_users' } } } } } } ) puts response
const response = await client.search({ index: "user_hits", size: 0, aggs: { users_per_day: { date_histogram: { field: "timestamp", calendar_interval: "day", }, aggs: { distinct_users: { cardinality: { field: "user_id", }, }, total_new_users: { cumulative_cardinality: { buckets_path: "distinct_users", }, }, }, }, }, }); console.log(response);
GET /user_hits/_search { "size": 0, "aggs": { "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } } } } } }
以下可能是响应
{ "took": 11, "timed_out": false, "_shards": ..., "hits": ..., "aggregations": { "users_per_day": { "buckets": [ { "key_as_string": "2019-01-01T00:00:00.000Z", "key": 1546300800000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 2 } }, { "key_as_string": "2019-01-02T00:00:00.000Z", "key": 1546387200000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 3 } }, { "key_as_string": "2019-01-03T00:00:00.000Z", "key": 1546473600000, "doc_count": 3, "distinct_users": { "value": 3 }, "total_new_users": { "value": 4 } } ] } } }
请注意,第二天 2019-01-02
有两个不同的用户,但累积管道聚合生成的 total_new_users
指标仅递增到三个。这意味着当天两个用户中只有一个是新的,另一个用户在前一天已经出现过。这种情况在第三天再次发生,三个用户中只有一个是全新的。
增量累积基数
编辑cumulative_cardinality
聚合将显示自查询时间段开始以来的总唯一计数。但是,有时查看“增量”计数会很有用。这意味着,每天添加了多少新用户,而不是总累积计数。
这可以通过向我们的查询添加 derivative
聚合来实现
resp = client.search( index="user_hits", size=0, aggs={ "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } }, "incremental_new_users": { "derivative": { "buckets_path": "total_new_users" } } } } }, ) print(resp)
response = client.search( index: 'user_hits', body: { size: 0, aggregations: { users_per_day: { date_histogram: { field: 'timestamp', calendar_interval: 'day' }, aggregations: { distinct_users: { cardinality: { field: 'user_id' } }, total_new_users: { cumulative_cardinality: { buckets_path: 'distinct_users' } }, incremental_new_users: { derivative: { buckets_path: 'total_new_users' } } } } } } ) puts response
const response = await client.search({ index: "user_hits", size: 0, aggs: { users_per_day: { date_histogram: { field: "timestamp", calendar_interval: "day", }, aggs: { distinct_users: { cardinality: { field: "user_id", }, }, total_new_users: { cumulative_cardinality: { buckets_path: "distinct_users", }, }, incremental_new_users: { derivative: { buckets_path: "total_new_users", }, }, }, }, }, }); console.log(response);
GET /user_hits/_search { "size": 0, "aggs": { "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } }, "incremental_new_users": { "derivative": { "buckets_path": "total_new_users" } } } } } }
以下可能是响应
{ "took": 11, "timed_out": false, "_shards": ..., "hits": ..., "aggregations": { "users_per_day": { "buckets": [ { "key_as_string": "2019-01-01T00:00:00.000Z", "key": 1546300800000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 2 } }, { "key_as_string": "2019-01-02T00:00:00.000Z", "key": 1546387200000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 3 }, "incremental_new_users": { "value": 1.0 } }, { "key_as_string": "2019-01-03T00:00:00.000Z", "key": 1546473600000, "doc_count": 3, "distinct_users": { "value": 3 }, "total_new_users": { "value": 4 }, "incremental_new_users": { "value": 1.0 } } ] } } }