累积基数聚合
编辑累积基数聚合
编辑一个父管道聚合,它计算父直方图(或 date_histogram)聚合中的累积基数。指定的指标必须是基数聚合,并且包含的直方图必须将min_doc_count
设置为0
(histogram
聚合的默认值)。
cumulative_cardinality
聚合对于查找“新增项目总数”非常有用,例如每天访问您网站的新访客数量。常规的基数聚合会告诉您每天有多少唯一访客,但不会区分“新”访客或“回头”访客。累积基数聚合可用于确定每天的唯一访客中有多少是“新增”的。
语法
编辑单独的cumulative_cardinality
聚合如下所示
{ "cumulative_cardinality": { "buckets_path": "my_cardinality_agg" } }
表 60. cumulative_cardinality
参数
参数名称 | 描述 | 必填 | 默认值 |
---|---|---|---|
|
我们需要查找累积基数的基数聚合的路径(有关更多详细信息,请参阅 |
必填 |
|
|
DecimalFormat 模式,用于输出值。如果指定,则格式化的值将返回在聚合的 |
可选 |
|
以下代码片段计算每日users
总数的累积基数
resp = client.search( index="user_hits", size=0, aggs={ "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } } } } }, ) print(resp)
response = client.search( index: 'user_hits', body: { size: 0, aggregations: { users_per_day: { date_histogram: { field: 'timestamp', calendar_interval: 'day' }, aggregations: { distinct_users: { cardinality: { field: 'user_id' } }, total_new_users: { cumulative_cardinality: { buckets_path: 'distinct_users' } } } } } } ) puts response
const response = await client.search({ index: "user_hits", size: 0, aggs: { users_per_day: { date_histogram: { field: "timestamp", calendar_interval: "day", }, aggs: { distinct_users: { cardinality: { field: "user_id", }, }, total_new_users: { cumulative_cardinality: { buckets_path: "distinct_users", }, }, }, }, }, }); console.log(response);
GET /user_hits/_search { "size": 0, "aggs": { "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } } } } } }
以下可能是响应
{ "took": 11, "timed_out": false, "_shards": ..., "hits": ..., "aggregations": { "users_per_day": { "buckets": [ { "key_as_string": "2019-01-01T00:00:00.000Z", "key": 1546300800000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 2 } }, { "key_as_string": "2019-01-02T00:00:00.000Z", "key": 1546387200000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 3 } }, { "key_as_string": "2019-01-03T00:00:00.000Z", "key": 1546473600000, "doc_count": 3, "distinct_users": { "value": 3 }, "total_new_users": { "value": 4 } } ] } } }
请注意,第二天,2019-01-02
,有两个不同的用户,但累积管道聚合生成的total_new_users
指标仅增加到三个。这意味着当天只有两个用户中的一个用户是新用户,另一个用户在之前的一天就已经出现过。这种情况在第三天再次发生,三天中只有三个用户中的一个用户是完全新增的。
增量累积基数
编辑cumulative_cardinality
聚合将显示自查询时间段开始以来的总 distinct 计数。但是,有时查看“增量”计数也很有用。这意味着每天新增多少用户,而不是总累积计数。
这可以通过向我们的查询中添加derivative
聚合来实现
resp = client.search( index="user_hits", size=0, aggs={ "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } }, "incremental_new_users": { "derivative": { "buckets_path": "total_new_users" } } } } }, ) print(resp)
response = client.search( index: 'user_hits', body: { size: 0, aggregations: { users_per_day: { date_histogram: { field: 'timestamp', calendar_interval: 'day' }, aggregations: { distinct_users: { cardinality: { field: 'user_id' } }, total_new_users: { cumulative_cardinality: { buckets_path: 'distinct_users' } }, incremental_new_users: { derivative: { buckets_path: 'total_new_users' } } } } } } ) puts response
const response = await client.search({ index: "user_hits", size: 0, aggs: { users_per_day: { date_histogram: { field: "timestamp", calendar_interval: "day", }, aggs: { distinct_users: { cardinality: { field: "user_id", }, }, total_new_users: { cumulative_cardinality: { buckets_path: "distinct_users", }, }, incremental_new_users: { derivative: { buckets_path: "total_new_users", }, }, }, }, }, }); console.log(response);
GET /user_hits/_search { "size": 0, "aggs": { "users_per_day": { "date_histogram": { "field": "timestamp", "calendar_interval": "day" }, "aggs": { "distinct_users": { "cardinality": { "field": "user_id" } }, "total_new_users": { "cumulative_cardinality": { "buckets_path": "distinct_users" } }, "incremental_new_users": { "derivative": { "buckets_path": "total_new_users" } } } } } }
以下可能是响应
{ "took": 11, "timed_out": false, "_shards": ..., "hits": ..., "aggregations": { "users_per_day": { "buckets": [ { "key_as_string": "2019-01-01T00:00:00.000Z", "key": 1546300800000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 2 } }, { "key_as_string": "2019-01-02T00:00:00.000Z", "key": 1546387200000, "doc_count": 2, "distinct_users": { "value": 2 }, "total_new_users": { "value": 3 }, "incremental_new_users": { "value": 1.0 } }, { "key_as_string": "2019-01-03T00:00:00.000Z", "key": 1546473600000, "doc_count": 3, "distinct_users": { "value": 3 }, "total_new_users": { "value": 4 }, "incremental_new_users": { "value": 1.0 } } ] } } }