桶相关性聚合
编辑桶相关性聚合
编辑一个兄弟管道聚合,它在配置的兄弟多桶聚合上执行相关性函数。
参数
编辑-
buckets_path
- (必需,字符串)包含一组要关联的值的桶的路径。有关语法,请参阅
buckets_path
语法。 -
function
-
(必需,对象)要执行的相关性函数。
function
的属性-
count_correlation
-
(必需*,对象)用于计算计数相关性的配置。此函数旨在确定术语值与给定指标的相关性。因此,它需要满足以下要求。
buckets_path
必须指向_count
指标。- 所有
bucket_path
计数值的总计数必须小于或等于indicator.doc_count
。 - 当使用此函数时,需要进行初始计算以收集所需的
indicator
值。
count_correlation
的属性-
indicator
-
(必需,对象)用于关联配置的
bucket_path
值的指标。indicator
的属性-
doc_count
- (必需,整数)最初创建
expectations
的文档总数。它必须大于或等于buckets_path
中所有值的总和,因为这是术语值关联的原始超集数据。 -
expectations
- (必需,数组)用于关联配置的
bucket_path
值的数字数组。此值的长度必须始终等于bucket_path
返回的桶数。 -
fractions
- (可选,数组)用于平均和计算方差的分数数组。如果预先计算的数据和
buckets_path
存在已知差距,则应使用此数组。如果提供了fractions
,则其长度必须等于expectations
。
-
-
语法
编辑一个 bucket_correlation
聚合单独看起来像这样
示例
编辑以下代码片段将字段 version
中的各个术语与 latency
指标相关联。未显示的是 latency
指标值的预计算,这是使用 百分位数聚合完成的。
此示例仅使用 10 秒的百分位数。
resp = client.search( index="correlate_latency", size="0", filter_path="aggregations", aggs={ "buckets": { "terms": { "field": "version", "size": 2 }, "aggs": { "latency_ranges": { "range": { "field": "latency", "ranges": [ { "to": 0 }, { "from": 0, "to": 105 }, { "from": 105, "to": 225 }, { "from": 225, "to": 445 }, { "from": 445, "to": 665 }, { "from": 665, "to": 885 }, { "from": 885, "to": 1115 }, { "from": 1115, "to": 1335 }, { "from": 1335, "to": 1555 }, { "from": 1555, "to": 1775 }, { "from": 1775 } ] } }, "bucket_correlation": { "bucket_correlation": { "buckets_path": "latency_ranges>_count", "function": { "count_correlation": { "indicator": { "expectations": [ 0, 52.5, 165, 335, 555, 775, 1000, 1225, 1445, 1665, 1775 ], "doc_count": 200 } } } } } } } }, ) print(resp)
const response = await client.search({ index: "correlate_latency", size: 0, filter_path: "aggregations", aggs: { buckets: { terms: { field: "version", size: 2, }, aggs: { latency_ranges: { range: { field: "latency", ranges: [ { to: 0, }, { from: 0, to: 105, }, { from: 105, to: 225, }, { from: 225, to: 445, }, { from: 445, to: 665, }, { from: 665, to: 885, }, { from: 885, to: 1115, }, { from: 1115, to: 1335, }, { from: 1335, to: 1555, }, { from: 1555, to: 1775, }, { from: 1775, }, ], }, }, bucket_correlation: { bucket_correlation: { buckets_path: "latency_ranges>_count", function: { count_correlation: { indicator: { expectations: [ 0, 52.5, 165, 335, 555, 775, 1000, 1225, 1445, 1665, 1775, ], doc_count: 200, }, }, }, }, }, }, }, }, }); console.log(response);
POST correlate_latency/_search?size=0&filter_path=aggregations { "aggs": { "buckets": { "terms": { "field": "version", "size": 2 }, "aggs": { "latency_ranges": { "range": { "field": "latency", "ranges": [ { "to": 0.0 }, { "from": 0, "to": 105 }, { "from": 105, "to": 225 }, { "from": 225, "to": 445 }, { "from": 445, "to": 665 }, { "from": 665, "to": 885 }, { "from": 885, "to": 1115 }, { "from": 1115, "to": 1335 }, { "from": 1335, "to": 1555 }, { "from": 1555, "to": 1775 }, { "from": 1775 } ] } }, "bucket_correlation": { "bucket_correlation": { "buckets_path": "latency_ranges>_count", "function": { "count_correlation": { "indicator": { "expectations": [0, 52.5, 165, 335, 555, 775, 1000, 1225, 1445, 1665, 1775], "doc_count": 200 } } } } } } } } }
包含范围聚合和桶相关性聚合的术语桶。两者都用于计算术语值与延迟的相关性。 |
|
延迟字段上的范围聚合。这些范围是参考延迟字段的百分位数创建的。 |
|
桶相关性聚合,它计算每个范围内的术语值数量与先前计算的指标值的相关性。 |
以下可能是响应
{ "aggregations" : { "buckets" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "1.0", "doc_count" : 100, "latency_ranges" : { "buckets" : [ { "key" : "*-0.0", "to" : 0.0, "doc_count" : 0 }, { "key" : "0.0-105.0", "from" : 0.0, "to" : 105.0, "doc_count" : 1 }, { "key" : "105.0-225.0", "from" : 105.0, "to" : 225.0, "doc_count" : 9 }, { "key" : "225.0-445.0", "from" : 225.0, "to" : 445.0, "doc_count" : 0 }, { "key" : "445.0-665.0", "from" : 445.0, "to" : 665.0, "doc_count" : 0 }, { "key" : "665.0-885.0", "from" : 665.0, "to" : 885.0, "doc_count" : 0 }, { "key" : "885.0-1115.0", "from" : 885.0, "to" : 1115.0, "doc_count" : 10 }, { "key" : "1115.0-1335.0", "from" : 1115.0, "to" : 1335.0, "doc_count" : 20 }, { "key" : "1335.0-1555.0", "from" : 1335.0, "to" : 1555.0, "doc_count" : 20 }, { "key" : "1555.0-1775.0", "from" : 1555.0, "to" : 1775.0, "doc_count" : 20 }, { "key" : "1775.0-*", "from" : 1775.0, "doc_count" : 20 } ] }, "bucket_correlation" : { "value" : 0.8402398981360937 } }, { "key" : "2.0", "doc_count" : 100, "latency_ranges" : { "buckets" : [ { "key" : "*-0.0", "to" : 0.0, "doc_count" : 0 }, { "key" : "0.0-105.0", "from" : 0.0, "to" : 105.0, "doc_count" : 19 }, { "key" : "105.0-225.0", "from" : 105.0, "to" : 225.0, "doc_count" : 11 }, { "key" : "225.0-445.0", "from" : 225.0, "to" : 445.0, "doc_count" : 20 }, { "key" : "445.0-665.0", "from" : 445.0, "to" : 665.0, "doc_count" : 20 }, { "key" : "665.0-885.0", "from" : 665.0, "to" : 885.0, "doc_count" : 20 }, { "key" : "885.0-1115.0", "from" : 885.0, "to" : 1115.0, "doc_count" : 10 }, { "key" : "1115.0-1335.0", "from" : 1115.0, "to" : 1335.0, "doc_count" : 0 }, { "key" : "1335.0-1555.0", "from" : 1335.0, "to" : 1555.0, "doc_count" : 0 }, { "key" : "1555.0-1775.0", "from" : 1555.0, "to" : 1775.0, "doc_count" : 0 }, { "key" : "1775.0-*", "from" : 1775.0, "doc_count" : 0 } ] }, "bucket_correlation" : { "value" : -0.5759855613334943 } } ] } } }