Avg 聚合
编辑Avg 聚合
编辑一个 单值
指标聚合,用于计算从聚合文档中提取的数值的平均值。这些值可以从文档中特定的数值或 直方图 字段中提取。
假设数据由代表学生考试成绩(介于 0 和 100 之间)的文档组成,我们可以使用以下方式计算他们的平均分:
resp = client.search( index="exams", size="0", aggs={ "avg_grade": { "avg": { "field": "grade" } } }, ) print(resp)
response = client.search( index: 'exams', size: 0, body: { aggregations: { avg_grade: { avg: { field: 'grade' } } } } ) puts response
const response = await client.search({ index: "exams", size: 0, aggs: { avg_grade: { avg: { field: "grade", }, }, }, }); console.log(response);
POST /exams/_search?size=0 { "aggs": { "avg_grade": { "avg": { "field": "grade" } } } }
上述聚合计算所有文档的平均成绩。聚合类型为 avg
,field
设置定义了文档中计算平均值的数值字段。上述操作将返回以下内容:
{ ... "aggregations": { "avg_grade": { "value": 75.0 } } }
聚合的名称(上面的 avg_grade
)也用作从返回的响应中检索聚合结果的键。
脚本
编辑假设考试非常困难,你需要应用成绩更正。对 运行时字段 求平均值以获得更正后的平均值:
resp = client.search( index="exams", size="0", runtime_mappings={ "grade.corrected": { "type": "double", "script": { "source": "emit(Math.min(100, doc['grade'].value * params.correction))", "params": { "correction": 1.2 } } } }, aggs={ "avg_corrected_grade": { "avg": { "field": "grade.corrected" } } }, ) print(resp)
response = client.search( index: 'exams', size: 0, body: { runtime_mappings: { 'grade.corrected' => { type: 'double', script: { source: "emit(Math.min(100, doc['grade'].value * params.correction))", params: { correction: 1.2 } } } }, aggregations: { avg_corrected_grade: { avg: { field: 'grade.corrected' } } } } ) puts response
const response = await client.search({ index: "exams", size: 0, runtime_mappings: { "grade.corrected": { type: "double", script: { source: "emit(Math.min(100, doc['grade'].value * params.correction))", params: { correction: 1.2, }, }, }, }, aggs: { avg_corrected_grade: { avg: { field: "grade.corrected", }, }, }, }); console.log(response);
POST /exams/_search?size=0 { "runtime_mappings": { "grade.corrected": { "type": "double", "script": { "source": "emit(Math.min(100, doc['grade'].value * params.correction))", "params": { "correction": 1.2 } } } }, "aggs": { "avg_corrected_grade": { "avg": { "field": "grade.corrected" } } } }
缺失值
编辑missing
参数定义了应如何处理缺少值的文档。默认情况下,它们将被忽略,但也可以将它们视为具有一个值。
resp = client.search( index="exams", size="0", aggs={ "grade_avg": { "avg": { "field": "grade", "missing": 10 } } }, ) print(resp)
response = client.search( index: 'exams', size: 0, body: { aggregations: { grade_avg: { avg: { field: 'grade', missing: 10 } } } } ) puts response
const response = await client.search({ index: "exams", size: 0, aggs: { grade_avg: { avg: { field: "grade", missing: 10, }, }, }, }); console.log(response);
直方图字段
编辑当在 直方图字段 上计算平均值时,聚合的结果是 values
数组中所有元素的加权平均值,同时考虑了 counts
数组中相同位置的数字。
例如,对于以下索引,该索引存储了不同网络的延迟指标的预聚合直方图:
resp = client.index( index="metrics_index", id="1", document={ "network.name": "net-1", "latency_histo": { "values": [ 0.1, 0.2, 0.3, 0.4, 0.5 ], "counts": [ 3, 7, 23, 12, 6 ] } }, ) print(resp) resp1 = client.index( index="metrics_index", id="2", document={ "network.name": "net-2", "latency_histo": { "values": [ 0.1, 0.2, 0.3, 0.4, 0.5 ], "counts": [ 8, 17, 8, 7, 6 ] } }, ) print(resp1) resp2 = client.search( index="metrics_index", size="0", aggs={ "avg_latency": { "avg": { "field": "latency_histo" } } }, ) print(resp2)
response = client.index( index: 'metrics_index', id: 1, body: { 'network.name' => 'net-1', latency_histo: { values: [ 0.1, 0.2, 0.3, 0.4, 0.5 ], counts: [ 3, 7, 23, 12, 6 ] } } ) puts response response = client.index( index: 'metrics_index', id: 2, body: { 'network.name' => 'net-2', latency_histo: { values: [ 0.1, 0.2, 0.3, 0.4, 0.5 ], counts: [ 8, 17, 8, 7, 6 ] } } ) puts response response = client.search( index: 'metrics_index', size: 0, body: { aggregations: { avg_latency: { avg: { field: 'latency_histo' } } } } ) puts response
const response = await client.index({ index: "metrics_index", id: 1, document: { "network.name": "net-1", latency_histo: { values: [0.1, 0.2, 0.3, 0.4, 0.5], counts: [3, 7, 23, 12, 6], }, }, }); console.log(response); const response1 = await client.index({ index: "metrics_index", id: 2, document: { "network.name": "net-2", latency_histo: { values: [0.1, 0.2, 0.3, 0.4, 0.5], counts: [8, 17, 8, 7, 6], }, }, }); console.log(response1); const response2 = await client.search({ index: "metrics_index", size: 0, aggs: { avg_latency: { avg: { field: "latency_histo", }, }, }, }); console.log(response2);
PUT metrics_index/_doc/1 { "network.name" : "net-1", "latency_histo" : { "values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 6] } } PUT metrics_index/_doc/2 { "network.name" : "net-2", "latency_histo" : { "values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [8, 17, 8, 7, 6] } } POST /metrics_index/_search?size=0 { "aggs": { "avg_latency": { "avg": { "field": "latency_histo" } } } }
对于每个直方图字段,avg
聚合会将 values
数组 <1> 中的每个数字乘以其在 counts
数组 <2> 中的关联计数。最终,它将计算所有直方图的这些值的平均值并返回以下结果:
{ ... "aggregations": { "avg_latency": { "value": 0.29690721649 } } }