加权平均聚合
编辑加权平均聚合
编辑一个 单值
指标聚合,计算从聚合文档中提取的数值的加权平均值。这些值可以从文档中的特定数值字段中提取。
在计算常规平均值时,每个数据点都有相同的“权重”……它对最终值的贡献是相同的。另一方面,加权平均值对每个数据点赋予不同的权重。每个数据点对最终值的贡献量是从文档中提取的。
用公式表示,加权平均值是 ∑(值 * 权重) / ∑(权重)
可以把常规平均值看作是每个值都具有隐式权重 1
的加权平均值。
表 53. weighted_avg
参数
参数名称 | 描述 | 必需 | 默认值 |
---|---|---|---|
|
用于提供值的字段或脚本的配置 |
必需 |
|
|
用于提供权重的字段或脚本的配置 |
必需 |
|
|
数值响应格式化程序 |
可选 |
value
和 weight
对象具有每个字段特定的配置
示例
编辑如果我们的文档有一个保存 0-100 数值分数的 "grade"
字段和一个保存任意数值权重的 "weight"
字段,我们可以使用以下方法计算加权平均值
resp = client.search( index="exams", size=0, aggs={ "weighted_grade": { "weighted_avg": { "value": { "field": "grade" }, "weight": { "field": "weight" } } } }, ) print(resp)
response = client.search( index: 'exams', body: { size: 0, aggregations: { weighted_grade: { weighted_avg: { value: { field: 'grade' }, weight: { field: 'weight' } } } } } ) puts response
const response = await client.search({ index: "exams", size: 0, aggs: { weighted_grade: { weighted_avg: { value: { field: "grade", }, weight: { field: "weight", }, }, }, }, }); console.log(response);
POST /exams/_search { "size": 0, "aggs": { "weighted_grade": { "weighted_avg": { "value": { "field": "grade" }, "weight": { "field": "weight" } } } } }
这将产生如下响应
{ ... "aggregations": { "weighted_grade": { "value": 70.0 } } }
虽然允许每个字段有多个值,但只允许有一个权重。如果聚合遇到一个具有多个权重的文档(例如,权重字段是一个多值字段),它将中止搜索。如果遇到这种情况,应该构建一个 运行时字段 将这些值组合成一个权重。
这个单一权重将独立应用于从 value
字段中提取的每个值。
此示例展示了如何对具有多个值的单个文档使用单一权重进行平均
resp = client.index( index="exams", refresh=True, document={ "grade": [ 1, 2, 3 ], "weight": 2 }, ) print(resp) resp1 = client.search( index="exams", size=0, aggs={ "weighted_grade": { "weighted_avg": { "value": { "field": "grade" }, "weight": { "field": "weight" } } } }, ) print(resp1)
response = client.index( index: 'exams', refresh: true, body: { grade: [ 1, 2, 3 ], weight: 2 } ) puts response response = client.search( index: 'exams', body: { size: 0, aggregations: { weighted_grade: { weighted_avg: { value: { field: 'grade' }, weight: { field: 'weight' } } } } } ) puts response
const response = await client.index({ index: "exams", refresh: "true", document: { grade: [1, 2, 3], weight: 2, }, }); console.log(response); const response1 = await client.search({ index: "exams", size: 0, aggs: { weighted_grade: { weighted_avg: { value: { field: "grade", }, weight: { field: "weight", }, }, }, }, }); console.log(response1);
POST /exams/_doc?refresh { "grade": [1, 2, 3], "weight": 2 } POST /exams/_search { "size": 0, "aggs": { "weighted_grade": { "weighted_avg": { "value": { "field": "grade" }, "weight": { "field": "weight" } } } } }
三个值(1
、2
和 3
)将被作为独立值包括在内,所有值的权重均为 2
{ ... "aggregations": { "weighted_grade": { "value": 2.0 } } }
聚合返回 2.0
作为结果,这与我们手动计算的预期结果一致:((1*2) + (2*2) + (3*2)) / (2+2+2) == 2
运行时字段
编辑如果必须对与索引值不太匹配的值进行求和或加权,请在 运行时字段 上运行聚合。
resp = client.index( index="exams", refresh=True, document={ "grade": 100, "weight": [ 2, 3 ] }, ) print(resp) resp1 = client.index( index="exams", refresh=True, document={ "grade": 80, "weight": 3 }, ) print(resp1) resp2 = client.search( index="exams", filter_path="aggregations", size=0, runtime_mappings={ "weight.combined": { "type": "double", "script": "\n double s = 0;\n for (double w : doc['weight']) {\n s += w;\n }\n emit(s);\n " } }, aggs={ "weighted_grade": { "weighted_avg": { "value": { "script": "doc.grade.value + 1" }, "weight": { "field": "weight.combined" } } } }, ) print(resp2)
response = client.index( index: 'exams', refresh: true, body: { grade: 100, weight: [ 2, 3 ] } ) puts response response = client.index( index: 'exams', refresh: true, body: { grade: 80, weight: 3 } ) puts response response = client.search( index: 'exams', filter_path: 'aggregations', body: { size: 0, runtime_mappings: { 'weight.combined' => { type: 'double', script: "\n double s = 0;\n for (double w : doc['weight']) {\n s += w;\n }\n emit(s);\n " } }, aggregations: { weighted_grade: { weighted_avg: { value: { script: 'doc.grade.value + 1' }, weight: { field: 'weight.combined' } } } } } ) puts response
const response = await client.index({ index: "exams", refresh: "true", document: { grade: 100, weight: [2, 3], }, }); console.log(response); const response1 = await client.index({ index: "exams", refresh: "true", document: { grade: 80, weight: 3, }, }); console.log(response1); const response2 = await client.search({ index: "exams", filter_path: "aggregations", size: 0, runtime_mappings: { "weight.combined": { type: "double", script: "\n double s = 0;\n for (double w : doc['weight']) {\n s += w;\n }\n emit(s);\n ", }, }, aggs: { weighted_grade: { weighted_avg: { value: { script: "doc.grade.value + 1", }, weight: { field: "weight.combined", }, }, }, }, }); console.log(response2);
POST /exams/_doc?refresh { "grade": 100, "weight": [2, 3] } POST /exams/_doc?refresh { "grade": 80, "weight": 3 } POST /exams/_search?filter_path=aggregations { "size": 0, "runtime_mappings": { "weight.combined": { "type": "double", "script": """ double s = 0; for (double w : doc['weight']) { s += w; } emit(s); """ } }, "aggs": { "weighted_grade": { "weighted_avg": { "value": { "script": "doc.grade.value + 1" }, "weight": { "field": "weight.combined" } } } } }
应该如下所示
{ "aggregations": { "weighted_grade": { "value": 93.5 } } }
缺失值
编辑默认情况下,聚合会排除 value
或 weight
字段的值缺失或为 null
的文档。可以使用 missing
参数来为这些文档指定默认值。
resp = client.search( index="exams", size=0, aggs={ "weighted_grade": { "weighted_avg": { "value": { "field": "grade", "missing": 2 }, "weight": { "field": "weight", "missing": 3 } } } }, ) print(resp)
const response = await client.search({ index: "exams", size: 0, aggs: { weighted_grade: { weighted_avg: { value: { field: "grade", missing: 2, }, weight: { field: "weight", missing: 3, }, }, }, }, }); console.log(response);
POST /exams/_search { "size": 0, "aggs": { "weighted_grade": { "weighted_avg": { "value": { "field": "grade", "missing": 2 }, "weight": { "field": "weight", "missing": 3 } } } } }