Avg 聚合

编辑

一个 单值 指标聚合,用于计算从聚合文档中提取的数值的平均值。这些值可以从文档中特定的数值或 直方图 字段中提取。

假设数据由代表学生考试成绩(介于 0 和 100 之间)的文档组成,我们可以使用以下方式计算他们的平均分:

resp = client.search(
    index="exams",
    size="0",
    aggs={
        "avg_grade": {
            "avg": {
                "field": "grade"
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'exams',
  size: 0,
  body: {
    aggregations: {
      avg_grade: {
        avg: {
          field: 'grade'
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "exams",
  size: 0,
  aggs: {
    avg_grade: {
      avg: {
        field: "grade",
      },
    },
  },
});
console.log(response);
POST /exams/_search?size=0
{
  "aggs": {
    "avg_grade": { "avg": { "field": "grade" } }
  }
}

上述聚合计算所有文档的平均成绩。聚合类型为 avgfield 设置定义了文档中计算平均值的数值字段。上述操作将返回以下内容:

{
  ...
  "aggregations": {
    "avg_grade": {
      "value": 75.0
    }
  }
}

聚合的名称(上面的 avg_grade)也用作从返回的响应中检索聚合结果的键。

脚本

编辑

假设考试非常困难,你需要应用成绩更正。对 运行时字段 求平均值以获得更正后的平均值:

resp = client.search(
    index="exams",
    size="0",
    runtime_mappings={
        "grade.corrected": {
            "type": "double",
            "script": {
                "source": "emit(Math.min(100, doc['grade'].value * params.correction))",
                "params": {
                    "correction": 1.2
                }
            }
        }
    },
    aggs={
        "avg_corrected_grade": {
            "avg": {
                "field": "grade.corrected"
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'exams',
  size: 0,
  body: {
    runtime_mappings: {
      'grade.corrected' => {
        type: 'double',
        script: {
          source: "emit(Math.min(100, doc['grade'].value * params.correction))",
          params: {
            correction: 1.2
          }
        }
      }
    },
    aggregations: {
      avg_corrected_grade: {
        avg: {
          field: 'grade.corrected'
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "exams",
  size: 0,
  runtime_mappings: {
    "grade.corrected": {
      type: "double",
      script: {
        source: "emit(Math.min(100, doc['grade'].value * params.correction))",
        params: {
          correction: 1.2,
        },
      },
    },
  },
  aggs: {
    avg_corrected_grade: {
      avg: {
        field: "grade.corrected",
      },
    },
  },
});
console.log(response);
POST /exams/_search?size=0
{
  "runtime_mappings": {
    "grade.corrected": {
      "type": "double",
      "script": {
        "source": "emit(Math.min(100, doc['grade'].value * params.correction))",
        "params": {
          "correction": 1.2
        }
      }
    }
  },
  "aggs": {
    "avg_corrected_grade": {
      "avg": {
        "field": "grade.corrected"
      }
    }
  }
}

缺失值

编辑

missing 参数定义了应如何处理缺少值的文档。默认情况下,它们将被忽略,但也可以将它们视为具有一个值。

resp = client.search(
    index="exams",
    size="0",
    aggs={
        "grade_avg": {
            "avg": {
                "field": "grade",
                "missing": 10
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'exams',
  size: 0,
  body: {
    aggregations: {
      grade_avg: {
        avg: {
          field: 'grade',
          missing: 10
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "exams",
  size: 0,
  aggs: {
    grade_avg: {
      avg: {
        field: "grade",
        missing: 10,
      },
    },
  },
});
console.log(response);
POST /exams/_search?size=0
{
  "aggs": {
    "grade_avg": {
      "avg": {
        "field": "grade",
        "missing": 10     
      }
    }
  }
}

grade 字段中没有值的文档将与具有值 10 的文档放入相同的桶中。

直方图字段

编辑

当在 直方图字段 上计算平均值时,聚合的结果是 values 数组中所有元素的加权平均值,同时考虑了 counts 数组中相同位置的数字。

例如,对于以下索引,该索引存储了不同网络的延迟指标的预聚合直方图:

resp = client.index(
    index="metrics_index",
    id="1",
    document={
        "network.name": "net-1",
        "latency_histo": {
            "values": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5
            ],
            "counts": [
                3,
                7,
                23,
                12,
                6
            ]
        }
    },
)
print(resp)

resp1 = client.index(
    index="metrics_index",
    id="2",
    document={
        "network.name": "net-2",
        "latency_histo": {
            "values": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5
            ],
            "counts": [
                8,
                17,
                8,
                7,
                6
            ]
        }
    },
)
print(resp1)

resp2 = client.search(
    index="metrics_index",
    size="0",
    aggs={
        "avg_latency": {
            "avg": {
                "field": "latency_histo"
            }
        }
    },
)
print(resp2)
response = client.index(
  index: 'metrics_index',
  id: 1,
  body: {
    'network.name' => 'net-1',
    latency_histo: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        3,
        7,
        23,
        12,
        6
      ]
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 2,
  body: {
    'network.name' => 'net-2',
    latency_histo: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        8,
        17,
        8,
        7,
        6
      ]
    }
  }
)
puts response

response = client.search(
  index: 'metrics_index',
  size: 0,
  body: {
    aggregations: {
      avg_latency: {
        avg: {
          field: 'latency_histo'
        }
      }
    }
  }
)
puts response
const response = await client.index({
  index: "metrics_index",
  id: 1,
  document: {
    "network.name": "net-1",
    latency_histo: {
      values: [0.1, 0.2, 0.3, 0.4, 0.5],
      counts: [3, 7, 23, 12, 6],
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "metrics_index",
  id: 2,
  document: {
    "network.name": "net-2",
    latency_histo: {
      values: [0.1, 0.2, 0.3, 0.4, 0.5],
      counts: [8, 17, 8, 7, 6],
    },
  },
});
console.log(response1);

const response2 = await client.search({
  index: "metrics_index",
  size: 0,
  aggs: {
    avg_latency: {
      avg: {
        field: "latency_histo",
      },
    },
  },
});
console.log(response2);
PUT metrics_index/_doc/1
{
  "network.name" : "net-1",
  "latency_histo" : {
      "values" : [0.1, 0.2, 0.3, 0.4, 0.5], 
      "counts" : [3, 7, 23, 12, 6] 
   }
}

PUT metrics_index/_doc/2
{
  "network.name" : "net-2",
  "latency_histo" : {
      "values" :  [0.1, 0.2, 0.3, 0.4, 0.5], 
      "counts" : [8, 17, 8, 7, 6] 
   }
}

POST /metrics_index/_search?size=0
{
  "aggs": {
    "avg_latency":
      { "avg": { "field": "latency_histo" }
    }
  }
}

对于每个直方图字段,avg 聚合会将 values 数组 <1> 中的每个数字乘以其在 counts 数组 <2> 中的关联计数。最终,它将计算所有直方图的这些值的平均值并返回以下结果:

{
  ...
  "aggregations": {
    "avg_latency": {
      "value": 0.29690721649
    }
  }
}