求和聚合

编辑

一个 单值 指标聚合,它将从聚合文档中提取的数值相加。这些值可以从特定的数值或 直方图 字段中提取。

假设数据包含代表销售记录的文档,我们可以将所有帽子的销售价格求和,如下所示:

resp = client.search(
    index="sales",
    size="0",
    query={
        "constant_score": {
            "filter": {
                "match": {
                    "type": "hat"
                }
            }
        }
    },
    aggs={
        "hat_prices": {
            "sum": {
                "field": "price"
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  size: 0,
  body: {
    query: {
      constant_score: {
        filter: {
          match: {
            type: 'hat'
          }
        }
      }
    },
    aggregations: {
      hat_prices: {
        sum: {
          field: 'price'
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  size: 0,
  query: {
    constant_score: {
      filter: {
        match: {
          type: "hat",
        },
      },
    },
  },
  aggs: {
    hat_prices: {
      sum: {
        field: "price",
      },
    },
  },
});
console.log(response);
POST /sales/_search?size=0
{
  "query": {
    "constant_score": {
      "filter": {
        "match": { "type": "hat" }
      }
    }
  },
  "aggs": {
    "hat_prices": { "sum": { "field": "price" } }
  }
}

结果为:

{
  ...
  "aggregations": {
    "hat_prices": {
      "value": 450.0
    }
  }
}

聚合的名称(上面的 hat_prices)也作为从返回的响应中检索聚合结果的键。

脚本

编辑

如果需要获得比单个字段更复杂的 sum,请在 运行时字段上运行聚合。

resp = client.search(
    index="sales",
    size="0",
    runtime_mappings={
        "price.weighted": {
            "type": "double",
            "script": "\n        double price = doc['price'].value;\n        if (doc['promoted'].value) {\n          price *= 0.8;\n        }\n        emit(price);\n      "
        }
    },
    query={
        "constant_score": {
            "filter": {
                "match": {
                    "type": "hat"
                }
            }
        }
    },
    aggs={
        "hat_prices": {
            "sum": {
                "field": "price.weighted"
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  size: 0,
  body: {
    runtime_mappings: {
      'price.weighted' => {
        type: 'double',
        script: "\n        double price = doc['price'].value;\n        if (doc['promoted'].value) {\n          price *= 0.8;\n        }\n        emit(price);\n      "
      }
    },
    query: {
      constant_score: {
        filter: {
          match: {
            type: 'hat'
          }
        }
      }
    },
    aggregations: {
      hat_prices: {
        sum: {
          field: 'price.weighted'
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  size: 0,
  runtime_mappings: {
    "price.weighted": {
      type: "double",
      script:
        "\n        double price = doc['price'].value;\n        if (doc['promoted'].value) {\n          price *= 0.8;\n        }\n        emit(price);\n      ",
    },
  },
  query: {
    constant_score: {
      filter: {
        match: {
          type: "hat",
        },
      },
    },
  },
  aggs: {
    hat_prices: {
      sum: {
        field: "price.weighted",
      },
    },
  },
});
console.log(response);
POST /sales/_search?size=0
{
  "runtime_mappings": {
    "price.weighted": {
      "type": "double",
      "script": """
        double price = doc['price'].value;
        if (doc['promoted'].value) {
          price *= 0.8;
        }
        emit(price);
      """
    }
  },
  "query": {
    "constant_score": {
      "filter": {
        "match": { "type": "hat" }
      }
    }
  },
  "aggs": {
    "hat_prices": {
      "sum": {
        "field": "price.weighted"
      }
    }
  }
}

缺失值

编辑

missing 参数定义了如何处理缺少值的文档。默认情况下,缺少值的文档将被忽略,但也可能将它们视为具有值。例如,以下示例将所有没有价格的帽子销售额视为 100

resp = client.search(
    index="sales",
    size="0",
    query={
        "constant_score": {
            "filter": {
                "match": {
                    "type": "hat"
                }
            }
        }
    },
    aggs={
        "hat_prices": {
            "sum": {
                "field": "price",
                "missing": 100
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  size: 0,
  body: {
    query: {
      constant_score: {
        filter: {
          match: {
            type: 'hat'
          }
        }
      }
    },
    aggregations: {
      hat_prices: {
        sum: {
          field: 'price',
          missing: 100
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  size: 0,
  query: {
    constant_score: {
      filter: {
        match: {
          type: "hat",
        },
      },
    },
  },
  aggs: {
    hat_prices: {
      sum: {
        field: "price",
        missing: 100,
      },
    },
  },
});
console.log(response);
POST /sales/_search?size=0
{
  "query": {
    "constant_score": {
      "filter": {
        "match": { "type": "hat" }
      }
    }
  },
  "aggs": {
    "hat_prices": {
      "sum": {
        "field": "price",
        "missing": 100 
      }
    }
  }
}

直方图字段

编辑

当在 直方图字段上计算总和时,聚合的结果是 values 数组中所有元素乘以 counts 数组中相同位置的数字的总和。

例如,对于以下索引,它存储了不同网络的延迟指标的预聚合直方图:

resp = client.indices.create(
    index="metrics_index",
    mappings={
        "properties": {
            "latency_histo": {
                "type": "histogram"
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="metrics_index",
    id="1",
    refresh=True,
    document={
        "network.name": "net-1",
        "latency_histo": {
            "values": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5
            ],
            "counts": [
                3,
                7,
                23,
                12,
                6
            ]
        }
    },
)
print(resp1)

resp2 = client.index(
    index="metrics_index",
    id="2",
    refresh=True,
    document={
        "network.name": "net-2",
        "latency_histo": {
            "values": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5
            ],
            "counts": [
                8,
                17,
                8,
                7,
                6
            ]
        }
    },
)
print(resp2)

resp3 = client.search(
    index="metrics_index",
    size="0",
    filter_path="aggregations",
    aggs={
        "total_latency": {
            "sum": {
                "field": "latency_histo"
            }
        }
    },
)
print(resp3)
response = client.indices.create(
  index: 'metrics_index',
  body: {
    mappings: {
      properties: {
        latency_histo: {
          type: 'histogram'
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 1,
  refresh: true,
  body: {
    'network.name' => 'net-1',
    latency_histo: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        3,
        7,
        23,
        12,
        6
      ]
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 2,
  refresh: true,
  body: {
    'network.name' => 'net-2',
    latency_histo: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        8,
        17,
        8,
        7,
        6
      ]
    }
  }
)
puts response

response = client.search(
  index: 'metrics_index',
  size: 0,
  filter_path: 'aggregations',
  body: {
    aggregations: {
      total_latency: {
        sum: {
          field: 'latency_histo'
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "metrics_index",
  mappings: {
    properties: {
      latency_histo: {
        type: "histogram",
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "metrics_index",
  id: 1,
  refresh: "true",
  document: {
    "network.name": "net-1",
    latency_histo: {
      values: [0.1, 0.2, 0.3, 0.4, 0.5],
      counts: [3, 7, 23, 12, 6],
    },
  },
});
console.log(response1);

const response2 = await client.index({
  index: "metrics_index",
  id: 2,
  refresh: "true",
  document: {
    "network.name": "net-2",
    latency_histo: {
      values: [0.1, 0.2, 0.3, 0.4, 0.5],
      counts: [8, 17, 8, 7, 6],
    },
  },
});
console.log(response2);

const response3 = await client.search({
  index: "metrics_index",
  size: 0,
  filter_path: "aggregations",
  aggs: {
    total_latency: {
      sum: {
        field: "latency_histo",
      },
    },
  },
});
console.log(response3);
PUT metrics_index
{
  "mappings": {
    "properties": {
      "latency_histo": { "type": "histogram" }
    }
  }
}

PUT metrics_index/_doc/1?refresh
{
  "network.name" : "net-1",
  "latency_histo" : {
      "values" : [0.1, 0.2, 0.3, 0.4, 0.5],
      "counts" : [3, 7, 23, 12, 6]
   }
}

PUT metrics_index/_doc/2?refresh
{
  "network.name" : "net-2",
  "latency_histo" : {
      "values" :  [0.1, 0.2, 0.3, 0.4, 0.5],
      "counts" : [8, 17, 8, 7, 6]
   }
}

POST /metrics_index/_search?size=0&filter_path=aggregations
{
  "aggs" : {
    "total_latency" : { "sum" : { "field" : "latency_histo" } }
  }
}

对于每个直方图字段,sum 聚合将 values 数组中的每个数字与其在 counts 数组中的关联计数相乘。

最终,它将添加所有直方图的所有值并返回以下结果:

{
  "aggregations": {
    "total_latency": {
      "value": 28.8
    }
  }
}