范围聚合

编辑

一种基于多桶值源的聚合,允许用户定义一组范围 - 每个范围代表一个桶。在聚合过程中,将根据每个桶的范围检查从每个文档中提取的值,并将相关/匹配的文档放入相应的“桶”中。请注意,此聚合包含每个范围的 from 值,但不包含 to 值。

示例

resp = client.search(
    index="sales",
    aggs={
        "price_ranges": {
            "range": {
                "field": "price",
                "ranges": [
                    {
                        "to": 100
                    },
                    {
                        "from": 100,
                        "to": 200
                    },
                    {
                        "from": 200
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    aggregations: {
      price_ranges: {
        range: {
          field: 'price',
          ranges: [
            {
              to: 100
            },
            {
              from: 100,
              to: 200
            },
            {
              from: 200
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  aggs: {
    price_ranges: {
      range: {
        field: "price",
        ranges: [
          {
            to: 100,
          },
          {
            from: 100,
            to: 200,
          },
          {
            from: 200,
          },
        ],
      },
    },
  },
});
console.log(response);
GET sales/_search
{
  "aggs": {
    "price_ranges": {
      "range": {
        "field": "price",
        "ranges": [
          { "to": 100.0 },
          { "from": 100.0, "to": 200.0 },
          { "from": 200.0 }
        ]
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "price_ranges": {
      "buckets": [
        {
          "key": "*-100.0",
          "to": 100.0,
          "doc_count": 2
        },
        {
          "key": "100.0-200.0",
          "from": 100.0,
          "to": 200.0,
          "doc_count": 2
        },
        {
          "key": "200.0-*",
          "from": 200.0,
          "doc_count": 3
        }
      ]
    }
  }
}

键控响应

编辑

keyed 标志设置为 true 会将一个唯一的字符串键与每个桶关联,并返回哈希形式的范围而不是数组形式

resp = client.search(
    index="sales",
    aggs={
        "price_ranges": {
            "range": {
                "field": "price",
                "keyed": True,
                "ranges": [
                    {
                        "to": 100
                    },
                    {
                        "from": 100,
                        "to": 200
                    },
                    {
                        "from": 200
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    aggregations: {
      price_ranges: {
        range: {
          field: 'price',
          keyed: true,
          ranges: [
            {
              to: 100
            },
            {
              from: 100,
              to: 200
            },
            {
              from: 200
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  aggs: {
    price_ranges: {
      range: {
        field: "price",
        keyed: true,
        ranges: [
          {
            to: 100,
          },
          {
            from: 100,
            to: 200,
          },
          {
            from: 200,
          },
        ],
      },
    },
  },
});
console.log(response);
GET sales/_search
{
  "aggs": {
    "price_ranges": {
      "range": {
        "field": "price",
        "keyed": true,
        "ranges": [
          { "to": 100 },
          { "from": 100, "to": 200 },
          { "from": 200 }
        ]
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "price_ranges": {
      "buckets": {
        "*-100.0": {
          "to": 100.0,
          "doc_count": 2
        },
        "100.0-200.0": {
          "from": 100.0,
          "to": 200.0,
          "doc_count": 2
        },
        "200.0-*": {
          "from": 200.0,
          "doc_count": 3
        }
      }
    }
  }
}

也可以自定义每个范围的键

resp = client.search(
    index="sales",
    aggs={
        "price_ranges": {
            "range": {
                "field": "price",
                "keyed": True,
                "ranges": [
                    {
                        "key": "cheap",
                        "to": 100
                    },
                    {
                        "key": "average",
                        "from": 100,
                        "to": 200
                    },
                    {
                        "key": "expensive",
                        "from": 200
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    aggregations: {
      price_ranges: {
        range: {
          field: 'price',
          keyed: true,
          ranges: [
            {
              key: 'cheap',
              to: 100
            },
            {
              key: 'average',
              from: 100,
              to: 200
            },
            {
              key: 'expensive',
              from: 200
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  aggs: {
    price_ranges: {
      range: {
        field: "price",
        keyed: true,
        ranges: [
          {
            key: "cheap",
            to: 100,
          },
          {
            key: "average",
            from: 100,
            to: 200,
          },
          {
            key: "expensive",
            from: 200,
          },
        ],
      },
    },
  },
});
console.log(response);
GET sales/_search
{
  "aggs": {
    "price_ranges": {
      "range": {
        "field": "price",
        "keyed": true,
        "ranges": [
          { "key": "cheap", "to": 100 },
          { "key": "average", "from": 100, "to": 200 },
          { "key": "expensive", "from": 200 }
        ]
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "price_ranges": {
      "buckets": {
        "cheap": {
          "to": 100.0,
          "doc_count": 2
        },
        "average": {
          "from": 100.0,
          "to": 200.0,
          "doc_count": 2
        },
        "expensive": {
          "from": 200.0,
          "doc_count": 3
        }
      }
    }
  }
}

脚本

编辑

如果文档中的数据与您希望聚合的数据不完全匹配,请使用运行时字段。例如,如果您需要应用特定的货币转换率

resp = client.search(
    index="sales",
    runtime_mappings={
        "price.euros": {
            "type": "double",
            "script": {
                "source": "\n          emit(doc['price'].value * params.conversion_rate)\n        ",
                "params": {
                    "conversion_rate": 0.835526591
                }
            }
        }
    },
    aggs={
        "price_ranges": {
            "range": {
                "field": "price.euros",
                "ranges": [
                    {
                        "to": 100
                    },
                    {
                        "from": 100,
                        "to": 200
                    },
                    {
                        "from": 200
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    runtime_mappings: {
      'price.euros' => {
        type: 'double',
        script: {
          source: "\n          emit(doc['price'].value * params.conversion_rate)\n        ",
          params: {
            conversion_rate: 0.835526591
          }
        }
      }
    },
    aggregations: {
      price_ranges: {
        range: {
          field: 'price.euros',
          ranges: [
            {
              to: 100
            },
            {
              from: 100,
              to: 200
            },
            {
              from: 200
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  runtime_mappings: {
    "price.euros": {
      type: "double",
      script: {
        source:
          "\n          emit(doc['price'].value * params.conversion_rate)\n        ",
        params: {
          conversion_rate: 0.835526591,
        },
      },
    },
  },
  aggs: {
    price_ranges: {
      range: {
        field: "price.euros",
        ranges: [
          {
            to: 100,
          },
          {
            from: 100,
            to: 200,
          },
          {
            from: 200,
          },
        ],
      },
    },
  },
});
console.log(response);
GET sales/_search
{
  "runtime_mappings": {
    "price.euros": {
      "type": "double",
      "script": {
        "source": """
          emit(doc['price'].value * params.conversion_rate)
        """,
        "params": {
          "conversion_rate": 0.835526591
        }
      }
    }
  },
  "aggs": {
    "price_ranges": {
      "range": {
        "field": "price.euros",
        "ranges": [
          { "to": 100 },
          { "from": 100, "to": 200 },
          { "from": 200 }
        ]
      }
    }
  }
}

子聚合

编辑

以下示例不仅将文档“放入”不同的桶中,还计算每个价格范围内价格的统计信息

resp = client.search(
    index="sales",
    aggs={
        "price_ranges": {
            "range": {
                "field": "price",
                "ranges": [
                    {
                        "to": 100
                    },
                    {
                        "from": 100,
                        "to": 200
                    },
                    {
                        "from": 200
                    }
                ]
            },
            "aggs": {
                "price_stats": {
                    "stats": {
                        "field": "price"
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  body: {
    aggregations: {
      price_ranges: {
        range: {
          field: 'price',
          ranges: [
            {
              to: 100
            },
            {
              from: 100,
              to: 200
            },
            {
              from: 200
            }
          ]
        },
        aggregations: {
          price_stats: {
            stats: {
              field: 'price'
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  aggs: {
    price_ranges: {
      range: {
        field: "price",
        ranges: [
          {
            to: 100,
          },
          {
            from: 100,
            to: 200,
          },
          {
            from: 200,
          },
        ],
      },
      aggs: {
        price_stats: {
          stats: {
            field: "price",
          },
        },
      },
    },
  },
});
console.log(response);
GET sales/_search
{
  "aggs": {
    "price_ranges": {
      "range": {
        "field": "price",
        "ranges": [
          { "to": 100 },
          { "from": 100, "to": 200 },
          { "from": 200 }
        ]
      },
      "aggs": {
        "price_stats": {
          "stats": { "field": "price" }
        }
      }
    }
  }
}

响应

{
  ...
  "aggregations": {
    "price_ranges": {
      "buckets": [
        {
          "key": "*-100.0",
          "to": 100.0,
          "doc_count": 2,
          "price_stats": {
            "count": 2,
            "min": 10.0,
            "max": 50.0,
            "avg": 30.0,
            "sum": 60.0
          }
        },
        {
          "key": "100.0-200.0",
          "from": 100.0,
          "to": 200.0,
          "doc_count": 2,
          "price_stats": {
            "count": 2,
            "min": 150.0,
            "max": 175.0,
            "avg": 162.5,
            "sum": 325.0
          }
        },
        {
          "key": "200.0-*",
          "from": 200.0,
          "doc_count": 3,
          "price_stats": {
            "count": 3,
            "min": 200.0,
            "max": 200.0,
            "avg": 200.0,
            "sum": 600.0
          }
        }
      ]
    }
  }
}

直方图字段

编辑

对直方图字段运行范围聚合会计算每个配置范围的总计数。

这是在不插值直方图字段值之间的情况下完成的。因此,可能存在“介于”两个直方图值之间的范围。生成的范围桶的文档计数将为零。

这是一个示例,针对以下索引执行范围聚合,该索引存储了不同网络的延迟指标(以毫秒为单位)的预聚合直方图

resp = client.indices.create(
    index="metrics_index",
    mappings={
        "properties": {
            "network": {
                "properties": {
                    "name": {
                        "type": "keyword"
                    }
                }
            },
            "latency_histo": {
                "type": "histogram"
            }
        }
    },
)
print(resp)

resp1 = client.index(
    index="metrics_index",
    id="1",
    refresh=True,
    document={
        "network.name": "net-1",
        "latency_histo": {
            "values": [
                1,
                3,
                8,
                12,
                15
            ],
            "counts": [
                3,
                7,
                23,
                12,
                6
            ]
        }
    },
)
print(resp1)

resp2 = client.index(
    index="metrics_index",
    id="2",
    refresh=True,
    document={
        "network.name": "net-2",
        "latency_histo": {
            "values": [
                1,
                6,
                8,
                12,
                14
            ],
            "counts": [
                8,
                17,
                8,
                7,
                6
            ]
        }
    },
)
print(resp2)

resp3 = client.search(
    index="metrics_index",
    size="0",
    filter_path="aggregations",
    aggs={
        "latency_ranges": {
            "range": {
                "field": "latency_histo",
                "ranges": [
                    {
                        "to": 2
                    },
                    {
                        "from": 2,
                        "to": 3
                    },
                    {
                        "from": 3,
                        "to": 10
                    },
                    {
                        "from": 10
                    }
                ]
            }
        }
    },
)
print(resp3)
response = client.indices.create(
  index: 'metrics_index',
  body: {
    mappings: {
      properties: {
        network: {
          properties: {
            name: {
              type: 'keyword'
            }
          }
        },
        latency_histo: {
          type: 'histogram'
        }
      }
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 1,
  refresh: true,
  body: {
    'network.name' => 'net-1',
    latency_histo: {
      values: [
        1,
        3,
        8,
        12,
        15
      ],
      counts: [
        3,
        7,
        23,
        12,
        6
      ]
    }
  }
)
puts response

response = client.index(
  index: 'metrics_index',
  id: 2,
  refresh: true,
  body: {
    'network.name' => 'net-2',
    latency_histo: {
      values: [
        1,
        6,
        8,
        12,
        14
      ],
      counts: [
        8,
        17,
        8,
        7,
        6
      ]
    }
  }
)
puts response

response = client.search(
  index: 'metrics_index',
  size: 0,
  filter_path: 'aggregations',
  body: {
    aggregations: {
      latency_ranges: {
        range: {
          field: 'latency_histo',
          ranges: [
            {
              to: 2
            },
            {
              from: 2,
              to: 3
            },
            {
              from: 3,
              to: 10
            },
            {
              from: 10
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "metrics_index",
  mappings: {
    properties: {
      network: {
        properties: {
          name: {
            type: "keyword",
          },
        },
      },
      latency_histo: {
        type: "histogram",
      },
    },
  },
});
console.log(response);

const response1 = await client.index({
  index: "metrics_index",
  id: 1,
  refresh: "true",
  document: {
    "network.name": "net-1",
    latency_histo: {
      values: [1, 3, 8, 12, 15],
      counts: [3, 7, 23, 12, 6],
    },
  },
});
console.log(response1);

const response2 = await client.index({
  index: "metrics_index",
  id: 2,
  refresh: "true",
  document: {
    "network.name": "net-2",
    latency_histo: {
      values: [1, 6, 8, 12, 14],
      counts: [8, 17, 8, 7, 6],
    },
  },
});
console.log(response2);

const response3 = await client.search({
  index: "metrics_index",
  size: 0,
  filter_path: "aggregations",
  aggs: {
    latency_ranges: {
      range: {
        field: "latency_histo",
        ranges: [
          {
            to: 2,
          },
          {
            from: 2,
            to: 3,
          },
          {
            from: 3,
            to: 10,
          },
          {
            from: 10,
          },
        ],
      },
    },
  },
});
console.log(response3);
PUT metrics_index
{
  "mappings": {
    "properties": {
      "network": {
        "properties": {
          "name": {
            "type": "keyword"
          }
        }
      },
      "latency_histo": {
         "type": "histogram"
      }
    }
  }
}

PUT metrics_index/_doc/1?refresh
{
  "network.name" : "net-1",
  "latency_histo" : {
      "values" : [1, 3, 8, 12, 15],
      "counts" : [3, 7, 23, 12, 6]
   }
}

PUT metrics_index/_doc/2?refresh
{
  "network.name" : "net-2",
  "latency_histo" : {
      "values" : [1, 6, 8, 12, 14],
      "counts" : [8, 17, 8, 7, 6]
   }
}

GET metrics_index/_search?size=0&filter_path=aggregations
{
  "aggs": {
    "latency_ranges": {
      "range": {
        "field": "latency_histo",
        "ranges": [
          {"to": 2},
          {"from": 2, "to": 3},
          {"from": 3, "to": 10},
          {"from": 10}
        ]
      }
    }
  }
}

range 聚合将基于 values 计算的每个范围的计数求和,并返回以下输出

{
  "aggregations": {
    "latency_ranges": {
      "buckets": [
        {
          "key": "*-2.0",
          "to": 2.0,
          "doc_count": 11
        },
        {
          "key": "2.0-3.0",
          "from": 2.0,
          "to": 3.0,
          "doc_count": 0
        },
        {
          "key": "3.0-10.0",
          "from": 3.0,
          "to": 10.0,
          "doc_count": 55
        },
        {
          "key": "10.0-*",
          "from": 10.0,
          "doc_count": 31
        }
      ]
    }
  }
}

范围聚合是一种桶聚合,它将文档分区到不同的桶中,而不是像指标聚合那样计算字段的指标。每个桶代表一个文档集合,可以在其上运行子聚合。另一方面,直方图字段是一个预聚合字段,表示单个字段中的多个值:数值数据的桶和每个桶的项目/文档计数。范围聚合的预期输入(期望原始文档)和直方图字段(提供摘要信息)之间的这种不匹配将聚合的结果限制为每个桶的文档计数。

因此,当对直方图字段执行范围聚合时,不允许使用子聚合。