范围聚合
编辑范围聚合
编辑一种基于多桶值源的聚合,允许用户定义一组范围 - 每个范围代表一个桶。在聚合过程中,将根据每个桶的范围检查从每个文档中提取的值,并将相关/匹配的文档放入相应的“桶”中。请注意,此聚合包含每个范围的 from
值,但不包含 to
值。
示例
resp = client.search( index="sales", aggs={ "price_ranges": { "range": { "field": "price", "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] } } }, ) print(resp)
response = client.search( index: 'sales', body: { aggregations: { price_ranges: { range: { field: 'price', ranges: [ { to: 100 }, { from: 100, to: 200 }, { from: 200 } ] } } } } ) puts response
const response = await client.search({ index: "sales", aggs: { price_ranges: { range: { field: "price", ranges: [ { to: 100, }, { from: 100, to: 200, }, { from: 200, }, ], }, }, }, }); console.log(response);
GET sales/_search { "aggs": { "price_ranges": { "range": { "field": "price", "ranges": [ { "to": 100.0 }, { "from": 100.0, "to": 200.0 }, { "from": 200.0 } ] } } } }
响应
{ ... "aggregations": { "price_ranges": { "buckets": [ { "key": "*-100.0", "to": 100.0, "doc_count": 2 }, { "key": "100.0-200.0", "from": 100.0, "to": 200.0, "doc_count": 2 }, { "key": "200.0-*", "from": 200.0, "doc_count": 3 } ] } } }
键控响应
编辑将 keyed
标志设置为 true
会将一个唯一的字符串键与每个桶关联,并返回哈希形式的范围而不是数组形式
resp = client.search( index="sales", aggs={ "price_ranges": { "range": { "field": "price", "keyed": True, "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] } } }, ) print(resp)
response = client.search( index: 'sales', body: { aggregations: { price_ranges: { range: { field: 'price', keyed: true, ranges: [ { to: 100 }, { from: 100, to: 200 }, { from: 200 } ] } } } } ) puts response
const response = await client.search({ index: "sales", aggs: { price_ranges: { range: { field: "price", keyed: true, ranges: [ { to: 100, }, { from: 100, to: 200, }, { from: 200, }, ], }, }, }, }); console.log(response);
GET sales/_search { "aggs": { "price_ranges": { "range": { "field": "price", "keyed": true, "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] } } } }
响应
{ ... "aggregations": { "price_ranges": { "buckets": { "*-100.0": { "to": 100.0, "doc_count": 2 }, "100.0-200.0": { "from": 100.0, "to": 200.0, "doc_count": 2 }, "200.0-*": { "from": 200.0, "doc_count": 3 } } } } }
也可以自定义每个范围的键
resp = client.search( index="sales", aggs={ "price_ranges": { "range": { "field": "price", "keyed": True, "ranges": [ { "key": "cheap", "to": 100 }, { "key": "average", "from": 100, "to": 200 }, { "key": "expensive", "from": 200 } ] } } }, ) print(resp)
response = client.search( index: 'sales', body: { aggregations: { price_ranges: { range: { field: 'price', keyed: true, ranges: [ { key: 'cheap', to: 100 }, { key: 'average', from: 100, to: 200 }, { key: 'expensive', from: 200 } ] } } } } ) puts response
const response = await client.search({ index: "sales", aggs: { price_ranges: { range: { field: "price", keyed: true, ranges: [ { key: "cheap", to: 100, }, { key: "average", from: 100, to: 200, }, { key: "expensive", from: 200, }, ], }, }, }, }); console.log(response);
GET sales/_search { "aggs": { "price_ranges": { "range": { "field": "price", "keyed": true, "ranges": [ { "key": "cheap", "to": 100 }, { "key": "average", "from": 100, "to": 200 }, { "key": "expensive", "from": 200 } ] } } } }
响应
{ ... "aggregations": { "price_ranges": { "buckets": { "cheap": { "to": 100.0, "doc_count": 2 }, "average": { "from": 100.0, "to": 200.0, "doc_count": 2 }, "expensive": { "from": 200.0, "doc_count": 3 } } } } }
脚本
编辑如果文档中的数据与您希望聚合的数据不完全匹配,请使用运行时字段。例如,如果您需要应用特定的货币转换率
resp = client.search( index="sales", runtime_mappings={ "price.euros": { "type": "double", "script": { "source": "\n emit(doc['price'].value * params.conversion_rate)\n ", "params": { "conversion_rate": 0.835526591 } } } }, aggs={ "price_ranges": { "range": { "field": "price.euros", "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] } } }, ) print(resp)
response = client.search( index: 'sales', body: { runtime_mappings: { 'price.euros' => { type: 'double', script: { source: "\n emit(doc['price'].value * params.conversion_rate)\n ", params: { conversion_rate: 0.835526591 } } } }, aggregations: { price_ranges: { range: { field: 'price.euros', ranges: [ { to: 100 }, { from: 100, to: 200 }, { from: 200 } ] } } } } ) puts response
const response = await client.search({ index: "sales", runtime_mappings: { "price.euros": { type: "double", script: { source: "\n emit(doc['price'].value * params.conversion_rate)\n ", params: { conversion_rate: 0.835526591, }, }, }, }, aggs: { price_ranges: { range: { field: "price.euros", ranges: [ { to: 100, }, { from: 100, to: 200, }, { from: 200, }, ], }, }, }, }); console.log(response);
GET sales/_search { "runtime_mappings": { "price.euros": { "type": "double", "script": { "source": """ emit(doc['price'].value * params.conversion_rate) """, "params": { "conversion_rate": 0.835526591 } } } }, "aggs": { "price_ranges": { "range": { "field": "price.euros", "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] } } } }
子聚合
编辑以下示例不仅将文档“放入”不同的桶中,还计算每个价格范围内价格的统计信息
resp = client.search( index="sales", aggs={ "price_ranges": { "range": { "field": "price", "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] }, "aggs": { "price_stats": { "stats": { "field": "price" } } } } }, ) print(resp)
response = client.search( index: 'sales', body: { aggregations: { price_ranges: { range: { field: 'price', ranges: [ { to: 100 }, { from: 100, to: 200 }, { from: 200 } ] }, aggregations: { price_stats: { stats: { field: 'price' } } } } } } ) puts response
const response = await client.search({ index: "sales", aggs: { price_ranges: { range: { field: "price", ranges: [ { to: 100, }, { from: 100, to: 200, }, { from: 200, }, ], }, aggs: { price_stats: { stats: { field: "price", }, }, }, }, }, }); console.log(response);
GET sales/_search { "aggs": { "price_ranges": { "range": { "field": "price", "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, { "from": 200 } ] }, "aggs": { "price_stats": { "stats": { "field": "price" } } } } } }
响应
{ ... "aggregations": { "price_ranges": { "buckets": [ { "key": "*-100.0", "to": 100.0, "doc_count": 2, "price_stats": { "count": 2, "min": 10.0, "max": 50.0, "avg": 30.0, "sum": 60.0 } }, { "key": "100.0-200.0", "from": 100.0, "to": 200.0, "doc_count": 2, "price_stats": { "count": 2, "min": 150.0, "max": 175.0, "avg": 162.5, "sum": 325.0 } }, { "key": "200.0-*", "from": 200.0, "doc_count": 3, "price_stats": { "count": 3, "min": 200.0, "max": 200.0, "avg": 200.0, "sum": 600.0 } } ] } } }
直方图字段
编辑对直方图字段运行范围聚合会计算每个配置范围的总计数。
这是在不插值直方图字段值之间的情况下完成的。因此,可能存在“介于”两个直方图值之间的范围。生成的范围桶的文档计数将为零。
这是一个示例,针对以下索引执行范围聚合,该索引存储了不同网络的延迟指标(以毫秒为单位)的预聚合直方图
resp = client.indices.create( index="metrics_index", mappings={ "properties": { "network": { "properties": { "name": { "type": "keyword" } } }, "latency_histo": { "type": "histogram" } } }, ) print(resp) resp1 = client.index( index="metrics_index", id="1", refresh=True, document={ "network.name": "net-1", "latency_histo": { "values": [ 1, 3, 8, 12, 15 ], "counts": [ 3, 7, 23, 12, 6 ] } }, ) print(resp1) resp2 = client.index( index="metrics_index", id="2", refresh=True, document={ "network.name": "net-2", "latency_histo": { "values": [ 1, 6, 8, 12, 14 ], "counts": [ 8, 17, 8, 7, 6 ] } }, ) print(resp2) resp3 = client.search( index="metrics_index", size="0", filter_path="aggregations", aggs={ "latency_ranges": { "range": { "field": "latency_histo", "ranges": [ { "to": 2 }, { "from": 2, "to": 3 }, { "from": 3, "to": 10 }, { "from": 10 } ] } } }, ) print(resp3)
response = client.indices.create( index: 'metrics_index', body: { mappings: { properties: { network: { properties: { name: { type: 'keyword' } } }, latency_histo: { type: 'histogram' } } } } ) puts response response = client.index( index: 'metrics_index', id: 1, refresh: true, body: { 'network.name' => 'net-1', latency_histo: { values: [ 1, 3, 8, 12, 15 ], counts: [ 3, 7, 23, 12, 6 ] } } ) puts response response = client.index( index: 'metrics_index', id: 2, refresh: true, body: { 'network.name' => 'net-2', latency_histo: { values: [ 1, 6, 8, 12, 14 ], counts: [ 8, 17, 8, 7, 6 ] } } ) puts response response = client.search( index: 'metrics_index', size: 0, filter_path: 'aggregations', body: { aggregations: { latency_ranges: { range: { field: 'latency_histo', ranges: [ { to: 2 }, { from: 2, to: 3 }, { from: 3, to: 10 }, { from: 10 } ] } } } } ) puts response
const response = await client.indices.create({ index: "metrics_index", mappings: { properties: { network: { properties: { name: { type: "keyword", }, }, }, latency_histo: { type: "histogram", }, }, }, }); console.log(response); const response1 = await client.index({ index: "metrics_index", id: 1, refresh: "true", document: { "network.name": "net-1", latency_histo: { values: [1, 3, 8, 12, 15], counts: [3, 7, 23, 12, 6], }, }, }); console.log(response1); const response2 = await client.index({ index: "metrics_index", id: 2, refresh: "true", document: { "network.name": "net-2", latency_histo: { values: [1, 6, 8, 12, 14], counts: [8, 17, 8, 7, 6], }, }, }); console.log(response2); const response3 = await client.search({ index: "metrics_index", size: 0, filter_path: "aggregations", aggs: { latency_ranges: { range: { field: "latency_histo", ranges: [ { to: 2, }, { from: 2, to: 3, }, { from: 3, to: 10, }, { from: 10, }, ], }, }, }, }); console.log(response3);
PUT metrics_index { "mappings": { "properties": { "network": { "properties": { "name": { "type": "keyword" } } }, "latency_histo": { "type": "histogram" } } } } PUT metrics_index/_doc/1?refresh { "network.name" : "net-1", "latency_histo" : { "values" : [1, 3, 8, 12, 15], "counts" : [3, 7, 23, 12, 6] } } PUT metrics_index/_doc/2?refresh { "network.name" : "net-2", "latency_histo" : { "values" : [1, 6, 8, 12, 14], "counts" : [8, 17, 8, 7, 6] } } GET metrics_index/_search?size=0&filter_path=aggregations { "aggs": { "latency_ranges": { "range": { "field": "latency_histo", "ranges": [ {"to": 2}, {"from": 2, "to": 3}, {"from": 3, "to": 10}, {"from": 10} ] } } } }
range
聚合将基于 values
计算的每个范围的计数求和,并返回以下输出
{ "aggregations": { "latency_ranges": { "buckets": [ { "key": "*-2.0", "to": 2.0, "doc_count": 11 }, { "key": "2.0-3.0", "from": 2.0, "to": 3.0, "doc_count": 0 }, { "key": "3.0-10.0", "from": 3.0, "to": 10.0, "doc_count": 55 }, { "key": "10.0-*", "from": 10.0, "doc_count": 31 } ] } } }
范围聚合是一种桶聚合,它将文档分区到不同的桶中,而不是像指标聚合那样计算字段的指标。每个桶代表一个文档集合,可以在其上运行子聚合。另一方面,直方图字段是一个预聚合字段,表示单个字段中的多个值:数值数据的桶和每个桶的项目/文档计数。范围聚合的预期输入(期望原始文档)和直方图字段(提供摘要信息)之间的这种不匹配将聚合的结果限制为每个桶的文档计数。
因此,当对直方图字段执行范围聚合时,不允许使用子聚合。