顶级指标聚合

编辑

top_metrics 聚合从具有最大或最小“排序”值的文档中选择指标。例如,这会获取 s 值最大的文档上 m 字段的值

resp = client.bulk(
    index="test",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "s": 1,
            "m": 3.1415
        },
        {
            "index": {}
        },
        {
            "s": 2,
            "m": 1
        },
        {
            "index": {}
        },
        {
            "s": 3,
            "m": 2.71828
        }
    ],
)
print(resp)

resp1 = client.search(
    index="test",
    filter_path="aggregations",
    aggs={
        "tm": {
            "top_metrics": {
                "metrics": {
                    "field": "m"
                },
                "sort": {
                    "s": "desc"
                }
            }
        }
    },
)
print(resp1)
response = client.bulk(
  index: 'test',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      s: 1,
      m: 3.1415
    },
    {
      index: {}
    },
    {
      s: 2,
      m: 1
    },
    {
      index: {}
    },
    {
      s: 3,
      m: 2.71828
    }
  ]
)
puts response

response = client.search(
  index: 'test',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      tm: {
        top_metrics: {
          metrics: {
            field: 'm'
          },
          sort: {
            s: 'desc'
          }
        }
      }
    }
  }
)
puts response
const response = await client.bulk({
  index: "test",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      s: 1,
      m: 3.1415,
    },
    {
      index: {},
    },
    {
      s: 2,
      m: 1,
    },
    {
      index: {},
    },
    {
      s: 3,
      m: 2.71828,
    },
  ],
});
console.log(response);

const response1 = await client.search({
  index: "test",
  filter_path: "aggregations",
  aggs: {
    tm: {
      top_metrics: {
        metrics: {
          field: "m",
        },
        sort: {
          s: "desc",
        },
      },
    },
  },
});
console.log(response1);
POST /test/_bulk?refresh
{"index": {}}
{"s": 1, "m": 3.1415}
{"index": {}}
{"s": 2, "m": 1.0}
{"index": {}}
{"s": 3, "m": 2.71828}
POST /test/_search?filter_path=aggregations
{
  "aggs": {
    "tm": {
      "top_metrics": {
        "metrics": {"field": "m"},
        "sort": {"s": "desc"}
      }
    }
  }
}

返回结果如下

{
  "aggregations": {
    "tm": {
      "top": [ {"sort": [3], "metrics": {"m": 2.718280076980591 } } ]
    }
  }
}

top_metrics 在精神上与 top_hits 非常相似,但因为它更受限制,所以它能够使用更少的内存来完成其工作,并且通常速度更快。

sort

编辑

指标请求中的 sort 字段的功能与 搜索 请求中的 sort 字段完全相同,除了

  • 它不能用于 binaryflattenedipkeywordtext 字段。
  • 它只支持单个排序值,因此未指定哪个文档胜出平局。

聚合返回的指标是搜索请求将返回的第一个命中。所以,

"sort": {"s": "desc"}
获取具有最高 s 值的文档的指标
"sort": {"s": "asc"}
获取具有最低 s 值的文档的指标
"sort": {"_geo_distance": {"location": "POINT (-78.6382 35.7796)"}}
获取 location 最接近 35.7796, -78.6382 的文档的指标
"sort": "_score"
获取具有最高分数的文档的指标

metrics

编辑

metrics 选择要返回的“顶部”文档的字段。您可以使用类似 "metrics": {"field": "m"} 的内容请求单个指标,或者通过请求类似 "metrics": [{"field": "m"}, {"field": "i"} 的指标列表来请求多个指标。

metrics.field 支持以下字段类型

除了关键字之外,还支持相应类型的 运行时字段metrics.field 不支持具有 数组值 的字段。对数组值进行 top_metric 聚合可能会返回不一致的结果。

以下示例在几种字段类型上运行 top_metrics 聚合。

resp = client.indices.create(
    index="test",
    mappings={
        "properties": {
            "d": {
                "type": "date"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="test",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "s": 1,
            "m": 3.1415,
            "i": 1,
            "d": "2020-01-01T00:12:12Z",
            "t": "cat"
        },
        {
            "index": {}
        },
        {
            "s": 2,
            "m": 1,
            "i": 6,
            "d": "2020-01-02T00:12:12Z",
            "t": "dog"
        },
        {
            "index": {}
        },
        {
            "s": 3,
            "m": 2.71828,
            "i": -12,
            "d": "2019-12-31T00:12:12Z",
            "t": "chicken"
        }
    ],
)
print(resp1)

resp2 = client.search(
    index="test",
    filter_path="aggregations",
    aggs={
        "tm": {
            "top_metrics": {
                "metrics": [
                    {
                        "field": "m"
                    },
                    {
                        "field": "i"
                    },
                    {
                        "field": "d"
                    },
                    {
                        "field": "t.keyword"
                    }
                ],
                "sort": {
                    "s": "desc"
                }
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'test',
  body: {
    mappings: {
      properties: {
        d: {
          type: 'date'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'test',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      s: 1,
      m: 3.1415,
      i: 1,
      d: '2020-01-01T00:12:12Z',
      t: 'cat'
    },
    {
      index: {}
    },
    {
      s: 2,
      m: 1,
      i: 6,
      d: '2020-01-02T00:12:12Z',
      t: 'dog'
    },
    {
      index: {}
    },
    {
      s: 3,
      m: 2.71828,
      i: -12,
      d: '2019-12-31T00:12:12Z',
      t: 'chicken'
    }
  ]
)
puts response

response = client.search(
  index: 'test',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      tm: {
        top_metrics: {
          metrics: [
            {
              field: 'm'
            },
            {
              field: 'i'
            },
            {
              field: 'd'
            },
            {
              field: 't.keyword'
            }
          ],
          sort: {
            s: 'desc'
          }
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "test",
  mappings: {
    properties: {
      d: {
        type: "date",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "test",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      s: 1,
      m: 3.1415,
      i: 1,
      d: "2020-01-01T00:12:12Z",
      t: "cat",
    },
    {
      index: {},
    },
    {
      s: 2,
      m: 1,
      i: 6,
      d: "2020-01-02T00:12:12Z",
      t: "dog",
    },
    {
      index: {},
    },
    {
      s: 3,
      m: 2.71828,
      i: -12,
      d: "2019-12-31T00:12:12Z",
      t: "chicken",
    },
  ],
});
console.log(response1);

const response2 = await client.search({
  index: "test",
  filter_path: "aggregations",
  aggs: {
    tm: {
      top_metrics: {
        metrics: [
          {
            field: "m",
          },
          {
            field: "i",
          },
          {
            field: "d",
          },
          {
            field: "t.keyword",
          },
        ],
        sort: {
          s: "desc",
        },
      },
    },
  },
});
console.log(response2);
PUT /test
{
  "mappings": {
    "properties": {
      "d": {"type": "date"}
    }
  }
}
POST /test/_bulk?refresh
{"index": {}}
{"s": 1, "m": 3.1415, "i": 1, "d": "2020-01-01T00:12:12Z", "t": "cat"}
{"index": {}}
{"s": 2, "m": 1.0, "i": 6, "d": "2020-01-02T00:12:12Z", "t": "dog"}
{"index": {}}
{"s": 3, "m": 2.71828, "i": -12, "d": "2019-12-31T00:12:12Z", "t": "chicken"}
POST /test/_search?filter_path=aggregations
{
  "aggs": {
    "tm": {
      "top_metrics": {
        "metrics": [
          {"field": "m"},
          {"field": "i"},
          {"field": "d"},
          {"field": "t.keyword"}
        ],
        "sort": {"s": "desc"}
      }
    }
  }
}

返回结果如下

{
  "aggregations": {
    "tm": {
      "top": [ {
        "sort": [3],
        "metrics": {
          "m": 2.718280076980591,
          "i": -12,
          "d": "2019-12-31T00:12:12.000Z",
          "t.keyword": "chicken"
        }
      } ]
    }
  }
}

missing

编辑

missing 参数定义了如何处理缺少值的文档。默认情况下,如果任何关键组件缺失,则会忽略整个文档。可以通过使用 missing 参数来将缺失的组件视为具有某个值。

resp = client.indices.create(
    index="my-index",
    mappings={
        "properties": {
            "nr": {
                "type": "integer"
            },
            "state": {
                "type": "keyword"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="my-index",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "nr": 1,
            "state": "started"
        },
        {
            "index": {}
        },
        {
            "nr": 2,
            "state": "stopped"
        },
        {
            "index": {}
        },
        {
            "nr": 3,
            "state": "N/A"
        },
        {
            "index": {}
        },
        {
            "nr": 4
        }
    ],
)
print(resp1)

resp2 = client.search(
    index="my-index",
    filter_path="aggregations",
    aggs={
        "my_top_metrics": {
            "top_metrics": {
                "metrics": {
                    "field": "state",
                    "missing": "N/A"
                },
                "sort": {
                    "nr": "desc"
                }
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'my-index',
  body: {
    mappings: {
      properties: {
        nr: {
          type: 'integer'
        },
        state: {
          type: 'keyword'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'my-index',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      nr: 1,
      state: 'started'
    },
    {
      index: {}
    },
    {
      nr: 2,
      state: 'stopped'
    },
    {
      index: {}
    },
    {
      nr: 3,
      state: 'N/A'
    },
    {
      index: {}
    },
    {
      nr: 4
    }
  ]
)
puts response

response = client.search(
  index: 'my-index',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      my_top_metrics: {
        top_metrics: {
          metrics: {
            field: 'state',
            missing: 'N/A'
          },
          sort: {
            nr: 'desc'
          }
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my-index",
  mappings: {
    properties: {
      nr: {
        type: "integer",
      },
      state: {
        type: "keyword",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "my-index",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      nr: 1,
      state: "started",
    },
    {
      index: {},
    },
    {
      nr: 2,
      state: "stopped",
    },
    {
      index: {},
    },
    {
      nr: 3,
      state: "N/A",
    },
    {
      index: {},
    },
    {
      nr: 4,
    },
  ],
});
console.log(response1);

const response2 = await client.search({
  index: "my-index",
  filter_path: "aggregations",
  aggs: {
    my_top_metrics: {
      top_metrics: {
        metrics: {
          field: "state",
          missing: "N/A",
        },
        sort: {
          nr: "desc",
        },
      },
    },
  },
});
console.log(response2);
PUT /my-index
{
  "mappings": {
    "properties": {
      "nr":    { "type": "integer" },
      "state":  { "type": "keyword"  } 
    }
  }
}
POST /my-index/_bulk?refresh
{"index": {}}
{"nr": 1, "state": "started"}
{"index": {}}
{"nr": 2, "state": "stopped"}
{"index": {}}
{"nr": 3, "state": "N/A"}
{"index": {}}
{"nr": 4} 
POST /my-index/_search?filter_path=aggregations
{
  "aggs": {
    "my_top_metrics": {
      "top_metrics": {
        "metrics": {
          "field": "state",
          "missing": "N/A"}, 
        "sort": {"nr": "desc"}
      }
    }
  }
}

如果要在文本内容上使用聚合,则它必须是 keyword 类型字段,或者必须在该字段上启用 fielddata。

此文档缺少 state 字段值。

missing 参数定义了如果 state 字段缺少值,则应将其视为具有 N/A 值。

该请求导致以下响应

{
  "aggregations": {
    "my_top_metrics": {
      "top": [
        {
          "sort": [
            4
          ],
          "metrics": {
            "state": "N/A"
          }
        }
      ]
    }
  }
}

size

编辑

top_metrics 可以使用 size 参数返回前几个文档的指标

resp = client.bulk(
    index="test",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "s": 1,
            "m": 3.1415
        },
        {
            "index": {}
        },
        {
            "s": 2,
            "m": 1
        },
        {
            "index": {}
        },
        {
            "s": 3,
            "m": 2.71828
        }
    ],
)
print(resp)

resp1 = client.search(
    index="test",
    filter_path="aggregations",
    aggs={
        "tm": {
            "top_metrics": {
                "metrics": {
                    "field": "m"
                },
                "sort": {
                    "s": "desc"
                },
                "size": 3
            }
        }
    },
)
print(resp1)
response = client.bulk(
  index: 'test',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      s: 1,
      m: 3.1415
    },
    {
      index: {}
    },
    {
      s: 2,
      m: 1
    },
    {
      index: {}
    },
    {
      s: 3,
      m: 2.71828
    }
  ]
)
puts response

response = client.search(
  index: 'test',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      tm: {
        top_metrics: {
          metrics: {
            field: 'm'
          },
          sort: {
            s: 'desc'
          },
          size: 3
        }
      }
    }
  }
)
puts response
const response = await client.bulk({
  index: "test",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      s: 1,
      m: 3.1415,
    },
    {
      index: {},
    },
    {
      s: 2,
      m: 1,
    },
    {
      index: {},
    },
    {
      s: 3,
      m: 2.71828,
    },
  ],
});
console.log(response);

const response1 = await client.search({
  index: "test",
  filter_path: "aggregations",
  aggs: {
    tm: {
      top_metrics: {
        metrics: {
          field: "m",
        },
        sort: {
          s: "desc",
        },
        size: 3,
      },
    },
  },
});
console.log(response1);
POST /test/_bulk?refresh
{"index": {}}
{"s": 1, "m": 3.1415}
{"index": {}}
{"s": 2, "m": 1.0}
{"index": {}}
{"s": 3, "m": 2.71828}
POST /test/_search?filter_path=aggregations
{
  "aggs": {
    "tm": {
      "top_metrics": {
        "metrics": {"field": "m"},
        "sort": {"s": "desc"},
        "size": 3
      }
    }
  }
}

返回结果如下

{
  "aggregations": {
    "tm": {
      "top": [
        {"sort": [3], "metrics": {"m": 2.718280076980591 } },
        {"sort": [2], "metrics": {"m": 1.0 } },
        {"sort": [1], "metrics": {"m": 3.1414999961853027 } }
      ]
    }
  }
}

默认 size 为 1。最大默认 size 为 10,因为聚合的工作存储是“密集的”,这意味着我们为每个存储桶分配 size 个槽。10 是一个 非常 保守的默认最大值,如果需要,您可以通过更改 top_metrics_max_size 索引设置来提高它。但请注意,较大的 size 可能会占用相当多的内存,尤其是当它们位于像大型 术语聚合 这样会产生许多存储桶的聚合内部时。如果您仍然想提高它,请使用类似

resp = client.indices.put_settings(
    index="test",
    settings={
        "top_metrics_max_size": 100
    },
)
print(resp)
response = client.indices.put_settings(
  index: 'test',
  body: {
    top_metrics_max_size: 100
  }
)
puts response
const response = await client.indices.putSettings({
  index: "test",
  settings: {
    top_metrics_max_size: 100,
  },
});
console.log(response);
PUT /test/_settings
{
  "top_metrics_max_size": 100
}

如果 size 大于 1,则 top_metrics 聚合不能作为排序的 目标

示例

编辑

与术语一起使用

编辑

此聚合在 terms 聚合中应该非常有用,例如,查找每个服务器报告的最后一个值。

resp = client.indices.create(
    index="node",
    mappings={
        "properties": {
            "ip": {
                "type": "ip"
            },
            "date": {
                "type": "date"
            }
        }
    },
)
print(resp)

resp1 = client.bulk(
    index="node",
    refresh=True,
    operations=[
        {
            "index": {}
        },
        {
            "ip": "192.168.0.1",
            "date": "2020-01-01T01:01:01",
            "m": 1
        },
        {
            "index": {}
        },
        {
            "ip": "192.168.0.1",
            "date": "2020-01-01T02:01:01",
            "m": 2
        },
        {
            "index": {}
        },
        {
            "ip": "192.168.0.2",
            "date": "2020-01-01T02:01:01",
            "m": 3
        }
    ],
)
print(resp1)

resp2 = client.search(
    index="node",
    filter_path="aggregations",
    aggs={
        "ip": {
            "terms": {
                "field": "ip"
            },
            "aggs": {
                "tm": {
                    "top_metrics": {
                        "metrics": {
                            "field": "m"
                        },
                        "sort": {
                            "date": "desc"
                        }
                    }
                }
            }
        }
    },
)
print(resp2)
response = client.indices.create(
  index: 'node',
  body: {
    mappings: {
      properties: {
        ip: {
          type: 'ip'
        },
        date: {
          type: 'date'
        }
      }
    }
  }
)
puts response

response = client.bulk(
  index: 'node',
  refresh: true,
  body: [
    {
      index: {}
    },
    {
      ip: '192.168.0.1',
      date: '2020-01-01T01:01:01',
      m: 1
    },
    {
      index: {}
    },
    {
      ip: '192.168.0.1',
      date: '2020-01-01T02:01:01',
      m: 2
    },
    {
      index: {}
    },
    {
      ip: '192.168.0.2',
      date: '2020-01-01T02:01:01',
      m: 3
    }
  ]
)
puts response

response = client.search(
  index: 'node',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      ip: {
        terms: {
          field: 'ip'
        },
        aggregations: {
          tm: {
            top_metrics: {
              metrics: {
                field: 'm'
              },
              sort: {
                date: 'desc'
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "node",
  mappings: {
    properties: {
      ip: {
        type: "ip",
      },
      date: {
        type: "date",
      },
    },
  },
});
console.log(response);

const response1 = await client.bulk({
  index: "node",
  refresh: "true",
  operations: [
    {
      index: {},
    },
    {
      ip: "192.168.0.1",
      date: "2020-01-01T01:01:01",
      m: 1,
    },
    {
      index: {},
    },
    {
      ip: "192.168.0.1",
      date: "2020-01-01T02:01:01",
      m: 2,
    },
    {
      index: {},
    },
    {
      ip: "192.168.0.2",
      date: "2020-01-01T02:01:01",
      m: 3,
    },
  ],
});
console.log(response1);

const response2 = await client.search({
  index: "node",
  filter_path: "aggregations",
  aggs: {
    ip: {
      terms: {
        field: "ip",
      },
      aggs: {
        tm: {
          top_metrics: {
            metrics: {
              field: "m",
            },
            sort: {
              date: "desc",
            },
          },
        },
      },
    },
  },
});
console.log(response2);
PUT /node
{
  "mappings": {
    "properties": {
      "ip": {"type": "ip"},
      "date": {"type": "date"}
    }
  }
}
POST /node/_bulk?refresh
{"index": {}}
{"ip": "192.168.0.1", "date": "2020-01-01T01:01:01", "m": 1}
{"index": {}}
{"ip": "192.168.0.1", "date": "2020-01-01T02:01:01", "m": 2}
{"index": {}}
{"ip": "192.168.0.2", "date": "2020-01-01T02:01:01", "m": 3}
POST /node/_search?filter_path=aggregations
{
  "aggs": {
    "ip": {
      "terms": {
        "field": "ip"
      },
      "aggs": {
        "tm": {
          "top_metrics": {
            "metrics": {"field": "m"},
            "sort": {"date": "desc"}
          }
        }
      }
    }
  }
}

返回结果如下

{
  "aggregations": {
    "ip": {
      "buckets": [
        {
          "key": "192.168.0.1",
          "doc_count": 2,
          "tm": {
            "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 2 } } ]
          }
        },
        {
          "key": "192.168.0.2",
          "doc_count": 1,
          "tm": {
            "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 3 } } ]
          }
        }
      ],
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0
    }
  }
}

top_hits 不同,您可以通过此指标的结果对存储桶进行排序

resp = client.search(
    index="node",
    filter_path="aggregations",
    aggs={
        "ip": {
            "terms": {
                "field": "ip",
                "order": {
                    "tm.m": "desc"
                }
            },
            "aggs": {
                "tm": {
                    "top_metrics": {
                        "metrics": {
                            "field": "m"
                        },
                        "sort": {
                            "date": "desc"
                        }
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'node',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      ip: {
        terms: {
          field: 'ip',
          order: {
            'tm.m' => 'desc'
          }
        },
        aggregations: {
          tm: {
            top_metrics: {
              metrics: {
                field: 'm'
              },
              sort: {
                date: 'desc'
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "node",
  filter_path: "aggregations",
  aggs: {
    ip: {
      terms: {
        field: "ip",
        order: {
          "tm.m": "desc",
        },
      },
      aggs: {
        tm: {
          top_metrics: {
            metrics: {
              field: "m",
            },
            sort: {
              date: "desc",
            },
          },
        },
      },
    },
  },
});
console.log(response);
POST /node/_search?filter_path=aggregations
{
  "aggs": {
    "ip": {
      "terms": {
        "field": "ip",
        "order": {"tm.m": "desc"}
      },
      "aggs": {
        "tm": {
          "top_metrics": {
            "metrics": {"field": "m"},
            "sort": {"date": "desc"}
          }
        }
      }
    }
  }
}

返回结果如下

{
  "aggregations": {
    "ip": {
      "buckets": [
        {
          "key": "192.168.0.2",
          "doc_count": 1,
          "tm": {
            "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 3 } } ]
          }
        },
        {
          "key": "192.168.0.1",
          "doc_count": 2,
          "tm": {
            "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 2 } } ]
          }
        }
      ],
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0
    }
  }
}

混合排序类型

编辑

通过在不同索引中具有不同类型的字段对 top_metrics 进行排序会产生一些令人惊讶的结果:浮点数字段始终独立于整数数字段进行排序。

resp = client.bulk(
    index="test",
    refresh=True,
    operations=[
        {
            "index": {
                "_index": "test1"
            }
        },
        {
            "s": 1,
            "m": 3.1415
        },
        {
            "index": {
                "_index": "test1"
            }
        },
        {
            "s": 2,
            "m": 1
        },
        {
            "index": {
                "_index": "test2"
            }
        },
        {
            "s": 3.1,
            "m": 2.71828
        }
    ],
)
print(resp)

resp1 = client.search(
    index="test*",
    filter_path="aggregations",
    aggs={
        "tm": {
            "top_metrics": {
                "metrics": {
                    "field": "m"
                },
                "sort": {
                    "s": "asc"
                }
            }
        }
    },
)
print(resp1)
response = client.bulk(
  index: 'test',
  refresh: true,
  body: [
    {
      index: {
        _index: 'test1'
      }
    },
    {
      s: 1,
      m: 3.1415
    },
    {
      index: {
        _index: 'test1'
      }
    },
    {
      s: 2,
      m: 1
    },
    {
      index: {
        _index: 'test2'
      }
    },
    {
      s: 3.1,
      m: 2.71828
    }
  ]
)
puts response

response = client.search(
  index: 'test*',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      tm: {
        top_metrics: {
          metrics: {
            field: 'm'
          },
          sort: {
            s: 'asc'
          }
        }
      }
    }
  }
)
puts response
const response = await client.bulk({
  index: "test",
  refresh: "true",
  operations: [
    {
      index: {
        _index: "test1",
      },
    },
    {
      s: 1,
      m: 3.1415,
    },
    {
      index: {
        _index: "test1",
      },
    },
    {
      s: 2,
      m: 1,
    },
    {
      index: {
        _index: "test2",
      },
    },
    {
      s: 3.1,
      m: 2.71828,
    },
  ],
});
console.log(response);

const response1 = await client.search({
  index: "test*",
  filter_path: "aggregations",
  aggs: {
    tm: {
      top_metrics: {
        metrics: {
          field: "m",
        },
        sort: {
          s: "asc",
        },
      },
    },
  },
});
console.log(response1);
POST /test/_bulk?refresh
{"index": {"_index": "test1"}}
{"s": 1, "m": 3.1415}
{"index": {"_index": "test1"}}
{"s": 2, "m": 1}
{"index": {"_index": "test2"}}
{"s": 3.1, "m": 2.71828}
POST /test*/_search?filter_path=aggregations
{
  "aggs": {
    "tm": {
      "top_metrics": {
        "metrics": {"field": "m"},
        "sort": {"s": "asc"}
      }
    }
  }
}

返回结果如下

{
  "aggregations": {
    "tm": {
      "top": [ {"sort": [3.0999999046325684], "metrics": {"m": 2.718280076980591 } } ]
    }
  }
}

虽然这比错误要好,但这 可能 不是您想要的结果。虽然它会损失一些精度,但您可以使用类似的方法将整数数字段显式转换为浮点数

resp = client.search(
    index="test*",
    filter_path="aggregations",
    aggs={
        "tm": {
            "top_metrics": {
                "metrics": {
                    "field": "m"
                },
                "sort": {
                    "s": {
                        "order": "asc",
                        "numeric_type": "double"
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'test*',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      tm: {
        top_metrics: {
          metrics: {
            field: 'm'
          },
          sort: {
            s: {
              order: 'asc',
              numeric_type: 'double'
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "test*",
  filter_path: "aggregations",
  aggs: {
    tm: {
      top_metrics: {
        metrics: {
          field: "m",
        },
        sort: {
          s: {
            order: "asc",
            numeric_type: "double",
          },
        },
      },
    },
  },
});
console.log(response);
POST /test*/_search?filter_path=aggregations
{
  "aggs": {
    "tm": {
      "top_metrics": {
        "metrics": {"field": "m"},
        "sort": {"s": {"order": "asc", "numeric_type": "double"}}
      }
    }
  }
}

返回更符合预期的结果

{
  "aggregations": {
    "tm": {
      "top": [ {"sort": [1.0], "metrics": {"m": 3.1414999961853027 } } ]
    }
  }
}

在管道聚合中使用

编辑

top_metrics 可用于管道聚合,该管道聚合消耗每个存储桶的单个值,例如 bucket_selector,它应用每个存储桶的过滤,类似于在 SQL 中使用 HAVING 子句。这需要将 size 设置为 1,并为要传递给包装聚合器的(单个)指标指定正确的路径。例如

resp = client.search(
    index="test*",
    filter_path="aggregations",
    aggs={
        "ip": {
            "terms": {
                "field": "ip"
            },
            "aggs": {
                "tm": {
                    "top_metrics": {
                        "metrics": {
                            "field": "m"
                        },
                        "sort": {
                            "s": "desc"
                        },
                        "size": 1
                    }
                },
                "having_tm": {
                    "bucket_selector": {
                        "buckets_path": {
                            "top_m": "tm[m]"
                        },
                        "script": "params.top_m < 1000"
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'test*',
  filter_path: 'aggregations',
  body: {
    aggregations: {
      ip: {
        terms: {
          field: 'ip'
        },
        aggregations: {
          tm: {
            top_metrics: {
              metrics: {
                field: 'm'
              },
              sort: {
                s: 'desc'
              },
              size: 1
            }
          },
          having_tm: {
            bucket_selector: {
              buckets_path: {
                top_m: 'tm[m]'
              },
              script: 'params.top_m < 1000'
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "test*",
  filter_path: "aggregations",
  aggs: {
    ip: {
      terms: {
        field: "ip",
      },
      aggs: {
        tm: {
          top_metrics: {
            metrics: {
              field: "m",
            },
            sort: {
              s: "desc",
            },
            size: 1,
          },
        },
        having_tm: {
          bucket_selector: {
            buckets_path: {
              top_m: "tm[m]",
            },
            script: "params.top_m < 1000",
          },
        },
      },
    },
  },
});
console.log(response);
POST /test*/_search?filter_path=aggregations
{
  "aggs": {
    "ip": {
      "terms": {
        "field": "ip"
      },
      "aggs": {
        "tm": {
          "top_metrics": {
            "metrics": {"field": "m"},
            "sort": {"s": "desc"},
            "size": 1
          }
        },
        "having_tm": {
          "bucket_selector": {
            "buckets_path": {
              "top_m": "tm[m]"
            },
            "script": "params.top_m < 1000"
          }
        }
      }
    }
  }
}

bucket_path 使用 top_metrics 名称 tm 和为指标提供聚合值的关键字,即 m