_doc_count 字段

编辑

桶聚合总是返回一个名为 doc_count 的字段,显示每个桶中聚合和分区的文档数量。doc_count 值的计算非常简单。对于每个桶中收集的每个文档,doc_count 都会增加 1。

虽然这种简单的方法在计算单个文档的聚合时很有效,但在准确表示存储预聚合数据(例如 histogramaggregate_metric_double 字段)的文档时会失败,因为一个摘要字段可能代表多个文档。

为了在处理预聚合数据时能够正确计算文档数量,我们引入了一种名为 _doc_count 的元数据字段类型。_doc_count 必须始终是一个正整数,表示在单个摘要字段中聚合的文档数量。

当字段 _doc_count 被添加到文档时,所有桶聚合都将遵守其值,并将桶的 doc_count 增加该字段的值。如果一个文档不包含任何 _doc_count 字段,则默认情况下会隐式使用 _doc_count = 1

  • _doc_count 字段每个文档只能存储一个正整数。不允许使用嵌套数组。
  • 如果文档不包含任何 _doc_count 字段,聚合器将增加 1,这是默认行为。

示例

编辑

以下 创建索引 API 请求创建一个新索引,其中包含以下字段映射

  • my_histogram,一个用于存储百分比数据的 histogram 字段
  • my_text,一个用于存储直方图标题的 keyword 字段
resp = client.indices.create(
    index="my_index",
    mappings={
        "properties": {
            "my_histogram": {
                "type": "histogram"
            },
            "my_text": {
                "type": "keyword"
            }
        }
    },
)
print(resp)
response = client.indices.create(
  index: 'my_index',
  body: {
    mappings: {
      properties: {
        my_histogram: {
          type: 'histogram'
        },
        my_text: {
          type: 'keyword'
        }
      }
    }
  }
)
puts response
const response = await client.indices.create({
  index: "my_index",
  mappings: {
    properties: {
      my_histogram: {
        type: "histogram",
      },
      my_text: {
        type: "keyword",
      },
    },
  },
});
console.log(response);
PUT my_index
{
  "mappings" : {
    "properties" : {
      "my_histogram" : {
        "type" : "histogram"
      },
      "my_text" : {
        "type" : "keyword"
      }
    }
  }
}

以下 索引 API 请求存储两个直方图的预聚合数据:histogram_1histogram_2

resp = client.index(
    index="my_index",
    id="1",
    document={
        "my_text": "histogram_1",
        "my_histogram": {
            "values": [
                0.1,
                0.2,
                0.3,
                0.4,
                0.5
            ],
            "counts": [
                3,
                7,
                23,
                12,
                6
            ]
        },
        "_doc_count": 45
    },
)
print(resp)

resp1 = client.index(
    index="my_index",
    id="2",
    document={
        "my_text": "histogram_2",
        "my_histogram": {
            "values": [
                0.1,
                0.25,
                0.35,
                0.4,
                0.45,
                0.5
            ],
            "counts": [
                8,
                17,
                8,
                7,
                6,
                2
            ]
        },
        "_doc_count": 62
    },
)
print(resp1)
response = client.index(
  index: 'my_index',
  id: 1,
  body: {
    my_text: 'histogram_1',
    my_histogram: {
      values: [
        0.1,
        0.2,
        0.3,
        0.4,
        0.5
      ],
      counts: [
        3,
        7,
        23,
        12,
        6
      ]
    },
    _doc_count: 45
  }
)
puts response

response = client.index(
  index: 'my_index',
  id: 2,
  body: {
    my_text: 'histogram_2',
    my_histogram: {
      values: [
        0.1,
        0.25,
        0.35,
        0.4,
        0.45,
        0.5
      ],
      counts: [
        8,
        17,
        8,
        7,
        6,
        2
      ]
    },
    _doc_count: 62
  }
)
puts response
const response = await client.index({
  index: "my_index",
  id: 1,
  document: {
    my_text: "histogram_1",
    my_histogram: {
      values: [0.1, 0.2, 0.3, 0.4, 0.5],
      counts: [3, 7, 23, 12, 6],
    },
    _doc_count: 45,
  },
});
console.log(response);

const response1 = await client.index({
  index: "my_index",
  id: 2,
  document: {
    my_text: "histogram_2",
    my_histogram: {
      values: [0.1, 0.25, 0.35, 0.4, 0.45, 0.5],
      counts: [8, 17, 8, 7, 6, 2],
    },
    _doc_count: 62,
  },
});
console.log(response1);
PUT my_index/_doc/1
{
  "my_text" : "histogram_1",
  "my_histogram" : {
      "values" : [0.1, 0.2, 0.3, 0.4, 0.5],
      "counts" : [3, 7, 23, 12, 6]
   },
  "_doc_count": 45 
}

PUT my_index/_doc/2
{
  "my_text" : "histogram_2",
  "my_histogram" : {
      "values" : [0.1, 0.25, 0.35, 0.4, 0.45, 0.5],
      "counts" : [8, 17, 8, 7, 6, 2]
   },
  "_doc_count": 62 
}

字段 _doc_count 必须是一个正整数,存储聚合生成每个直方图的文档数量。

如果我们在 my_index 上运行以下 词项聚合

resp = client.search(
    aggs={
        "histogram_titles": {
            "terms": {
                "field": "my_text"
            }
        }
    },
)
print(resp)
response = client.search(
  body: {
    aggregations: {
      histogram_titles: {
        terms: {
          field: 'my_text'
        }
      }
    }
  }
)
puts response
const response = await client.search({
  aggs: {
    histogram_titles: {
      terms: {
        field: "my_text",
      },
    },
  },
});
console.log(response);
GET /_search
{
    "aggs" : {
        "histogram_titles" : {
            "terms" : { "field" : "my_text" }
        }
    }
}

我们将得到以下响应

{
    ...
    "aggregations" : {
        "histogram_titles" : {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets" : [
                {
                    "key" : "histogram_2",
                    "doc_count" : 62
                },
                {
                    "key" : "histogram_1",
                    "doc_count" : 45
                }
            ]
        }
    }
}