过滤器聚合编辑

一种多桶聚合,其中每个桶包含与 查询 匹配的文档。

示例

response = client.bulk(
  index: 'logs',
  refresh: true,
  body: [
    {
      index: {
        _id: 1
      }
    },
    {
      body: 'warning: page could not be rendered'
    },
    {
      index: {
        _id: 2
      }
    },
    {
      body: 'authentication error'
    },
    {
      index: {
        _id: 3
      }
    },
    {
      body: 'warning: connection timed out'
    }
  ]
)
puts response

response = client.search(
  index: 'logs',
  body: {
    size: 0,
    aggregations: {
      messages: {
        filters: {
          filters: {
            errors: {
              match: {
                body: 'error'
              }
            },
            warnings: {
              match: {
                body: 'warning'
              }
            }
          }
        }
      }
    }
  }
)
puts response
PUT /logs/_bulk?refresh
{ "index" : { "_id" : 1 } }
{ "body" : "warning: page could not be rendered" }
{ "index" : { "_id" : 2 } }
{ "body" : "authentication error" }
{ "index" : { "_id" : 3 } }
{ "body" : "warning: connection timed out" }

GET logs/_search
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "filters" : {
          "errors" :   { "match" : { "body" : "error"   }},
          "warnings" : { "match" : { "body" : "warning" }}
        }
      }
    }
  }
}

在上面的示例中,我们分析日志消息。聚合将构建两个日志消息集合(桶) - 一个包含错误的所有消息,另一个包含警告的所有消息。

响应

{
  "took": 9,
  "timed_out": false,
  "_shards": ...,
  "hits": ...,
  "aggregations": {
    "messages": {
      "buckets": {
        "errors": {
          "doc_count": 1
        },
        "warnings": {
          "doc_count": 2
        }
      }
    }
  }
}

匿名过滤器编辑

filters 字段也可以作为过滤器数组提供,如以下请求所示

response = client.search(
  index: 'logs',
  body: {
    size: 0,
    aggregations: {
      messages: {
        filters: {
          filters: [
            {
              match: {
                body: 'error'
              }
            },
            {
              match: {
                body: 'warning'
              }
            }
          ]
        }
      }
    }
  }
)
puts response
GET logs/_search
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "filters" : [
          { "match" : { "body" : "error"   }},
          { "match" : { "body" : "warning" }}
        ]
      }
    }
  }
}

过滤后的桶按请求中提供的顺序返回。此示例的响应将是

{
  "took": 4,
  "timed_out": false,
  "_shards": ...,
  "hits": ...,
  "aggregations": {
    "messages": {
      "buckets": [
        {
          "doc_count": 1
        },
        {
          "doc_count": 2
        }
      ]
    }
  }
}

其他编辑

可以设置 other_bucket 参数,以在响应中添加一个桶,该桶将包含与任何给定过滤器都不匹配的所有文档。此参数的值可以是以下值

false
不计算 其他
true
返回 其他 桶,如果使用命名过滤器,则在桶中(默认情况下名为 _other_),或者如果使用匿名过滤器,则作为最后一个桶

可以使用 other_bucket_key 参数将 其他 桶的键设置为除默认 _other_ 之外的其他值。设置此参数将隐式将 other_bucket 参数设置为 true

以下代码段显示了请求 其他 桶命名为 other_messages 的响应。

response = client.index(
  index: 'logs',
  id: 4,
  refresh: true,
  body: {
    body: 'info: user Bob logged out'
  }
)
puts response

response = client.search(
  index: 'logs',
  body: {
    size: 0,
    aggregations: {
      messages: {
        filters: {
          other_bucket_key: 'other_messages',
          filters: {
            errors: {
              match: {
                body: 'error'
              }
            },
            warnings: {
              match: {
                body: 'warning'
              }
            }
          }
        }
      }
    }
  }
)
puts response
PUT logs/_doc/4?refresh
{
  "body": "info: user Bob logged out"
}

GET logs/_search
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "other_bucket_key": "other_messages",
        "filters" : {
          "errors" :   { "match" : { "body" : "error"   }},
          "warnings" : { "match" : { "body" : "warning" }}
        }
      }
    }
  }
}

响应将类似于以下内容

{
  "took": 3,
  "timed_out": false,
  "_shards": ...,
  "hits": ...,
  "aggregations": {
    "messages": {
      "buckets": {
        "errors": {
          "doc_count": 1
        },
        "warnings": {
          "doc_count": 2
        },
        "other_messages": {
          "doc_count": 1
        }
      }
    }
  }
}

非键控响应编辑

默认情况下,命名过滤器聚合将桶作为对象返回。但在某些排序情况下,例如 桶排序,JSON 无法保证对象中元素的顺序。可以使用 keyed 参数将桶指定为对象数组。此参数的值可以是以下值

true
(默认) 将桶作为对象返回
false
将桶作为对象数组返回

此参数被 匿名过滤器 忽略。

示例

response = client.search(
  index: 'sales',
  size: 0,
  filter_path: 'aggregations',
  body: {
    aggregations: {
      the_filter: {
        filters: {
          keyed: false,
          filters: {
            "t-shirt": {
              term: {
                type: 't-shirt'
              }
            },
            hat: {
              term: {
                type: 'hat'
              }
            }
          }
        },
        aggregations: {
          avg_price: {
            avg: {
              field: 'price'
            }
          },
          sort_by_avg_price: {
            bucket_sort: {
              sort: {
                avg_price: 'asc'
              }
            }
          }
        }
      }
    }
  }
)
puts response
POST /sales/_search?size=0&filter_path=aggregations
{
  "aggs": {
    "the_filter": {
      "filters": {
        "keyed": false,
        "filters": {
          "t-shirt": { "term": { "type": "t-shirt" } },
          "hat": { "term": { "type": "hat" } }
        }
      },
      "aggs": {
        "avg_price": { "avg": { "field": "price" } },
        "sort_by_avg_price": {
          "bucket_sort": { "sort": { "avg_price": "asc" } }
        }
      }
    }
  }
}

响应

{
  "aggregations": {
    "the_filter": {
      "buckets": [
        {
          "key": "t-shirt",
          "doc_count": 3,
          "avg_price": { "value": 128.33333333333334 }
        },
        {
          "key": "hat",
          "doc_count": 3,
          "avg_price": { "value": 150.0 }
        }
      ]
    }
  }
}