过滤器聚合

编辑

一个多桶聚合,其中每个桶包含与查询匹配的文档。

示例

resp = client.bulk(
    index="logs",
    refresh=True,
    operations=[
        {
            "index": {
                "_id": 1
            }
        },
        {
            "body": "warning: page could not be rendered"
        },
        {
            "index": {
                "_id": 2
            }
        },
        {
            "body": "authentication error"
        },
        {
            "index": {
                "_id": 3
            }
        },
        {
            "body": "warning: connection timed out"
        }
    ],
)
print(resp)

resp1 = client.search(
    index="logs",
    size=0,
    aggs={
        "messages": {
            "filters": {
                "filters": {
                    "errors": {
                        "match": {
                            "body": "error"
                        }
                    },
                    "warnings": {
                        "match": {
                            "body": "warning"
                        }
                    }
                }
            }
        }
    },
)
print(resp1)
response = client.bulk(
  index: 'logs',
  refresh: true,
  body: [
    {
      index: {
        _id: 1
      }
    },
    {
      body: 'warning: page could not be rendered'
    },
    {
      index: {
        _id: 2
      }
    },
    {
      body: 'authentication error'
    },
    {
      index: {
        _id: 3
      }
    },
    {
      body: 'warning: connection timed out'
    }
  ]
)
puts response

response = client.search(
  index: 'logs',
  body: {
    size: 0,
    aggregations: {
      messages: {
        filters: {
          filters: {
            errors: {
              match: {
                body: 'error'
              }
            },
            warnings: {
              match: {
                body: 'warning'
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.bulk({
  index: "logs",
  refresh: "true",
  operations: [
    {
      index: {
        _id: 1,
      },
    },
    {
      body: "warning: page could not be rendered",
    },
    {
      index: {
        _id: 2,
      },
    },
    {
      body: "authentication error",
    },
    {
      index: {
        _id: 3,
      },
    },
    {
      body: "warning: connection timed out",
    },
  ],
});
console.log(response);

const response1 = await client.search({
  index: "logs",
  size: 0,
  aggs: {
    messages: {
      filters: {
        filters: {
          errors: {
            match: {
              body: "error",
            },
          },
          warnings: {
            match: {
              body: "warning",
            },
          },
        },
      },
    },
  },
});
console.log(response1);
PUT /logs/_bulk?refresh
{ "index" : { "_id" : 1 } }
{ "body" : "warning: page could not be rendered" }
{ "index" : { "_id" : 2 } }
{ "body" : "authentication error" }
{ "index" : { "_id" : 3 } }
{ "body" : "warning: connection timed out" }

GET logs/_search
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "filters" : {
          "errors" :   { "match" : { "body" : "error"   }},
          "warnings" : { "match" : { "body" : "warning" }}
        }
      }
    }
  }
}

在上面的例子中,我们分析日志消息。该聚合将构建两个日志消息的集合(桶) - 一个包含所有包含错误的日志消息,另一个包含所有包含警告的日志消息。

响应

{
  "took": 9,
  "timed_out": false,
  "_shards": ...,
  "hits": ...,
  "aggregations": {
    "messages": {
      "buckets": {
        "errors": {
          "doc_count": 1
        },
        "warnings": {
          "doc_count": 2
        }
      }
    }
  }
}

匿名过滤器

编辑

过滤器字段也可以作为过滤器数组提供,如下面的请求所示

resp = client.search(
    index="logs",
    size=0,
    aggs={
        "messages": {
            "filters": {
                "filters": [
                    {
                        "match": {
                            "body": "error"
                        }
                    },
                    {
                        "match": {
                            "body": "warning"
                        }
                    }
                ]
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'logs',
  body: {
    size: 0,
    aggregations: {
      messages: {
        filters: {
          filters: [
            {
              match: {
                body: 'error'
              }
            },
            {
              match: {
                body: 'warning'
              }
            }
          ]
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "logs",
  size: 0,
  aggs: {
    messages: {
      filters: {
        filters: [
          {
            match: {
              body: "error",
            },
          },
          {
            match: {
              body: "warning",
            },
          },
        ],
      },
    },
  },
});
console.log(response);
GET logs/_search
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "filters" : [
          { "match" : { "body" : "error"   }},
          { "match" : { "body" : "warning" }}
        ]
      }
    }
  }
}

过滤后的桶按照请求中提供的顺序返回。此示例的响应将是

{
  "took": 4,
  "timed_out": false,
  "_shards": ...,
  "hits": ...,
  "aggregations": {
    "messages": {
      "buckets": [
        {
          "doc_count": 1
        },
        {
          "doc_count": 2
        }
      ]
    }
  }
}

其他

编辑

other_bucket 参数可以设置为在响应中添加一个桶,该桶将包含所有不匹配任何给定过滤器的文档。此参数的值可以如下所示

false
不计算 other
true
返回 other 桶,如果使用命名过滤器,则在桶中(默认命名为 _other_),如果使用匿名过滤器,则作为最后一个桶返回

other_bucket_key 参数可用于将 other 桶的键设置为除默认 _other_ 之外的值。设置此参数将隐式将 other_bucket 参数设置为 true

以下代码片段显示了一个请求将 other 桶命名为 other_messages 的响应。

resp = client.index(
    index="logs",
    id="4",
    refresh=True,
    document={
        "body": "info: user Bob logged out"
    },
)
print(resp)

resp1 = client.search(
    index="logs",
    size=0,
    aggs={
        "messages": {
            "filters": {
                "other_bucket_key": "other_messages",
                "filters": {
                    "errors": {
                        "match": {
                            "body": "error"
                        }
                    },
                    "warnings": {
                        "match": {
                            "body": "warning"
                        }
                    }
                }
            }
        }
    },
)
print(resp1)
response = client.index(
  index: 'logs',
  id: 4,
  refresh: true,
  body: {
    body: 'info: user Bob logged out'
  }
)
puts response

response = client.search(
  index: 'logs',
  body: {
    size: 0,
    aggregations: {
      messages: {
        filters: {
          other_bucket_key: 'other_messages',
          filters: {
            errors: {
              match: {
                body: 'error'
              }
            },
            warnings: {
              match: {
                body: 'warning'
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.index({
  index: "logs",
  id: 4,
  refresh: "true",
  document: {
    body: "info: user Bob logged out",
  },
});
console.log(response);

const response1 = await client.search({
  index: "logs",
  size: 0,
  aggs: {
    messages: {
      filters: {
        other_bucket_key: "other_messages",
        filters: {
          errors: {
            match: {
              body: "error",
            },
          },
          warnings: {
            match: {
              body: "warning",
            },
          },
        },
      },
    },
  },
});
console.log(response1);
PUT logs/_doc/4?refresh
{
  "body": "info: user Bob logged out"
}

GET logs/_search
{
  "size": 0,
  "aggs" : {
    "messages" : {
      "filters" : {
        "other_bucket_key": "other_messages",
        "filters" : {
          "errors" :   { "match" : { "body" : "error"   }},
          "warnings" : { "match" : { "body" : "warning" }}
        }
      }
    }
  }
}

响应将如下所示

{
  "took": 3,
  "timed_out": false,
  "_shards": ...,
  "hits": ...,
  "aggregations": {
    "messages": {
      "buckets": {
        "errors": {
          "doc_count": 1
        },
        "warnings": {
          "doc_count": 2
        },
        "other_messages": {
          "doc_count": 1
        }
      }
    }
  }
}

非键控响应

编辑

默认情况下,命名过滤器聚合将桶作为对象返回。但是在某些排序情况下,例如桶排序,JSON 不保证对象中元素的顺序。您可以使用 keyed 参数将桶指定为对象数组。此参数的值可以如下所示

true
(默认)将桶作为对象返回
false
将桶作为对象数组返回

匿名过滤器会忽略此参数。

示例

resp = client.search(
    index="sales",
    size="0",
    filter_path="aggregations",
    aggs={
        "the_filter": {
            "filters": {
                "keyed": False,
                "filters": {
                    "t-shirt": {
                        "term": {
                            "type": "t-shirt"
                        }
                    },
                    "hat": {
                        "term": {
                            "type": "hat"
                        }
                    }
                }
            },
            "aggs": {
                "avg_price": {
                    "avg": {
                        "field": "price"
                    }
                },
                "sort_by_avg_price": {
                    "bucket_sort": {
                        "sort": {
                            "avg_price": "asc"
                        }
                    }
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'sales',
  size: 0,
  filter_path: 'aggregations',
  body: {
    aggregations: {
      the_filter: {
        filters: {
          keyed: false,
          filters: {
            "t-shirt": {
              term: {
                type: 't-shirt'
              }
            },
            hat: {
              term: {
                type: 'hat'
              }
            }
          }
        },
        aggregations: {
          avg_price: {
            avg: {
              field: 'price'
            }
          },
          sort_by_avg_price: {
            bucket_sort: {
              sort: {
                avg_price: 'asc'
              }
            }
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "sales",
  size: 0,
  filter_path: "aggregations",
  aggs: {
    the_filter: {
      filters: {
        keyed: false,
        filters: {
          "t-shirt": {
            term: {
              type: "t-shirt",
            },
          },
          hat: {
            term: {
              type: "hat",
            },
          },
        },
      },
      aggs: {
        avg_price: {
          avg: {
            field: "price",
          },
        },
        sort_by_avg_price: {
          bucket_sort: {
            sort: {
              avg_price: "asc",
            },
          },
        },
      },
    },
  },
});
console.log(response);
POST /sales/_search?size=0&filter_path=aggregations
{
  "aggs": {
    "the_filter": {
      "filters": {
        "keyed": false,
        "filters": {
          "t-shirt": { "term": { "type": "t-shirt" } },
          "hat": { "term": { "type": "hat" } }
        }
      },
      "aggs": {
        "avg_price": { "avg": { "field": "price" } },
        "sort_by_avg_price": {
          "bucket_sort": { "sort": { "avg_price": "asc" } }
        }
      }
    }
  }
}

响应

{
  "aggregations": {
    "the_filter": {
      "buckets": [
        {
          "key": "t-shirt",
          "doc_count": 3,
          "avg_price": { "value": 128.33333333333334 }
        },
        {
          "key": "hat",
          "doc_count": 3,
          "avg_price": { "value": 150.0 }
        }
      ]
    }
  }
}