elasticsearch - Count buckets before sorting - Stack Overflow

admin2025-04-18  0

In Elastic, I'm using a bucket_script to calculate a difference between two values, then I'm using bucket_selector to select only the buckets matching a particular condition, and then I use a bucket_sort to pick the top 100 buckets.

All that works fine. But what I would like to be able to get the total number of buckets after the selector, but before the sort. I know I can get a count using stats_bucket but I don't know where I can put it to get the count before the selector

To give a partial example:

"aggs": {
    "myCount": {
      "stats_bucket": {
        "buckets_path": "myAgg._count"
      }
    },
    "myAgg": {
      "aggs": {
        "start": {
          "filter": {
            "bool": {
              "must": [
                {
                  "range": {
                    "start": {
                      "format": "yyyy-MM-dd'T'HH:mm:ss",
                      "gte": "2016-10-01T00:00:00",
                      "lte": "2024-03-18T23:59:59"
                    }
                  }
                },
                {
                  "range": {
                    "end": {
                      "format": "yyyy-MM-dd'T'HH:mm:ss",
                      "gte": "2024-03-11T00:00:00"
                    }
                  }
                }
              ]
            }
          }
        },
        "end": {
          "filter": {
            "bool": {
              "must": [
                {
                  "range": {
                    "start": {
                      "format": "yyyy-MM-dd'T'HH:mm:ss",
                      "gte": "2016-10-01T00:00:00",
                      "lte": "2025-03-02T23:59:59"
                    }
                  }
                },
                {
                  "range": {
                    "end": {
                      "format": "yyyy-MM-dd'T'HH:mm:ss",
                      "gte": "2025-02-23T00:00:00"
                    }
                  }
                }
              ]
            }
          }
        },
        "diff": {
          "bucket_script": {
            "script": {
              "source": "params._end - params._start"
            },
            "buckets_path": {
              "_start": "start._count",
              "_end": "end._count"
            }
          }
        },
        "diff_filter": {
          "bucket_selector": {
            "buckets_path": {
              "diffBucket": "diff"
            },
            "script": "params.diffBucket > 0"
          }
        },
        "diffSort": {
          "bucket_sort": {
            "from": 0,
            "size": 100,
            "sort": [
              {
                "diff": {
                  "order": "desc"
                }
              }
            ]
          }
        }
      },
      "terms": {
        "field": "myField.label",
        "shard_size": 2000,
        "size": 1000
      }
    }
  }

Which will give you a result like:

    "myCount": {
      "count": 100,
      "min": 1189,
      "max": 270021,
      "avg": 31513.92,
      "sum": 3151392
    }

As you can see, the count is 100 because the sort was only getting the top 100. Is there a way to make this work?

转载请注明原文地址:http://conceptsofalgorithm.com/Algorithm/1744964808a277132.html

最新回复(0)