In Elastic, I'm using a bucket_script
to calculate a difference between two values, then I'm using bucket_selector
to select only the buckets matching a particular condition, and then I use a bucket_sort
to pick the top 100 buckets.
All that works fine. But what I would like to be able to get the total number of buckets after the selector, but before the sort. I know I can get a count using stats_bucket
but I don't know where I can put it to get the count before the selector
To give a partial example:
"aggs": {
"myCount": {
"stats_bucket": {
"buckets_path": "myAgg._count"
}
},
"myAgg": {
"aggs": {
"start": {
"filter": {
"bool": {
"must": [
{
"range": {
"start": {
"format": "yyyy-MM-dd'T'HH:mm:ss",
"gte": "2016-10-01T00:00:00",
"lte": "2024-03-18T23:59:59"
}
}
},
{
"range": {
"end": {
"format": "yyyy-MM-dd'T'HH:mm:ss",
"gte": "2024-03-11T00:00:00"
}
}
}
]
}
}
},
"end": {
"filter": {
"bool": {
"must": [
{
"range": {
"start": {
"format": "yyyy-MM-dd'T'HH:mm:ss",
"gte": "2016-10-01T00:00:00",
"lte": "2025-03-02T23:59:59"
}
}
},
{
"range": {
"end": {
"format": "yyyy-MM-dd'T'HH:mm:ss",
"gte": "2025-02-23T00:00:00"
}
}
}
]
}
}
},
"diff": {
"bucket_script": {
"script": {
"source": "params._end - params._start"
},
"buckets_path": {
"_start": "start._count",
"_end": "end._count"
}
}
},
"diff_filter": {
"bucket_selector": {
"buckets_path": {
"diffBucket": "diff"
},
"script": "params.diffBucket > 0"
}
},
"diffSort": {
"bucket_sort": {
"from": 0,
"size": 100,
"sort": [
{
"diff": {
"order": "desc"
}
}
]
}
}
},
"terms": {
"field": "myField.label",
"shard_size": 2000,
"size": 1000
}
}
}
Which will give you a result like:
"myCount": {
"count": 100,
"min": 1189,
"max": 270021,
"avg": 31513.92,
"sum": 3151392
}
As you can see, the count is 100 because the sort was only getting the top 100. Is there a way to make this work?