Let’s say we have an ElasticSearch index called strings with a field pattern
of {"type": "keyword"}
.
Now we want to do the equivalent of MongoDB db.getCollection('...').distinct('pattern')
:
Solution:
In Python you can use the iterate_distinct_field()
helper from this previous post on ElasticSearch distinct. Full example:
from elasticsearch import Elasticsearch es = Elasticsearch() def iterate_distinct_field(es, fieldname, pagesize=250, **kwargs): """ Helper to get all distinct values from ElasticSearch (ordered by number of occurrences) """ compositeQuery = { "size": pagesize, "sources": [{ fieldname: { "terms": { "field": fieldname } } } ] } # Iterate over pages while True: result = es.search(**kwargs, body={ "aggs": { "values": { "composite": compositeQuery } } }) # Yield each bucket for aggregation in result["aggregations"]["values"]["buckets"]: yield aggregation # Set "after" field if "after_key" in result["aggregations"]["values"]: compositeQuery["after"] = \ result["aggregations"]["values"]["after_key"] else: # Finished! break # Usage example for result in iterate_distinct_field(es, fieldname="pattern.keyword", index="strings"): print(result) # e.g. {'key': {'pattern': 'mypattern'}, 'doc_count': 315}