使用 可以用 SQL 进行 elasticsearch 的查询。按已有字段来分桶是最简单的一种分桶的方式。很多时候我们希望用于分桶的key是需要先经过计算而来的。其中最简单的一种计算方式是按区间段来算histogram。用于计算的字段可以是时间戳,也可能是数值。
GROUP BY DATE_TRUNC('year',"date")
SQL(其中"date"是指一个列的名字,之所以要加引号是因为date是关键字)
$ cat << EOF | ./es_query.py http://127.0.0.1:9200select year, max(adj_close) from quote where symbol='AAPL' group by date_trunc('year',"date") as yearEOF
{"max(adj_close)": 50.0, "year": "1981-01-01T00:00:00.000+08:00"}{"max(adj_close)": 51.0, "year": "1982-01-01T00:00:00.000+08:00"}{"max(adj_close)": 94.0, "year": "1983-01-01T00:00:00.000+08:00"}{"max(adj_close)": 50.0, "year": "1984-01-01T00:00:00.000+08:00"}{"max(adj_close)": 46.0, "year": "1985-01-01T00:00:00.000+08:00"}{"max(adj_close)": 66.0, "year": "1986-01-01T00:00:00.000+08:00"}{"max(adj_close)": 179.0, "year": "1987-01-01T00:00:00.000+08:00"}{"max(adj_close)": 143.0, "year": "1988-01-01T00:00:00.000+08:00"}{"max(adj_close)": 152.0, "year": "1989-01-01T00:00:00.000+08:00"}{"max(adj_close)": 147.0, "year": "1990-01-01T00:00:00.000+08:00"}{"max(adj_close)": 228.0, "year": "1991-01-01T00:00:00.000+08:00"}{"max(adj_close)": 221.0, "year": "1992-01-01T00:00:00.000+08:00"}{"max(adj_close)": 207.0, "year": "1993-01-01T00:00:00.000+08:00"}{"max(adj_close)": 141.0, "year": "1994-01-01T00:00:00.000+08:00"}{"max(adj_close)": 162.0, "year": "1995-01-01T00:00:00.000+08:00"}{"max(adj_close)": 115.0, "year": "1996-01-01T00:00:00.000+08:00"}{"max(adj_close)": 96.0, "year": "1997-01-01T00:00:00.000+08:00"}{"max(adj_close)": 142.0, "year": "1998-01-01T00:00:00.000+08:00"}{"max(adj_close)": 389.0, "year": "1999-01-01T00:00:00.000+08:00"}{"max(adj_close)": 476.0, "year": "2000-01-01T00:00:00.000+08:00"}{"max(adj_close)": 175.0, "year": "2001-01-01T00:00:00.000+08:00"}{"max(adj_close)": 172.0, "year": "2002-01-01T00:00:00.000+08:00"}{"max(adj_close)": 164.0, "year": "2003-01-01T00:00:00.000+08:00"}{"max(adj_close)": 452.0, "year": "2004-01-01T00:00:00.000+08:00"}{"max(adj_close)": 991.0, "year": "2005-01-01T00:00:00.000+08:00"}{"max(adj_close)": 1214.0, "year": "2006-01-01T00:00:00.000+08:00"}{"max(adj_close)": 2643.0, "year": "2007-01-01T00:00:00.000+08:00"}{"max(adj_close)": 2578.0, "year": "2008-01-01T00:00:00.000+08:00"}{"max(adj_close)": 2799.0, "year": "2009-01-01T00:00:00.000+08:00"}{"max(adj_close)": 4305.0, "year": "2010-01-01T00:00:00.000+08:00"}{"max(adj_close)": 5586.0, "year": "2011-01-01T00:00:00.000+08:00"}{"max(adj_close)": 9328.0, "year": "2012-01-01T00:00:00.000+08:00"}{"max(adj_close)": 7800.0, "year": "2013-01-01T00:00:00.000+08:00"}{"max(adj_close)": 11637.0, "year": "2014-01-01T00:00:00.000+08:00"}{"max(adj_close)": 13067.0, "year": "2015-01-01T00:00:00.000+08:00"}{"max(adj_close)": 10478.0, "year": "2016-01-01T00:00:00.000+08:00"}
Elasticsearch
{ "query": { "term": { "symbol": "AAPL" } }, "aggs": { "year": { "date_histogram": { "field": "date", "interval": "year", "time_zone": "+08:00" }, "aggs": { "max(adj_close)": { "max": { "field": "adj_close" } } } } }, "size": 0}
{ "hits": { "hits": [], "total": 8790, "max_score": 0.0 }, "_shards": { "successful": 1, "failed": 0, "total": 1 }, "took": 6, "aggregations": { "year": { "buckets": [ { "max(adj_close)": { "value": 50.0 }, "key_as_string": "1981-01-01T00:00:00.000+08:00", "key": 347126400000, "doc_count": 185 }, { "max(adj_close)": { "value": 51.0 }, "key_as_string": "1982-01-01T00:00:00.000+08:00", "key": 378662400000, "doc_count": 253 }, { "max(adj_close)": { "value": 94.0 }, "key_as_string": "1983-01-01T00:00:00.000+08:00", "key": 410198400000, "doc_count": 252 }, { "max(adj_close)": { "value": 50.0 }, "key_as_string": "1984-01-01T00:00:00.000+08:00", "key": 441734400000, "doc_count": 253 }, { "max(adj_close)": { "value": 46.0 }, "key_as_string": "1985-01-01T00:00:00.000+08:00", "key": 473356800000, "doc_count": 253 }, { "max(adj_close)": { "value": 66.0 }, "key_as_string": "1986-01-01T00:00:00.000+08:00", "key": 504892800000, "doc_count": 253 }, { "max(adj_close)": { "value": 179.0 }, "key_as_string": "1987-01-01T00:00:00.000+08:00", "key": 536428800000, "doc_count": 253 }, { "max(adj_close)": { "value": 143.0 }, "key_as_string": "1988-01-01T00:00:00.000+08:00", "key": 567964800000, "doc_count": 253 }, { "max(adj_close)": { "value": 152.0 }, "key_as_string": "1989-01-01T00:00:00.000+08:00", "key": 599587200000, "doc_count": 252 }, { "max(adj_close)": { "value": 147.0 }, "key_as_string": "1990-01-01T00:00:00.000+08:00", "key": 631123200000, "doc_count": 253 }, { "max(adj_close)": { "value": 228.0 }, "key_as_string": "1991-01-01T00:00:00.000+08:00", "key": 662659200000, "doc_count": 253 }, { "max(adj_close)": { "value": 221.0 }, "key_as_string": "1992-01-01T00:00:00.000+08:00", "key": 694195200000, "doc_count": 254 }, { "max(adj_close)": { "value": 207.0 }, "key_as_string": "1993-01-01T00:00:00.000+08:00", "key": 725817600000, "doc_count": 253 }, { "max(adj_close)": { "value": 141.0 }, "key_as_string": "1994-01-01T00:00:00.000+08:00", "key": 757353600000, "doc_count": 252 }, { "max(adj_close)": { "value": 162.0 }, "key_as_string": "1995-01-01T00:00:00.000+08:00", "key": 788889600000, "doc_count": 252 }, { "max(adj_close)": { "value": 115.0 }, "key_as_string": "1996-01-01T00:00:00.000+08:00", "key": 820425600000, "doc_count": 254 }, { "max(adj_close)": { "value": 96.0 }, "key_as_string": "1997-01-01T00:00:00.000+08:00", "key": 852048000000, "doc_count": 253 }, { "max(adj_close)": { "value": 142.0 }, "key_as_string": "1998-01-01T00:00:00.000+08:00", "key": 883584000000, "doc_count": 252 }, { "max(adj_close)": { "value": 389.0 }, "key_as_string": "1999-01-01T00:00:00.000+08:00", "key": 915120000000, "doc_count": 252 }, { "max(adj_close)": { "value": 476.0 }, "key_as_string": "2000-01-01T00:00:00.000+08:00", "key": 946656000000, "doc_count": 252 }, { "max(adj_close)": { "value": 175.0 }, "key_as_string": "2001-01-01T00:00:00.000+08:00", "key": 978278400000, "doc_count": 248 }, { "max(adj_close)": { "value": 172.0 }, "key_as_string": "2002-01-01T00:00:00.000+08:00", "key": 1009814400000, "doc_count": 252 }, { "max(adj_close)": { "value": 164.0 }, "key_as_string": "2003-01-01T00:00:00.000+08:00", "key": 1041350400000, "doc_count": 252 }, { "max(adj_close)": { "value": 452.0 }, "key_as_string": "2004-01-01T00:00:00.000+08:00", "key": 1072886400000, "doc_count": 252 }, { "max(adj_close)": { "value": 991.0 }, "key_as_string": "2005-01-01T00:00:00.000+08:00", "key": 1104508800000, "doc_count": 252 }, { "max(adj_close)": { "value": 1214.0 }, "key_as_string": "2006-01-01T00:00:00.000+08:00", "key": 1136044800000, "doc_count": 251 }, { "max(adj_close)": { "value": 2643.0 }, "key_as_string": "2007-01-01T00:00:00.000+08:00", "key": 1167580800000, "doc_count": 251 }, { "max(adj_close)": { "value": 2578.0 }, "key_as_string": "2008-01-01T00:00:00.000+08:00", "key": 1199116800000, "doc_count": 253 }, { "max(adj_close)": { "value": 2799.0 }, "key_as_string": "2009-01-01T00:00:00.000+08:00", "key": 1230739200000, "doc_count": 252 }, { "max(adj_close)": { "value": 4305.0 }, "key_as_string": "2010-01-01T00:00:00.000+08:00", "key": 1262275200000, "doc_count": 252 }, { "max(adj_close)": { "value": 5586.0 }, "key_as_string": "2011-01-01T00:00:00.000+08:00", "key": 1293811200000, "doc_count": 252 }, { "max(adj_close)": { "value": 9328.0 }, "key_as_string": "2012-01-01T00:00:00.000+08:00", "key": 1325347200000, "doc_count": 250 }, { "max(adj_close)": { "value": 7800.0 }, "key_as_string": "2013-01-01T00:00:00.000+08:00", "key": 1356969600000, "doc_count": 252 }, { "max(adj_close)": { "value": 11637.0 }, "key_as_string": "2014-01-01T00:00:00.000+08:00", "key": 1388505600000, "doc_count": 252 }, { "max(adj_close)": { "value": 13067.0 }, "key_as_string": "2015-01-01T00:00:00.000+08:00", "key": 1420041600000, "doc_count": 252 }, { "max(adj_close)": { "value": 10478.0 }, "key_as_string": "2016-01-01T00:00:00.000+08:00", "key": 1451577600000, "doc_count": 30 } ] } }, "timed_out": false}
Profile
[ { "query": [ { "query_type": "TermQuery", "lucene": "symbol:AAPL", "time": "1.496726000ms", "breakdown": { "score": 0, "create_weight": 725453, "next_doc": 380669, "match": 0, "build_scorer": 390604, "advance": 0 } } ], "rewrite_time": 2705, "collector": [ { "name": "MultiCollector", "reason": "search_multi", "time": "6.185877000ms", "children": [ { "name": "TotalHitCountCollector", "reason": "search_count", "time": "0.4641090000ms" }, { "name": "HistogramAggregator: [year]", "reason": "aggregation", "time": "4.456138000ms" } ] } ] }]
TO_CHAR(DATE_TRUNC('year',"date"), 'yyyy')
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200select year, max(adj_close) from quote where symbol='AAPL' group by to_char(date_trunc('year',"date"), 'yyyy') as yearEOF
{"max(adj_close)": 50.0, "year": "1981"}{"max(adj_close)": 51.0, "year": "1982"}{"max(adj_close)": 94.0, "year": "1983"}{"max(adj_close)": 50.0, "year": "1984"}{"max(adj_close)": 46.0, "year": "1985"}{"max(adj_close)": 66.0, "year": "1986"}{"max(adj_close)": 179.0, "year": "1987"}{"max(adj_close)": 143.0, "year": "1988"}{"max(adj_close)": 152.0, "year": "1989"}{"max(adj_close)": 147.0, "year": "1990"}{"max(adj_close)": 228.0, "year": "1991"}{"max(adj_close)": 221.0, "year": "1992"}{"max(adj_close)": 207.0, "year": "1993"}{"max(adj_close)": 141.0, "year": "1994"}{"max(adj_close)": 162.0, "year": "1995"}{"max(adj_close)": 115.0, "year": "1996"}{"max(adj_close)": 96.0, "year": "1997"}{"max(adj_close)": 142.0, "year": "1998"}{"max(adj_close)": 389.0, "year": "1999"}{"max(adj_close)": 476.0, "year": "2000"}{"max(adj_close)": 175.0, "year": "2001"}{"max(adj_close)": 172.0, "year": "2002"}{"max(adj_close)": 164.0, "year": "2003"}{"max(adj_close)": 452.0, "year": "2004"}{"max(adj_close)": 991.0, "year": "2005"}{"max(adj_close)": 1214.0, "year": "2006"}{"max(adj_close)": 2643.0, "year": "2007"}{"max(adj_close)": 2578.0, "year": "2008"}{"max(adj_close)": 2799.0, "year": "2009"}{"max(adj_close)": 4305.0, "year": "2010"}{"max(adj_close)": 5586.0, "year": "2011"}{"max(adj_close)": 9328.0, "year": "2012"}{"max(adj_close)": 7800.0, "year": "2013"}{"max(adj_close)": 11637.0, "year": "2014"}{"max(adj_close)": 13067.0, "year": "2015"}{"max(adj_close)": 10478.0, "year": "2016"}
Elasticsearch
{ "query": { "term": { "symbol": "AAPL" } }, "aggs": { "year": { "date_histogram": { "field": "date", "interval": "year", "time_zone": "+08:00", "format": "yyyy" }, "aggs": { "max(adj_close)": { "max": { "field": "adj_close" } } } } }, "size": 0}
GROUP BY HISTOGRAM(ipo_year, 5)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200select ipo_year_range, count(*) from symbol group by histogram(ipo_year, 5) as ipo_year_rangeEOF
{"ipo_year_range": 1970, "count(*)": 5}{"ipo_year_range": 1975, "count(*)": 0}{"ipo_year_range": 1980, "count(*)": 31}{"ipo_year_range": 1985, "count(*)": 124}{"ipo_year_range": 1990, "count(*)": 283}{"ipo_year_range": 1995, "count(*)": 315}{"ipo_year_range": 2000, "count(*)": 358}{"ipo_year_range": 2005, "count(*)": 387}{"ipo_year_range": 2010, "count(*)": 1055}{"ipo_year_range": 2015, "count(*)": 340}
Elasticsearch
{ "aggs": { "ipo_year_range": { "aggs": {}, "histogram": { "field": "ipo_year", "interval": 5 } } }, "size": 0}
{ "hits": { "hits": [], "total": 6714, "max_score": 0.0 }, "_shards": { "successful": 1, "failed": 0, "total": 1 }, "took": 2, "aggregations": { "ipo_year_range": { "buckets": [ { "key": 1970, "doc_count": 5 }, { "key": 1975, "doc_count": 0 }, { "key": 1980, "doc_count": 31 }, { "key": 1985, "doc_count": 124 }, { "key": 1990, "doc_count": 283 }, { "key": 1995, "doc_count": 315 }, { "key": 2000, "doc_count": 358 }, { "key": 2005, "doc_count": 387 }, { "key": 2010, "doc_count": 1055 }, { "key": 2015, "doc_count": 340 } ] } }, "timed_out": false}
Profile
[ { "query": [ { "query_type": "MatchAllDocsQuery", "lucene": "*:*", "time": "0.2565110000ms", "breakdown": { "score": 0, "create_weight": 7913, "next_doc": 220517, "match": 0, "build_scorer": 28081, "advance": 0 } } ], "rewrite_time": 2524, "collector": [ { "name": "MultiCollector", "reason": "search_multi", "time": "2.350590000ms", "children": [ { "name": "TotalHitCountCollector", "reason": "search_count", "time": "0.2413570000ms" }, { "name": "HistogramAggregator: [ipo_year_range]", "reason": "aggregation", "time": "1.112294000ms" } ] } ] }]
从profile结果可以看出来,两种histogram在底层实现是一样的。