把 Elasticsearch 當數據庫使
來自: https://segmentfault.com/a/1190000004454399
今天需要做一些最簡單的聚合查詢
COUNT(*)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 select count(*) from quote EOF
{"count(*)": 20994400}
Elasticsearch
{
"aggs": {},
"size": 0
}
{
"hits": {
"hits": [],
"total": 20994400,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 26,
"timed_out": false
}
這個就不算聚合,只是看了一下最終滿足過濾條件的 total hits count。
COUNT(ipo_year)
這個和 COUNT(*) 的區別是 COUNT(ipo_year) 要求字段必須有值才算一個。
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 select count(ipo_year) from symbol EOF
{"count(ipo_year)": 2898}
Elasticsearch
{
"aggs": {
"count(ipo_year)": {
"value_count": {
"field": "ipo_year"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 55,
"aggregations": {
"count(ipo_year)": {
"value": 2898
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.3204170000ms",
"breakdown": {
"score": 0,
"create_weight": 10688,
"next_doc": 278660,
"match": 0,
"build_scorer": 31069,
"advance": 0
}
}
],
"rewrite_time": 2279,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.957183000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2319240000ms"
},
{
"name": "ValueCountAggregator: [count(ipo_year)]",
"reason": "aggregation",
"time": "1.999916000ms"
}
]
}
]
}
]
這是我們的第一個聚合例子。可以從profile結果看出來,其實現方式在采集文檔的時候加上了ValueCountAggregator統計了字段非空的文檔數量。
COUNT(DISTINCT ipo_year)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 select count(distinct ipo_year) from symbol EOF
{"count(distinct ipo_year)": 39}
Elasticsearch
{
"aggs": {
"count(distinct ipo_year)": {
"cardinality": {
"field": "ipo_year"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 24,
"aggregations": {
"count(distinct ipo_year)": {
"value": 39
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2033600000ms",
"breakdown": {
"score": 0,
"create_weight": 7501,
"next_doc": 162905,
"match": 0,
"build_scorer": 32954,
"advance": 0
}
}
],
"rewrite_time": 2300,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.438386000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2240230000ms"
},
{
"name": "CardinalityAggregator: [count(distinct ipo_year)]",
"reason": "aggregation",
"time": "1.471620000ms"
}
]
}
]
}
]
這個例子里 ValueCountAggregator 變成了 CardinalityAggregator
SUM(market_cap)
MIN/MAX/AVG/SUM 這幾個簡單的聚合也是支持的
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 select sum(market_cap) from symbol EOF
{"sum(market_cap)": 11454155180142.0}
Elasticsearch
{
"aggs": {
"sum(market_cap)": {
"sum": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 15,
"aggregations": {
"sum(market_cap)": {
"value": 11454155180142.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2026870000ms",
"breakdown": {
"score": 0,
"create_weight": 8097,
"next_doc": 163069,
"match": 0,
"build_scorer": 31521,
"advance": 0
}
}
],
"rewrite_time": 2151,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.461247000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.3302140000ms"
},
{
"name": "SumAggregator: [sum(market_cap)]",
"reason": "aggregation",
"time": "1.102363000ms"
}
]
}
]
}
]
過濾 + 聚合
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 select sum(market_cap) from symbol where ipo_year=1998 EOF
{"sum(market_cap)": 107049150786.0}
Elasticsearch
{
"query": {
"term": {
"ipo_year": 1998
}
},
"aggs": {
"sum(market_cap)": {
"sum": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 56,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 11,
"aggregations": {
"sum(market_cap)": {
"value": 107049150786.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "TermQuery",
"lucene": "ipo_year:`N",
"time": "0.4526400000ms",
"breakdown": {
"score": 0,
"create_weight": 220579,
"next_doc": 159412,
"match": 0,
"build_scorer": 72649,
"advance": 0
}
}
],
"rewrite_time": 3750,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "0.2203470000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.009478000000ms"
},
{
"name": "SumAggregator: [sum(market_cap)]",
"reason": "aggregation",
"time": "0.1557820000ms"
}
]
}
]
}
]
query 過濾完,然后再計算 aggs
</div> 本文由用戶 wymv5244 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!