Skip to content

ElasticSearch数据检索实战

基础查询

查询聊天记录

查询:

  • 指定发送者(from.keyword):TongXiaoEr
  • 聊天类型(chatType): 单聊
  • 消息类型(msgType): 文本
  • 关键次匹配(text.content):好的

排序:

  • 消息时间(msgTime):倒序
bash
POST /gupaoedu-wxcp-msg-2024-06-*/_search
{
  "size": 1, 
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "from.keyword": "TongXiaoEr"
          }
        },
        {
          "term": {
            "chatType": "1"
          }
        },
        {
          "term": {
            "msgType": "text"
          }
        },
        {
          "match_phrase": {
            "text.content": "好的"
          }
        }
      ]
    }
  },
  "sort": [
    {
      "msgTime": {
        "order": "desc"
      }
    }
  ]
}
java
Query q = Query.of(b -> b.bool(b1 -> b1.must(
                Query.of(b2 -> b2.match(m -> m.field("from.keyword").query(t -> t.stringValue("TongXiaoEr")))),
                Query.of(b2 -> b2.term(t -> t.field("chatType").value(FieldValue.of("1")))),
                Query.of(b2 -> b2.term(t -> t.field("msgType").value(FieldValue.of("text")))),
                Query.of(b2 -> b2.matchPhrase(m -> m.field("text.content").query("好的")))
        )
));

// 构建搜索请求
SearchRequest searchRequest = SearchRequest.of(s -> s
        .index("gupaoedu-wxcp-msg-2024-06-*")
        .size(1)
        .query(q)
        .sort(SortOptions.of(so -> so
                .field(f -> f
                        .field("msgTime")
                        .order(SortOrder.Desc)
                )
        ))
);

SearchResponse<Map> searchResponse = client.search(searchRequest, Map.class);
// 序列化SearchResponse对象为JSON字符串
String jsonResponse = serializeResponseToJson(searchResponse);
System.out.println(jsonResponse);
json
{
  "took" : 80,
  "timed_out" : false,
  "_shards" : {
    "total" : 19,
    "successful" : 19,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 83,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "gupaoedu-wxcp-msg-2024-06-19",
        "_type" : "_doc",
        "_id" : "1012599982130127612_1718777967324_external",
        "_score" : null,
        "_source" : {
          "cropId" : "wwe2d76b4a3b7df43e",
          "msgType" : "text",
          "msgId" : "1012599982130127612_1718777967324_external",
          "roomId" : "",
          "@timestamp" : "2024-06-19T14:19:23.144+08:00",
          "tolist" : [
            "wmuppIEAAAV9D4H5gODlL1qiV5gZwPTQ"
          ],
          "action" : "send",
          "msgTime" : 1718777963144,
          "from" : "TongXiaoEr",
          "text" : {
            "content" : "好的哦"
          },
          "chatType" : "1"
        },
        "sort" : [
          1718777963144
        ]
      }
    ]
  }
}

聚合排序

根据消息接收者(tolist.keyword)聚合,并将最新聊天记录的排在最前面

查询:

  • 指定发送者(from.keyword):TongXiaoEr

排序:

  • 消息时间(@timestamp):倒序

注意,大小限制为10000,这里演示,仅设置3。最外层的size设置为0,就是不让返回记录的其他信息(hits为),返回aggregations

bash
POST /gupaoedu-wxcp-msg-2024-07-*/_search
{
  "size": 0, 
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "from.keyword": "3587fc1a14b67eac0a3a3c39d322f9a7"
          }
        }
      ]
    }
  },
  "aggs": {
    "user_list": {
      "terms": {
        "field": "tolist.keyword",
        "size": 3,
        "order": {
          "max_timestamp": "desc"
        }
      },
      "aggs": {
        "max_timestamp": {
          "max": {
            "field": "@timestamp"
          }
        }
      }
    }
  }
}
java
Query q = Query.of(b -> b.bool(b1 -> b1.must(
                Query.of(b2 -> b2.match(m -> m.field("from.keyword").query(t -> t.stringValue("TongXiaoEr")))),
                Query.of(b2 -> b2.term(t -> t.field("chatType").value(FieldValue.of("1")))),
                Query.of(b2 -> b2.term(t -> t.field("msgType").value(FieldValue.of("text")))),
                Query.of(b2 -> b2.matchPhrase(m -> m.field("text.content").query("好的")))
        )
));

//   聚合后,按照时间排序,不支持分页了
Aggregation aggregation = new Aggregation.Builder()
        .terms(t -> {
            Map<String, SortOrder> orderMap = new HashMap<>();
            orderMap.put("max_timestamp", SortOrder.Desc);
            t.field("tolist.keyword").size(3).order(orderMap);
            return t;
        }).aggregations("max_timestamp", agg -> agg.max(m -> m.field("@timestamp"))).build();


// 构建搜索请求
SearchRequest searchRequest = SearchRequest.of(s -> s
        .index("gupaoedu-wxcp-msg-2024-07-*")
        .size(0)
        .query(q)
        .aggregations("distinct_tolist", aggregation)
);

SearchResponse<Void> searchResponse = client.search(searchRequest, Void.class);
Map<String, Aggregate> aggregations = searchResponse.aggregations();
if (aggregations == null) {
    return Result.ok();
}

Aggregate aggregate = aggregations.get("distinct_tolist");
if (aggregate == null) {
    return Result.ok();
}

List<String> toList = aggregate
                .sterms()             // 聚合
//                .composite()            // 组合聚合
                .buckets()
                .array()
                .stream()
//                .map(bucket -> bucket.key().getOrDefault("tolist_terms", JsonData.of("")).to(String.class))
                .map(StringTermsBucket::key)
                .collect(Collectors.toList());
json
{
  "took" : 259,
  "timed_out" : false,
  "_shards" : {
    "total" : 8,
    "successful" : 8,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 10000,
      "relation" : "gte"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "user_list" : {
      "doc_count_error_upper_bound" : -1,
      "sum_other_doc_count" : 98104,
      "buckets" : [
        {
          "key" : "wmuppIEAAAx5WOt1Nxlujqq4qkR7rRUQ",
          "doc_count" : 2,
          "max_timestamp" : {
            "value" : 1.720430363476E12,
            "value_as_string" : "2024-07-08T09:19:23.476Z"
          }
        },
        {
          "key" : "wmuppIEAAANvj-6wJYkpQ_NmXlpokZJg",
          "doc_count" : 2,
          "max_timestamp" : {
            "value" : 1.720430363472E12,
            "value_as_string" : "2024-07-08T09:19:23.472Z"
          }
        },
        {
          "key" : "wmuppIEAAAuqIsOl5xAUWEJD22Syuhjg",
          "doc_count" : 2,
          "max_timestamp" : {
            "value" : 1.720430363424E12,
            "value_as_string" : "2024-07-08T09:19:23.424Z"
          }
        }
      ]
    }
  }
}

业务场景应用,对应左侧的最近联系人列表,支持指定范围查询。

image-20240708200139414

人生感悟