MySQL优化记录 
简介 
背景介绍
前同事遇到一个比较棘手的问题,由于是遗留系统,没有设计统计表,现实需求又需要根据众多条件进行统计,关联到多个大表200W+。经过几轮方案调整,最终优化效果是没有建统计表,查询速度从之前的20秒+,减少到2秒以内。基本满足业务需求。
复盘 
最开始怀疑是对方自建数据库和磁盘导致查询缓慢。经对方同意,将数据导入到本地SSD进行,顺便升级数据库到8.0进行测试,发现同样查询缓慢。遂开始进行索引层优化面再确认(因为之前已经确认过),索引基本都走了。没有多大优化空间了。
方案一 
建立统计表来实现单表查询,历史数据按天处理。经过测算,发现跑一个项目、一天的数据需要2秒,系统有8000个项目,算下来就是16000秒,折算下来就是4个小时。系统有500天的存量数据。简单估算下,需要83天。发现该方案不太合理。
方案二 
由于最近正在处理大数据相关业务,也做过基于的RDD算子的spark程序优化。受到了一些启发,想想是否可以按照RDD优化的思路来解决慢查询的问题。先将大表数据进行过滤,统计出每个维度的关键数据,然后根据项目ID组合叠加多个维度的数据(行转列),最后再关联上项目的基础数据,就完成了业务查询。
相关截图记录 
数据量概览
慢查询结果耗时
慢查询对应的执行计划
优化后查询耗时
优化后查询执行计划
SQL记录 
优化前查询
sql
select tt1.id, tt1.department_name ,tt1.hospital_name,tt1.province, tt1.city 
,(
select count(1) as num
         from interaction_message t left join user u 
         on t.sender_id = u.id
      where 1=1
      and date(t.created_time) between '2020-05-01'
    and '2020-05-31'
          and u.project_id = tt1.id
          and u.user_type in (1,4)
          and u.name != '科室小助手'
          and t.content_type in ('I', 'T', 'VOICE')
) messages_yhry
, 0 messages_nums_yhry
,(
select count(*) as num
         from interaction_message t left join user u 
         on t.receiver_id = u.id
      where 1=1
      and date(t.created_time) between '2020-05-01'
    and '2020-05-31'
          and u.project_id = tt1.id
          and u.user_type in (1,4)
          and t.content_type in ('I', 'T', 'VOICE')
          and u.name != '科室小助手'
) messages_hz
,0 messages_nums_hz
,(
select  count(1) as send_num
      from health_education_instance t ,user u
      where t.creator = u.id
      and u.name != '科室小助手'
      and 1=1 
      and date(t.created_time) between '2020-05-01'
    and '2020-05-31'
      and t.project_id = tt1.id
) edu_send_nums
,(
select count(*) as read_num
      from health_education_instance t
      left join user u on t.creator = u.id
      where 1=1 
      and date(t.created_time) between '2020-05-01'
    and '2020-05-31'
      and t.project_id = tt1.id
      and u.name != '科室小助手'
      and t.read_time is not null
) edu_read_nums
,(
select count(*) as num
         from q_questionnaire_send_info t
         INNER JOIN q_questionnaire_info info on t.questionnaire_id=info.id and info.type=0 and info.deleted=0
          left join user u 
         on t.send_id = u.id
      where  1=1
and date(t.send_time) between '2020-05-01'
    and '2020-05-31'
          and u.project_id = tt1.id
          and u.user_type = 1
          and u.name != '科室小助手'
) questions_send_nums
,(
select count(1) as num
         from q_questionnaire_send_info t
         INNER JOIN q_questionnaire_info info on t.questionnaire_id=info.id and info.type=0 and info.deleted=0
          left join user u 
         on t.send_id = u.id
      where  1=1
and date(t.send_time) between '2020-05-01'
    and '2020-05-31'
      and t.answer_time is not null
          and u.project_id = tt1.id
          and u.user_type = 1
) questions_receive_nums
from project tt1 where  1=1 and tt1.id != 1 and tt1.deleted = 0 and tt1.type = 2
limit 1, 10;优化后查询
sql
select 
--   '2017-08-10' date,
  t.project_id,
  p.department_name,
  p.hospital_name,
  p.province,
  p.city,
  sum(messages_yhry) messages_yhry,
  sum(messages_hz) messages_hz,
  sum(edu_send_nums) edu_send_nums,
  sum(edu_read_nums) edu_read_nums,
  sum(questions_send_nums) questions_send_nums,
  sum(questions_receive_nums) questions_receive_nums
from (
  select '2017-08-10' date, t.project_id project_id, count(1) 'messages_yhry', 0 'messages_hz', 0 'edu_send_nums', 0 'edu_read_nums', 0 'questions_send_nums', 0 'questions_receive_nums'
  from interaction_message t left join user u on t.sender_id = u.id
  where 1=1
  and t.created_time between '2020-05-01 00:00:00' and '2020-05-31 23:59:59'
  and u.user_type in (1,4)
  and u.name != '科室小助手'
  and t.content_type in ('I', 'T', 'VOICE')
  group by t.project_id
          
  union all
  select '2017-08-10' date, t.project_id project_id, 0 'messages_yhry', count(1) 'messages_hz', 0 'edu_send_nums', 0 'edu_read_nums', 0 'questions_send_nums', 0 'questions_receive_nums'
  from interaction_message t left join user u on t.receiver_id = u.id
  where 1=1
  and t.created_time between '2020-05-01 00:00:00' and '2020-05-31 23:59:59'
  and u.user_type in (1,4)
  and u.name != '科室小助手'
  and t.content_type in ('I', 'T', 'VOICE')
  group by t.project_id
          
  union all
  select '2017-08-10' date, u.project_id project_id, 0 'messages_yhry', 0 'messages_hz', count(1) 'edu_send_nums', 0 'edu_read_nums', 0 'questions_send_nums', 0 'questions_receive_nums'
  from health_education_instance t ,user u
  where t.creator = u.id
  and u.name != '科室小助手'
  and t.created_time between '2020-05-01 00:00:00' and '2020-05-31 23:59:59'
  group by u.project_id
          
          
  union all
  select '2017-08-10' date, u.project_id project_id, 0 'messages_yhry', 0 'messages_hz', 0 'edu_send_nums', count(1) 'edu_read_nums', 0 'questions_send_nums', 0 'questions_receive_nums'
  from health_education_instance t ,user u
  where t.creator = u.id
  and u.name != '科室小助手'
  and t.created_time between '2020-05-01 00:00:00' and '2020-05-31 23:59:59'
  and t.read_time is not null
  group by u.project_id
          
          
  union all
  select '2017-08-10' date, u.project_id project_id, 0 'messages_yhry', 0 'messages_hz', 0 'edu_send_nums', 0 'edu_read_nums', count(1) 'questions_send_nums', 0 'questions_receive_nums'
  from q_questionnaire_send_info t
  INNER JOIN q_questionnaire_info info 
  on t.questionnaire_id=info.id and info.type=0 and info.deleted=0 
  left join user u  on t.send_id = u.id
  where  1=1
  and t.send_time between '2020-05-01 00:00:00' and '2020-05-31 23:59:59'
  and u.user_type = 1
  and u.name != '科室小助手'
  group by u.project_id
          
  union all
  select '2017-08-10' date, u.project_id project_id, 0 'messages_yhry', 0 'messages_hz', 0 'edu_send_nums', 0 'edu_read_nums', 0 'questions_send_nums', count(1) 'questions_receive_nums'
  from q_questionnaire_send_info t
  INNER JOIN q_questionnaire_info info 
  on t.questionnaire_id=info.id and info.type=0 and info.deleted=0 
  left join user u  on t.send_id = u.id
  where  1=1
  and t.send_time between '2020-05-01 00:00:00' and '2020-05-31 23:59:59'
  and t.answer_time is not null
  and u.user_type = 1
  and u.name != '科室小助手'
  group by u.project_id
) t, project p
where 1=1
and t.project_id = p.id
group by t.project_id