题解 140 | #未完成率较高的50%用户近三个月答卷情#
【场景】:等级值百分比、近几个月
【分类】:专用窗口函数、嵌套子查询
分析思路
难点:
统计SQL试卷用户未完成率中的百分比排位使用percent_rank()
(1)统计SQL试卷未完成率
- [条件]:tag = 'SQL'
- [使用]:join ;count(); if()
(2)统计SQL试卷用户未完成率中的百分比排位
- [使用]:percent_rank()
(3)统计SQL试卷用户未完成率中的百分比排位中大于50%,并且是6级和7级用户有试卷作答记录的近三个月
- [使用]:dense_rank();date_format(start_time,'%Y%m')
(4)最终统计每个用户每个月的答卷数目和完成数目
- [使用]:where ranking <= 3;group by uid,start_month;order by uid,start_month
最终结果
select 查询结果 [用户ID;月份;答题数目;完成数目]
from 从哪张表中查询数据[多个join连接的表]
where 查询条件 [百分比排位中大于50%]
group by 分组条件 [用户ID;月份]
order by 对查询结果排序 [用户ID、月份升序];
扩展:
保留日期中年月的方法
- substring(start_time,1,7);
- replace(substring(start_time,1,7),'-','');
- date_format(start_time,'%Y%m')
- date(start_time)
前往查看: MySQL 窗口函数
求解代码
方法一
with子句
with
temp as(
#统计SQL试卷未完成率
select
uid,
count(if(submit_time is null,1,null)) as incomplete_cnt,#未完成数
count(start_time) as total_cnt, #作答总数
count(if(submit_time is null,1,null))/count(start_time) as incomplete_rate #未完成率
from user_info
join exam_record using(uid)
join examination_info using(exam_id)
where tag = 'SQL'
group by uid
)
,temp1 as(
#统计SQL试卷用户未完成率中的百分比排位
select
uid,
percent_rank() over(order by incomplete_rate) as nt_complete
from temp
)
,temp2 as(
#6级和7级用户并且有试卷作答记录的近三个月
select
uid,
start_time,
submit_time,
date_format(start_time,%Y%m) as start_month,
dense_rank() over(partition by uid order by (date_format(start_time,'%y%m')) desc) as ranking
from exam_record
where uid in (
select
uid
from temp1
join user_info using(uid)
join exam_record using(uid)
where nt_complete >= 0.5
and level between 6 and 7
)
)
#最终统计每个用户每个月的答卷数目和完成数目
select
uid,
start_month,
count(start_time) as total_cnt,
count(if(submit_time is null,null,1)) as complete_cnt
from temp2
where ranking <= 3
group by uid,start_month
order by uid,start_month
优化代码
同时进行未完成率的筛选和百分比排位
with
temp as(
#统计SQL试卷用户未完成率中的百分比排位
select
uid,
percent_rank() over(order by count(if(submit_time is null,1,null))/count(start_time)) as nt_complete
from user_info
join exam_record using(uid)
join examination_info using(exam_id)
where tag = 'SQL'
group by uid
)
,temp1 as(
#6级和7级用户并且有试卷作答记录的近三个月
select
uid,
start_time,
submit_time,
date_format(start_time,'%Y%m') as start_month,
dense_rank() over(partition by uid order by (date_format(start_time,'%Y%m')) desc) as ranking
from exam_record
where uid in (
select
uid
from temp
join user_info using(uid)
join exam_record using(uid)
where nt_complete >= 0.5
and level between 6 and 7
)
)
#最终统计每个用户每个月的答卷数目和完成数目
select
uid,
start_month,
count(start_time) as total_cnt,
count(if(submit_time is null,null,1)) as complete_cnt
from temp1
where ranking <= 3
group by uid,start_month
order by uid,start_month
temp 表输出结果:
1 1001|0.000
2 1002|0.500
3 1003|1.000
temp1 表输出结果:
1 1002|2020-05-06 12:01:01|None|202005|1
2 1002|2020-05-02 19:01:01|2020-05-02 19:32:00|202005|1
3 1002|2020-03-02 12:11:01|None|202003|2
4 1002|2020-03-01 12:01:01|2020-03-01 12:41:01|202003|2
5 1002|2020-02-02 12:01:01|2020-02-02 12:43:01|202002|3
6 1002|2020-02-02 12:01:01|None|202002|3
7 1002|2020-02-01 12:11:01|None|202002|3
8 1002|2020-01-01 18:01:01|2020-01-01 18:59:02|202001|4
9 1002|2020-01-20 10:01:01|None|202001|4
方法二
嵌套子查询
#最终统计每个用户每个月的答卷数目和完成数目
select
uid,
start_month,
count(start_time) as total_cnt,
count(if(submit_time is null,null,1)) as complete_cnt
from(
#6级和7级用户并且有试卷作答记录的近三个月
select
uid,
start_time,
submit_time,
date_format(start_time,'%Y%m') as start_month,
dense_rank() over(partition by uid order by (date_format(start_time,'%Y%m')) desc) as ranking
from exam_record
where uid in (
select
uid
from(
#统计SQL试卷用户未完成率中的百分比排位
select
uid,
percent_rank() over(order by count(if(submit_time is null,1,null))/count(start_time)) as nt_complete
from user_info
join exam_record using(uid)
join examination_info using(exam_id)
where tag = 'SQL'
group by uid
) temp
join user_info using(uid)
join exam_record using(uid)
where nt_complete >= 0.5
and level between 6 and 7
)
) temp1
where ranking <= 3
group by uid,start_month
order by uid,start_month