Soul-大厂必考连续登录经典问题答案
--自己整理的sqlintern.com 代码答案
--1.2020年6月的活跃用户数为?(1分)
select
substr(load_dt, 1, 7) load_month --题干要求6月份,但是实际场景中,都是取更长周期,比如近6个月、近1年等
, count(distinct usr_id) cst_cnt --活跃用户数是去重用户数,初学者容易写成count(1)
from
td_load_rcd
group by
substr(load_dt, 1, 7)
;
--2.7月份工作日期间,各时间段的月活分布,通勤(7:00-9:00、18:00-20:00),午休(11:00-13:00),临睡(22:00-1:00),哪段时间的活跃用户数最高?(1分)
select
case when hour(load_tm) between 7 and 8 or hour(load_tm) between 18 and 19 then 'commute'
when hour(load_tm) between 11 and 12 then 'lunch'
when hour(load_tm) in (22, 23, 0) then 'before_sleep'
end as time_prd
,count(distinct usr_id) as cst_dt
from
td_load_rcd where load_dt in ('2020-07-01', '2020-07-02',
'2020-07-03', '2020-07-06', '2020-07-07', --题干要求工作日,这里通过枚举
'2020-07-08', '2020-07-09', '2020-07-10') --大家想想有没有其他方式呢?提示(mysql提取星期函数,大家可自行百度)
group by
case when hour(load_tm) between 7 and 8 or hour(load_tm) between 18 and 19 then 'commute'
when hour(load_tm) between 11 and 12 then 'lunch'
when hour(load_tm) in (22, 23, 0) then 'before_sleep'
end --注意两处case when的区别,此处没有 as time_prd
;
--3.单日登录次数大于等于5次的用户数(1分)
select count(distinct usr_id) as usr_cnt
from
(
select usr_id, load_dt, count(1) as load_times --本题的思路是子查询,保存每个用户在每天的登录次数
from td_load_rcd
group by usr_id, load_dt
having count(1) >=5 --把count(1) 改成load_times 行不行?大家可以试一下
)t
;
-4.6月12日的T+1日留存、6月15日的T+3日留存、6月20日的T+7日留存分别为:(2分)
--经典题,T+N留存率查询,同学们需要反复琢磨
--首先明确留存率的定义:T日新增用户中,在第n日(即T+n日)再次活跃的用户,占T日新增用户的比例。
--谷歌的官方说法更简洁,叫:Percentage of new users who return each day
--总之,一定得是新用户
create view td_distinct_load_rcd_min as
select usr_id,min(load_dt) load_dt
from td_load_rcd
group by usr_id --既然是新用户,首先得找到每天新增用户
;
create view td_distinct_load_rcd as
select load_dt, usr_id
from td_load_rcd
group by load_dt, usr_id --这是sql代码优化的一种思路。建中间表,减少代码量,提升查询速度。中间表保存了用户在每天的去重登录情况
;
select
t0.load_dt
, count(t0.usr_id) as cst_dt_0
, count(t1.usr_id) as cst_dt_1
, count(t1.usr_id)/count(t0.usr_id) as cst_dt_pct_1
, count(t2.usr_id) as cst_dt_2
, count(t2.usr_id)/count(t0.usr_id) as cst_dt_pct_2
, count(t3.usr_id) as cst_dt_3
, count(t3.usr_id)/count(t0.usr_id) as cst_dt_pct_7
from
td_distinct_load_rcd_min t0
left join
td_distinct_load_rcd t1
on t0.usr_id=t1.usr_id and t0.load_dt=date_sub(t1.load_dt, interval 1 day) --修改本处的1,3,7即可得到任意一天的任意N日留存率
left join
td_distinct_load_rcd t2
on t0.usr_id=t2.usr_id and t0.load_dt=date_sub(t2.load_dt, interval 3 day)
left join
td_distinct_load_rcd t3
on t0.usr_id=t3.usr_id and t0.load_dt=date_sub(t3.load_dt, interval 7 day)
group by
t0.load_dt
order by
t0.load_dt
;
--5.6月份连续7天登录的用户数为(2分)
select count(distinct usr_id)
from
(
select usr_id, load_dt2, count(1) load_days
from
(
select usr_id, load_dt, rnk, date_sub(load_dt, interval rnk day) as load_dt2
from
(
select
a.usr_id
, a.load_dt
, row_number()
over(partition by a.usr_id order by a.load_dt) rnk
from
(
select
usr_id
, load_dt
from td_load_rcd where substr(load_dt, 1, 7)='2020-06'
group by
usr_id, load_dt
)a
)b
)c
group by usr_id,load_dt2
having load_days >= 7
)t
;