题解 | #统计活跃间隔对用户分级结果#
统计活跃间隔对用户分级结果
https://www.nowcoder.com/practice/6765b4a4f260455bae513a60b6eed0af
# 画个时间轴方便理解: # 1.最早登陆时间在7天内,就是新用户;最晚登陆时间在7跟30之外,活跃时间决定“沉睡or流失” # 2.一个用户会有多个登陆时间,可以排序:ROW_NUMBER()OVER(PARTITION BY uid ORDER BY in_time); # 也可以取出首尾,最早登陆时间: MIN(in_time) first_dt GROUP BY uid;(即最远的登陆时间) # 最晚登陆时间: MAX(out_time) last_dt GROUP BY uid;(即最近的登陆时间) SELECT CASE WHEN first_diff < 7 THEN "新晋用户" WHEN last_diff >= 30 THEN "流失用户" WHEN last_diff < 30 AND last_diff >= 7 THEN "沉睡用户" ELSE "忠实用户" END user_grade, ROUND(COUNT(uid)/ MAX(uid_cnt),2) ratio # case when分组,可以在同一层聚合,因为select执行顺序在group by之后 # 巧妙的是:用户数uid_cnt在每一组都相同,用一个max变成聚合列避免语法错误,可以做同级计算 FROM ( SELECT uid ,uid_cnt ,TIMESTAMPDIFF(DAY,first_dt,current_dt) first_diff #首次登陆至今时间差(求时间差不能直接相减) ,TIMESTAMPDIFF(DAY,last_dt,current_dt) last_diff #最后一次登陆至今时间差 FROM ( SELECT uid ,MIN(in_time) first_dt ,MAX(out_time) last_dt FROM tb_user_log GROUP BY uid ) a JOIN ( SELECT COUNT(DISTINCT uid) uid_cnt # 总用户数 ,MAX(out_time) current_dt #当前时间 FROM tb_user_log ) b ON 1 # 直接 on 1的操作:追加信息到前一个表的每一行上 ) t GROUP BY CASE WHEN first_diff < 7 THEN "新晋用户" WHEN last_diff >= 30 THEN "流失用户" WHEN last_diff < 30 AND last_diff >= 7 THEN "沉睡用户" ELSE "忠实用户" END ORDER BY ratio DESC