题解 | #连续两次作答试卷的最大时间窗#
连续两次作答试卷的最大时间窗
https://www.nowcoder.com/practice/9dcc0eebb8394e79ada1d4d4e979d73c
select tc2.uid, tc2.days_window, round((ta.avg_exam * tc2.days_window), 2) as avg_exam_cnt from ( select uid, max(days_diff) as days_window from ( select tc.uid, tc.start_time, tc.start_time_lag, (datediff (tc.start_time, tc.start_time_lag) + 1) as days_diff, tc.ranking from ( SELECT uid, DATE_FORMAT (start_time, '%Y-%m-%d') AS start_time, LAG (DATE_FORMAT (start_time, '%Y-%m-%d'), 1) OVER ( partition by uid ORDER BY uid, start_time ) AS start_time_lag, ROW_NUMBER() OVER ( partition by uid ORDER BY uid, start_time ) AS ranking FROM exam_record where year (start_time) = 2021 ) tc where tc.ranking != 1 and uid in ( select uid from exam_record where year (start_time) = 2021 group by uid having count(distinct date_format (start_time, '%Y-%m-%d')) > 1 ) and year (tc.start_time) = 2021 ) tc1 group by uid ) tc2 left join ( select uid, count(exam_id) / ( datediff ( max(DATE_FORMAT (start_time, '%Y-%m-%d')), min(DATE_FORMAT (start_time, '%Y-%m-%d')) ) + 1 ) as avg_exam from exam_record where year (start_time) = 2021 group by uid ) ta on tc2.uid = ta.uid order by days_window desc, avg_exam_cnt desc;
题目解析:
#01先找出满足条件的uid
select uid from exam_record where year(start_time) =2021 group by uid having count(distinct date_format(start_time,'%Y-%m-%d'))>1;
用到了year()等日期函数
获取指定时间部分合集 from_unixtime(ts,fmt) 根据指定的fmt格式,格式化unix时间戳ts last_day(date) 给定日期所在月份的最后一天 hour(time) 返回time的小时值(0~23) minute(time) 返回time的分钟值(0~59) quarter(date) 返回date在一年中的季度(1~4),如select quarter(current_date); month(date) 返回date的月份值(1~12) week(date) 返回日期date为一年中第几周(0~53) year(date) 返回日期date的年份(1000~9999) monthname(date) 返回date的月份名,如:select monthname(current_date); extract()函数 select extract(year_month from current_date); select extract(day_second from current_date); select extract(hour_minute from current_date);
#02之后我们需要计算试卷比例
select uid, count(exam_id)/ (datediff(max(DATE_FORMAT(start_time, '%Y-%m-%d')),min(DATE_FORMAT(start_time, '%Y-%m-%d')))+1) as avg_exam_cnt from exam_record group by uid
用到了聚合函数count(),max(),min(),日期差函数datediff(大日期-小日期)
聚合函数
avg(col)返回指定列的平均值 min(col)返回指定列的最小值 max(col)返回指定列的最大值 sum(col)返回指定列的所有值之和 count(col)返回指定列中非null值的个数 group_concat()函数:聚合字符串的 #group_concat(列名 separator '指定的分隔符')函数 select department,group_concat(emp_name order by salary separator ';') from emp group
时间日期常见的操作
#时间加减函数 date_add(date,interval int keyword)函数 date_sub(date,interval int keyword) 函数 例子: SELECT DATE_ADD('2022-01-01', INTERVAL 2 YEAR); select date_sub(current_date,interval 6 month); #时间格式化函数 date_format(date,fmt) 函数 例子: 在 MySQL 中,DATE_FORMAT() 函数用于将日期转换为指定格式的字符串。你可以使用不同的格式代码来定义想要显示的日期格式。以下是一些常见的格式代码: %Y:四位数的年份(例如:2022); %y:两位数的年份(例如:22); %m:两位数的月份(01-12); %c:没有前导零的月份(1-12); %d:两位数的日期(01-31); %H:24小时制的小时数(00-23); %h:12小时制的小时数(01-12); %i:两位数的分钟数(00-59); %s:两位数的秒数(00-59); %p:AM 或 PM; %W:星期的英文全名(例如:Sunday); %a:星期的英文缩写(例如:Sun); %M:月份的英文全名(例如:January); %b:月份的英文缩写(例如:Jan)。 以下是 DATE_FORMAT() 函数的一些示例: SELECT DATE_FORMAT('2022-01-31', '%Y-%m-%d'); -- 输出:2022-01-31 SELECT DATE_FORMAT('2022-01-31', '%d/%m/%Y'); -- 输出:31/01/2022 SELECT DATE_FORMAT('2022-01-31 15:30:45', '%H:%i:%s'); -- 输出:15:30:45 SELECT DATE_FORMAT('2022-01-31 15:30:45', '%h:%i:%s %p'); -- 输出:03:30:45 PM #时间差函数 date_diff函数 例子: select datediff(current_date(),'2008-08-08') select time_diff(timestamp(),'2008-08-08 10:18:56') TIMESTAMPDIFF(unit, datetime_expr1, datetime_expr2)函数 用于计算两个时间段之差 例子: unit:要计算的时间单位,可以是YEAR、QUARTER、MONTH、WEEK、DAY、HOUR、MINUTE或SECOND。 datetime_expr1:第一个日期或时间表达式。 datetime_expr2:第二个日期或时间表达式。 以下是一些使用TIMESTAMPDIFF函数的例子: 计算两个日期之间的天数差异: SELECT TIMESTAMPDIFF(DAY, '2022-01-01', '2022-01-10'); 结果为:9,表示两个日期之间相差9天。 #其他格式转为日期格式函数 例子: str_to_date(string,format)函数将字符串的函数转换为指定日期格式 select str_to_date('August 10 2017','%M%d%Y') UNIX_TIMESTAMP() 函数: 该函数将一个日期时间表示的字符串转换为 UNIX 时间戳。 示例: SELECT UNIX_TIMESTAMP('2024-02-27 12:34:56') AS unix_timestamp;
#在找我们的最大间隔数
select uid, max(days_diff) as days_window from( select tc.uid,tc.start_time,tc.start_time_lag,(datediff(tc.start_time,tc.start_time_lag)+1) as days_diff,tc.ranking from ( SELECT uid, DATE_FORMAT(start_time, '%Y-%m-%d') AS start_time, LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag, ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking FROM exam_record where year(start_time) =2021 ) tc where tc.ranking !=1 and uid in (select uid from exam_record where year(start_time) =2021 group by uid having count(distinct date_format(start_time,'%Y-%m-%d'))>1) and year(tc.start_time) =2021 ) tc1 group by uid
这块主要用到了窗口函数lag()和row_number()函数
LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag,
ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking
#序号函数 window_function ( expr ) OVER ( PARTITION BY ... ORDER BY ... frame_clause ) PARTITION BY类似于group by,不加就是正常的全局排序 ORDER BY 就是对分组进行排序 frame_clause窗口大小 例子 #序号函数,row_number() 按照顺序排序 | 唯一标识 rank() 同名排名相同,但是遇见同名会出现跳一位之后在进行排序,1 1 3 | dense_rank() 同名排名相同,且连续 1 1 2 select tid, uid,ranking from ( select b.uid,a.tag tid, row_number() over (partition by a.tag order by max(b.score) desc,min(b.score) desc,uid desc) as ranking from examination_info a left join exam_record b on a.exam_id=b.exam_id group by a.tag,b.uid ) tc where ranking<=3; #分布函数 #cume_dist()函数,小于等于当前行的比例,用于计算所占比例的。 select dname, ename salary, cume_dist() over(order by salary) as rn1,-- 没有partition语句 所有的数据位于一组 cume_dist() over(partition by dname order by salary) as rn2 from employee; #前后函数 lead()函数的用法(提前一期), lag(滞后一期)函数的用法 -- lead的用法是提前一期的意思,lag表示滞后一期的意思 select dname, ename, hiredate, salary, lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time, lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time, lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time from employee; #头尾函数 返回截至当前行,开头的第一个first_value(expr)的值;返回最后一个值last_value(expr)的值 select dname, ename, hiredate, salary, lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time, lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time, lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time, first_value(salary) over(partition by dname order by hiredate) as first_1_salary, last_value(salary) over(partition by dname order by hiredate) as last_1_salary from employee; #分组聚合函数 #分组按照时间或者序号累加效果 select dname,ename,salary,sum(salary) over (partition by dname order by hiredate desc) as cn from employee; 或者 SELECT dname, ename, salary, SUM(salary) OVER ( PARTITION BY dname ORDER BY hiredate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS cn FROM employee; #3 PRECEDING表示当前行的前第几行加和 #1 following 表示当前行的后第几行加和 # UNBOUNDED 表示边缘,UNBOUNDED PRECEDING第一行,UNBOUNDED following 最后一行,实现任意区间段的统计 #不分组直接汇总 select dname,ename,salary,sum(salary) over (partition by dname ) as cn from employee; #其他聚合函数 nth_value(expr,n)函数:指定分组列或者表达式的第几个数据 ntile(2)函数:为指定分组列进行平均分组 select dname, ename, hiredate, salary, nth_value(salary,2) over(partition by dname order by hiredate) as last_1_time, ntile(2) over(partition by dname order by hiredate) as nt from employee;
#最后把他们通过left join 进行连接即可。