题解 | #连续两次作答试卷的最大时间窗#
连续两次作答试卷的最大时间窗
https://www.nowcoder.com/practice/9dcc0eebb8394e79ada1d4d4e979d73c
select
tc2.uid,
tc2.days_window,
round((ta.avg_exam * tc2.days_window), 2) as avg_exam_cnt
from
(
select
uid,
max(days_diff) as days_window
from
(
select
tc.uid,
tc.start_time,
tc.start_time_lag,
(datediff (tc.start_time, tc.start_time_lag) + 1) as days_diff,
tc.ranking
from
(
SELECT
uid,
DATE_FORMAT (start_time, '%Y-%m-%d') AS start_time,
LAG (DATE_FORMAT (start_time, '%Y-%m-%d'), 1) OVER (
partition by
uid
ORDER BY
uid,
start_time
) AS start_time_lag,
ROW_NUMBER() OVER (
partition by
uid
ORDER BY
uid,
start_time
) AS ranking
FROM
exam_record
where
year (start_time) = 2021
) tc
where
tc.ranking != 1
and uid in (
select
uid
from
exam_record
where
year (start_time) = 2021
group by
uid
having
count(distinct date_format (start_time, '%Y-%m-%d')) > 1
)
and year (tc.start_time) = 2021
) tc1
group by
uid
) tc2
left join (
select
uid,
count(exam_id) / (
datediff (
max(DATE_FORMAT (start_time, '%Y-%m-%d')),
min(DATE_FORMAT (start_time, '%Y-%m-%d'))
) + 1
) as avg_exam
from
exam_record
where
year (start_time) = 2021
group by
uid
) ta on tc2.uid = ta.uid
order by
days_window desc,
avg_exam_cnt desc;
题目解析:
#01先找出满足条件的uid
select uid from exam_record where year(start_time) =2021 group by uid having count(distinct date_format(start_time,'%Y-%m-%d'))>1;
用到了year()等日期函数
获取指定时间部分合集 from_unixtime(ts,fmt) 根据指定的fmt格式,格式化unix时间戳ts last_day(date) 给定日期所在月份的最后一天 hour(time) 返回time的小时值(0~23) minute(time) 返回time的分钟值(0~59) quarter(date) 返回date在一年中的季度(1~4),如select quarter(current_date); month(date) 返回date的月份值(1~12) week(date) 返回日期date为一年中第几周(0~53) year(date) 返回日期date的年份(1000~9999) monthname(date) 返回date的月份名,如:select monthname(current_date); extract()函数 select extract(year_month from current_date); select extract(day_second from current_date); select extract(hour_minute from current_date);
#02之后我们需要计算试卷比例
select uid, count(exam_id)/ (datediff(max(DATE_FORMAT(start_time, '%Y-%m-%d')),min(DATE_FORMAT(start_time, '%Y-%m-%d')))+1) as avg_exam_cnt from exam_record group by uid
用到了聚合函数count(),max(),min(),日期差函数datediff(大日期-小日期)
聚合函数
avg(col)返回指定列的平均值 min(col)返回指定列的最小值 max(col)返回指定列的最大值 sum(col)返回指定列的所有值之和 count(col)返回指定列中非null值的个数 group_concat()函数:聚合字符串的 #group_concat(列名 separator '指定的分隔符')函数 select department,group_concat(emp_name order by salary separator ';') from emp group
时间日期常见的操作
#时间加减函数
date_add(date,interval int keyword)函数
date_sub(date,interval int keyword) 函数
例子:
SELECT DATE_ADD('2022-01-01', INTERVAL 2 YEAR);
select date_sub(current_date,interval 6 month);
#时间格式化函数
date_format(date,fmt) 函数
例子:
在 MySQL 中,DATE_FORMAT() 函数用于将日期转换为指定格式的字符串。你可以使用不同的格式代码来定义想要显示的日期格式。以下是一些常见的格式代码:
%Y:四位数的年份(例如:2022);
%y:两位数的年份(例如:22);
%m:两位数的月份(01-12);
%c:没有前导零的月份(1-12);
%d:两位数的日期(01-31);
%H:24小时制的小时数(00-23);
%h:12小时制的小时数(01-12);
%i:两位数的分钟数(00-59);
%s:两位数的秒数(00-59);
%p:AM 或 PM;
%W:星期的英文全名(例如:Sunday);
%a:星期的英文缩写(例如:Sun);
%M:月份的英文全名(例如:January);
%b:月份的英文缩写(例如:Jan)。
以下是 DATE_FORMAT() 函数的一些示例:
SELECT DATE_FORMAT('2022-01-31', '%Y-%m-%d');
-- 输出:2022-01-31
SELECT DATE_FORMAT('2022-01-31', '%d/%m/%Y');
-- 输出:31/01/2022
SELECT DATE_FORMAT('2022-01-31 15:30:45', '%H:%i:%s');
-- 输出:15:30:45
SELECT DATE_FORMAT('2022-01-31 15:30:45', '%h:%i:%s %p');
-- 输出:03:30:45 PM
#时间差函数
date_diff函数
例子:
select datediff(current_date(),'2008-08-08')
select time_diff(timestamp(),'2008-08-08 10:18:56')
TIMESTAMPDIFF(unit, datetime_expr1, datetime_expr2)函数
用于计算两个时间段之差
例子:
unit:要计算的时间单位,可以是YEAR、QUARTER、MONTH、WEEK、DAY、HOUR、MINUTE或SECOND。
datetime_expr1:第一个日期或时间表达式。
datetime_expr2:第二个日期或时间表达式。
以下是一些使用TIMESTAMPDIFF函数的例子:
计算两个日期之间的天数差异:
SELECT TIMESTAMPDIFF(DAY, '2022-01-01', '2022-01-10');
结果为:9,表示两个日期之间相差9天。
#其他格式转为日期格式函数
例子:
str_to_date(string,format)函数将字符串的函数转换为指定日期格式
select str_to_date('August 10 2017','%M%d%Y')
UNIX_TIMESTAMP() 函数: 该函数将一个日期时间表示的字符串转换为 UNIX 时间戳。
示例:
SELECT UNIX_TIMESTAMP('2024-02-27 12:34:56') AS unix_timestamp;
#在找我们的最大间隔数
select uid, max(days_diff) as days_window from( select tc.uid,tc.start_time,tc.start_time_lag,(datediff(tc.start_time,tc.start_time_lag)+1) as days_diff,tc.ranking from ( SELECT uid, DATE_FORMAT(start_time, '%Y-%m-%d') AS start_time, LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag, ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking FROM exam_record where year(start_time) =2021 ) tc where tc.ranking !=1 and uid in (select uid from exam_record where year(start_time) =2021 group by uid having count(distinct date_format(start_time,'%Y-%m-%d'))>1) and year(tc.start_time) =2021 ) tc1 group by uid
这块主要用到了窗口函数lag()和row_number()函数
LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag,
ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking
#序号函数
window_function ( expr ) OVER (
PARTITION BY ...
ORDER BY ...
frame_clause
)
PARTITION BY类似于group by,不加就是正常的全局排序
ORDER BY 就是对分组进行排序
frame_clause窗口大小
例子
#序号函数,row_number() 按照顺序排序 | 唯一标识
rank() 同名排名相同,但是遇见同名会出现跳一位之后在进行排序,1 1 3 |
dense_rank() 同名排名相同,且连续 1 1 2
select tid, uid,ranking
from
(
select b.uid,a.tag tid,
row_number() over (partition by a.tag order by max(b.score) desc,min(b.score) desc,uid desc) as ranking
from examination_info a
left join exam_record b
on a.exam_id=b.exam_id
group by a.tag,b.uid
) tc
where ranking<=3;
#分布函数
#cume_dist()函数,小于等于当前行的比例,用于计算所占比例的。
select
dname,
ename
salary,
cume_dist() over(order by salary) as rn1,-- 没有partition语句 所有的数据位于一组
cume_dist() over(partition by dname order by salary) as rn2
from employee;
#前后函数
lead()函数的用法(提前一期), lag(滞后一期)函数的用法
-- lead的用法是提前一期的意思,lag表示滞后一期的意思
select
dname,
ename,
hiredate,
salary,
lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time,
lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time,
lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time
from employee;
#头尾函数
返回截至当前行,开头的第一个first_value(expr)的值;返回最后一个值last_value(expr)的值
select
dname,
ename,
hiredate,
salary,
lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time,
lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time,
lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time,
first_value(salary) over(partition by dname order by hiredate) as first_1_salary,
last_value(salary) over(partition by dname order by hiredate) as last_1_salary
from employee;
#分组聚合函数
#分组按照时间或者序号累加效果
select dname,ename,salary,sum(salary) over (partition by dname order by hiredate desc) as cn from employee;
或者
SELECT dname, ename, salary, SUM(salary) OVER (
PARTITION BY dname
ORDER BY hiredate
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cn
FROM employee;
#3 PRECEDING表示当前行的前第几行加和
#1 following 表示当前行的后第几行加和
# UNBOUNDED 表示边缘,UNBOUNDED PRECEDING第一行,UNBOUNDED following 最后一行,实现任意区间段的统计
#不分组直接汇总
select dname,ename,salary,sum(salary) over (partition by dname ) as cn from employee;
#其他聚合函数
nth_value(expr,n)函数:指定分组列或者表达式的第几个数据
ntile(2)函数:为指定分组列进行平均分组
select
dname,
ename,
hiredate,
salary,
nth_value(salary,2) over(partition by dname order by hiredate) as last_1_time,
ntile(2) over(partition by dname order by hiredate) as nt
from employee;
#最后把他们通过left join 进行连接即可。