题解 | #连续两次作答试卷的最大时间窗#

连续两次作答试卷的最大时间窗

https://www.nowcoder.com/practice/9dcc0eebb8394e79ada1d4d4e979d73c

select
    tc2.uid,
    tc2.days_window,
    round((ta.avg_exam * tc2.days_window), 2) as avg_exam_cnt
from
    (
        select
            uid,
            max(days_diff) as days_window
        from
            (
                select
                    tc.uid,
                    tc.start_time,
                    tc.start_time_lag,
                    (datediff (tc.start_time, tc.start_time_lag) + 1) as days_diff,
                    tc.ranking
                from
                    (
                        SELECT
                            uid,
                            DATE_FORMAT (start_time, '%Y-%m-%d') AS start_time,
                            LAG (DATE_FORMAT (start_time, '%Y-%m-%d'), 1) OVER (
                                partition by
                                    uid
                                ORDER BY
                                    uid,
                                    start_time
                            ) AS start_time_lag,
                            ROW_NUMBER() OVER (
                                partition by
                                    uid
                                ORDER BY
                                    uid,
                                    start_time
                            ) AS ranking
                        FROM
                            exam_record
                        where
                            year (start_time) = 2021
                    ) tc
                where
                    tc.ranking != 1
                    and uid in (
                        select
                            uid
                        from
                            exam_record
                        where
                            year (start_time) = 2021
                        group by
                            uid
                        having
                            count(distinct date_format (start_time, '%Y-%m-%d')) > 1
                    )
                    and year (tc.start_time) = 2021
            ) tc1
        group by
            uid
    ) tc2
    left join (
        select
            uid,
            count(exam_id) / (
                datediff (
                    max(DATE_FORMAT (start_time, '%Y-%m-%d')),
                    min(DATE_FORMAT (start_time, '%Y-%m-%d'))
                ) + 1
            ) as avg_exam
        from
            exam_record
        where
            year (start_time) = 2021
        group by
            uid
    ) ta on tc2.uid = ta.uid
order by
    days_window desc,
    avg_exam_cnt desc;

题目解析：

#01先找出满足条件的uid

select uid
from exam_record 
where year(start_time) =2021
group by uid
having count(distinct date_format(start_time,'%Y-%m-%d'))>1;

用到了year()等日期函数

获取指定时间部分合集

from_unixtime(ts,fmt) 根据指定的fmt格式，格式化unix时间戳ts

last_day(date) 给定日期所在月份的最后一天

hour(time) 返回time的小时值(0~23)

minute(time) 返回time的分钟值(0~59)

quarter(date) 返回date在一年中的季度(1~4)，如select quarter(current_date);

month(date) 返回date的月份值(1~12)

week(date) 返回日期date为一年中第几周(0~53)

year(date) 返回日期date的年份(1000~9999)

monthname(date) 返回date的月份名，如：select monthname(current_date);

extract（）函数

select extract(year_month from current_date);
select extract(day_second from current_date);
select extract(hour_minute from current_date);

#02之后我们需要计算试卷比例

select uid,
count(exam_id)/
(datediff(max(DATE_FORMAT(start_time, '%Y-%m-%d')),min(DATE_FORMAT(start_time, '%Y-%m-%d')))+1)  as avg_exam_cnt
from exam_record
group by uid

用到了聚合函数count(),max(),min(),日期差函数datediff(大日期-小日期）

聚合函数

avg(col)返回指定列的平均值

min(col)返回指定列的最小值

max(col)返回指定列的最大值

sum(col)返回指定列的所有值之和

count(col)返回指定列中非null值的个数

group_concat()函数：聚合字符串的

#group_concat(列名 separator '指定的分隔符')函数
select department,group_concat(emp_name order by salary separator ';') from emp group

时间日期常见的操作

#时间加减函数
date_add(date,interval int keyword)函数
date_sub(date,interval int keyword) 函数
例子：
SELECT DATE_ADD('2022-01-01', INTERVAL 2 YEAR);
select date_sub(current_date,interval 6 month);




#时间格式化函数
date_format(date,fmt) 函数
例子：
在 MySQL 中，DATE_FORMAT() 函数用于将日期转换为指定格式的字符串。你可以使用不同的格式代码来定义想要显示的日期格式。以下是一些常见的格式代码：

%Y：四位数的年份（例如：2022）；
%y：两位数的年份（例如：22）；
%m：两位数的月份（01-12）；
%c：没有前导零的月份（1-12）；
%d：两位数的日期（01-31）；
%H：24小时制的小时数（00-23）；
%h：12小时制的小时数（01-12）；
%i：两位数的分钟数（00-59）；
%s：两位数的秒数（00-59）；
%p：AM 或 PM；
%W：星期的英文全名（例如：Sunday）；
%a：星期的英文缩写（例如：Sun）；
%M：月份的英文全名（例如：January）；
%b：月份的英文缩写（例如：Jan）。
以下是 DATE_FORMAT() 函数的一些示例：

SELECT DATE_FORMAT('2022-01-31', '%Y-%m-%d');
-- 输出：2022-01-31

SELECT DATE_FORMAT('2022-01-31', '%d/%m/%Y');
-- 输出：31/01/2022

SELECT DATE_FORMAT('2022-01-31 15:30:45', '%H:%i:%s');
-- 输出：15:30:45

SELECT DATE_FORMAT('2022-01-31 15:30:45', '%h:%i:%s %p');
-- 输出：03:30:45 PM




#时间差函数
date_diff函数
例子：
select datediff(current_date(),'2008-08-08')
select time_diff(timestamp(),'2008-08-08 10:18:56')

TIMESTAMPDIFF(unit, datetime_expr1, datetime_expr2)函数
用于计算两个时间段之差
例子：
unit：要计算的时间单位，可以是YEAR、QUARTER、MONTH、WEEK、DAY、HOUR、MINUTE或SECOND。
datetime_expr1：第一个日期或时间表达式。
datetime_expr2：第二个日期或时间表达式。
以下是一些使用TIMESTAMPDIFF函数的例子：
计算两个日期之间的天数差异：
SELECT TIMESTAMPDIFF(DAY, '2022-01-01', '2022-01-10');
结果为：9，表示两个日期之间相差9天。




#其他格式转为日期格式函数
例子：
str_to_date(string,format)函数将字符串的函数转换为指定日期格式
select str_to_date('August 10 2017','%M%d%Y')

UNIX_TIMESTAMP() 函数： 该函数将一个日期时间表示的字符串转换为 UNIX 时间戳。
示例：
SELECT UNIX_TIMESTAMP('2024-02-27 12:34:56') AS unix_timestamp;

#在找我们的最大间隔数

			select 
				uid,
				max(days_diff) as days_window	
			from(
						select 
							tc.uid,tc.start_time,tc.start_time_lag,(datediff(tc.start_time,tc.start_time_lag)+1) as days_diff,tc.ranking
						from 
						(
							SELECT
									uid,
									DATE_FORMAT(start_time, '%Y-%m-%d') AS start_time,
									LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag,
									ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking
							FROM
									exam_record
							where year(start_time) =2021
									) tc
						where tc.ranking !=1
				  and uid in 
					(select uid
from exam_record 
where year(start_time) =2021
group by uid
having count(distinct date_format(start_time,'%Y-%m-%d'))>1)
and year(tc.start_time) =2021
				  ) tc1
				group by uid

这块主要用到了窗口函数lag()和row_number()函数

LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag,

ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking

#序号函数
window_function ( expr ) OVER ( 
  PARTITION BY ... 
  ORDER BY ... 
  frame_clause 
)
PARTITION BY类似于group by，不加就是正常的全局排序
ORDER BY 就是对分组进行排序
frame_clause窗口大小
例子
#序号函数，row_number() 按照顺序排序 | 唯一标识
rank() 同名排名相同,但是遇见同名会出现跳一位之后在进行排序，1 1  3 | 
dense_rank() 同名排名相同，且连续 1 1 2
select tid,	uid,ranking
from
(
    select b.uid,a.tag tid,
    row_number() over (partition by a.tag order by max(b.score) desc,min(b.score) desc,uid desc) as ranking
    from examination_info a
    left join exam_record b
    on a.exam_id=b.exam_id
    group by a.tag,b.uid
) tc
where ranking<=3;




#分布函数
#cume_dist()函数，小于等于当前行的比例，用于计算所占比例的。
select  
 dname,
 ename
 salary,
 cume_dist() over(order by salary) as rn1,-- 没有partition语句 所有的数据位于一组
 cume_dist() over(partition by dname order by salary) as rn2 
from employee;




#前后函数
lead()函数的用法（提前一期）, lag（滞后一期）函数的用法

-- lead的用法是提前一期的意思,lag表示滞后一期的意思
select 
 dname,
 ename,
 hiredate,
 salary,
  lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time,
 lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time,
 lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time 
from employee;




#头尾函数
返回截至当前行，开头的第一个first_value(expr)的值；返回最后一个值last_value(expr)的值

select 
 dname,
 ename,
 hiredate,
 salary,
  lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time,
 lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time,
 lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time,
 first_value(salary) over(partition by dname order by hiredate) as first_1_salary,
 last_value(salary) over(partition by dname order by hiredate) as last_1_salary
from employee;




#分组聚合函数
#分组按照时间或者序号累加效果
select dname,ename,salary,sum(salary) over (partition by dname order by hiredate desc) as cn from employee;
或者
SELECT dname, ename, salary, SUM(salary) OVER (
  PARTITION BY dname
  ORDER BY hiredate
  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cn
FROM employee;

#3 PRECEDING表示当前行的前第几行加和
#1 following 表示当前行的后第几行加和
# UNBOUNDED 表示边缘，UNBOUNDED PRECEDING第一行，UNBOUNDED  following 最后一行，实现任意区间段的统计

#不分组直接汇总
select dname,ename,salary,sum(salary) over (partition by dname ) as cn from employee;




#其他聚合函数
nth_value(expr,n)函数：指定分组列或者表达式的第几个数据

ntile(2)函数：为指定分组列进行平均分组

select 
 dname,
 ename,
 hiredate,
 salary,
  nth_value(salary,2) over(partition by dname order by hiredate) as last_1_time,
  ntile(2) over(partition by dname order by hiredate) as nt
from employee;

#最后把他们通过left join 进行连接即可。