题解 | #牛客网连续练习题目3天及以上的用户#
牛客网连续练习题目3天及以上的用户
https://www.nowcoder.com/practice/4d77709a0ea2482282ee86852fe32f06
#import pandas as pd #data = pd.read_csv('nowcoder.csv',sep=',') #data['date'] = pd.to_datetime(data['date']) #data['date1'] = data['date'].dt.strftime('%Y-%M') #print(data) #data = data[data['date1'] == '2021-12'] #data['date2'] = pd.to_datetime(data['date'].dt.date) #data['rk'] = pd.to_datetime(data.groupby(['user_id']).date2.rank(),unit='d') #date1 = data.groupby(['user_id','cha']).count().groupby('user_id').rk.max() #print(date1[date1>=3]) ############################################################################################## #我是分界线,上面的是显示详细日期的,下面是大佬的,明天再做代码分析好了 #引入模块 import pandas as pd #from datetime import timedelta import datetime #读CSV文件 a = pd.read_csv('nowcoder.csv') #对'user_id'和'date'列去重 a = a.drop_duplicates(['user_id','date'])[['user_id','date']] #保留年月日 a.date = pd.to_datetime(a.date).dt.date #设置时间范围为20211201~20211231 s_date = datetime.datetime.strptime('2021-12-01', '%Y-%m-%d').date() e_date = datetime.datetime.strptime('2021-12-31', '%Y-%m-%d').date() a = a[(a['date'] >= s_date) & (a['date'] <= e_date)] #以'user_id'分组进行日期排序 a['sort_date'] = a['date'].groupby(a['user_id']).rank(method='dense',ascending = 0) #得到每组最大和最小日期 maxdate = a[a.sort_date == a.sort_date.max()][['user_id','date','sort_date']] mindate = a[a.sort_date == a.sort_date.min()][['user_id','date','sort_date']] #左联接 date = pd.merge(maxdate,mindate,on='user_id',how='left') #将最大最小日期和排序号做差,如果做差后两者相等,则说明连续,如果差大于等于2,则说明连续练习题目3天及以上 date['diffdate'] = date.date_y-date.date_x date['diffrank'] = date.sort_date_x - date.sort_date_y date['diffdate'] = pd.to_timedelta(date['diffdate']).dt.days #得到符合条件的'user_id' result = date[(date['diffdate'] == date['diffrank'])&(date['diffrank']>=2)]['user_id'] #匹配源表中符合条件的行 new={} for i in range(len(result)): new[i] = a[a['user_id'] == result[i]] #拼接字段,得到结果 final = pd.concat(list(new.values()), ignore_index=True) finalresult = a.groupby('user_id')['date'].count() print(finalresult)