题解 | #筛选某店铺最有价值用户中消费最多前5名#
筛选某店铺最有价值用户中消费最多前5名
https://www.nowcoder.com/practice/58655010a7c34e9fb2b7b491c3f79ca4
import pandas as pd df = pd.read_csv('sales.csv') # 定义评分函数 def score_feature(series, reverse=False): # 计算四分位数 Series方法 用于计算序列中数据的分位数 q1 = series.quantile(0.25) q2 = series.quantile(0.50) q3 = series.quantile(0.75) # 定义评分函数 if reverse == False: scores = [4 if x <= q1 else 3 if x <= q2 else 2 if x <= q3 else 1 for x in series] else: scores = [4 if x >= q3 else 3 if x >= q2 else 2 if x >= q1 else 1 for x in series] return scores # 对每个特征进行评分 df['R_Quartile'] = score_feature(df['recency']) df['F_Quartile'] = score_feature(df['frequency'], reverse=True) df['M_Quartile'] = score_feature(df['monetary'], reverse=True) df['RFMClass'] = df['R_Quartile'].astype(str) + df['F_Quartile'].astype(str) + df['M_Quartile'].astype(str) #df['RFMClass'] = df.apply(lambda row: str(row['R_Quartile']) + str(row['F_Quartile']) + str(row['M_Quartile']), axis=1) show_row = ['user_id','recency','frequency','monetary','RFMClass'] print(df[show_row].head()) cond = df['RFMClass'] = '444' df1 = df[df[cond]].sort_values(by = 'monetary',ascending=False) print(df[show_row].head())