2019独角兽企业重金招聘Python工程师标准>>>

留存用户:在前面的blog的指标已经说明,前一个周期新增的用户在当前周期的仍在活跃成为留存用户。

本文采用了三种实现:一种是用.net+sql来实现数据的处理,一种是利用hadoop平台进行数据处理,一种是基于hive的实现

.net + sql

                    #region 构造统计日期临时表DataTable originalDates = new DataTable();DataColumn col = new DataColumn("OriginalStartDate", typeof(Int32));originalDates.Columns.Add(col);col = new DataColumn("OriginalEndDate", typeof(Int32));originalDates.Columns.Add(col);int step = endDate.Subtract(startDate).Days + 1;for (int i = 1; i <= 30; i++){//如果是天的周期,增加到30日留存if (period != PeriodOptions.Daily && i > 6) break;DataRow newRow = originalDates.NewRow();DateTime pStartDate;DateTime pEndDate = Utility.GetNextStatDate(period, startDate, endDate, -i, out pStartDate);newRow["OriginalStartDate"] = int.Parse(pStartDate.ToString("yyyyMMdd"));newRow["OriginalEndDate"] = int.Parse(pEndDate.ToString("yyyyMMdd"));originalDates.Rows.Add(newRow);}#endregion//计算汇总SqlParameter[] paramters = new SqlParameter[]{SqlParamHelper.MakeInParam("@dt", SqlDbType.Structured),SqlParamHelper.MakeInParam("@StartDate", SqlDbType.Int, 4, startDate.ToString("yyyyMMdd")),SqlParamHelper.MakeInParam("@EndDate", SqlDbType.Int, 4, endDate.ToString("yyyyMMdd")),SqlParamHelper.MakeInParam("@Period", SqlDbType.TinyInt, 1, (int)period)                        };paramters[0].TypeName = "dbo.OriginalDatesType";paramters[0].Value = originalDates;DataSet ds = SqlHelper.ExecuteDataset(ComputingDB_ConnString, CommandType.StoredProcedure, "PR_StatRetainedUsers", paramters);
ALTER PROCEDURE [dbo].[PR_StatRetainedUsers] (@dt OriginalDatesType readonly,   @StartDate int,@EndDate int, @Period tinyint)
AS
begincreate table #RetainedUsers(SoftID int,Platform tinyint,ChannelID int,OriginalStatDate int,RetainedUserCount int)declare @sql nvarchar(max);if (@Period = 1) beginset @sql = N'insert into #RetainedUsers(SoftID,Platform,ChannelID,OriginalStatDate,RetainedUserCount)select A.SoftID,A.PLATFORM,A.FirstChannelID,B.OriginalEndDate,COUNT(distinct A.IMEI)from ...' + CAST((@StartDate / 10000) as nvarchar(10)) + N' A with(nolock) inner join @dt Bon A.LoginDate=@StartDate and A.FirstLoginDate between B.OriginalStartDate and B.OriginalEndDate and (FromCache=0 or FromCache is null)group by A.SoftID,A.PLATFORM,A.FirstChannelID,B.OriginalEndDate;'exec sp_executesql @sql, N'@StartDate int,@dt OriginalDatesType readonly', @StartDate, @dt    end else beginif (@StartDate/10000 = @EndDate / 10000) beginset @sql = N'insert into #RetainedUsers(SoftID,Platform,ChannelID,OriginalStatDate,RetainedUserCount)select A.SoftID,A.PLATFORM,A.FirstChannelID,B.OriginalEndDate,COUNT(distinct A.IMEI)from ....' + CAST((@StartDate / 10000) as nvarchar(10)) + N' A with(nolock) inner join @dt Bon A.Part=@part and A.LoginDate between @StartDate and @EndDate and A.FirstLoginDate between B.OriginalStartDate and B.OriginalEndDate and (FromCache=0 or FromCache is null)group by A.SoftID,A.PLATFORM,A.FirstChannelID,B.OriginalEndDate;'end else beginset @sql = N'insert into #RetainedUsers(SoftID,Platform,ChannelID,OriginalStatDate,RetainedUserCount)select A.SoftID,A.PLATFORM,A.FirstChannelID,B.OriginalEndDate,COUNT(distinct A.IMEI)from (select * from ....' + CAST((@StartDate / 10000) as nvarchar(10)) + N' with(nolock) where Part=@part and LoginDate between @StartDate and @EndDate and (FromCache=0 or FromCache is null)union allselect * from ....' + CAST((@EndDate / 10000) as nvarchar(10)) + N' with(nolock) where Part=@part and LoginDate between @StartDate and @EndDate and (FromCache=0 or FromCache is null)) Ainner join @dt Bon A.FirstLoginDate between B.OriginalStartDate and B.OriginalEndDategroup by A.SoftID,A.PLATFORM,A.FirstChannelID,B.OriginalEndDate;'end  declare @part tinyint = 0;    while @part < 128 begin      --if (@Period <> 12 or @part = 0) beginexec sp_executesql @sql, N'@part tinyint,@StartDate int,@EndDate int,@dt OriginalDatesType readonly', @Part, @StartDate, @EndDate, @dt;--endset @part = @part + 1endendselect @Period Period,OriginalStatDate,@EndDate StatDate,SoftID,Platform,-1 ID2,-1 ID1,0 OriginalNewUserCount,SUM(RetainedUserCount) RetainedUserCount from #RetainedUsersgroup by SoftID,Platform,OriginalStatDateselect @Period Period,OriginalStatDate,@EndDate StatDate,SoftID,Platform,ChannelID ID2,-1 ID1,0 OriginalNewUserCount,SUM(RetainedUserCount) RetainedUserCount from #RetainedUsersgroup by SoftID,Platform,ChannelID,OriginalStatDate  drop table #RetainedUsers
end

hadoop

@MapConfig
public static class MapTask extends Mapper<LongWritable, Text, Text, Text> {private Text mKey = new Text();private Text mValue = new Text();private StringBuilder sb = new StringBuilder();private Map<String,Integer> map=new HashMap<String,Integer>();@Overrideprotected void setup(Context context) throws IOException, InterruptedException {String enddate =context.getConfiguration().get("key_enddate");int period =Integer.parseInt(context.getConfiguration().get("period"));int step=Integer.parseInt(context.getConfiguration().get("step"));DateTime curstatdate=DateTime.parseToDateTime(enddate,"yyyyMMdd");//设置要计算留存的时间if (period==PeriodOptions.GetValueByEnum(PeriodOptions.Daily)){for (DateTime startdate=curstatdate.addDays(-30);new Double(DateTime.minusDay(curstatdate,startdate)).intValue()>0;startdate=startdate.addDays(1)){Integer tmp=Integer.parseInt(startdate.toString("yyyyMMdd"));map.put(startdate.toString("yyyyMMdd"),tmp);}}else if(period==PeriodOptions.GetValueByEnum(PeriodOptions.Weekly)){for (DateTime startdate=curstatdate.addDays(-48);DateTime.minusDay(curstatdate,startdate)>0;startdate=startdate.addDays(1)){if (WeekOptions.GetEnumByValue(startdate.getDayOfWeek())== WeekOptions.SUNDAY){Integer tmp=Integer.parseInt(startdate.toString("yyyyMMdd"));for (DateTime substartdate=startdate.addDays(-step);new Double(DateTime.minusDay(startdate,substartdate)).intValue()>=0;substartdate=substartdate.addDays(1)){map.put(substartdate.toString("yyyyMMdd"),tmp);}}}}else if(period==PeriodOptions.GetValueByEnum(PeriodOptions.NaturalMonth)){for (DateTime startdate=curstatdate.addMonths(-7).addDays(1);DateTime.minusDay(curstatdate,startdate)>0;startdate=startdate.addDays(1)){if (startdate.addDays(1).day()==1){Integer tmp=Integer.parseInt(startdate.toString("yyyyMMdd"));for (DateTime substartdate=startdate.addMonths(-1).addDays(1);new Double(DateTime.minusDay(startdate,substartdate)).intValue()>0;substartdate=substartdate.addDays(1)){map.put(substartdate.toString("yyyyMMdd"),tmp);}}}}}@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String str = value.toString();String[] params = StringUtils.splitByWholeSeparatorPreserveAllTokens(str, "\t");if (!map.containsKey(params[23]) || (Integer.parseInt(params[15])&1)!=0 ){return;}int firstlogintime=map.get(params[23]);String enddate =context.getConfiguration().get("key_enddate");sb.delete(0, sb.length());//key<0:softid,2:platform,22 firstchannelid>//value<23 firstlogindate,4:logindate,3:imei>sb.append(params[0]).append("\t").append(params[2]).append("\t").append(params[22]);mKey.set(sb.toString());mValue.set(firstlogintime + "\t" + enddate + "\t" + params[3]);context.write(mKey, mValue);sb.delete(0, sb.length());sb.append(params[0]).append("\t").append(params[2]).append("\t").append(-1);mKey.set(sb.toString());mValue.set(firstlogintime + "\t" + enddate + "\t" + params[3]);context.write(mKey, mValue);}@Overrideprotected void cleanup(Context context) throws IOException, InterruptedException {map.clear();}
}//key<0:softid,2:platform,22 firstchannelid>
//value<23 firstlogindate,4:logindate,3:imei>
@CombineConfig
public static class CombineTask extends Reducer<Text, Text, Text, Text> {Text mvalue=new Text();//留存用户private Multiset<String> multiset=HashMultiset.create();@Overrideprotected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {multiset.clear();for (Text item:values){multiset.add(item.toString());}for (String item:multiset.elementSet()){mvalue.set(item);context.write(key,mvalue);}}}//key<0:softid,2:platform,22 firstchannelid>
//value<23 firstlogindate,4:logindate,3:imei>
@ReduceConfig
public static class ReduceTask extends Reducer<Text, Text, Text, Text> {private Text mValue = new Text();//留存用户private Map<String, Multiset<Object>> MRetained = new HashMap<>();@Overrideprotected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {for (Map.Entry<String, Multiset<Object>> map : MRetained.entrySet()) {MRetained.get(map.getKey()).clear();}MRetained.clear();for (Text item : values) {String[] params = StringUtils.splitByWholeSeparatorPreserveAllTokens(item.toString(), "\t");String mapkey = params[0] + "\t" + params[1];if (!params[0].equals(params[1])) {if (!MRetained.containsKey(mapkey)) {MRetained.put(mapkey, HashMultiset.create());}MRetained.get(mapkey).add(params[2]);}}String period = context.getConfiguration().get("period");for (Map.Entry<String, Multiset<Object>> map : MRetained.entrySet()) {if (map.getKey().split("\t") == null ) {continue;}mValue.set(period+ "\t" + map.getKey()+ "\t" + 0+ "\t" + map.getValue().elementSet().size());//softid,platform,channelid,period,originaldate,statdate,OriginalNewUserCount,RetainedUserCountcontext.write(key, mValue);}}}

hive

insert overwrite table temp_db.temp_U_StatRetainedUsers partition(p=${period},dt='${dir}')
select a.soft_id,a.platform,a.channel,a.soft_version,a.country,${period},a.first_login_date,${date},a.newusercount,COALESCE(b.retainedusercount,0)
from (select soft_id,platform,channel,soft_version,country,count(1) newusercount,case when ${period}=1 then login_datewhen ${period}=3 then cast(from_unixtime(unix_timestamp(date_sub('${originalenddate2}',cast(floor(datediff('${originalenddate2}',from_unixtime(unix_timestamp(cast(login_date as string),'yyyyMMdd'),'yyyy-MM-dd'))/7)*7 as int)),'yyyy-MM-dd'),'yyyyMMdd') as int)when ${period}=12 then cast(from_unixtime(unix_timestamp(date_sub(from_unixtime(unix_timestamp(cast((floor(cast(from_unixtime(unix_timestamp(date_add(from_unixtime(unix_timestamp(cast((floor(login_date/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),31),'yyyy-MM-dd'),'yyyyMMdd') as int)/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),1),'yyyy-MM-dd'),'yyyyMMdd') as int) end first_login_datefrom computing_db.softloginuserswhere dt>='${datestart2}' and dt<='${dateend2}'group by soft_id,platform,channel,soft_version,country,case when ${period}=1 then login_datewhen ${period}=3 then cast(from_unixtime(unix_timestamp(date_sub('${originalenddate2}',cast(floor(datediff('${originalenddate2}',from_unixtime(unix_timestamp(cast(login_date as string),'yyyyMMdd'),'yyyy-MM-dd'))/7)*7 as int)),'yyyy-MM-dd'),'yyyyMMdd') as int)when ${period}=12 then cast(from_unixtime(unix_timestamp(date_sub(from_unixtime(unix_timestamp(cast((floor(cast(from_unixtime(unix_timestamp(date_add(from_unixtime(unix_timestamp(cast((floor(login_date/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),31),'yyyy-MM-dd'),'yyyyMMdd') as int)/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),1),'yyyy-MM-dd'),'yyyyMMdd') as int) end) a
left outer join (select soft_id,platform,first_channel,first_soft_version,first_country,count(distinct dev_id) retainedusercount,case when ${period}=1 then first_login_datewhen ${period}=3 then cast(from_unixtime(unix_timestamp(date_sub('${originalenddate2}',cast(floor(datediff('${originalenddate2}',from_unixtime(unix_timestamp(cast(first_login_date as string),'yyyyMMdd'),'yyyy-MM-dd'))/7)*7 as int)),'yyyy-MM-dd'),'yyyyMMdd') as int)when ${period}=12 then cast(from_unixtime(unix_timestamp(date_sub(from_unixtime(unix_timestamp(cast((floor(cast(from_unixtime(unix_timestamp(date_add(from_unixtime(unix_timestamp(cast((floor(first_login_date/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),31),'yyyy-MM-dd'),'yyyyMMdd') as int)/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),1),'yyyy-MM-dd'),'yyyyMMdd') as int) end first_login_datefrom computing_db.softloginlogwhere dt>='${datestart}' and dt<='${dateend}' and first_login_date between ${originalstartdate} and ${originalenddate} and from_cache=0group by soft_id,platform,first_channel,first_soft_version,first_country,case when ${period}=1 then first_login_datewhen ${period}=3 then cast(from_unixtime(unix_timestamp(date_sub('${originalenddate2}',cast(floor(datediff('${originalenddate2}',from_unixtime(unix_timestamp(cast(first_login_date as string),'yyyyMMdd'),'yyyy-MM-dd'))/7)*7 as int)),'yyyy-MM-dd'),'yyyyMMdd') as int)when ${period}=12 then cast(from_unixtime(unix_timestamp(date_sub(from_unixtime(unix_timestamp(cast((floor(cast(from_unixtime(unix_timestamp(date_add(from_unixtime(unix_timestamp(cast((floor(first_login_date/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),31),'yyyy-MM-dd'),'yyyyMMdd') as int)/100)*100+1) as string),'yyyyMMdd'),'yyyy-MM-dd'),1),'yyyy-MM-dd'),'yyyyMMdd') as int) end) b
on a.soft_id=b.soft_id and a.platform=b.platform and a.channel=b.first_channel and a.country=b.first_country and a.soft_version=b.first_soft_version and a.first_login_date=b.first_login_date;

转载于:https://my.oschina.net/osenlin/blog/523099

移动分发端 基础统计指标经典业务代码节选--留存用户统计相关推荐

  1. js基础代码大全_关于前端业务代码的一些见解

    前言 如何写出可维护和可读性高的代码,这一直是一个困扰很多人的问题.关于变量如何起名.如何优化 if...else 之类的小技巧,这里就不做介绍了,推荐去看<代码大全2>,千书万书,都不如 ...

  2. 分库分表基础知识总结

    为什么要分区,分表和分库? 随着互联网产品在体量和规模上日益膨胀,无论是Oracle还是MySQL,都会第一时间面临来自磁盘.CPU和内存等单机瓶颈,为此,产品方除了需要不断购买成本难以控制的高规格服 ...

  3. 业务代码解构利器--SWAK

    简介 业务的不断发展.商品类型的不断增多.不断添加的业务需求使得闲鱼的代码出现"bad smell"--平台代码和业务代码耦合严重难以分离:业务和业务之间代码交织缺少拆解.这也是行 ...

  4. 图像处理与计算机视觉基础、经典以及最近发展

    图像处理与计算机视觉基础,经典以及最近发展 By xdyang(杨晓冬xdyang.ustc@gmail.com) 一. 绪论 1. 为什么要写这篇文章 从2002年到现在,接触图像快十年了.虽然没有 ...

  5. 业务代码解构利器--SWAK 1

    简介 业务的不断发展.商品类型的不断增多.不断添加的业务需求使得闲鱼的代码出现"bad smell"--平台代码和业务代码耦合严重难以分离:业务和业务之间代码交织缺少拆解.这也是行 ...

  6. 天天写业务代码,如何成为技术大牛

    程序员平时的日常编码工作中,大多数人都只是编写业务代码,各种if else以及数据库操作等.针对于不同的产品去实现功能时,也只是重复性的搬砖工作.此时会有很多人认为天天写业务代码,感觉没有什么长进,也 ...

  7. 大牛养成指南(3):天天写业务代码,如何成为技术大牛?

    2019独角兽企业重金招聘Python工程师标准>>> 几个典型的误区 拜大牛为师 知乎上有人认为想成为技术大牛最简单直接.快速有效的方式是"拜团队技术大牛为师" ...

  8. 朱晔的互联网架构实践心得S2E2:写业务代码最容易掉的8种坑

    我承认,本文的标题有一点标题党,特别是写业务代码,大家因为没有足够重视一些细节最容易调的坑(侧重Java,当然,本文说的这些点很多是不限制于语言的). 1.客户端的使用 我们在使用Redis.Elas ...

  9. 朱晔的互联网架构实践心得S2E1:业务代码究竟难不难写? | 掘金年度征文

    注意,这是我的架构实践心得的第二季的系列文章,第一季有10篇你也可以回顾. 最近我一直在思考几个问题: 业务代码究竟难不难写? 一直开发业务代码是不是完全学不到东西? 5年+开发经验的老程序员的价值在 ...

最新文章

  1. 如何为ccflow工作流引擎增加一个优先级PRI?
  2. 学长毕业日记 :本科毕业论文写成博士论文的神操作20170328
  3. Bicolored RBS
  4. WLAN产品形态之分层架构
  5. 安卓入门系列-07常用UI控件(长文)
  6. 解除服务器端口号占用及服务器端口号的修改
  7. java定义js函数_JS中可以先使用函数,然后再定义.
  8. linux系统报警怎么办,常见Linux系统故障和解决方法
  9. python contextlib
  10. 关于Java里try/catch/finally/有return时执行过程
  11. 区块链如何赋能车联网-Higgs Chain
  12. 陕西神木市一煤矿发生冒顶事故 约20人被困井下
  13. 图像语义分割(7)-PSPNet:金字塔型场景解析网络
  14. 《Python》 字典
  15. 高阶技术管理岗空降落地实践指南
  16. Unity3d 人物跳跃后落地悬空问题
  17. 将全部视频画面水平或者垂直翻转的实例教程分享
  18. 芴基噁二唑铱配合物|阳离子型铱配合物Ir(F_2ppy)_2(Br_2bpy)+PF-6
  19. 寒武纪芯片创始人:要让AI芯片计算效率提高一万倍
  20. Maplab系列15:Inverted File

热门文章

  1. 记一次订单号重复的事故,快看看你的 uuid 在并发下还正确吗?
  2. RabbitMQ 简介以及使用场景
  3. 【收藏】万字综述,核心开发者全面解读PyTorch内部机制
  4. Jupyter官方神器:可视化 Debug 工具!
  5. 告别痛苦,快乐学习Pandas!开源教程《Joyful-Pandas》发布
  6. PyTorch 深度剖析:如何保存和加载PyTorch模型?
  7. 14岁清华本科新生免试直博!高校新生大数据陆续曝光,男女比例、年龄差距各异......
  8. 有bug!PyTorch在AMD CPU的计算机上卡死了
  9. AI自动评审论文,CMU这个工具可行吗?
  10. 一边动,一边画,自己就变二次元!华人小哥参与的黑科技:实时交互式视频风格化...