一、dwt 层介绍

dws 层统计的是每日数据,dws 层数据服务 dwt 层数据,dwt 层统计的是不同主题的累积数据。

二、数据表

2.1 设备主题宽表 dwt_uv_topic

1、数据来源
dwt_uv_topic、dws_uv_detail_daycount。

2、建表

drop table if exists dwt_uv_topic;
create external table dwt_uv_topic
( `mid_id` string COMMENT '设备唯一标识', `user_id` string COMMENT '用户标识', `version_code` string COMMENT '程序版本号', `version_name` string COMMENT '程序版本名', `lang` string COMMENT '系统语言', `source` string COMMENT '渠道号', `os` string COMMENT '安卓系统版本', `area` string COMMENT '区域', `model` string COMMENT '手机型号', `brand` string COMMENT '手机品牌', `sdk_version` string COMMENT 'sdkVersion', `gmail` string COMMENT 'gmail', `height_width` string COMMENT '屏幕宽高', `app_time` string COMMENT '客户端日志产生时的时间', `network` string COMMENT '网络模式',`lng` string COMMENT '经度', `lat` string COMMENT '纬度', `login_date_first` string comment '首次活跃时间', `login_date_last` string comment '末次活跃时间', `login_day_count` bigint comment '当日活跃次数', `login_count` bigint comment '累积活跃天数'
)
stored as parquet
location '/warehouse/gmall/dwt/dwt_uv_topic'
tblproperties ("parquet.compression"="lzo");

3、加载数据

insert overwrite table dwt_uv_topic
select nvl(new.mid_id,old.mid_id), nvl(new.user_id,old.user_id), nvl(new.version_code,old.version_code), nvl(new.version_name,old.version_name), nvl(new.lang,old.lang), nvl(new.source,old.source), nvl(new.os,old.os), nvl(new.area,old.area), nvl(new.model,old.model), nvl(new.brand,old.brand), nvl(new.sdk_version,old.sdk_version), nvl(new.gmail,old.gmail), nvl(new.height_width,old.height_width), nvl(new.app_time,old.app_time), nvl(new.network,old.network), nvl(new.lng,old.lng), nvl(new.lat,old.lat), if(old.mid_id is null,'2020-03-10',old.login_date_first), if(new.mid_id is not null,'2020-03-10',old.login_date_last), if(new.mid_id is not null, new.login_count,0), nvl(old.login_count,0)+if(new.login_count>0,1,0)
from
( select * from dwt_uv_topic
)old full outer join
( select * from dws_uv_detail_daycount where dt='2020-03-10'
)new on old.mid_id=new.mid_id;

2.2 会员主题宽表 dwt_user_topic

1、数据来源
dwt_user_topic、dws_user_action_daycount。

2、创建表

drop table if exists dwt_user_topic;
create external table dwt_user_topic ( user_id string comment '用户id', login_date_first string comment '首次登录时间', login_date_last string comment '末次登录时间', login_count bigint comment '累积登录天数', login_last_30d_count bigint comment '最近30日登录天数', order_date_first string comment '首次下单时间', order_date_last string comment '末次下单时间', order_count bigint comment '累积下单次数', order_amount decimal(16,2) comment '累积下单金额', order_last_30d_count bigint comment '最近30日下单次数', order_last_30d_amount bigint comment '最近30日下单金额', payment_date_first string comment '首次支付时间', payment_date_last string comment '末次支付时间', payment_count decimal(16,2) comment '累积支付次数', payment_amount decimal(16,2) comment '累积支付金额', payment_last_30d_count decimal(16,2) comment '最近30日支付次数', payment_last_30d_amount decimal(16,2) comment '最近30日支付金额'
)COMMENT '用户主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_user_topic/'
tblproperties ("parquet.compression"="lzo");

3、加载数据

insert overwrite table dwt_user_topic
select nvl(new.user_id,old.user_id), if(old.login_date_first is null and new.login_count>0,'2020-03-10',old.login_date_first), if(new.login_count>0,'2020-03-10',old.login_date_last), nvl(old.login_count,0)+if(new.login_count>0,1,0), nvl(new.login_last_30d_count,0), if(old.order_date_first is null and new.order_count>0,'2020-03-10',old.order_date_first),if(new.order_count>0,'2020-03-10',old.order_date_last), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.order_amount,0)+nvl(new.order_amount,0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0), if(old.payment_date_first is null and new.payment_count>0,'2020-03-10',old.payment_date_first), if(new.payment_count>0,'2020-03-10',old.payment_date_last), nvl(old.payment_count,0)+nvl(new.payment_count,0), nvl(old.payment_amount,0)+nvl(new.payment_amount,0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0)
from dwt_user_topic old
full outer join
( select user_id, sum(if(dt='2020-03-10',login_count,0)) login_count, sum(if(dt='2020-03-10',order_count,0)) order_count, sum(if(dt='2020-03-10',order_amount,0)) order_amount, sum(if(dt='2020-03-10',payment_count,0)) payment_count, sum(if(dt='2020-03-10',payment_amount,0)) payment_amount, sum(if(login_count>0,1,0)) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount from dws_user_action_daycount where dt>=date_add( '2020-03-10',-30) group by user_id
)new on old.user_id=new.user_id;

2.3 商品主题宽表 dwt_sku_topic

1、数据来源
dwt_sku_topic、dws_sku_action_daycount、dwd_dim_sku_info。

2、创建表

drop table if exists dwt_sku_topic;
create external table dwt_sku_topic ( sku_id string comment 'sku_id', spu_id string comment 'spu_id', order_last_30d_count bigint comment '最近30日被下单次数', order_last_30d_num bigint comment '最近30日被下单件数', order_last_30d_amount decimal(16,2) comment '最近30日被下单金额', order_count bigint comment '累积被下单次数', order_num bigint comment '累积被下单件数', order_amount decimal(16,2) comment '累积被下单金额', payment_last_30d_count bigint comment '最近30日被支付次数', payment_last_30d_num bigint comment '最近30日被支付件数', payment_last_30d_amount decimal(16,2) comment '最近30日被支付金额', payment_count bigint comment '累积被支付次数', payment_num bigint comment '累积被支付件数', payment_amount decimal(16,2) comment '累积被支付金额', refund_last_30d_count bigint comment '最近三十日退款次数', refund_last_30d_num bigint comment '最近三十日退款件数',refund_last_30d_amount decimal(10,2) comment '最近三十日退款金额', refund_count bigint comment '累积退款次数', refund_num bigint comment '累积退款件数', refund_amount decimal(10,2) comment '累积退款金额', cart_last_30d_count bigint comment '最近30日被加入购物车次数', cart_last_30d_num bigint comment '最近30日被加入购物车件数', cart_count bigint comment '累积被加入购物车次数', cart_num bigint comment '累积被加入购物车件数', favor_last_30d_count bigint comment '最近30日被收藏次数', favor_count bigint comment '累积被收藏次数', appraise_last_30d_good_count bigint comment '最近30日好评数', appraise_last_30d_mid_count bigint comment '最近30日中评数', appraise_last_30d_bad_count bigint comment '最近30日差评数', appraise_last_30d_default_count bigint comment '最近30日默认评价数', appraise_good_count bigint comment '累积好评数', appraise_mid_count bigint comment '累积中评数', appraise_bad_count bigint comment '累积差评数', appraise_default_count bigint comment '累积默认评价数'
)COMMENT '商品主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_sku_topic/'
tblproperties ("parquet.compression"="lzo");

3、加载数据

insert overwrite table dwt_sku_topic
select nvl(new.sku_id,old.sku_id), sku_info.spu_id, nvl(new.order_count30,0), nvl(new.order_num30,0), nvl(new.order_amount30,0), nvl(old.order_count,0) + nvl(new.order_count,0), nvl(old.order_num,0) + nvl(new.order_num,0), nvl(old.order_amount,0) + nvl(new.order_amount,0), nvl(new.payment_count30,0), nvl(new.payment_num30,0), nvl(new.payment_amount30,0), nvl(old.payment_count,0) + nvl(new.payment_count,0), nvl(old.payment_num,0) + nvl(new.payment_count,0), nvl(old.payment_amount,0) + nvl(new.payment_count,0), nvl(new.refund_count30,0), nvl(new.refund_num30,0), nvl(new.refund_amount30,0), nvl(old.refund_count,0) + nvl(new.refund_count,0), nvl(old.refund_num,0) + nvl(new.refund_num,0), nvl(old.refund_amount,0) + nvl(new.refund_amount,0), nvl(new.cart_count30,0), nvl(new.cart_num30,0), nvl(old.cart_count,0) + nvl(new.cart_count,0), nvl(old.cart_num,0) + nvl(new.cart_num,0), nvl(new.favor_count30,0), nvl(old.favor_count,0) + nvl(new.favor_count,0), nvl(new.appraise_good_count30,0), nvl(new.appraise_mid_count30,0), nvl(new.appraise_bad_count30,0), nvl(new.appraise_default_count30,0) , nvl(old.appraise_good_count,0) + nvl(new.appraise_good_count,0), nvl(old.appraise_mid_count,0) + nvl(new.appraise_mid_count,0), nvl(old.appraise_bad_count,0) + nvl(new.appraise_bad_count,0), nvl(old.appraise_default_count,0) + nvl(new.appraise_default_count,0)
from
(
select sku_id, spu_id, order_last_30d_count, order_last_30d_num, order_last_30d_amount, order_count, order_num, order_amount, payment_last_30d_count, payment_last_30d_num, payment_last_30d_amount, payment_count, payment_num, payment_amount, refund_last_30d_count, refund_last_30d_num, refund_last_30d_amount, refund_count, refund_num, refund_amount, cart_last_30d_count, cart_last_30d_num, cart_count, cart_num, favor_last_30d_count, favor_count, appraise_last_30d_good_count, appraise_last_30d_mid_count, appraise_last_30d_bad_count, appraise_last_30d_default_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count
from dwt_sku_topic
)old
full outer join
( select sku_id, sum(if(dt='2020-03-10', order_count,0 )) order_count, sum(if(dt='2020-03-10',order_num ,0 )) order_num, sum(if(dt='2020-03-10',order_amount,0 )) order_amount, sum(if(dt='2020-03-10',payment_count,0 )) payment_count, sum(if(dt='2020-03-10',payment_num,0 )) payment_num, sum(if(dt='2020-03-10',payment_amount,0 )) payment_amount, sum(if(dt='2020-03-10',refund_count,0 )) refund_count, sum(if(dt='2020-03-10',refund_num,0 )) refund_num, sum(if(dt='2020-03-10',refund_amount,0 )) refund_amount, sum(if(dt='2020-03-10',cart_count,0 )) cart_count, sum(if(dt='2020-03-10',cart_num,0 )) cart_num, sum(if(dt='2020-03-10',favor_count,0 )) favor_count, sum(if(dt='2020-03-10',appraise_good_count,0 )) appraise_good_count, sum(if(dt='2020-03-10',appraise_mid_count,0 )) appraise_mid_count, sum(if(dt='2020-03-10',appraise_bad_count,0 )) appraise_bad_count, sum(if(dt='2020-03-10',appraise_default_count,0 )) appraise_default_count, sum(order_count) order_count30, sum(order_num) order_num30, sum(order_amount) order_amount30, sum(payment_count) payment_count30,sum(payment_num) payment_num30, sum(payment_amount) payment_amount30, sum(refund_count) refund_count30, sum(refund_num) refund_num30, sum(refund_amount) refund_amount30, sum(cart_count) cart_count30, sum(cart_num) cart_num30, sum(favor_count) favor_count30, sum(appraise_good_count) appraise_good_count30, sum(appraise_mid_count) appraise_mid_count30, sum(appraise_bad_count) appraise_bad_count30, sum(appraise_default_count) appraise_default_count30 from dws_sku_action_daycount where dt >= date_add ('2020-03-10', -30) group by sku_id
)new
on new.sku_id = old.sku_id
left join
(select * from dwd_dim_sku_info where dt='2020-03-10'
) sku_info
on nvl(new.sku_id,old.sku_id)= sku_info.id;

2.4 优惠券主题宽表 dwt_coupon_topic (预留)

1、数据来源
dwt_coupon_topic、dws_coupon_use_daycount。

2、创建表

drop table if exists dwt_coupon_topic;
create external table dwt_coupon_topic ( `coupon_id` string COMMENT '优惠券ID', `get_day_count` bigint COMMENT '当日领用次数', `using_day_count` bigint COMMENT '当日使用(下单)次数', `used_day_count` bigint COMMENT '当日使用(支付)次数', `get_count` bigint COMMENT '累积领用次数', `using_count` bigint COMMENT '累积使用(下单)次数', `used_count` bigint COMMENT '累积使用(支付)次数'
)COMMENT '购物券主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_coupon_topic/'
tblproperties ("parquet.compression"="lzo");

3、加载数据

insert overwrite table dwt_coupon_topic
select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0)
from
( select * from dwt_coupon_topic
)old
full outer join
( select coupon_id, get_count, using_count, used_count from dws_coupon_use_daycount where dt='2020-03-10'
)new
on
old.coupon_id=new.coupon_id;

2.5 活动主题宽表 dwt_activity_topic (预留)

1、数据来源
dwt_activity_topic、dws_activity_info_daycount。

2、创建表

drop table if exists dwt_activity_topic;
create external table dwt_activity_topic( `id` string COMMENT '活动id', `activity_name` string COMMENT '活动名称', `order_day_count` bigint COMMENT '当日日下单次数', `payment_day_count` bigint COMMENT '当日支付次数', `order_count` bigint COMMENT '累积下单次数', `payment_count` bigint COMMENT '累积支付次数'
) COMMENT '活动主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_activity_topic/'
tblproperties ("parquet.compression"="lzo");

3、加载数据

insert overwrite table dwt_activity_topic
select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0)
from
( select * from dwt_activity_topic
)old
full outer join
( select id, activity_name, order_count, payment_count from dws_activity_info_daycount where dt='2020-03-10'
)new on old.id=new.id;

三、dwt 层数据导入脚本 dws_to_dwt.sh

#!/bin/bashAPP=gmall
hive=/opt/module/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then do_date=$1
else do_date=`date -d "-1 day" +%F`
fisql="
insert overwrite table ${APP}.dwt_uv_topic
select nvl(new.mid_id,old.mid_id), nvl(new.user_id,old.user_id), nvl(new.version_code,old.version_code), nvl(new.version_name,old.version_name), nvl(new.lang,old.lang), nvl(new.source,old.source), nvl(new.os,old.os), nvl(new.area,old.area), nvl(new.model,old.model), nvl(new.brand,old.brand), nvl(new.sdk_version,old.sdk_version), nvl(new.gmail,old.gmail), nvl(new.height_width,old.height_width), nvl(new.app_time,old.app_time), nvl(new.network,old.network), nvl(new.lng,old.lng), nvl(new.lat,old.lat), nvl(old.login_date_first,'$do_date'), if(new.login_count>0,'$do_date',old.login_date_last), nvl(new.login_count,0), nvl(new.login_count,0)+nvl(old.login_count,0)
from
(
select *
from ${APP}.dwt_uv_topic
)old
full outer join
(
select *
from ${APP}.dws_uv_detail_daycount
where dt='$do_date'
)new
on old.mid_id=new.mid_id; insert overwrite table ${APP}.dwt_user_topic
select nvl(new.user_id,old.user_id), if(old.login_date_first is null and new.login_count>0,'$do_date',old.login_date_first), if(new.login_count>0,'$do_date',old.login_date_last), nvl(old.login_count,0)+if(new.login_count>0,1,0), nvl(new.login_last_30d_count,0), if(old.order_date_first is null and new.order_count>0,'$do_date',old.order_date_first), if(new.order_count>0,'$do_date',old.order_date_last), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.order_amount,0)+nvl(new.order_amount,0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0), if(old.payment_date_first is null and new.payment_count>0,'$do_date',old.payment_date_first), if(new.payment_count>0,'$do_date',old.payment_date_last), nvl(old.payment_count,0)+nvl(new.payment_count,0), nvl(old.payment_amount,0)+nvl(new.payment_amount,0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0)
from
(
select *
from ${APP}.dwt_user_topic
)old
full outer join
(
select user_id, sum(if(dt='$do_date',login_count,0)) login_count, sum(if(dt='$do_date',order_count,0)) order_count, sum(if(dt='$do_date',order_amount,0)) order_amount, sum(if(dt='$do_date',payment_count,0)) payment_count, sum(if(dt='$do_date',payment_amount,0)) payment_amount, sum(if(order_count>0,1,0)) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount
from ${APP}.dws_user_action_daycount
where dt>=date_add('$do_date',-30)
group by user_id
)new
on old.user_id=new.user_id; with
sku_act as
(
select sku_id, sum(if(dt='$do_date', order_count,0 )) order_count, sum(if(dt='$do_date',order_num ,0 )) order_num, sum(if(dt='$do_date',order_amount,0 )) order_amount , sum(if(dt='$do_date',payment_count,0 )) payment_count, sum(if(dt='$do_date',payment_num,0 )) payment_num, sum(if(dt='$do_date',payment_amount,0 )) payment_amount, sum(if(dt='$do_date',refund_count,0 )) refund_count, sum(if(dt='$do_date',refund_num,0 )) refund_num, sum(if(dt='$do_date',refund_amount,0 )) refund_amount, sum(if(dt='$do_date',cart_count,0 )) cart_count, sum(if(dt='$do_date',cart_num,0 )) cart_num, sum(if(dt='$do_date',favor_count,0 )) favor_count, sum(if(dt='$do_date',appraise_good_count,0 )) appraise_good_count, sum(if(dt='$do_date',appraise_mid_count,0 )) appraise_mid_count, sum(if(dt='$do_date',appraise_bad_count,0 )) appraise_bad_count, sum(if(dt='$do_date',appraise_default_count,0 )) appraise_default_count, sum( order_count ) order_count30 , sum( order_num ) order_num30, sum( order_amount ) order_amount30, sum( payment_count ) payment_count30, sum( payment_num ) payment_num30, sum( payment_amount ) payment_amount30, sum( refund_count ) refund_count30, sum( refund_num ) refund_num30, sum( refund_amount ) refund_amount30, sum( cart_count ) cart_count30, sum( cart_num ) cart_num30, sum( favor_count ) favor_count30, sum( appraise_good_count ) appraise_good_count30, sum( appraise_mid_count ) appraise_mid_count30, sum( appraise_bad_count ) appraise_bad_count30, sum( appraise_default_count ) appraise_default_count30
from ${APP}.dws_sku_action_daycount
where dt>=date_add ( '$do_date',-30)
group by sku_id
),
sku_topic as (
select sku_id, spu_id, order_last_30d_count, order_last_30d_num, order_last_30d_amount, order_count, order_num, order_amount, payment_last_30d_count, payment_last_30d_num, payment_last_30d_amount, payment_count, payment_num, payment_amount, refund_last_30d_count, refund_last_30d_num, refund_last_30d_amount, refund_count, refund_num, refund_amount, cart_last_30d_count, cart_last_30d_num, cart_count, cart_num, favor_last_30d_count, favor_count, appraise_last_30d_good_count, appraise_last_30d_mid_count, appraise_last_30d_bad_count, appraise_last_30d_default_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count
from ${APP}.dwt_sku_topic
)
insert overwrite table ${APP}.dwt_sku_topic
select nvl(sku_act.sku_id,sku_topic.sku_id), sku_info.spu_id, nvl(sku_act.order_count30,0), nvl(sku_act.order_num30,0), nvl(sku_act.order_amount30,0), nvl(sku_topic.order_count,0)+nvl(sku_act.order_count,0),nvl(sku_topic.order_num,0)+nvl(sku_act.order_num,0), nvl(sku_topic.order_amount,0)+nvl(sku_act.order_amount,0), nvl(sku_act.payment_count30,0), nvl(sku_act.payment_num30,0), nvl(sku_act.payment_amount30,0), nvl(sku_topic.payment_count,0)+nvl(sku_act.payment_count,0), nvl(sku_topic.payment_num,0)+nvl(sku_act.payment_count,0), nvl(sku_topic.payment_amount,0)+nvl(sku_act.payment_count,0), nvl(refund_count30,0),nvl(sku_act.refund_num30,0), nvl(sku_act.refund_amount30,0), nvl(sku_topic.refund_count,0)+nvl(sku_act.refund_count,0), nvl(sku_topic.refund_num,0)+nvl(sku_act.refund_num,0),nvl(sku_topic.refund_amount,0)+nvl(sku_act.refund_amount,0), nvl(sku_act.cart_count30,0), nvl(sku_act.cart_num30,0), nvl(sku_topic.cart_count,0)+nvl(sku_act.cart_count,0),nvl(sku_topic.cart_num,0)+nvl(sku_act.cart_num,0),nvl(sku_act.favor_count30,0),nvl(sku_topic.favor_count,0)+nvl(sku_act.favor_count,0), nvl(sku_act.appraise_good_count30,0),nvl(sku_act.appraise_mid_count30,0), nvl(sku_act.appraise_bad_count30,0), nvl(sku_act.appraise_default_count30,0),nvl(sku_topic.appraise_good_count,0)+nvl(sku_act.appraise_good_count,0),nvl(sku_topic.appraise_mid_count,0)+nvl(sku_act.appraise_mid_count,0), nvl(sku_topic.appraise_bad_count,0)+nvl(sku_act.appraise_bad_count,0), nvl(sku_topic.appraise_default_count,0)+ nvl(sku_act.appraise_default_count,0)
from sku_act
full outer join sku_topic
on sku_act.sku_id =sku_topic.sku_id
left join
(
select *
from ${APP}.dwd_dim_sku_info
where dt='$do_date'
)sku_info
on nvl(sku_topic.sku_id,sku_act.sku_id)= sku_info.id; insert overwrite table ${APP}.dwt_coupon_topic
select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0)
from
(
select *
from ${APP}.dwt_coupon_topic
)old full outer join
(
select coupon_id, get_count, using_count, used_count
from ${APP}.dws_coupon_use_daycount
where dt='$do_date'
)new
on old.coupon_id=new.coupon_id; insert overwrite table ${APP}.dwt_activity_topic
select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0)
from
(
select *
from ${APP}.dwt_activity_topic
)old full outer join
(
select id, activity_name, order_count,payment_count
from ${APP}.dws_activity_info_daycount
where dt='$do_date'
)new
on
old.id=new.id;
" $hive -e "$sql"

四、dwt 层总结

1、数据采用 parquet 存储 + lzo 压缩的方式。
2、dwt 层不是分区表。
3、宽表字段怎么来?维度关联的事实表度量值+开头、结尾+累积+累积一个时间段。
4、dwt 层共五张表。

电商数仓(dwt 层)相关推荐

  1. 电商数仓DWD层用户行为日志解析

    文章目录 前言 一.页面埋点日志.启动日志结构 二.日志解析的流程 2.1 启动日志表解析(包括注意事项) 2.1.1 解析思路 2.1.2 建表语句 2.1.3 数据导入 2.1.4 注意事项 2. ...

  2. 电商数仓(dwd 层)

    一.dwd 层介绍 1.对用户行为数据解析. 2.对核心数据进行判空过滤. 3.对业务数据采用维度模型重新建模,即维度退化. 二.dwd 层用户行为数据 2.1 用户行为启动表 dwd_start_l ...

  3. 数据仓库之电商数仓-- 3.3、电商数据仓库系统(DWT层)

    目录 八.数仓搭建-DWT层 8.1 访客主题 8.2 用户主题 8.3 商品主题 8.4 优惠券主题 8.5 活动主题 8.6 地区主题 8.7 DWT层首日数据导入脚本 8.8 DWT层每日数据导 ...

  4. 数据仓库之电商数仓-- 3.4、电商数据仓库系统(ADS层)

    目录 九.数仓搭建-ADS层 9.1 建表说明 9.2 访客主题 9.2.1 访客统计 9.2.2 路径分析 9.3 用户主题 9.3.1 用户统计 9.3.2 用户变动统计 9.3.3 用户行为漏斗 ...

  5. 数据仓库之电商数仓-- 3.2、电商数据仓库系统(DWS层)

    目录 七.数仓搭建-DWS层 7.1 系统函数 7.1.1 nvl函数 7.1.2 日期处理函数 7.1.3 复杂数据类型定义 7.2 DWS层 7.2.1 访客主题 7.2.2 用户主题 7.2.3 ...

  6. 数据仓库之电商数仓-- 4、可视化报表Superset

    目录 一.Superset入门 1.1 Superset概述 1.2 Superset应用场景 二.Superset安装及使用 2.1 安装Python环境 2.1.1 安装Miniconda 2.1 ...

  7. 数据仓库之电商数仓-- 2、业务数据采集平台

    目录 一.电商业务简介 1.1 电商业务流程 1.2 电商常识(SKU.SPU) 1.3 电商系统表结构 1.3.1 活动信息表(activity_info) 1.3.2 活动规则表(activity ...

  8. 数据仓库之电商数仓-- 1、用户行为数据采集

    目录 一.数据仓库概念 二.项目需求及架构设计 2.1 项目需求分析 2.2 项目框架 2.2.1 技术选型 2.2.2 系统数据流程设计 2.2.3 框架版本选型 2.2.4 服务器选型 2.2.5 ...

  9. 电商数仓描述_笔记-尚硅谷大数据项目数据仓库-电商数仓V1.2新版

    架构 项目框架 数仓架构 存储压缩 Snappy与LZO LZO安装: 读取LZO文件时,需要先创建索引,才可以进行切片. 框架版本选型Apache:运维麻烦,需要自己调研兼容性. CDH:国内使用最 ...

  10. 复盘离线电商数仓3.0项目–数据开发梳理

    复盘离线电商数仓项目–数据开发梳理 业务数据 数仓分层 ods层到ads层的开发 开源BI工具Superset ODS层业务数据&日志数据 ods层业务数据 使用Sqoop脚本从Mysql数据 ...

最新文章

  1. SQL2008-分页显示3种方法
  2. python九十八类_Python领域最伟大工程师Kenneth Reitz,教你写代码
  3. linux上设置了log4j没有产生日志文件_关于 log4j 升级到 log4j2 的小结
  4. python3.7 pip安装_python3.7安装, 解决pip is configured with locations that require TLS/SSL问题...
  5. 就业模拟试题_Net
  6. MySQL常用存储引擎之Archive
  7. 12行代码AC_Leecode 495. 提莫攻击——Leecode每日一题系列
  8. php 直播服务器搭建,基于Nginx搭建RTMP/HLS视频直播服务器
  9. mysql业务繁忙时能建索引吗_MySQL DBA面试高频三十问
  10. 人受失败后多久可以做第二次_做完皮秒多久可以用自己的护肤产品、过来人分享皮秒后怎么护肤?...
  11. bzoj2243 树链剖分
  12. Java配置文件找不到指定_转载:Java项目读取配置文件时,FileNotFoundException 系统找不到指定的文件,System.getProperty(user.dir)的理解...
  13. JS学习总结(9)——String
  14. cenos各个版本下载地址
  15. 密码生成器c语言程序,C++全密码生成的实现代码
  16. 江苏高考新方案定了!总分750分,科目“3+1+2”
  17. 买“背包“吗?送“手臂”的那种!卡耐基梅隆大学可穿戴机械臂问世
  18. 输入框限制规则 只能输入数字 只能输入字母数字 等等
  19. 淘宝API upload_img - 上传图片到淘宝
  20. php微信支付mch_id参数格式错误,在.net core上,Web网站调用微信支付-统一下单接口(xml传参)一直返回错误:mch_id参数格式错误...

热门文章

  1. mas6a801 sw tree disp
  2. [luoguT30208]太极剑
  3. 亿级视频内容如何实时更新?
  4. 服务器硬盘常用的阵列方式有几种,三种常见磁盘阵列设置
  5. 【迁移学习】Self Paced Adversarial Training for Multimodal Few-shot Learning论文解读
  6. Windows网络笔记-台式机通过网线直连笔记本,台式机通过笔记本上网(win10)
  7. 1988年图灵奖--伊万·萨瑟兰简介
  8. shell中用grep查找并且不输出_grep无法查找shell传过来的变量?先注意一下文本格式吧!...
  9. Discuz集思街淘宝客模板 程序源代码
  10. Android专业DJ,著名音乐游戏《DJ英雄》登陆Android Market