电商数仓(dwt 层)
一、dwt 层介绍
dws 层统计的是每日数据,dws 层数据服务 dwt 层数据,dwt 层统计的是不同主题的累积数据。
二、数据表
2.1 设备主题宽表 dwt_uv_topic
1、数据来源
dwt_uv_topic、dws_uv_detail_daycount。
2、建表
drop table if exists dwt_uv_topic;
create external table dwt_uv_topic
( `mid_id` string COMMENT '设备唯一标识', `user_id` string COMMENT '用户标识', `version_code` string COMMENT '程序版本号', `version_name` string COMMENT '程序版本名', `lang` string COMMENT '系统语言', `source` string COMMENT '渠道号', `os` string COMMENT '安卓系统版本', `area` string COMMENT '区域', `model` string COMMENT '手机型号', `brand` string COMMENT '手机品牌', `sdk_version` string COMMENT 'sdkVersion', `gmail` string COMMENT 'gmail', `height_width` string COMMENT '屏幕宽高', `app_time` string COMMENT '客户端日志产生时的时间', `network` string COMMENT '网络模式',`lng` string COMMENT '经度', `lat` string COMMENT '纬度', `login_date_first` string comment '首次活跃时间', `login_date_last` string comment '末次活跃时间', `login_day_count` bigint comment '当日活跃次数', `login_count` bigint comment '累积活跃天数'
)
stored as parquet
location '/warehouse/gmall/dwt/dwt_uv_topic'
tblproperties ("parquet.compression"="lzo");
3、加载数据
insert overwrite table dwt_uv_topic
select nvl(new.mid_id,old.mid_id), nvl(new.user_id,old.user_id), nvl(new.version_code,old.version_code), nvl(new.version_name,old.version_name), nvl(new.lang,old.lang), nvl(new.source,old.source), nvl(new.os,old.os), nvl(new.area,old.area), nvl(new.model,old.model), nvl(new.brand,old.brand), nvl(new.sdk_version,old.sdk_version), nvl(new.gmail,old.gmail), nvl(new.height_width,old.height_width), nvl(new.app_time,old.app_time), nvl(new.network,old.network), nvl(new.lng,old.lng), nvl(new.lat,old.lat), if(old.mid_id is null,'2020-03-10',old.login_date_first), if(new.mid_id is not null,'2020-03-10',old.login_date_last), if(new.mid_id is not null, new.login_count,0), nvl(old.login_count,0)+if(new.login_count>0,1,0)
from
( select * from dwt_uv_topic
)old full outer join
( select * from dws_uv_detail_daycount where dt='2020-03-10'
)new on old.mid_id=new.mid_id;
2.2 会员主题宽表 dwt_user_topic
1、数据来源
dwt_user_topic、dws_user_action_daycount。
2、创建表
drop table if exists dwt_user_topic;
create external table dwt_user_topic ( user_id string comment '用户id', login_date_first string comment '首次登录时间', login_date_last string comment '末次登录时间', login_count bigint comment '累积登录天数', login_last_30d_count bigint comment '最近30日登录天数', order_date_first string comment '首次下单时间', order_date_last string comment '末次下单时间', order_count bigint comment '累积下单次数', order_amount decimal(16,2) comment '累积下单金额', order_last_30d_count bigint comment '最近30日下单次数', order_last_30d_amount bigint comment '最近30日下单金额', payment_date_first string comment '首次支付时间', payment_date_last string comment '末次支付时间', payment_count decimal(16,2) comment '累积支付次数', payment_amount decimal(16,2) comment '累积支付金额', payment_last_30d_count decimal(16,2) comment '最近30日支付次数', payment_last_30d_amount decimal(16,2) comment '最近30日支付金额'
)COMMENT '用户主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_user_topic/'
tblproperties ("parquet.compression"="lzo");
3、加载数据
insert overwrite table dwt_user_topic
select nvl(new.user_id,old.user_id), if(old.login_date_first is null and new.login_count>0,'2020-03-10',old.login_date_first), if(new.login_count>0,'2020-03-10',old.login_date_last), nvl(old.login_count,0)+if(new.login_count>0,1,0), nvl(new.login_last_30d_count,0), if(old.order_date_first is null and new.order_count>0,'2020-03-10',old.order_date_first),if(new.order_count>0,'2020-03-10',old.order_date_last), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.order_amount,0)+nvl(new.order_amount,0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0), if(old.payment_date_first is null and new.payment_count>0,'2020-03-10',old.payment_date_first), if(new.payment_count>0,'2020-03-10',old.payment_date_last), nvl(old.payment_count,0)+nvl(new.payment_count,0), nvl(old.payment_amount,0)+nvl(new.payment_amount,0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0)
from dwt_user_topic old
full outer join
( select user_id, sum(if(dt='2020-03-10',login_count,0)) login_count, sum(if(dt='2020-03-10',order_count,0)) order_count, sum(if(dt='2020-03-10',order_amount,0)) order_amount, sum(if(dt='2020-03-10',payment_count,0)) payment_count, sum(if(dt='2020-03-10',payment_amount,0)) payment_amount, sum(if(login_count>0,1,0)) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount from dws_user_action_daycount where dt>=date_add( '2020-03-10',-30) group by user_id
)new on old.user_id=new.user_id;
2.3 商品主题宽表 dwt_sku_topic
1、数据来源
dwt_sku_topic、dws_sku_action_daycount、dwd_dim_sku_info。
2、创建表
drop table if exists dwt_sku_topic;
create external table dwt_sku_topic ( sku_id string comment 'sku_id', spu_id string comment 'spu_id', order_last_30d_count bigint comment '最近30日被下单次数', order_last_30d_num bigint comment '最近30日被下单件数', order_last_30d_amount decimal(16,2) comment '最近30日被下单金额', order_count bigint comment '累积被下单次数', order_num bigint comment '累积被下单件数', order_amount decimal(16,2) comment '累积被下单金额', payment_last_30d_count bigint comment '最近30日被支付次数', payment_last_30d_num bigint comment '最近30日被支付件数', payment_last_30d_amount decimal(16,2) comment '最近30日被支付金额', payment_count bigint comment '累积被支付次数', payment_num bigint comment '累积被支付件数', payment_amount decimal(16,2) comment '累积被支付金额', refund_last_30d_count bigint comment '最近三十日退款次数', refund_last_30d_num bigint comment '最近三十日退款件数',refund_last_30d_amount decimal(10,2) comment '最近三十日退款金额', refund_count bigint comment '累积退款次数', refund_num bigint comment '累积退款件数', refund_amount decimal(10,2) comment '累积退款金额', cart_last_30d_count bigint comment '最近30日被加入购物车次数', cart_last_30d_num bigint comment '最近30日被加入购物车件数', cart_count bigint comment '累积被加入购物车次数', cart_num bigint comment '累积被加入购物车件数', favor_last_30d_count bigint comment '最近30日被收藏次数', favor_count bigint comment '累积被收藏次数', appraise_last_30d_good_count bigint comment '最近30日好评数', appraise_last_30d_mid_count bigint comment '最近30日中评数', appraise_last_30d_bad_count bigint comment '最近30日差评数', appraise_last_30d_default_count bigint comment '最近30日默认评价数', appraise_good_count bigint comment '累积好评数', appraise_mid_count bigint comment '累积中评数', appraise_bad_count bigint comment '累积差评数', appraise_default_count bigint comment '累积默认评价数'
)COMMENT '商品主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_sku_topic/'
tblproperties ("parquet.compression"="lzo");
3、加载数据
insert overwrite table dwt_sku_topic
select nvl(new.sku_id,old.sku_id), sku_info.spu_id, nvl(new.order_count30,0), nvl(new.order_num30,0), nvl(new.order_amount30,0), nvl(old.order_count,0) + nvl(new.order_count,0), nvl(old.order_num,0) + nvl(new.order_num,0), nvl(old.order_amount,0) + nvl(new.order_amount,0), nvl(new.payment_count30,0), nvl(new.payment_num30,0), nvl(new.payment_amount30,0), nvl(old.payment_count,0) + nvl(new.payment_count,0), nvl(old.payment_num,0) + nvl(new.payment_count,0), nvl(old.payment_amount,0) + nvl(new.payment_count,0), nvl(new.refund_count30,0), nvl(new.refund_num30,0), nvl(new.refund_amount30,0), nvl(old.refund_count,0) + nvl(new.refund_count,0), nvl(old.refund_num,0) + nvl(new.refund_num,0), nvl(old.refund_amount,0) + nvl(new.refund_amount,0), nvl(new.cart_count30,0), nvl(new.cart_num30,0), nvl(old.cart_count,0) + nvl(new.cart_count,0), nvl(old.cart_num,0) + nvl(new.cart_num,0), nvl(new.favor_count30,0), nvl(old.favor_count,0) + nvl(new.favor_count,0), nvl(new.appraise_good_count30,0), nvl(new.appraise_mid_count30,0), nvl(new.appraise_bad_count30,0), nvl(new.appraise_default_count30,0) , nvl(old.appraise_good_count,0) + nvl(new.appraise_good_count,0), nvl(old.appraise_mid_count,0) + nvl(new.appraise_mid_count,0), nvl(old.appraise_bad_count,0) + nvl(new.appraise_bad_count,0), nvl(old.appraise_default_count,0) + nvl(new.appraise_default_count,0)
from
(
select sku_id, spu_id, order_last_30d_count, order_last_30d_num, order_last_30d_amount, order_count, order_num, order_amount, payment_last_30d_count, payment_last_30d_num, payment_last_30d_amount, payment_count, payment_num, payment_amount, refund_last_30d_count, refund_last_30d_num, refund_last_30d_amount, refund_count, refund_num, refund_amount, cart_last_30d_count, cart_last_30d_num, cart_count, cart_num, favor_last_30d_count, favor_count, appraise_last_30d_good_count, appraise_last_30d_mid_count, appraise_last_30d_bad_count, appraise_last_30d_default_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count
from dwt_sku_topic
)old
full outer join
( select sku_id, sum(if(dt='2020-03-10', order_count,0 )) order_count, sum(if(dt='2020-03-10',order_num ,0 )) order_num, sum(if(dt='2020-03-10',order_amount,0 )) order_amount, sum(if(dt='2020-03-10',payment_count,0 )) payment_count, sum(if(dt='2020-03-10',payment_num,0 )) payment_num, sum(if(dt='2020-03-10',payment_amount,0 )) payment_amount, sum(if(dt='2020-03-10',refund_count,0 )) refund_count, sum(if(dt='2020-03-10',refund_num,0 )) refund_num, sum(if(dt='2020-03-10',refund_amount,0 )) refund_amount, sum(if(dt='2020-03-10',cart_count,0 )) cart_count, sum(if(dt='2020-03-10',cart_num,0 )) cart_num, sum(if(dt='2020-03-10',favor_count,0 )) favor_count, sum(if(dt='2020-03-10',appraise_good_count,0 )) appraise_good_count, sum(if(dt='2020-03-10',appraise_mid_count,0 )) appraise_mid_count, sum(if(dt='2020-03-10',appraise_bad_count,0 )) appraise_bad_count, sum(if(dt='2020-03-10',appraise_default_count,0 )) appraise_default_count, sum(order_count) order_count30, sum(order_num) order_num30, sum(order_amount) order_amount30, sum(payment_count) payment_count30,sum(payment_num) payment_num30, sum(payment_amount) payment_amount30, sum(refund_count) refund_count30, sum(refund_num) refund_num30, sum(refund_amount) refund_amount30, sum(cart_count) cart_count30, sum(cart_num) cart_num30, sum(favor_count) favor_count30, sum(appraise_good_count) appraise_good_count30, sum(appraise_mid_count) appraise_mid_count30, sum(appraise_bad_count) appraise_bad_count30, sum(appraise_default_count) appraise_default_count30 from dws_sku_action_daycount where dt >= date_add ('2020-03-10', -30) group by sku_id
)new
on new.sku_id = old.sku_id
left join
(select * from dwd_dim_sku_info where dt='2020-03-10'
) sku_info
on nvl(new.sku_id,old.sku_id)= sku_info.id;
2.4 优惠券主题宽表 dwt_coupon_topic (预留)
1、数据来源
dwt_coupon_topic、dws_coupon_use_daycount。
2、创建表
drop table if exists dwt_coupon_topic;
create external table dwt_coupon_topic ( `coupon_id` string COMMENT '优惠券ID', `get_day_count` bigint COMMENT '当日领用次数', `using_day_count` bigint COMMENT '当日使用(下单)次数', `used_day_count` bigint COMMENT '当日使用(支付)次数', `get_count` bigint COMMENT '累积领用次数', `using_count` bigint COMMENT '累积使用(下单)次数', `used_count` bigint COMMENT '累积使用(支付)次数'
)COMMENT '购物券主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_coupon_topic/'
tblproperties ("parquet.compression"="lzo");
3、加载数据
insert overwrite table dwt_coupon_topic
select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0)
from
( select * from dwt_coupon_topic
)old
full outer join
( select coupon_id, get_count, using_count, used_count from dws_coupon_use_daycount where dt='2020-03-10'
)new
on
old.coupon_id=new.coupon_id;
2.5 活动主题宽表 dwt_activity_topic (预留)
1、数据来源
dwt_activity_topic、dws_activity_info_daycount。
2、创建表
drop table if exists dwt_activity_topic;
create external table dwt_activity_topic( `id` string COMMENT '活动id', `activity_name` string COMMENT '活动名称', `order_day_count` bigint COMMENT '当日日下单次数', `payment_day_count` bigint COMMENT '当日支付次数', `order_count` bigint COMMENT '累积下单次数', `payment_count` bigint COMMENT '累积支付次数'
) COMMENT '活动主题宽表'
stored as parquet
location '/warehouse/gmall/dwt/dwt_activity_topic/'
tblproperties ("parquet.compression"="lzo");
3、加载数据
insert overwrite table dwt_activity_topic
select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0)
from
( select * from dwt_activity_topic
)old
full outer join
( select id, activity_name, order_count, payment_count from dws_activity_info_daycount where dt='2020-03-10'
)new on old.id=new.id;
三、dwt 层数据导入脚本 dws_to_dwt.sh
#!/bin/bashAPP=gmall
hive=/opt/module/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then do_date=$1
else do_date=`date -d "-1 day" +%F`
fisql="
insert overwrite table ${APP}.dwt_uv_topic
select nvl(new.mid_id,old.mid_id), nvl(new.user_id,old.user_id), nvl(new.version_code,old.version_code), nvl(new.version_name,old.version_name), nvl(new.lang,old.lang), nvl(new.source,old.source), nvl(new.os,old.os), nvl(new.area,old.area), nvl(new.model,old.model), nvl(new.brand,old.brand), nvl(new.sdk_version,old.sdk_version), nvl(new.gmail,old.gmail), nvl(new.height_width,old.height_width), nvl(new.app_time,old.app_time), nvl(new.network,old.network), nvl(new.lng,old.lng), nvl(new.lat,old.lat), nvl(old.login_date_first,'$do_date'), if(new.login_count>0,'$do_date',old.login_date_last), nvl(new.login_count,0), nvl(new.login_count,0)+nvl(old.login_count,0)
from
(
select *
from ${APP}.dwt_uv_topic
)old
full outer join
(
select *
from ${APP}.dws_uv_detail_daycount
where dt='$do_date'
)new
on old.mid_id=new.mid_id; insert overwrite table ${APP}.dwt_user_topic
select nvl(new.user_id,old.user_id), if(old.login_date_first is null and new.login_count>0,'$do_date',old.login_date_first), if(new.login_count>0,'$do_date',old.login_date_last), nvl(old.login_count,0)+if(new.login_count>0,1,0), nvl(new.login_last_30d_count,0), if(old.order_date_first is null and new.order_count>0,'$do_date',old.order_date_first), if(new.order_count>0,'$do_date',old.order_date_last), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.order_amount,0)+nvl(new.order_amount,0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0), if(old.payment_date_first is null and new.payment_count>0,'$do_date',old.payment_date_first), if(new.payment_count>0,'$do_date',old.payment_date_last), nvl(old.payment_count,0)+nvl(new.payment_count,0), nvl(old.payment_amount,0)+nvl(new.payment_amount,0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0)
from
(
select *
from ${APP}.dwt_user_topic
)old
full outer join
(
select user_id, sum(if(dt='$do_date',login_count,0)) login_count, sum(if(dt='$do_date',order_count,0)) order_count, sum(if(dt='$do_date',order_amount,0)) order_amount, sum(if(dt='$do_date',payment_count,0)) payment_count, sum(if(dt='$do_date',payment_amount,0)) payment_amount, sum(if(order_count>0,1,0)) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount
from ${APP}.dws_user_action_daycount
where dt>=date_add('$do_date',-30)
group by user_id
)new
on old.user_id=new.user_id; with
sku_act as
(
select sku_id, sum(if(dt='$do_date', order_count,0 )) order_count, sum(if(dt='$do_date',order_num ,0 )) order_num, sum(if(dt='$do_date',order_amount,0 )) order_amount , sum(if(dt='$do_date',payment_count,0 )) payment_count, sum(if(dt='$do_date',payment_num,0 )) payment_num, sum(if(dt='$do_date',payment_amount,0 )) payment_amount, sum(if(dt='$do_date',refund_count,0 )) refund_count, sum(if(dt='$do_date',refund_num,0 )) refund_num, sum(if(dt='$do_date',refund_amount,0 )) refund_amount, sum(if(dt='$do_date',cart_count,0 )) cart_count, sum(if(dt='$do_date',cart_num,0 )) cart_num, sum(if(dt='$do_date',favor_count,0 )) favor_count, sum(if(dt='$do_date',appraise_good_count,0 )) appraise_good_count, sum(if(dt='$do_date',appraise_mid_count,0 )) appraise_mid_count, sum(if(dt='$do_date',appraise_bad_count,0 )) appraise_bad_count, sum(if(dt='$do_date',appraise_default_count,0 )) appraise_default_count, sum( order_count ) order_count30 , sum( order_num ) order_num30, sum( order_amount ) order_amount30, sum( payment_count ) payment_count30, sum( payment_num ) payment_num30, sum( payment_amount ) payment_amount30, sum( refund_count ) refund_count30, sum( refund_num ) refund_num30, sum( refund_amount ) refund_amount30, sum( cart_count ) cart_count30, sum( cart_num ) cart_num30, sum( favor_count ) favor_count30, sum( appraise_good_count ) appraise_good_count30, sum( appraise_mid_count ) appraise_mid_count30, sum( appraise_bad_count ) appraise_bad_count30, sum( appraise_default_count ) appraise_default_count30
from ${APP}.dws_sku_action_daycount
where dt>=date_add ( '$do_date',-30)
group by sku_id
),
sku_topic as (
select sku_id, spu_id, order_last_30d_count, order_last_30d_num, order_last_30d_amount, order_count, order_num, order_amount, payment_last_30d_count, payment_last_30d_num, payment_last_30d_amount, payment_count, payment_num, payment_amount, refund_last_30d_count, refund_last_30d_num, refund_last_30d_amount, refund_count, refund_num, refund_amount, cart_last_30d_count, cart_last_30d_num, cart_count, cart_num, favor_last_30d_count, favor_count, appraise_last_30d_good_count, appraise_last_30d_mid_count, appraise_last_30d_bad_count, appraise_last_30d_default_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count
from ${APP}.dwt_sku_topic
)
insert overwrite table ${APP}.dwt_sku_topic
select nvl(sku_act.sku_id,sku_topic.sku_id), sku_info.spu_id, nvl(sku_act.order_count30,0), nvl(sku_act.order_num30,0), nvl(sku_act.order_amount30,0), nvl(sku_topic.order_count,0)+nvl(sku_act.order_count,0),nvl(sku_topic.order_num,0)+nvl(sku_act.order_num,0), nvl(sku_topic.order_amount,0)+nvl(sku_act.order_amount,0), nvl(sku_act.payment_count30,0), nvl(sku_act.payment_num30,0), nvl(sku_act.payment_amount30,0), nvl(sku_topic.payment_count,0)+nvl(sku_act.payment_count,0), nvl(sku_topic.payment_num,0)+nvl(sku_act.payment_count,0), nvl(sku_topic.payment_amount,0)+nvl(sku_act.payment_count,0), nvl(refund_count30,0),nvl(sku_act.refund_num30,0), nvl(sku_act.refund_amount30,0), nvl(sku_topic.refund_count,0)+nvl(sku_act.refund_count,0), nvl(sku_topic.refund_num,0)+nvl(sku_act.refund_num,0),nvl(sku_topic.refund_amount,0)+nvl(sku_act.refund_amount,0), nvl(sku_act.cart_count30,0), nvl(sku_act.cart_num30,0), nvl(sku_topic.cart_count,0)+nvl(sku_act.cart_count,0),nvl(sku_topic.cart_num,0)+nvl(sku_act.cart_num,0),nvl(sku_act.favor_count30,0),nvl(sku_topic.favor_count,0)+nvl(sku_act.favor_count,0), nvl(sku_act.appraise_good_count30,0),nvl(sku_act.appraise_mid_count30,0), nvl(sku_act.appraise_bad_count30,0), nvl(sku_act.appraise_default_count30,0),nvl(sku_topic.appraise_good_count,0)+nvl(sku_act.appraise_good_count,0),nvl(sku_topic.appraise_mid_count,0)+nvl(sku_act.appraise_mid_count,0), nvl(sku_topic.appraise_bad_count,0)+nvl(sku_act.appraise_bad_count,0), nvl(sku_topic.appraise_default_count,0)+ nvl(sku_act.appraise_default_count,0)
from sku_act
full outer join sku_topic
on sku_act.sku_id =sku_topic.sku_id
left join
(
select *
from ${APP}.dwd_dim_sku_info
where dt='$do_date'
)sku_info
on nvl(sku_topic.sku_id,sku_act.sku_id)= sku_info.id; insert overwrite table ${APP}.dwt_coupon_topic
select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0)
from
(
select *
from ${APP}.dwt_coupon_topic
)old full outer join
(
select coupon_id, get_count, using_count, used_count
from ${APP}.dws_coupon_use_daycount
where dt='$do_date'
)new
on old.coupon_id=new.coupon_id; insert overwrite table ${APP}.dwt_activity_topic
select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0)
from
(
select *
from ${APP}.dwt_activity_topic
)old full outer join
(
select id, activity_name, order_count,payment_count
from ${APP}.dws_activity_info_daycount
where dt='$do_date'
)new
on
old.id=new.id;
" $hive -e "$sql"
四、dwt 层总结
1、数据采用 parquet 存储 + lzo 压缩的方式。
2、dwt 层不是分区表。
3、宽表字段怎么来?维度关联的事实表度量值+开头、结尾+累积+累积一个时间段。
4、dwt 层共五张表。
电商数仓(dwt 层)相关推荐
- 电商数仓DWD层用户行为日志解析
文章目录 前言 一.页面埋点日志.启动日志结构 二.日志解析的流程 2.1 启动日志表解析(包括注意事项) 2.1.1 解析思路 2.1.2 建表语句 2.1.3 数据导入 2.1.4 注意事项 2. ...
- 电商数仓(dwd 层)
一.dwd 层介绍 1.对用户行为数据解析. 2.对核心数据进行判空过滤. 3.对业务数据采用维度模型重新建模,即维度退化. 二.dwd 层用户行为数据 2.1 用户行为启动表 dwd_start_l ...
- 数据仓库之电商数仓-- 3.3、电商数据仓库系统(DWT层)
目录 八.数仓搭建-DWT层 8.1 访客主题 8.2 用户主题 8.3 商品主题 8.4 优惠券主题 8.5 活动主题 8.6 地区主题 8.7 DWT层首日数据导入脚本 8.8 DWT层每日数据导 ...
- 数据仓库之电商数仓-- 3.4、电商数据仓库系统(ADS层)
目录 九.数仓搭建-ADS层 9.1 建表说明 9.2 访客主题 9.2.1 访客统计 9.2.2 路径分析 9.3 用户主题 9.3.1 用户统计 9.3.2 用户变动统计 9.3.3 用户行为漏斗 ...
- 数据仓库之电商数仓-- 3.2、电商数据仓库系统(DWS层)
目录 七.数仓搭建-DWS层 7.1 系统函数 7.1.1 nvl函数 7.1.2 日期处理函数 7.1.3 复杂数据类型定义 7.2 DWS层 7.2.1 访客主题 7.2.2 用户主题 7.2.3 ...
- 数据仓库之电商数仓-- 4、可视化报表Superset
目录 一.Superset入门 1.1 Superset概述 1.2 Superset应用场景 二.Superset安装及使用 2.1 安装Python环境 2.1.1 安装Miniconda 2.1 ...
- 数据仓库之电商数仓-- 2、业务数据采集平台
目录 一.电商业务简介 1.1 电商业务流程 1.2 电商常识(SKU.SPU) 1.3 电商系统表结构 1.3.1 活动信息表(activity_info) 1.3.2 活动规则表(activity ...
- 数据仓库之电商数仓-- 1、用户行为数据采集
目录 一.数据仓库概念 二.项目需求及架构设计 2.1 项目需求分析 2.2 项目框架 2.2.1 技术选型 2.2.2 系统数据流程设计 2.2.3 框架版本选型 2.2.4 服务器选型 2.2.5 ...
- 电商数仓描述_笔记-尚硅谷大数据项目数据仓库-电商数仓V1.2新版
架构 项目框架 数仓架构 存储压缩 Snappy与LZO LZO安装: 读取LZO文件时,需要先创建索引,才可以进行切片. 框架版本选型Apache:运维麻烦,需要自己调研兼容性. CDH:国内使用最 ...
- 复盘离线电商数仓3.0项目–数据开发梳理
复盘离线电商数仓项目–数据开发梳理 业务数据 数仓分层 ods层到ads层的开发 开源BI工具Superset ODS层业务数据&日志数据 ods层业务数据 使用Sqoop脚本从Mysql数据 ...
最新文章
- SQL2008-分页显示3种方法
- python九十八类_Python领域最伟大工程师Kenneth Reitz,教你写代码
- linux上设置了log4j没有产生日志文件_关于 log4j 升级到 log4j2 的小结
- python3.7 pip安装_python3.7安装, 解决pip is configured with locations that require TLS/SSL问题...
- 就业模拟试题_Net
- MySQL常用存储引擎之Archive
- 12行代码AC_Leecode 495. 提莫攻击——Leecode每日一题系列
- php 直播服务器搭建,基于Nginx搭建RTMP/HLS视频直播服务器
- mysql业务繁忙时能建索引吗_MySQL DBA面试高频三十问
- 人受失败后多久可以做第二次_做完皮秒多久可以用自己的护肤产品、过来人分享皮秒后怎么护肤?...
- bzoj2243 树链剖分
- Java配置文件找不到指定_转载:Java项目读取配置文件时,FileNotFoundException 系统找不到指定的文件,System.getProperty(user.dir)的理解...
- JS学习总结(9)——String
- cenos各个版本下载地址
- 密码生成器c语言程序,C++全密码生成的实现代码
- 江苏高考新方案定了!总分750分,科目“3+1+2”
- 买“背包“吗?送“手臂”的那种!卡耐基梅隆大学可穿戴机械臂问世
- 输入框限制规则 只能输入数字 只能输入字母数字 等等
- 淘宝API upload_img - 上传图片到淘宝
- php微信支付mch_id参数格式错误,在.net core上,Web网站调用微信支付-统一下单接口(xml传参)一直返回错误:mch_id参数格式错误...
热门文章
- mas6a801 sw tree disp
- [luoguT30208]太极剑
- 亿级视频内容如何实时更新?
- 服务器硬盘常用的阵列方式有几种,三种常见磁盘阵列设置
- 【迁移学习】Self Paced Adversarial Training for Multimodal Few-shot Learning论文解读
- Windows网络笔记-台式机通过网线直连笔记本,台式机通过笔记本上网(win10)
- 1988年图灵奖--伊万·萨瑟兰简介
- shell中用grep查找并且不输出_grep无法查找shell传过来的变量?先注意一下文本格式吧!...
- Discuz集思街淘宝客模板 程序源代码
- Android专业DJ,著名音乐游戏《DJ英雄》登陆Android Market