天池大数据比赛,菜鸟仓库比赛,御膳房操作
御膳房--操作
表名 描述 来源 所属包 所属项目 操作
item_feature 商品粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
config 每个商品在全国和分仓区域的补少... 天池 查看包 tianchi_data (tianchi_data) 已授权
item_store_feature 商品和分仓区域粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
数据开发-工作流-工作流节点
select
count(*)
from tianchi_data.config;
select
*
from tianchi_data.config limit 10;
创建结果输出的表target
create table if not exists target (
item_id bigint,
store_code string,
target Double);
表管理-筛选-输入target-
创建分仓结果输出的测试表target2
create table if not exists target2 (
item_id bigint,
store_code string,
target Double);
把查询结果保存到表
insert overwrite table target2
select item_id,store_code,qty_alipay_njhs from tianchi_data.item_store_feature limit 10;
查询表结构
desc tianchi_data.item_feature
desc tianchi_data.item_store_feature
查询新建的表
select
item_id,
store_code,
target
from target2;
删除表中所有数据
truncate table target2
创建全国结果输出的测试表target3
create table if not exists target3 (
item_id bigint,
target Double);
统计前两周的和——查询
insert overwrite table target3
select item_id,sum(qty_alipay_njhs) from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id;
insert overwrite table target2
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
实验数据操作表
create table if not exists test1 (
item_id bigint,
store_code string,
target Double);
Case when 的使用方法
--简单Case函数
CASE sex
WHEN '1' THEN '男'
WHEN '2' THEN '女'
ELSE '其他' END
--Case搜索函数
CASE WHEN sex = '1' THEN '男'
WHEN sex = '2' THEN '女'
ELSE '其他' END
---case a when b ---如果a=b返回then后面的结果
insert overwrite table test1
select item_id,case 100 when 100 then 'all' end ,target from target3 ;
查看
select count(*) from target
1389442
1189442
将test1数据追加到target2
insert into table target2
select item_id,store_code,target from test1
测试
select item_id,store_code,target from target2 where item_id=4
生成最终结果
insert into table target
select item_id,store_code,target from target2
UNION 操作符用于合并两个或多个 SELECT 语句的结果集。如果允许重复的值,请使用 UNION ALL
SELECT column_name(s) FROM table_name1
UNION ALL
SELECT column_name(s) FROM table_name2
mvn打包进入项目目录执行命令D:\eclipseHadoop1x\hadoop-test-indigoVersion-space\TianChiMapreduce
mvn clean package
配置请全部在base.mapred.xml中完成
MapOnly程序在Map阶段直接针对每一条输入记录进行处理并输出,后续无Combiner和Reducer。要编写MapOnly程序,只要做如下两点即可:
将base.mapred.xml中的Combiner和Reducer注释或删除
Mapper中要直接输出outputRecord,不要像普通MR程序输出Map的中间结果
如何读取本地文件
ODPS的MR程序不允许直接使用本地IO,如有资源文件,可放在src/main/resources/下,然后通过TaskContext的readResourceFileAsStream方法读取。
示例:
// 假设资源文件为src/main/resources/data.txt
String resourceFileName = "src/main/resources/data.txt";
BufferedInputStream bis = new BufferedInputStream(this.getClass().getClassLoader().getResourceAsStream(resourceFileName));
BufferedReader = new BufferedReader(new InputStreamReader(bis));
实验数据操作表
create table if not exists test2 (
item_id bigint,
store_code string,
target Double);
增加测试数据
insert into table test2
select item_id,store_code,target from target limit 20
输出结果
create table if not exists result (
str1 string,
str2 string);
参考示例
drop table if exists result;
create table if not exists result (str1 string,str2 string);
insert into table result
select
myudf(item_id) as (item_id)
from
(
select
*
from test2 tb2
where item_id=4
) tb1;
参考示例
SELECT
myudf(user_id,item_id,behavior_type,time,'2014-12-18',1) AS (user_id,item_id,time,feature) //返回的结果重命--方便插入其他表字段对应
FROM
(
SELECT
*
FROM
t_mj_p_user tb2 //tb2必须有防止报错
WHERE
time < '2014-12-18'
DISTRIBUTE BY user_id,item_id
SORT BY user_id,item_id,time DESC
) tb1; //tb1必须有防止报错
create table if not exists cn_submit (
item_id bigint,
store_code string,
target Double);
insert into table cn_submit
select * from target;
create table if not exists all_feature2 (
item_id bigint,
store_code string,
1w Double,
2w Double,
3w Double,
1m Double,
2m Double,
1d Double,
2d Double,
3d Double);
insert into table all_feature2
select item_id,case 100 when 100 then 'all' end as store_code,sum(dd),sum(dd2),sum(dd3),sum(dd4),sum(dd5),sum(dd6),sum(dd7),sum(dd8) from (
select item_id,sum(qty_alipay_njhs) as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151221' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,sum(qty_alipay_njhs) as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,sum(qty_alipay_njhs) as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151207' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,sum(qty_alipay_njhs) as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151130' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,sum(qty_alipay_njhs) as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151102' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,sum(qty_alipay_njhs) as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,sum(qty_alipay_njhs) as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151226' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,sum(qty_alipay_njhs) as dd8 from tianchi_data.item_feature where thedate ='20151225' group by item_id )
t group by item_id;
线性回归预测模型存放的地址
create table if not exists test3 (
item_id bigint,
store_code string,
target Double);
ceil取上整函数 floor向下取整 log取对数
insert into table test3
select item_id,store_code,floor(prediction_score) from test4;
insert into table test3
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
truncate table cn_submit;
insert into table cn_submit
select * from test3;
创建时间列表
create table if not exists datalist as
SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
清楚表重新生成
truncate table datalist;
insert into table datalist
SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
查询
select * from datalist;
create table if not exists datalist2 as
select thedate ,myudf() from datalist;
select * from datalist2;
创建要连接的表
create table if not exists item_feature as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227'
表连接
create table if not exists item_feature2 as
select /* + mapjoin(a) */
a._c1 as tab,
b.item_id,
b.thedate,
b.qty_alipay_njhs
from datalist2 a join item_feature b
on a.thedate = b.thedate;
select * from item_feature2 where item_id='222784' order by thedate
按ceiling(tab/14)分组--31维度的特征
create table if not exists item_feature4 as
select item_id,ceil(tab/14) as tab1,sum(qty_alipay_njhs) as sum1 from item_feature2 group by item_id,ceil(tab/14)
特征建立组
create table if not exists item_feature5 as
create table if not exists test3_9 as
select item_id,store_code,target from test3 order by item_id;
truncate table cn_submit;
insert into table cn_submit
select * from test3_9;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
模型组合
create table if not exists test3_91 as
select item_id,case 100 when 100 then 'all' end as store_code,floor(0.9*target) from target3;
truncate table cn_submit;
insert into table cn_submit
select * from test3_9;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
将全国最大值缩小200 总结果1389442个 max1结果1389435个
少7个值
create table if not exists max1 as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not exists max1_7 as
select * from target2 where item_id ='985273';
将其中6个放入max1 结果1389441个
insert into table max1
select * from max1_7 where store_code not in ('all');
select count(*) from max1
将最大值减去200
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max1;
insert into table cn_submit
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';
查看特征得知受12.25影响特别大
'985273'
将全国最大值0.97 总结果1389442个 max1结果1389435个
少7个值
create table if not exists max2 as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not exists max2_7 as
select * from target2 where item_id ='985273';
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max2;
insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max2_7;
查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not exists top50x as
select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 50
create table if not exists max50 as
select * from target2 where item_id not in (
select item_id from top50x
) ;
50x7=350 1389442-350=1389092
将350个值取出
create table if not exists max50_7 as
select * from target2 where item_id in (select item_id from top50x);
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max50;
insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max50_7;
查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not exists top1000x as
select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 1000;
create table if not exists max1000 as
select * from target2 where item_id not in (
select item_id from top1000x
) ;
将值取出
create table if not exists max1000_7 as
select * from target2 where item_id in (select item_id from top1000x);
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max1000;
insert into table cn_submit
select item_id,store_code,floor(0.95*target) as target from max1000_7;
平滑12.25后提交前两周和
create table if not exists pinghua3 as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate not in ('20151225');
insert into table pinghua3
select item_id,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_feature where thedate ='20151226';
create table if not exists pinghua2 as
select item_id,store_code,qty_alipay_njhs,thedate from tianchi_data.item_store_feature where thedate not in ('20151225');
insert into table pinghua2
select item_id,store_code,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_store_feature where thedate ='20151226';
truncate table cn_submit;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from pinghua2 where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
insert into table cn_submit
select item_id,case when 1=1 then 'all' end as store_code,sum(qty_alipay_njhs) from pinghua3 where thedate>='20151214' and thedate<='20151227' group by item_id;
天池大数据比赛,菜鸟仓库比赛,御膳房操作相关推荐
- 天池大数据比赛-菜鸟仓库比赛-第二赛季记录
统计全国仓库预测的前两周 商品_仓库_个数 rm(list=ls()) w=read.table("F:/笔记学习/天池比赛/菜鸟需求预测与分仓规划/data2/item_feature2. ...
- 天池大数据比赛-菜鸟仓库比赛-第一赛季记录
赛题说明链接 http://download.csdn.net/detail/q383700092/9538252 R语言 1379640 918539 2021961 1365166 5个 ...
- 不是你不会做菜,你只是缺个好厨房:深谈御膳房架构演进
本文根据阿里巴巴高级技术专家朱震杰在大流量高并发互联网应用实践在线峰会上题为<御膳房架构演进>的分享整体而成.在分享中重现了御膳房在探索大数据开放处理平台的道路上应对用户迫切需求和技术架构 ...
- AI比赛-NER:“万创杯”中医药天池大数据竞赛——中药说明书实体识别挑战
大赛概况 疫情催化下,人工智能正在持续助力中医药传承创新加速发展,其中中医用药知识体系沉淀挖掘是一个基础工作.通过挖掘中药说明书构建中药合理用药的知识图谱,将为中医规范诊疗奠定较好基础.挑战旨在通过抽 ...
- kaggle和天池大数据比赛的区别
kaggle和天池大数据的比赛给我的感觉完全不一样,天池上面的比赛给我感觉更像一场考试,大家都是自己埋头做自己的东西交流很少,以拟合线上验证集为最大的目标.kaggle上的比赛给人感觉更像一场交流分享 ...
- 阿里巴巴天池大数据竞赛黄金联赛全面开战,全球同步报名,只为寻找最聪明的你!...
阿里巴巴天池大数据竞赛黄金联赛全面开战,全球同步报名,只为寻找最聪明的你! 天池大数据竞赛是由阿里巴巴集团主办,面向全球新生代力量的高端算法竞赛.通过开放海量数据和"天池& ...
- 天池大数据竞赛第一名,上海交通大学人工智能实验室如何用AI定位肺结节
癌症,犹如黑暗中的魔鬼,带给人们恐惧与绝望.而肺癌,在我国作为发病率.死亡率最高的一类癌症,伤害着无数家庭.在我国每年都有近60万人死于肺癌.然而,癌症的死亡率与首次发现癌症的时期紧密相关,早期肺结节 ...
- 阿里天池大数据竞赛——口碑商家客流量预测 A2
阿里天池大赛koubeiyuce1 2017年二月份,天池大数据比赛,口碑商家客流量预测,参赛地址及详情: https://tianchi.shuju.aliyun.com/competition/i ...
- 新人 天池大数据初涉水
天池精准医疗大赛--人工智能辅助糖尿病遗传风险预测 一直想做一个天池大数据方面的比赛,以前一直没时间,正逢糖尿病风险预测比赛,阿里举办的,课题比较喜欢,哈哈 进入天池大数据竞赛中心,就可以看到相关信息 ...
最新文章
- Relay外部库使用
- 从管道中飞出的不一定是炮弹,也可能是无人机
- 洛谷 1359 租用游艇
- 2019河北高职计算机专科学校录取分数线,2019河北高考专科院校录取分数线_专科各院校投档线_一品高考网...
- 【若依(ruoyi)】按钮样式
- 使用python 下载_使用python下载大量文件
- 进程编译连接动态库,需要将动态库改为lib***.so
- 前端防xss攻击(去掉空格等能影响和攻击数据库的字段)
- 作为神经网络的输入_MATLAB实战|基于神经网络河南省降水量预测
- String path = request.getContextPath(....拼装当前网页的相对路径
- linux中文件的合并、归档、和压缩
- 元气骑士机器人旁边建筑_元气骑士:锤落谁家?锤子更适合机器人还是能双持的骑士呢?...
- 面试必知的25个经典回答 ,最全的面试干货,没有之一
- 解决使用redis作为session缓存 报错 Error: no such key 的问题
- 第一款无代码应用平台搭建的设备管理系统
- 计算机显示器模糊,电脑显示器模糊了怎么办?
- 【DPD数字预失真】射频功放的Volterra级数数字预失真系统开发
- 【大数据实战】苏宁大数据离线任务开发调度平台实践:设计与开发过程中的要点
- 洛谷试炼场 动态规划TG.lv(2)
- 计算机系统中所有实际物理装置的,计算机系统中所有实际物理装置的总称是计算机________件...
热门文章
- 移动终端课程设计——校园淘二手交易APP
- crc 校验错误_信道编码之循环冗余校验(CRC)
- java实现-强智教务系统API文档-全部java封装
- 探索分布式服务框架Dubbo开篇:牛逼哄哄的RPC
- 《Beyond Part Models: Person Retrieval with Refined Part Pooling 》PCB论文解读
- 用java下载apk解析包出错_教大家解析包时出现问题怎么解决
- Playing Atari with Deep Reinforcement Learning 学习笔记
- 条信息流oCPC调研报告
- linux下为php添加GD库(重新编译php)
- 高德地图API定位失败 浏览器定位 IP定位