电信信息日志使用mapreduce统计的两种方式
信息准备:
数据信息:
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200
1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 4 0 264 0 200
1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 2 4 132 1512 200
1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 4 0 240 0 200
1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 15 12 1527 2106 200
1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 20 16 4116 1432 200
1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 18 15 1116 954 200
1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.360.cn 信息安全 20 20 3156 2936 200
1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 4 0 240 0 200
1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 24 9 6960 690 200
1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 28 27 3659 3538 200
1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 3 3 1938 180 200
1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 15 9 918 4938 200
1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 3 3 180 180 200
1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 15 12 1938 2910 200
1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 img.qfc.cn 12 12 3008 3720 200
1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 57 102 7335 110349 200
1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 21 18 9531 2412 200
1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 69 63 11058 48243 200
1363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 2 2 120 120 200
1363157985079 13823070001 20-7C-8F-70-68-1F:CMCC 120.196.100.99 6 3 360 180 200
1363157985069 13600217502 00-1F-64-E2-E8-B1:CMCC 120.196.100.55 18 138 1080 186852 200
问题提出,统计 手机号 上行下行数据包 上行下行总流量
1.使用hadoop的序列化【进行了手机号是否匹配的分区】
package Hadoop;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;public class TrafficCountApp {public static class TrafficWritable implements Writable {private String phoneNo;private int upPackNo;private int downPackNo;private int upPayLoad;private int downPayLoad;public TrafficWritable(){}public TrafficWritable(String[] split){this(split[1], Integer.parseInt(split[6]), Integer.parseInt(split[7]),Integer.parseInt(split[8]),Integer.parseInt(split[9]));}public TrafficWritable(String phoneNo, int upPackNo, int downPackNo, int upPayLoad, int downPayLoad) {this.phoneNo = phoneNo;this.upPackNo = upPackNo;this.downPackNo = downPackNo;this.upPayLoad = upPayLoad;this.downPayLoad = downPayLoad;}public String getPhoneNo() {return phoneNo;}public void setPhoneNo(String phoneNo) {this.phoneNo = phoneNo;}public int getUpPackNo() {return upPackNo;}public void setUpPackNo(int upPackNo) {this.upPackNo = upPackNo;}public int getDownPackNo() {return downPackNo;}public void setDownPackNo(int downPackNo) {this.downPackNo = downPackNo;}public int getUpPayLoad() {return upPayLoad;}public void setUpPayLoad(int upPayLoad) {this.upPayLoad = upPayLoad;}public int getDownPayLoad() {return downPayLoad;}public void setDownPayLoad(int downPayLoad) {this.downPayLoad = downPayLoad;}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(phoneNo);out.writeInt(upPackNo);out.writeInt(downPackNo);out.writeInt(upPayLoad);out.writeInt(downPayLoad);}@Overridepublic void readFields(DataInput in) throws IOException {phoneNo = in.readUTF();upPackNo = in.readInt();downPackNo = in.readInt();upPayLoad = in.readInt();downPayLoad = in.readInt();}@Overridepublic String toString() {return upPackNo+"\t"+downPackNo+"\t"+upPayLoad+"\t"+downPayLoad;}}//1.自定义mapperpublic static class TrafficCountMapper extends Mapper<LongWritable, Text, Text, TrafficWritable> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {System.out.println(value.toString());String[] split = value.toString().split("\\s");System.out.println(Arrays.toString(split));context.write(new Text(split[1]), new TrafficWritable(split));}}//2.自定义reducerpublic static class TrafficCountReducer extends Reducer<Text, TrafficWritable, Text, TrafficWritable>{@Overrideprotected void reduce(Text key, Iterable<TrafficWritable> values, Context context) throws IOException, InterruptedException {int upPackNo =0;int downPackNo =0;int upPayLoad =0;int downPayLoad =0;for (TrafficWritable tw:values) {upPackNo+=tw.getUpPackNo();downPackNo += tw.getDownPackNo();upPayLoad += tw.getUpPayLoad();downPayLoad += tw.getDownPayLoad();}TrafficWritable v3 = new TrafficWritable(key.toString(), upPackNo, downPackNo, upPayLoad, downPayLoad);context.write(key, v3);}}//3.写一个驱动方法public static void main(String[] args) throws Exception {//使用一个job类的实例Configuration conf = new Configuration();Job job = Job.getInstance(conf);//下面一行很重要job.setJarByClass(TrafficCountApp.class);//自定义的mapper、reducerjob.setMapperClass(TrafficCountMapper.class);job.setReducerClass(TrafficCountReducer.class);//mapper的输出k、v类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(TrafficWritable.class);//reducer的输出k、v的类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(TrafficWritable.class);//job的输入hdfs、输出hdfsFileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));//提交jobjob.waitForCompletion(true);}
}
2.使用Text
【注】hadoop中的字符串表示形式:Text
package com.henu;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** @author George* @description 不使用序列化**/
public class DianXinCount {public static class DianXinMap extends Mapper<LongWritable, Text,Text, Text>{Text t1 = new Text();Text v1 = new Text();String str1 = "";@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] strings = line.split("\\s");str1 = strings[6]+"\t"+strings[7]+"\t"+strings[8]+"\t"+strings[9];t1.set(strings[1]);v1.set(str1);context.write(t1,v1);}}public static class DianXinReduce extends Reducer<Text,Text,Text,Text>{int upPackNo =0;int downPackNo =0;int upPayLoad =0;int downPayLoad =0;Text v2 = new Text();@Overrideprotected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {for (Text s : values) {//注:\t并不识别 //s+String[] strings = s.toString().split("\t");upPackNo+=Integer.parseInt(strings[0]);downPackNo+=Integer.parseInt(strings[1]);upPayLoad+=Integer.parseInt(strings[2]);downPayLoad+=Integer.parseInt(strings[3]);v2.set(upPackNo+"\t"+downPackNo+"\t"+upPayLoad+"\t"+downPayLoad);}upPackNo =0;downPackNo =0;upPayLoad =0;downPayLoad =0;context.write(key,v2);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(DianXinCount.class);job.setMapperClass(DianXinMap.class);job.setReducerClass(DianXinReduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));job.waitForCompletion(true);}
}
【注】在整个过程中,特别是字符串的拆分上,建议多使用一下测试类:
package com.henu;/*** @author George* @description**/
public class Test {public static void main(String[] args) {String s = "aaa"+"\t"+"bbb"+"\t"+"ccc"+"\t"+"aaa";String[] split = s.split("\t");System.out.println(split.length);for (String s1 : split) {System.out.println(s1);}System.out.println("----------------");System.out.println(split[1]);System.out.println(split[2]);/*String str = "1363157985066\t13726230503\t00-FD-07-A4-72-B8:CMCC\t120.196.100.82\ti02.c.aliimg.com\t\t24\t27\t2481\t24681\t200";String[] strings = str.split("\\s");for (String s : strings) {System.out.println(s);}System.out.println("-------------");System.out.println(strings[6]);System.out.println(strings[1]);*/}
}
结果展示:
电信信息日志使用mapreduce统计的两种方式相关推荐
- log4j控制日志输出文件名称的两种方式
1. 第一种方式 在类对象中用如下方式定义logger变量 private static Logger logger = Logger.getLogger("lemmaXml"); ...
- 杀掉僵尸 MapReduce 任务的两种方式
方法一 1.使用命令获取 job_id hadoop job -list 2. 杀掉进程 hadoop job -kill $JobId 方法二 使用 yarn 来管理: 1.通过 web 界面(80 ...
- [汇总信息] Laravel 上使用 phpexcel的两种方式
文章采集与网上 方式1.使用原生的phpexcel , http://blog.csdn.net/CSwfe/article/details/52748046?locationNum=1 1.在app ...
- centos6配置日志外发_CentOS6下记录后台操作日志的两种方式
CentOS6下记录后台操作日志的两种方式 平时为了记录登录CentOS Linux系统的操作命令,需要将操作日志记录下来,下面介绍两种方式 1.利用script以及scriptreplay工具 sc ...
- 实现日志管理的两种方式:aop、拦截器
一.Spring aop 实现 AOP概念: 切面(Aspect):一个关注点的模块化,这个关注点可能会横切多个对象.事务管理是Java应用程序中一个关于横切关注点的很好的例子.在Spring AOP ...
- 两种方式设置SVN提交代码时必须填写日志
两种方式设置SVN提交代码时必须填写日志 咱们在使用SVN的时候,团队中难免有同事提交代码时忘记填写日志而直接提交,这样会导致后期维护极不方便,这并不是我们想看到的.于是下面给出两种方式来解决这个问题 ...
- 如何让BERT拥有视觉感知能力?两种方式将视频信息注入BERT
一只小狐狸带你解锁NLP/ML/DL秘籍 老板老板,听说BERT是个瞎子 此话怎讲? 它能理解语言,但是理解不了小夕的自拍! video-BERT了解一下 喵喵喵? AI的三大核心板块(CV/Spee ...
- fragment与activity之间的信息传递的两种方式
因为Fragment和Activity一样是具有生命周期,不是一般的bean通过构造函数传值,会造成异常. fragment与activity之间的信息传递的两种方式: 1.第一种方式,也是最常用的方 ...
- Ubuntu查看IP信息的两种方式
无论使用什么系统,都有用到ip地址的时候,习惯了windows系统的人很容易就能查找出系统的ip,但是在linux系统如何查看ip呢?作为Linux新手,以Ubuntu的使用经验,我知道Ubuntu查 ...
最新文章
- PostgreSQL中的数据库实例、模式、用户(角色)、表空间
- C++入门经典-例9.4-默认模板参数
- [Android L]SEAndroid开放设备文件结点权限(读或写)方法(涵盖常用操作:sys/xxx、proc/xxx、SystemProperties)热门干货
- ubuntu php fpm.conf,ubuntu下nginx+PHP-FPM安装配置
- 单例模式示例_单例设计模式示例
- Python抓取豆瓣电影详情并提取信息
- Mac终于有大动作了!WWDC 2019苹果将公布全新系统
- javascript中定时器interval的使用
- 仿QQ锁屏界面消息提示
- python控制安捷伦频谱仪_通过 python 对罗德施瓦茨矢网、信号源、频谱仪的控制...
- 安卓使用MediaPlayer播放视频
- ajax去掉session,PHP中解决ajax请求session过时退出登陆问题
- Linux入门基本命令的使用。
- Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must
- 短文本匹配模型-ESIM
- 微库:微信互动管理平台
- Voron 3d打印机 Klipper双挤出配置方式
- tomcat用c语言开发服务,apache tomcat是什么语言开发的
- Java 可变参 Object...objects 方法的陷进
- 淘宝教育的视频打不开
热门文章
- 牛客 - 导航系统(最小生成树+Floyd)
- HDU - 3987 Harry Potter and the Forbidden Forest(最小割最少边数)
- uva 10305拓扑排序
- 开源oracle client,oracle client安装与配置
- im4java profile_GraphicsMagick+im4java
- java plt_matplotlib 画动态图以及plt.ion()和plt.ioff()的使用详解
- HDU3929(容斥原理)
- 8.OD--函数参考
- Delphi之virtual,dynamic,abstract
- 深度探索I/O完成端口