信息准备:

数据信息:

1363157985066    13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
1363157995052   13826544101 5C-0E-8B-C7-F1-E0:CMCC  120.197.40.4            4   0   264 0   200
1363157991076   13926435656 20-10-7A-28-CC-0A:CMCC  120.196.100.99          2   4   132 1512    200
1363154400022   13926251106 5C-0E-8B-8B-B1-50:CMCC  120.197.40.4            4   0   240 0   200
1363157993044   18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99  iface.qiyi.com  视频网站    15  12  1527    2106    200
1363157995074   84138413    5C-0E-8B-8C-E8-20:7DaysInn  120.197.40.4    122.72.52.12        20  16  4116    1432    200
1363157993055   13560439658 C4-17-FE-BA-DE-D9:CMCC  120.196.100.99          18  15  1116    954 200
1363157995033   15920133257 5C-0E-8B-C7-BA-20:CMCC  120.197.40.4    sug.so.360.cn   信息安全    20  20  3156    2936    200
1363157983019   13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82          4   0   240 0   200
1363157984041   13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4    s19.cnzz.com    站点统计    24  9   6960    690 200
1363157973098   15013685858 5C-0E-8B-C7-F7-90:CMCC  120.197.40.4    rank.ie.sogou.com   搜索引擎    28  27  3659    3538    200
1363157986029   15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99  www.umeng.com   站点统计    3   3   1938    180 200
1363157992093   13560439658 C4-17-FE-BA-DE-D9:CMCC  120.196.100.99          15  9   918 4938    200
1363157986041   13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4            3   3   180 180 200
1363157984040   13602846565 5C-0E-8B-8B-B6-00:CMCC  120.197.40.4    2052.flash2-http.qq.com 综合门户    15  12  1938    2910    200
1363157995093   13922314466 00-FD-07-A2-EC-BA:CMCC  120.196.100.82  img.qfc.cn      12  12  3008    3720    200
1363157982040   13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99  y0.ifengimg.com 综合门户    57  102 7335    110349  200
1363157986072   18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99  input.shouji.sogou.com  搜索引擎    21  18  9531    2412    200
1363157990043   13925057413 00-1F-64-E1-E6-9A:CMCC  120.196.100.55  t3.baidu.com    搜索引擎    69  63  11058   48243   200
1363157988072   13760778710 00-FD-07-A4-7B-08:CMCC  120.196.100.82          2   2   120 120 200
1363157985079   13823070001 20-7C-8F-70-68-1F:CMCC  120.196.100.99          6   3   360 180 200
1363157985069   13600217502 00-1F-64-E2-E8-B1:CMCC  120.196.100.55          18  138 1080    186852  200

问题提出,统计  手机号  上行下行数据包 上行下行总流量

1.使用hadoop的序列化【进行了手机号是否匹配的分区】

package Hadoop;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;public class TrafficCountApp {public static class TrafficWritable implements Writable {private String phoneNo;private int upPackNo;private int downPackNo;private int upPayLoad;private int downPayLoad;public TrafficWritable(){}public TrafficWritable(String[] split){this(split[1], Integer.parseInt(split[6]), Integer.parseInt(split[7]),Integer.parseInt(split[8]),Integer.parseInt(split[9]));}public TrafficWritable(String phoneNo, int upPackNo, int downPackNo, int upPayLoad, int downPayLoad) {this.phoneNo = phoneNo;this.upPackNo = upPackNo;this.downPackNo = downPackNo;this.upPayLoad = upPayLoad;this.downPayLoad = downPayLoad;}public String getPhoneNo() {return phoneNo;}public void setPhoneNo(String phoneNo) {this.phoneNo = phoneNo;}public int getUpPackNo() {return upPackNo;}public void setUpPackNo(int upPackNo) {this.upPackNo = upPackNo;}public int getDownPackNo() {return downPackNo;}public void setDownPackNo(int downPackNo) {this.downPackNo = downPackNo;}public int getUpPayLoad() {return upPayLoad;}public void setUpPayLoad(int upPayLoad) {this.upPayLoad = upPayLoad;}public int getDownPayLoad() {return downPayLoad;}public void setDownPayLoad(int downPayLoad) {this.downPayLoad = downPayLoad;}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(phoneNo);out.writeInt(upPackNo);out.writeInt(downPackNo);out.writeInt(upPayLoad);out.writeInt(downPayLoad);}@Overridepublic void readFields(DataInput in) throws IOException {phoneNo = in.readUTF();upPackNo = in.readInt();downPackNo = in.readInt();upPayLoad = in.readInt();downPayLoad = in.readInt();}@Overridepublic String toString() {return upPackNo+"\t"+downPackNo+"\t"+upPayLoad+"\t"+downPayLoad;}}//1.自定义mapperpublic static class TrafficCountMapper extends Mapper<LongWritable, Text, Text, TrafficWritable> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {System.out.println(value.toString());String[] split = value.toString().split("\\s");System.out.println(Arrays.toString(split));context.write(new Text(split[1]), new TrafficWritable(split));}}//2.自定义reducerpublic static class TrafficCountReducer extends Reducer<Text, TrafficWritable, Text, TrafficWritable>{@Overrideprotected void reduce(Text key, Iterable<TrafficWritable> values, Context context) throws IOException, InterruptedException {int upPackNo =0;int downPackNo =0;int upPayLoad =0;int downPayLoad =0;for (TrafficWritable tw:values) {upPackNo+=tw.getUpPackNo();downPackNo += tw.getDownPackNo();upPayLoad += tw.getUpPayLoad();downPayLoad += tw.getDownPayLoad();}TrafficWritable v3 = new TrafficWritable(key.toString(), upPackNo, downPackNo, upPayLoad, downPayLoad);context.write(key, v3);}}//3.写一个驱动方法public static void main(String[] args) throws Exception {//使用一个job类的实例Configuration conf = new Configuration();Job job = Job.getInstance(conf);//下面一行很重要job.setJarByClass(TrafficCountApp.class);//自定义的mapper、reducerjob.setMapperClass(TrafficCountMapper.class);job.setReducerClass(TrafficCountReducer.class);//mapper的输出k、v类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(TrafficWritable.class);//reducer的输出k、v的类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(TrafficWritable.class);//job的输入hdfs、输出hdfsFileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));//提交jobjob.waitForCompletion(true);}
}

2.使用Text

【注】hadoop中的字符串表示形式:Text

package com.henu;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** @author George* @description   不使用序列化**/
public class DianXinCount {public static class DianXinMap extends Mapper<LongWritable, Text,Text, Text>{Text t1 = new Text();Text v1 = new Text();String str1 = "";@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] strings = line.split("\\s");str1 = strings[6]+"\t"+strings[7]+"\t"+strings[8]+"\t"+strings[9];t1.set(strings[1]);v1.set(str1);context.write(t1,v1);}}public static class DianXinReduce extends Reducer<Text,Text,Text,Text>{int upPackNo =0;int downPackNo =0;int upPayLoad =0;int downPayLoad =0;Text v2 = new Text();@Overrideprotected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {for (Text s : values) {//注:\t并不识别 //s+String[] strings = s.toString().split("\t");upPackNo+=Integer.parseInt(strings[0]);downPackNo+=Integer.parseInt(strings[1]);upPayLoad+=Integer.parseInt(strings[2]);downPayLoad+=Integer.parseInt(strings[3]);v2.set(upPackNo+"\t"+downPackNo+"\t"+upPayLoad+"\t"+downPayLoad);}upPackNo =0;downPackNo =0;upPayLoad =0;downPayLoad =0;context.write(key,v2);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(DianXinCount.class);job.setMapperClass(DianXinMap.class);job.setReducerClass(DianXinReduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));job.waitForCompletion(true);}
}

【注】在整个过程中,特别是字符串的拆分上,建议多使用一下测试类:

package com.henu;/*** @author George* @description**/
public class Test {public static void main(String[] args) {String s = "aaa"+"\t"+"bbb"+"\t"+"ccc"+"\t"+"aaa";String[] split = s.split("\t");System.out.println(split.length);for (String s1 : split) {System.out.println(s1);}System.out.println("----------------");System.out.println(split[1]);System.out.println(split[2]);/*String str = "1363157985066\t13726230503\t00-FD-07-A4-72-B8:CMCC\t120.196.100.82\ti02.c.aliimg.com\t\t24\t27\t2481\t24681\t200";String[] strings = str.split("\\s");for (String s : strings) {System.out.println(s);}System.out.println("-------------");System.out.println(strings[6]);System.out.println(strings[1]);*/}
}

结果展示:

电信信息日志使用mapreduce统计的两种方式相关推荐

  1. log4j控制日志输出文件名称的两种方式

    1. 第一种方式 在类对象中用如下方式定义logger变量 private static Logger logger = Logger.getLogger("lemmaXml"); ...

  2. 杀掉僵尸 MapReduce 任务的两种方式

    方法一 1.使用命令获取 job_id hadoop job -list 2. 杀掉进程 hadoop job -kill $JobId 方法二 使用 yarn 来管理: 1.通过 web 界面(80 ...

  3. [汇总信息] Laravel 上使用 phpexcel的两种方式

    文章采集与网上 方式1.使用原生的phpexcel , http://blog.csdn.net/CSwfe/article/details/52748046?locationNum=1 1.在app ...

  4. centos6配置日志外发_CentOS6下记录后台操作日志的两种方式

    CentOS6下记录后台操作日志的两种方式 平时为了记录登录CentOS Linux系统的操作命令,需要将操作日志记录下来,下面介绍两种方式 1.利用script以及scriptreplay工具 sc ...

  5. 实现日志管理的两种方式:aop、拦截器

    一.Spring aop 实现 AOP概念: 切面(Aspect):一个关注点的模块化,这个关注点可能会横切多个对象.事务管理是Java应用程序中一个关于横切关注点的很好的例子.在Spring AOP ...

  6. 两种方式设置SVN提交代码时必须填写日志

    两种方式设置SVN提交代码时必须填写日志 咱们在使用SVN的时候,团队中难免有同事提交代码时忘记填写日志而直接提交,这样会导致后期维护极不方便,这并不是我们想看到的.于是下面给出两种方式来解决这个问题 ...

  7. 如何让BERT拥有视觉感知能力?两种方式将视频信息注入BERT

    一只小狐狸带你解锁NLP/ML/DL秘籍 老板老板,听说BERT是个瞎子 此话怎讲? 它能理解语言,但是理解不了小夕的自拍! video-BERT了解一下 喵喵喵? AI的三大核心板块(CV/Spee ...

  8. fragment与activity之间的信息传递的两种方式

    因为Fragment和Activity一样是具有生命周期,不是一般的bean通过构造函数传值,会造成异常. fragment与activity之间的信息传递的两种方式: 1.第一种方式,也是最常用的方 ...

  9. Ubuntu查看IP信息的两种方式

    无论使用什么系统,都有用到ip地址的时候,习惯了windows系统的人很容易就能查找出系统的ip,但是在linux系统如何查看ip呢?作为Linux新手,以Ubuntu的使用经验,我知道Ubuntu查 ...

最新文章

  1. PostgreSQL中的数据库实例、模式、用户(角色)、表空间
  2. C++入门经典-例9.4-默认模板参数
  3. [Android L]SEAndroid开放设备文件结点权限(读或写)方法(涵盖常用操作:sys/xxx、proc/xxx、SystemProperties)热门干货
  4. ubuntu php fpm.conf,ubuntu下nginx+PHP-FPM安装配置
  5. 单例模式示例_单例设计模式示例
  6. Python抓取豆瓣电影详情并提取信息
  7. Mac终于有大动作了!WWDC 2019苹果将公布全新系统
  8. javascript中定时器interval的使用
  9. 仿QQ锁屏界面消息提示
  10. python控制安捷伦频谱仪_通过 python 对罗德施瓦茨矢网、信号源、频谱仪的控制...
  11. 安卓使用MediaPlayer播放视频
  12. ajax去掉session,PHP中解决ajax请求session过时退出登陆问题
  13. Linux入门基本命令的使用。
  14. Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must
  15. 短文本匹配模型-ESIM
  16. 微库:微信互动管理平台
  17. Voron 3d打印机 Klipper双挤出配置方式
  18. tomcat用c语言开发服务,apache tomcat是什么语言开发的
  19. Java 可变参 Object...objects 方法的陷进
  20. 淘宝教育的视频打不开

热门文章

  1. 牛客 - 导航系统(最小生成树+Floyd)
  2. HDU - 3987 Harry Potter and the Forbidden Forest(最小割最少边数)
  3. uva 10305拓扑排序
  4. 开源oracle client,oracle client安装与配置
  5. im4java profile_GraphicsMagick+im4java
  6. java plt_matplotlib 画动态图以及plt.ion()和plt.ioff()的使用详解
  7. HDU3929(容斥原理)
  8. 8.OD--函数参考
  9. Delphi之virtual,dynamic,abstract
  10. 深度探索I/O完成端口