idea下mapreduce的wordcount
idea下mapreduce的wordcount
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion>
<groupId>com.henu</groupId><artifactId>henu</artifactId><version>1.0-SNAPSHOT</version>
<name>henu</name><!-- FIXME change it to the project's website --><url>http://www.example.com</url>
<properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target></properties>
<dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>RELEASE</version></dependency><dependency><groupId>log4j</groupId><artifactId>log4j</artifactId><version>1.2.17</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>2.7.2</version></dependency></dependencies>
<build><plugins><plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-compiler-plugin</artifactId><configuration><source>1.8</source><target>1.8</target><encoding>utf-8</encoding></configuration></plugin></plugins></build>
</project>
WordCount
package com.henu;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/*** @author George* @description**/
public class WC {
public static class WCMapper extends Mapper<LongWritable, Text,Text, IntWritable>{Text k1 = new Text();IntWritable v1 = new IntWritable(1);
@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] strings = line.split("\\s+");for (String s : strings) {k1.set(s);context.write(k1,v1);}}}
public static class WCReducer extends Reducer<Text, IntWritable,Text, IntWritable> {int count;IntWritable v2 = new IntWritable();
@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {count = 0;for (IntWritable value : values) {count += value.get();}v2.set(count);context.write(key,v2);}}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();Job job = Job.getInstance(conf);
job.setJarByClass(WC.class);
job.setMapperClass(WCMapper.class);job.setReducerClass(WCReducer.class);
job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.waitForCompletion(true);}
}
进行分区:
package com.henu;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/*** @author George* @description**/
public class WC {
public static class WCMapper extends Mapper<LongWritable, Text,Text, IntWritable>{Text k1 = new Text();IntWritable v1 = new IntWritable(1);
@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] strings = line.split("\\s+");for (String s : strings) {k1.set(s);context.write(k1,v1);}}}
public static class WCReducer extends Reducer<Text, IntWritable,Text, IntWritable> {int count;IntWritable v2 = new IntWritable();
@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {count = 0;for (IntWritable value : values) {count += value.get();}v2.set(count);context.write(key,v2);}}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();Job job = Job.getInstance(conf);
job.setJarByClass(WC.class);
job.setMapperClass(WCMapper.class);job.setReducerClass(WCReducer.class);
job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);
//map阶段设置分区job.setPartitionerClass(MyPartitoner.class);job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.waitForCompletion(true);}
private static class MyPartitoner extends Partitioner<Text,IntWritable> {@Overridepublic int getPartition(Text text, IntWritable intWritable, int i) {String kStr = text.toString();return kStr.equalsIgnoreCase("hello")?0:1;}}
}
发送到linux上运行:
yarn jar henu-1.0-SNAPSHOT.jar com.henu.WC /hello /abc
idea下mapreduce的wordcount相关推荐
- linux hadoop 运行jar,Linux下执行Hadoop WordCount.jar
Linux执行 Hadoop WordCount Ubuntu 终端进入快捷键 :ctrl + Alt +t hadoop启动命令:start-all.sh 正常执行效果如下: hadoop@HADO ...
- 第一个MapReduce程序-------WordCount
本关任务 词频统计是最能体现MapReduce思想的程序,结构简单,上手容易. 词频统计的大致功能是:统计单个或者多个文本文件中每个单词出现的次数,并将每个单词及其出现频率按照<k,v>键 ...
- Hadoop之图解MapReduce与WordCount示例分析
Hadoop的框架最核心的设计就是:HDFS和MapReduce.HDFS为海量的数据提供了存储,MapReduce则为海量的数据提供了计算. HDFS是Google File System(GFS) ...
- ubuntu下hadoop运行wordcount程序
本机环境 ubuntu 12 hadoop 1.1.2 首先保证hadoop配置成功 1.在Hadoop的解压目录的如下位置可以找到WordCount.java的源文件 src/examples/or ...
- MapReduce示例——WordCount(统计单词)
MapReduce示例--WordCount(统计单词) 过程分析 统计单词,把数据中的单词分别统计出出现的次数 过程图(图片源自网络): 实现Mapper.Reducer.Driver WordCo ...
- 初学Hadoop之图解MapReduce与WordCount示例分析
Hadoop的框架最核心的设计就是:HDFS和MapReduce.HDFS为海量的数据提供了存储,MapReduce则为海量的数据提供了计算. HDFS是Google File System(GFS) ...
- Hadoop系列二:Hadoop单节点伪分布部署并执行mapreduce示例wordcount
HDFS在集群上实现分布式文件系统,MapReduce在集群上实现了分布式计算和任务处理.HDFS在MapReduce任务处理过程中提供了文件操作和存储等支持,MapReduce在HDFS的基础上实现 ...
- Hadoop实例之利用MapReduce实现Wordcount单词统计 (附源代码)
大致思路是将hdfs上的文本作为输入,MapReduce通过InputFormat会将文本进行切片处理,并将每行的首字母相对于文本文件的首地址的偏移量作为输入键值对的key,文本内容作为输入键值对的v ...
- MapReduce执行WordCount操作
MapReduce 数字统计例子WordCount 在单机环境上运行WordCount.java程序 简单配置为:/etc/hosts: Core-site.xml 在这两个配置文件下运行WordCo ...
最新文章
- 将Android实例导入project
- Repeater的使用
- Linux学习之嵌入式Linux编程文件IO(C语言版)
- UpdataPanel学习之 RenderMode
- oopc——0.概念及为何要学习oopc
- pandownload网页服务器维护,PanDownload
- 避免野指针的方法及结构体小细节
- Hive启动的三种方式
- (转)ASP.NET MVC 3.0:基于Ajax的表单提交,A页面认证失败后页面被强转至登录页面,待登录成功将如何回到A页面?...
- iOS开发之错误码国际化
- [转] 电子技术*笔记4【2013-03】
- Android ProGuard 还原堆栈
- 病毒提示广告调查:投放色情网站 每日千人中招
- 数字 IC 笔试面试必考点(1)FPGA 芯片架构
- 洛谷-P1427-小鱼的数字游戏
- c语言pow函数原型_c语言pow的用法
- odi12配置mysql_通过ODI 12c同步PostgreSQL数据到FusionInsight LibrA
- 超人视觉怎么样/机器视觉培训适合报培训班吗
- 视频插件VideoJS5介绍
- 重学Java设计模式-创建者模式-工厂方法模式
热门文章
- CodeForces - 1345E Quantifier Question(dfs实现拓扑序)
- POJ - 2828 Buy Tickets(线段树+思维/Splay+模拟)
- uva1347Tour
- 广度优先遍历算法-02合法的括号问题
- JAVA异常处理正常的逻辑_JAVA异常的思考与总结
- sockaddr与sockaddr_in的区别
- Harfbuzz API 基本用法
- Python的re.match()和re.search()的使用和区别
- 设计模式--装饰模式
- Linux网络新技术基石 |​eBPF and XDP