idea下mapreduce的wordcount

pom.xml

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion>
<groupId>com.henu</groupId><artifactId>henu</artifactId><version>1.0-SNAPSHOT</version>
<name>henu</name><!-- FIXME change it to the project's website --><url>http://www.example.com</url>
<properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target></properties>
<dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>RELEASE</version></dependency><dependency><groupId>log4j</groupId><artifactId>log4j</artifactId><version>1.2.17</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>2.7.2</version></dependency></dependencies>
<build><plugins><plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-compiler-plugin</artifactId><configuration><source>1.8</source><target>1.8</target><encoding>utf-8</encoding></configuration></plugin></plugins></build>
</project>

WordCount

package com.henu;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/*** @author George* @description**/
public class WC {
public static class WCMapper extends Mapper<LongWritable, Text,Text, IntWritable>{Text k1 = new Text();IntWritable v1 = new IntWritable(1);
@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] strings = line.split("\\s+");for (String s : strings) {k1.set(s);context.write(k1,v1);}}}
public static class WCReducer extends Reducer<Text, IntWritable,Text, IntWritable> {int count;IntWritable v2 = new IntWritable();
@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {count = 0;for (IntWritable value : values) {count += value.get();}v2.set(count);context.write(key,v2);}}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();Job job = Job.getInstance(conf);
job.setJarByClass(WC.class);
job.setMapperClass(WCMapper.class);job.setReducerClass(WCReducer.class);
job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.waitForCompletion(true);}

}

进行分区：

package com.henu;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/*** @author George* @description**/
public class WC {
public static class WCMapper extends Mapper<LongWritable, Text,Text, IntWritable>{Text k1 = new Text();IntWritable v1 = new IntWritable(1);
@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] strings = line.split("\\s+");for (String s : strings) {k1.set(s);context.write(k1,v1);}}}
public static class WCReducer extends Reducer<Text, IntWritable,Text, IntWritable> {int count;IntWritable v2 = new IntWritable();
@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {count = 0;for (IntWritable value : values) {count += value.get();}v2.set(count);context.write(key,v2);}}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();Job job = Job.getInstance(conf);
job.setJarByClass(WC.class);
job.setMapperClass(WCMapper.class);job.setReducerClass(WCReducer.class);
job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);
//map阶段设置分区job.setPartitionerClass(MyPartitoner.class);job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.waitForCompletion(true);}
private static class MyPartitoner extends Partitioner<Text,IntWritable> {@Overridepublic int getPartition(Text text, IntWritable intWritable, int i) {String kStr = text.toString();return kStr.equalsIgnoreCase("hello")?0:1;}}
}

发送到linux上运行：

yarn jar henu-1.0-SNAPSHOT.jar com.henu.WC /hello /abc

idea下mapreduce的wordcount相关推荐

linux hadoop 运行jar,Linux下执行Hadoop WordCount.jar
Linux执行 Hadoop WordCount Ubuntu 终端进入快捷键 :ctrl + Alt +t hadoop启动命令:start-all.sh 正常执行效果如下: hadoop@HADO ...
第一个MapReduce程序-------WordCount
本关任务词频统计是最能体现MapReduce思想的程序,结构简单,上手容易. 词频统计的大致功能是:统计单个或者多个文本文件中每个单词出现的次数,并将每个单词及其出现频率按照<k,v>键 ...
Hadoop之图解MapReduce与WordCount示例分析
Hadoop的框架最核心的设计就是:HDFS和MapReduce.HDFS为海量的数据提供了存储,MapReduce则为海量的数据提供了计算. HDFS是Google File System(GFS) ...
ubuntu下hadoop运行wordcount程序
本机环境 ubuntu 12 hadoop 1.1.2 首先保证hadoop配置成功 1.在Hadoop的解压目录的如下位置可以找到WordCount.java的源文件 src/examples/or ...
MapReduce示例——WordCount（统计单词）
MapReduce示例--WordCount(统计单词) 过程分析统计单词,把数据中的单词分别统计出出现的次数过程图(图片源自网络): 实现Mapper.Reducer.Driver WordCo ...
初学Hadoop之图解MapReduce与WordCount示例分析
Hadoop的框架最核心的设计就是:HDFS和MapReduce.HDFS为海量的数据提供了存储,MapReduce则为海量的数据提供了计算. HDFS是Google File System(GFS) ...
Hadoop系列二：Hadoop单节点伪分布部署并执行mapreduce示例wordcount
HDFS在集群上实现分布式文件系统,MapReduce在集群上实现了分布式计算和任务处理.HDFS在MapReduce任务处理过程中提供了文件操作和存储等支持,MapReduce在HDFS的基础上实现 ...
Hadoop实例之利用MapReduce实现Wordcount单词统计 (附源代码)
大致思路是将hdfs上的文本作为输入,MapReduce通过InputFormat会将文本进行切片处理,并将每行的首字母相对于文本文件的首地址的偏移量作为输入键值对的key,文本内容作为输入键值对的v ...
MapReduce执行WordCount操作
MapReduce 数字统计例子WordCount 在单机环境上运行WordCount.java程序简单配置为:/etc/hosts: Core-site.xml 在这两个配置文件下运行WordCo ...

idea下mapreduce的wordcount

idea下mapreduce的wordcount

pom.xml

WordCount

进行分区：

idea下mapreduce的wordcount相关推荐

最新文章

热门文章