FileSplit简单使用

hadoop的FileSplit简单使用

FileSplit类继承关系：

FileSplit类中的属性和方法：

作业输入：

[java] view plaincopy

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt
hadoop a
spark a
hive a
hbase a
tachyon a
storm a
redis a
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt
hadoop b
spark b
kafka b
tachyon b
oozie b
flume b
sqoop b
solr b
hadoop@hadoop:/home/hadoop/blb$

[java] view plaincopy

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt
hadoop a
spark a
hive a
hbase a
tachyon a
storm a
redis a
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt
hadoop b
spark b
kafka b
tachyon b
oozie b
flume b
sqoop b
solr b
hadoop@hadoop:/home/hadoop/blb$

代码：

[java] view plaincopy

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SplitLocationInfo;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class GetSplitMapReduce {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length!=2){
System.err.println("Usage databaseV1 <inputpath> <outputpath>");
}
Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");
job.setJarByClass(GetSplitMapReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(MyMapper1.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
job.waitForCompletion(true);
}
public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
FileSplit fileSplit=(FileSplit) context.getInputSplit();
String pathname=fileSplit.getPath().getName(); //获取目录名字
int depth = fileSplit.getPath().depth(); //获取目录深度
Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类
long length = fileSplit.getLength(); //获取文件长度
SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo(); //获取位置信息
String[] locations = fileSplit.getLocations(); //获取位置
long start = fileSplit.getStart(); //The position of the first byte in the file to process.
String string = fileSplit.toString();
//fileSplit.
context.write(new Text("===================================================================================="), NullWritable.get());
context.write(new Text("pathname--"+pathname), NullWritable.get());
context.write(new Text("depth--"+depth), NullWritable.get());
context.write(new Text("class1--"+class1), NullWritable.get());
context.write(new Text("length--"+length), NullWritable.get());
context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());
context.write(new Text("locations--"+locations), NullWritable.get());
context.write(new Text("start--"+start), NullWritable.get());
context.write(new Text("string--"+string), NullWritable.get());
}
}
}

[java] view plaincopy

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SplitLocationInfo;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class GetSplitMapReduce {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length!=2){
System.err.println("Usage databaseV1 <inputpath> <outputpath>");
}
Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");
job.setJarByClass(GetSplitMapReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(MyMapper1.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
job.waitForCompletion(true);
}
public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
FileSplit fileSplit=(FileSplit) context.getInputSplit();
String pathname=fileSplit.getPath().getName(); //获取目录名字
int depth = fileSplit.getPath().depth(); //获取目录深度
Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类
long length = fileSplit.getLength(); //获取文件长度
SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo(); //获取位置信息
String[] locations = fileSplit.getLocations(); //获取位置
long start = fileSplit.getStart(); //The position of the first byte in the file to process.
String string = fileSplit.toString();
//fileSplit.
context.write(new Text("===================================================================================="), NullWritable.get());
context.write(new Text("pathname--"+pathname), NullWritable.get());
context.write(new Text("depth--"+depth), NullWritable.get());
context.write(new Text("class1--"+class1), NullWritable.get());
context.write(new Text("length--"+length), NullWritable.get());
context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());
context.write(new Text("locations--"+locations), NullWritable.get());
context.write(new Text("start--"+start), NullWritable.get());
context.write(new Text("string--"+string), NullWritable.get());
}
}
}

对应inputpath2.txt文件的输出：

[java] view plaincopy

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@4ff41ba0
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@2341ce62
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@35549603
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@4444ba4f
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@7c23bb8c
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@dee2400
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@d7d8325
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@2b2cf90e
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66

[java] view plaincopy

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@4ff41ba0
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@2341ce62
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@35549603
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@4444ba4f
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@7c23bb8c
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@dee2400
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@d7d8325
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
====================================================================================
pathname--inputpath2.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--66
locationInfo--null
locations--[Ljava.lang.String;@2b2cf90e
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66

对应inputpath1.txt文件的输出：

[java] view plaincopy

hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00001
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@4ff41ba0
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@2341ce62
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@35549603
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@4444ba4f
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@7c23bb8c
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@dee2400
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
====================================================================================
pathname--inputpath1.txt
depth--5
class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
length--58
locationInfo--null
locations--[Ljava.lang.String;@d7d8325
start--0
string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
hadoop@hadoop:/home/hadoop/blb$

FileSplit简单使用相关推荐

利用python画分形图_「分形」python简单的分形图片 - seo实验室
分形康托集 # 康托集 import pygame pygame.init() screen = pygame.display.set_caption('康托集') screen = pygame. ...
简单的MapReduce实践
简单的MapReduce实践文章目录简单的MapReduce实践操作环境实现文件合并和去重操作新建项目新建Java程序打包程序运行程序实现文件的倒排索引第一步,Map 第二步,Co ...
eclipse创建神经网络_使用Eclipse Deeplearning4j构建简单的神经网络
eclipse创建神经网络神经网络导论深度学习包含深度神经网络和深度强化学习,它们是机器学习的子集,而机器学习本身就是人工智能的子集. 广义地说,深度神经网络执行机器感知,该机器感知从原始数据中提 ...
使用Eclipse Deeplearning4j构建简单的神经网络
神经网络导论深度学习既包含深度神经网络又包含深度强化学习,这是机器学习的子集,而机器学习本身就是人工智能的子集. 广义上讲,深度神经网络执行机器感知,该机器感知从原始数据中提取重要特征,并对每个观察 ...
java FileSplit类
来源与去向通过inputformat的getsplits方法产生传递给inputformat的createRecordReader方法. /** A section of an input fil ...
C/C++简单实现文件分块
C语言简单实现文件分块模块1:分割文件指定目标输入文件(文件名或文件路径)和分割尺寸,要求分割尺寸(单位:MB)为正整数,且范围在[MIN_SIZE, MAX_SIZE]. 分割后产生块文件,命名 ...
在docker上安装部署tomcat项目超简单，拿来主义
在docker中部署tomcat,非常简单,而且省去了手动安装jdk等步骤,只需要将war包复制在容器tomcat实例中的webapps下面即可.以下将详细讲解流程: 在windows中打好包以后用w ...
Linux下tomcat的安装与卸载以及配置（超简单）
无敌简单的几步 1.安装 //首先你需要下载好tomcat包 sudo tar -xvzf apache-tomcat-7.0.85.tar.gz(这里是包名) -C 你要放的位置 2.卸载 rm - ...
Docker安装Apache与运行简单的web服务——httpd helloworld
Docker运行简单的web服务--httpd helloworld目录[阅读时间:约5分钟] 一.Docker简介二.Docker的安装与配置[CentOS环境] 三.Docker运行简单的web ...
Docker的安装、镜像源更换与简单应用
Docker的安装.镜像源更换与简单应用[阅读时间:约20分钟] 一.概述二.系统环境&项目介绍 1.系统环境 2.项目的任务要求三.Docker的安装四.Docker的简单应用 1. ...

FileSplit简单使用

FileSplit简单使用相关推荐

最新文章

热门文章