FileSplit简单使用
hadoop的FileSplit简单使用
FileSplit类继承关系:
FileSplit类中的属性和方法:
作业输入:
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt
- hadoop a
- spark a
- hive a
- hbase a
- tachyon a
- storm a
- redis a
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt
- hadoop b
- spark b
- kafka b
- tachyon b
- oozie b
- flume b
- sqoop b
- solr b
- hadoop@hadoop:/home/hadoop/blb$
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt
- hadoop a
- spark a
- hive a
- hbase a
- tachyon a
- storm a
- redis a
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt
- hadoop b
- spark b
- kafka b
- tachyon b
- oozie b
- flume b
- sqoop b
- solr b
- hadoop@hadoop:/home/hadoop/blb$
代码:
- import java.io.IOException;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapred.SplitLocationInfo;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.FileSplit;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
- import org.apache.hadoop.util.GenericOptionsParser;
- public class GetSplitMapReduce {
- public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
- Configuration conf = new Configuration();
- String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
- if(otherArgs.length!=2){
- System.err.println("Usage databaseV1 <inputpath> <outputpath>");
- }
- Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");
- job.setJarByClass(GetSplitMapReduce.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(NullWritable.class);
- job.setMapperClass(MyMapper1.class);
- job.setNumReduceTasks(0);
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputFormatClass(TextOutputFormat.class);
- FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
- FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
- job.waitForCompletion(true);
- }
- public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{
- @Override
- protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
- throws IOException, InterruptedException {
- FileSplit fileSplit=(FileSplit) context.getInputSplit();
- String pathname=fileSplit.getPath().getName(); //获取目录名字
- int depth = fileSplit.getPath().depth(); //获取目录深度
- Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类
- long length = fileSplit.getLength(); //获取文件长度
- SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo(); //获取位置信息
- String[] locations = fileSplit.getLocations(); //获取位置
- long start = fileSplit.getStart(); //The position of the first byte in the file to process.
- String string = fileSplit.toString();
- //fileSplit.
- context.write(new Text("===================================================================================="), NullWritable.get());
- context.write(new Text("pathname--"+pathname), NullWritable.get());
- context.write(new Text("depth--"+depth), NullWritable.get());
- context.write(new Text("class1--"+class1), NullWritable.get());
- context.write(new Text("length--"+length), NullWritable.get());
- context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());
- context.write(new Text("locations--"+locations), NullWritable.get());
- context.write(new Text("start--"+start), NullWritable.get());
- context.write(new Text("string--"+string), NullWritable.get());
- }
- }
- }
- import java.io.IOException;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapred.SplitLocationInfo;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.FileSplit;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
- import org.apache.hadoop.util.GenericOptionsParser;
- public class GetSplitMapReduce {
- public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
- Configuration conf = new Configuration();
- String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
- if(otherArgs.length!=2){
- System.err.println("Usage databaseV1 <inputpath> <outputpath>");
- }
- Job job = Job.getInstance(conf, GetSplitMapReduce.class.getSimpleName() + "1");
- job.setJarByClass(GetSplitMapReduce.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(NullWritable.class);
- job.setMapperClass(MyMapper1.class);
- job.setNumReduceTasks(0);
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputFormatClass(TextOutputFormat.class);
- FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
- FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
- job.waitForCompletion(true);
- }
- public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{
- @Override
- protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
- throws IOException, InterruptedException {
- FileSplit fileSplit=(FileSplit) context.getInputSplit();
- String pathname=fileSplit.getPath().getName(); //获取目录名字
- int depth = fileSplit.getPath().depth(); //获取目录深度
- Class<? extends FileSplit> class1 = fileSplit.getClass(); //获取当前类
- long length = fileSplit.getLength(); //获取文件长度
- SplitLocationInfo[] locationInfo = fileSplit.getLocationInfo(); //获取位置信息
- String[] locations = fileSplit.getLocations(); //获取位置
- long start = fileSplit.getStart(); //The position of the first byte in the file to process.
- String string = fileSplit.toString();
- //fileSplit.
- context.write(new Text("===================================================================================="), NullWritable.get());
- context.write(new Text("pathname--"+pathname), NullWritable.get());
- context.write(new Text("depth--"+depth), NullWritable.get());
- context.write(new Text("class1--"+class1), NullWritable.get());
- context.write(new Text("length--"+length), NullWritable.get());
- context.write(new Text("locationInfo--"+locationInfo), NullWritable.get());
- context.write(new Text("locations--"+locations), NullWritable.get());
- context.write(new Text("start--"+start), NullWritable.get());
- context.write(new Text("string--"+string), NullWritable.get());
- }
- }
- }
对应inputpath2.txt文件的输出:
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@4ff41ba0
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@2341ce62
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@35549603
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@4444ba4f
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@7c23bb8c
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@dee2400
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@d7d8325
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@2b2cf90e
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00000
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@4ff41ba0
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@2341ce62
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@35549603
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@4444ba4f
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@7c23bb8c
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@dee2400
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@d7d8325
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
- ====================================================================================
- pathname--inputpath2.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--66
- locationInfo--null
- locations--[Ljava.lang.String;@2b2cf90e
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath2.txt:0+66
对应inputpath1.txt文件的输出:
- hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/out2/part-m-00001
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@4ff41ba0
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@2341ce62
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@35549603
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@4444ba4f
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@7c23bb8c
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@dee2400
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- ====================================================================================
- pathname--inputpath1.txt
- depth--5
- class1--class org.apache.hadoop.mapreduce.lib.input.FileSplit
- length--58
- locationInfo--null
- locations--[Ljava.lang.String;@d7d8325
- start--0
- string--hdfs://hadoop:9000/user/hadoop/libin/input/inputpath1.txt:0+58
- hadoop@hadoop:/home/hadoop/blb$
FileSplit简单使用相关推荐
- 利用python画分形图_「分形」python简单的分形图片 - seo实验室
分形 康托集 # 康托集 import pygame pygame.init() screen = pygame.display.set_caption('康托集') screen = pygame. ...
- 简单的MapReduce实践
简单的MapReduce实践 文章目录 简单的MapReduce实践 操作环境 实现文件合并和去重操作 新建项目 新建Java程序 打包程序 运行程序 实现文件的倒排索引 第一步,Map 第二步,Co ...
- eclipse创建神经网络_使用Eclipse Deeplearning4j构建简单的神经网络
eclipse创建神经网络 神经网络导论 深度学习包含深度神经网络和深度强化学习,它们是机器学习的子集,而机器学习本身就是人工智能的子集. 广义地说,深度神经网络执行机器感知,该机器感知从原始数据中提 ...
- 使用Eclipse Deeplearning4j构建简单的神经网络
神经网络导论 深度学习既包含深度神经网络又包含深度强化学习,这是机器学习的子集,而机器学习本身就是人工智能的子集. 广义上讲,深度神经网络执行机器感知,该机器感知从原始数据中提取重要特征,并对每个观察 ...
- java FileSplit类
来源与去向 通过inputformat的getsplits方法产生 传递给inputformat的createRecordReader方法. /** A section of an input fil ...
- C/C++简单实现文件分块
C语言简单实现文件分块 模块1:分割文件 指定目标输入文件(文件名或文件路径)和分割尺寸,要求分割尺寸(单位:MB)为正整数,且范围在[MIN_SIZE, MAX_SIZE]. 分割后产生块文件,命名 ...
- 在docker上安装部署tomcat项目 超简单,拿来主义
在docker中部署tomcat,非常简单,而且省去了手动安装jdk等步骤,只需要将war包复制在容器tomcat实例中的webapps下面即可.以下将详细讲解流程: 在windows中打好包以后用w ...
- Linux下tomcat的安装与卸载以及配置(超简单)
无敌简单的几步 1.安装 //首先你需要下载好tomcat包 sudo tar -xvzf apache-tomcat-7.0.85.tar.gz(这里是包名) -C 你要放的位置 2.卸载 rm - ...
- Docker安装Apache与运行简单的web服务——httpd helloworld
Docker运行简单的web服务--httpd helloworld目录[阅读时间:约5分钟] 一.Docker简介 二.Docker的安装与配置[CentOS环境] 三.Docker运行简单的web ...
- Docker的安装、镜像源更换与简单应用
Docker的安装.镜像源更换与简单应用[阅读时间:约20分钟] 一.概述 二.系统环境&项目介绍 1.系统环境 2.项目的任务要求 三.Docker的安装 四.Docker的简单应用 1. ...
最新文章
- CCNA 第一章 网际互联
- 《周志华机器学习详细公式推导版》发布,Datawhale开源项目pumpkin-book
- php 不可以连接远程mysql数据库
- mysql-主从服务器同步搭建
- MIT媒体实验室主任辞去一切职务:拿了爱泼斯坦170万美金,涉及程序违规,麻省理工宣布彻查...
- 文件上传 java web_JavaWeb 文件上传下载
- 【python】排序算法的稳定性冒泡排序(画图详细讲解)
- [日常] DNS的迭代查询过程
- shell:读取文件的每一行内容并输出
- java 月份_java+javascript获得两个日期之间的所有月份
- [C++] C++ Primer 笔记
- React学习整理(一):React 安装
- svn checkout 提示“由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。”解决方法...
- 第四次作业:猫狗大战挑战赛
- winform 线程 句柄不断增加_多线程讲解
- 软件工程的极端所有权
- 如何录制网络视频,屏幕录制软件哪个好
- steam服务器连接不稳定WIN10,小编操作win10系统steam连接不稳的解决步骤
- hexo博客中如何插入图片
- Java实现微信授权登录
热门文章
- 计算机频率原理,频率计数器的工作原理和发展
- matlab电压模块,matlab simpowersystems电路仿真模块.doc
- 甄零一诺合同——专注合同信息化管理
- 计算机应用基础教案 电子书,计算机应用基础教案(全套)-20210511075659.pdf-原创力文档...
- 儿童时间管理表,让孩子学会善待时间
- 菲尼克斯电源模块UNO-PS1AC24DC100W的组装
- 基于 软件体系结构(第3版)考试重点和复习指南
- Flash 短片轻松学
- WTL入门(五) 自定义控件
- 可用性和可靠性的区别