hadoop学习之路(2)

1.本地安装hadoop(不安装本地hadoop会报错,虽然并不影响远程的环境,但会报错:Failed to locate the winutils binary in the hadoop binary path)

2.启动hadoop环境,dfs,yarn,然后测试代码(DataNode端口与linux设置端口一致)

package org.example;import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;public class HDFSIO {// 把本地d盘上的zhang.txt文件上传到HDFS根目录@Testpublic void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 获取输入流FileInputStream fis = new FileInputStream(new File("d:/zhang.txt"));// 3 获取输出流FSDataOutputStream fos = fs.create(new Path("/zhang.txt"));// 4 流的对拷IOUtils.copyBytes(fis, fos, conf);// 5 关闭资源IOUtils.closeStream(fos);IOUtils.closeStream(fis);fs.close();}// 从HDFS上下载zhang.txt文件到本地e盘上@Testpublic void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 获取输入流FSDataInputStream fis = fs.open(new Path("/san.txt"));// 3 获取输出流FileOutputStream fos = new FileOutputStream(new File("d:/san.txt"));// 4 流的对拷IOUtils.copyBytes(fis, fos, conf);// 5 关闭资源IOUtils.closeStream(fos);IOUtils.closeStream(fis);fs.close();}// 下载第一块@Testpublic void readFileSeek1() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 获取输入流FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));// 3 获取输出流FileOutputStream fos = new FileOutputStream(new File("d:/hadoop-2.7.2.tar.gz.part1"));// 4 流的对拷(只拷贝128m)byte[] buf = new byte[1024];for (int i = 0; i < 1024 * 128; i++) {fis.read(buf);fos.write(buf);}// 5 关闭资源IOUtils.closeStream(fos);IOUtils.closeStream(fis);fs.close();}// 下载第二块@SuppressWarnings("resource")@Testpublic void readFileSeek2() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 获取输入流FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));// 3 设置指定读取的起点fis.seek(1024*1024*128);// 4 获取输出流FileOutputStream fos = new FileOutputStream(new File("d:/hadoop-2.7.2.tar.gz.part2"));// 5 流的对拷IOUtils.copyBytes(fis, fos, conf);// 6 关闭资源IOUtils.closeStream(fos);IOUtils.closeStream(fis);fs.close();}}

View Code

package org.example;import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Test;public class HDFSClient {public static void main(String[] args) throws IOException, Exception, URISyntaxException {Configuration conf = new Configuration();
//        conf.set("fs.defaultFS", "hdfs://hadoop001:8020");// 1 获取hdfs客户端对象
//        FileSystem fs = FileSystem.get(conf );FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");// 2 在hdfs上创建路径fs.mkdirs(new Path("/0529/dashen/zhang"));// 3 关闭资源fs.close();System.out.println("over");}// 1 文件上传@Testpublic void testCopyFromLocalFile() throws IOException, InterruptedException, URISyntaxException{// 1 获取fs对象Configuration conf = new Configuration();conf.set("dfs.replication", "2");FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 执行上传APIfs.copyFromLocalFile(new Path("d:/zhang.txt"), new Path("/zhang.txt"));// 3 关闭资源fs.close();}// 2 文件下载@Testpublic void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 执行下载操作
//        fs.copyToLocalFile(new Path("/zhang.txt"), new Path("d:/zhang1.txt"));fs.copyToLocalFile(false, new Path("/zhang.txt"), new Path("d:/zhangzhang.txt"), true);// 3 关闭资源fs.close();}// 3 文件删除@Testpublic void testDelete() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 文件删除fs.delete(new Path("/0529"), true);// 3 关闭资源fs.close();}// 4 文件更名@Testpublic void testRename() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 执行更名操作fs.rename(new Path("/zhang.txt"), new Path("/zhang1.txt"));// 3 关闭资源fs.close();}// 5 文件详情查看@Testpublic void testListFiles() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 查看文件详情RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);while(listFiles.hasNext()){LocatedFileStatus fileStatus = listFiles.next();// 查看文件名称、权限、长度、块信息System.out.println(fileStatus.getPath().getName());// 文件名称System.out.println(fileStatus.getPermission());// 文件权限System.out.println(fileStatus.getLen());// 文件长度BlockLocation[] blockLocations = fileStatus.getBlockLocations();for (BlockLocation blockLocation : blockLocations) {String[] hosts = blockLocation.getHosts();for (String host : hosts) {System.out.println(host);}}System.out.println("------test分割线--------");}// 3 关闭资源fs.close();}// 6 判断是文件还是文件夹@Testpublic void testListStatus() throws IOException, InterruptedException, URISyntaxException{// 1 获取对象Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");// 2 判断操作FileStatus[] listStatus = fs.listStatus(new Path("/"));for (FileStatus fileStatus : listStatus) {if (fileStatus.isFile()) {// 文件System.out.println("f:"+fileStatus.getPath().getName());}else{// 文件夹System.out.println("d:"+fileStatus.getPath().getName());}}// 3 关闭资源fs.close();}}

View Code

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>org.example</groupId><artifactId>hdfs01</artifactId><version>1.0-SNAPSHOT</version><name>hdfs01</name><!-- FIXME change it to the project's website --><url>http://www.example.com</url><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target></properties><dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>RELEASE</version></dependency><dependency><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-core</artifactId><version>2.8.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>2.7.2</version></dependency>
<!--    <dependency>-->
<!--      <groupId>jdk.tools</groupId>-->
<!--      <artifactId>jdk.tools</artifactId>-->
<!--      <version>1.8</version>-->
<!--      <scope>system</scope>-->
<!--      <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>-->
<!--    </dependency>--></dependencies><build><pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --><plugins><!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --><plugin><artifactId>maven-clean-plugin</artifactId><version>3.1.0</version></plugin><!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --><plugin><artifactId>maven-resources-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.8.0</version></plugin><plugin><artifactId>maven-surefire-plugin</artifactId><version>2.22.1</version></plugin><plugin><artifactId>maven-jar-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-install-plugin</artifactId><version>2.5.2</version></plugin><plugin><artifactId>maven-deploy-plugin</artifactId><version>2.8.2</version></plugin><!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --><plugin><artifactId>maven-site-plugin</artifactId><version>3.7.1</version></plugin><plugin><artifactId>maven-project-info-reports-plugin</artifactId><version>3.0.0</version></plugin></plugins></pluginManagement></build>
</project>

View Code

hadoop学习之路(2)相关推荐

3台云腾讯云开始hadoop学习之路笔记二
3台云腾讯云开始hadoop学习之路笔记二(接上) 大三党开始学习hadoop之路了,菜鸟学习hadoop,有啥错误请大佬指教.由于自己电脑配置不够,只能买3台腾讯云服务器来学习了.以下笔记都是记录我 ...
我的hadoop学习之路
Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS.HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上. Ha ...
Hadoop学习之路一 Single Node Setup
从研究生入学到现在,快一年的时间了.和很多人一样,我迷茫着,一直没想明白自己想要干什么.年轻的时候真的想和所有人不一样,直到最后选择了大数据方向,更多的也是感觉.青春的魅力在于可能性.没有过多的犹豫纠 ...
hadoop学习之路(3)
重新系统化学习hadoop 虽然官方对centos6已经停止维护,但还是硬着头皮沿用之前的centos6,并解决了一点小疑惑. 1.修改ip地址的文件 /etc/sysconfig/network-s ...
Hadoop学习之路（二）Hadoop发展背景
Hadoop产生的背景 1. HADOOP最早起源于Nutch.Nutch的设计目标是构建一个大型的全网搜索引擎,包括网页抓取.索引.查询等功能,但随着抓取网页数量的增加,遇到了严重的可扩展性问题-- ...
Hadoop学习之路（三）Hadoop-2.7.5在CentOS-6.7上的编译
下载Hadoop源码 1.登录官网 2.确定你要安装的软件的版本一个选取原则: 不新不旧的稳定版本几个标准: 1)一般来说,刚刚发布的大版本都是有很多问题 2)应该选择某个大版本中的最后一个小版本 ...
Hadoop学习之路（九）HDFS深入理解
HDFS的优点和缺点 HDFS的优点 1.可构建在廉价机器上通过多副本提高可靠性,提供了容错和恢复机制服务器节点的宕机是常态必须理性对象 2.高容错性数据自动保存多个副本,副本丢失后,自动 ...
Hadoop学习之路（十三）MapReduce的初识
MapReduce是什么首先让我们来重温一下 hadoop 的四大组件: HDFS:分布式存储系统 MapReduce:分布式计算系统 YARN:hadoop 的资源调度系统 Common:以上三大 ...
hadoop学习之路(5)
HadoopHA 1.zookeeper配置 zoo.cfg # The number of milliseconds of each tick tickTime=2000 # The number ...

hadoop学习之路(2)

hadoop学习之路(2)相关推荐

最新文章

热门文章