使用Java调用HDFS的API进行文件基本操作
本样例主要是代码,其中包括了Java进行上传,下载,删除,创建文件夹,遍历文件夹等操作,代码环境基于MacOS,IDEA,使用Maven来配置依赖包,后面我会放出代码,和pom.xml。Hadoop是2.6 CDH版本。代码中的hmaster是我配置的虚拟机IP地址,需要修改为你的虚拟机IP地址。
HDFSUtil.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
package com.rain.hdfs; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.Before; import org.junit.Test; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; public class HDFSUtil { FileSystem fs = null; // 这是先行运行的函数,在其他函数运行之前,将配置写好 @Before public void init() throws Exception{ //读取classpath下的xxx-site.xml 配置文件,并解析其内容,封装到conf对象中 // 可以将core-site.xml导入到项目中 Configuration conf = new Configuration(); //也可以在代码中对conf中的配置信息进行手动设置,会覆盖掉配置文件中的读取的值 conf.set("fs.defaultFS", "hdfs://hmaster:9000/"); //根据配置信息,去获取一个具体文件系统的客户端操作实例对象 fs = FileSystem.get(new URI("hdfs://hmaster:9000/"),conf,"hadoop"); } /** * 上传文件,比较底层的写法 * * @throws Exception */ @Test public void upload_old() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://hmaster:9000/"); FileSystem fs = FileSystem.get(conf); Path dst = new Path("hdfs://hmaster:9000/data/sample.txt"); FSDataOutputStream os = fs.create(dst); FileInputStream is = new FileInputStream("/Users/rain/Downloads/sample.txt"); IOUtils.copy(is, os); } /** * 上传文件,封装好的写法 * @throws Exception * @throws IOException */ @Test public void upload() throws Exception, IOException{ // 前面为上传文件的路径,后面是HDFS中的路径和文件的新名称 fs.copyFromLocalFile(new Path("/Users/rain/Downloads/sample.txt"), new Path("hdfs://hmaster:9000/data/test.txt")); } /** * 下载文件 * @throws Exception * @throws IllegalArgumentException */ @Test public void download() throws Exception { // 前面是HDFS中文件的路径,后面是要下载到本地的路径以及名称 fs.copyToLocalFile(new Path("hdfs://hmaster:9000/sample.txt"), new Path("/Users/rain/Downloads/sample.txt")); } /** * 查看文件信息 * @throws IOException * @throws IllegalArgumentException * @throws FileNotFoundException * */ @Test public void listFiles() throws FileNotFoundException, IllegalArgumentException, IOException { // listFiles列出的是文件信息,而且提供递归遍历 RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/data"), true); while(files.hasNext()){ LocatedFileStatus file = files.next(); Path filePath = file.getPath(); String fileName = filePath.getName(); System.out.println(fileName); } System.out.println("---------------------------------"); //listStatus 可以列出文件和文件夹的信息,但是不提供自带的递归遍历 FileStatus[] listStatus = fs.listStatus(new Path("/data")); for(FileStatus status: listStatus){ String name = status.getPath().getName(); System.out.println(name + (status.isDirectory()?" is dir":" is file")); } } /** * 创建文件夹 * @throws Exception * @throws IllegalArgumentException */ @Test public void mkdir() throws IllegalArgumentException, Exception { fs.mkdirs(new Path("/data/test")); } /** * 删除文件或文件夹 * @throws IOException * @throws IllegalArgumentException */ @Test public void rm() throws IllegalArgumentException, IOException { fs.delete(new Path("/data/test"), true); } public static void main(String[] args) throws Exception { } } |
pom.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.rain.hadoop</groupId> <artifactId>rain-hadoop</artifactId> <version>1.0-SNAPSHOT</version> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>6</source> <target>6</target> </configuration> </plugin> </plugins> </build> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <hadoop.version>2.6.0-cdh5.7.0</hadoop.version> </properties> <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos</url> </repository> </repositories> <dependencies> <!--添加依赖--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.0-cdh5.7.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.2.0-cdh5.7.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.2.0-cdh5.7.0</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> </dependencies> </project> |