把本地文件夾下的所有文件上傳到hdfs上并合并成一個文件
來自: http://my.oschina.net/u/914897/blog/616682
需要自己寫代碼來實現:
/** * */ package com.jason.hadoop.example; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; /** * @author jason * */ public class PutMerge { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); FileSystem hdfs = FileSystem.get(URI.create(args[1]), conf); // FileSystem hdfs = FileSystem.get(conf); FileSystem local = FileSystem.getLocal(conf); Path inputDlir = new Path(args[0]); Path hdfsFile = new Path(args[2]); try { FileStatus[] inputFiles = local.listStatus(inputDlir); FSDataOutputStream out = hdfs.create(hdfsFile); for (int i=0; i<inputFiles.length; i++) { System.out.println(inputFiles[i].getPath().getName()); FSDataInputStream in = local.open(inputFiles[i].getPath()); byte[] buffer = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0 , bytesRead); } in.close(); } out.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } }
把工程打成jar包后,執行以下命令即可:
hadoop jar hadoopExample-1.0-SNAPSHOT.jar com.jason.hadoop.example.PutMerge /home/jason/hadoop-1.0.1/put_merge hdfs://localhost:9000 /example/put_merge/in/merge_222.txt
本文由用戶 uttq1257 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!