Hbase基于Mapreduce的編程
小試牛刀,將mapreduce的輸出結果保存到大型分布式數據庫中HBase中,一個例子,求各url的訪問pv數據,由于用到rcfile格式需要導入hive-exce包,還需要加載hbase包,如果這兩個包都已經被集群管理員放到各節點的hadoop/lib下那就可以省去這一步,廢話不說,干貨,看代碼:
package test.hbase;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import com.sohu.tv.dm.common.RCFileInputFormat;
public class URLCountHbase {
public static class HBaseMap extends
Mapper<LongWritable, BytesRefArrayWritable, Text, IntWritable> {
private IntWritable i = new IntWritable(1);
@Override
protected void map(LongWritable key, BytesRefArrayWritable value,
Context context) throws IOException, InterruptedException {
byte[] url = value.get(4).getBytesCopy();
context.write(new Text(url), i);
}
}
public static class HBaseReduce extends
TableReducer<Text, IntWritable, NullWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable i : values) {
sum += i.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.add(Bytes.toBytes("type"), Bytes.toBytes("count"),
Bytes.toBytes(String.valueOf(sum)));
context.write(NullWritable.get(), put);
}
}
public static void createHbaseTable(String tablename) throws IOException {
HTableDescriptor htd = new HTableDescriptor(tablename);
HColumnDescriptor col = new HColumnDescriptor("type");
htd.addFamily(col);
HBaseConfiguration config = new HBaseConfiguration();
HBaseAdmin admin = new HBaseAdmin(config);
if (admin.tableExists(tablename)) {
System.out.println("table exists, trying recreate table");
admin.disableTable(tablename);
admin.deleteTable(tablename);
}
System.out.println("create new table:" + tablename);
admin.createTable(htd);
}
public static void main(String args[]) throws Exception {
String tablename = "urlcount";
Configuration conf = new Configuration();
final FileSystem fs = FileSystem.getLocal(conf);
final HashSet<String> localfiles = new HashSet<String>();
localfiles.add("/opt/hadoop/hive-0.8.1/lib/hive-exec-0.8.1.jar");
localfiles.add("/opt/hadoop/hbase/hbase-0.92.1.jar");
final HashSet<String> files = new HashSet<String>();
for (String s : localfiles) {
files.add(URLCountHbase.convertPath(s, fs));
}
URLCountHbase.cacheJars(conf, files);
conf.set(TableOutputFormat.OUTPUT_TABLE, tablename);
createHbaseTable(tablename);
Job job = new Job(conf, "WordCount table with " + args[0]);
job.setJarByClass(URLCountHbase.class);
job.setNumReduceTasks(3);
job.setReducerClass(HBaseReduce.class);
job.setMapperClass(HBaseMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.setInputFormatClass(RCFileInputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
private static String convertPath(String path, FileSystem fs) {
final Path p = new Path(path);
return p.makeQualified(fs).toString();
}
private static void cacheJars(Configuration job, Set<String> localUrls) throws IOException {
if (localUrls.isEmpty()) {
return;
}
final String tmpjars = job.get("tmpjars");
final StringBuilder sb = new StringBuilder();
if (null != tmpjars) {
sb.append(tmpjars);
sb.append(",");
}
sb.append(org.apache.hadoop.util.StringUtils.arrayToString(localUrls.toArray(new String[0])));
job.set("tmpjars", sb.toString());
}
} 本文由用戶 openkk 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!