樸素貝葉斯之MapReduce版
來源:程序員人生 發布時間:2016-07-11 13:18:39 閱讀次數:2734次
1,統計詞出現的次數
1/計算種別的先驗幾率
*輸入格式:種別+文檔id+文檔詞(切分成A,b,c)
*輸出格式:種別+文檔出現次數+文檔出現的詞的總數
2/計算每一個詞的條件幾率
*輸入格式:種別+文檔id+文檔詞(切分成A,b,c)
*輸出格式:種別+詞+詞的總數
3/假定2分類問題-計算幾率值
* 1種別+文檔出現次數+文檔出現的詞的總數
* 2種別+詞+詞的總數
* 3種別+詞+log(詞的總數/文檔出現的詞的總數),種別-log(文檔出現次數/sum(文檔出現次數))
* 輸入格式:種別+詞+詞的總數
* 輸出格式:"詞","種別+log()值幾率"+1,2+種別的先驗幾率
* 4/假定2分類問題-測試
* 1種別+文檔出現次數+文檔出現的詞的總數
* 2種別+詞+詞的總數
* 3種別+詞+log(詞的總數/文檔出現的詞的總數),種別-log(文檔出現次數/sum(文檔出現次數))
*輸入格式:新文檔id+文檔詞(切分成A,b,c)
*輸出格式:新文檔id+種別
這個版本基本寫了MapReduce的樸素貝葉斯思路--具體優化和修改以后再弄
Python版實現
http://blog.csdn.net/q383700092/article/details/51773364
R語言版調用函數
http://blog.csdn.net/q383700092/article/details/51774069
MapReduce簡化實現版
http://blog.csdn.net/q383700092/article/details/51778765
spark版
后續添加
Bayes1
package com.ml.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 1/計算種別的先驗幾率
* 匯總到dict1.txt
*輸入格式:種別+文檔id+文檔詞(切分成A,b,c)
*輸出格式:種別+文檔出現次數+文檔出現的詞的總數
*/
public class Bayes1 extends Configured implements Tool {
public static enum Counter {
PARSER_ERR
}
public static class MyMap extends Mapper<LongWritable, Text, Text, Text> {
private Text mykey = new Text();// 種別id
private Text myval = new Text();// 文檔id+文檔長度
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] array = value.toString().split(",");
String[] doc=array[2].split("-");
mykey.set(array[0]);
myval.set("1"+","+doc.length);
context.write(mykey, myval);
};
}
public static class MyReduce extends Reducer<Text, Text, Text, Text> {
private Text val = new Text();
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// 用于計算該種別總的個數
int sum = 0;
//計算出現詞的總個數
int wordsum = 0;
// 循環遍歷 Interable
for (Text value : values) {
// 累加
String[] array = value.toString().split(",");
sum += Integer.parseInt(array[0]);
wordsum += Integer.parseInt(array[1]);
val.set(sum+","+wordsum);
}
context.write(key, val);
};
}
@Override
public int run(String[] args) throws Exception {
// 1 conf
Configuration conf = new Configuration();
conf.set("mapred.textoutputformat.separator", ",");// key value分隔符
// 2 create job
// Job job = new Job(conf, ModuleMapReduce.class.getSimpleName());
Job job = this.parseInputAndOutput(this, conf, args);
// 3 set job
// 3.1 set run jar class
// job.setJarByClass(ModuleReducer.class);
// 3.2 set intputformat
job.setInputFormatClass(TextInputFormat.class);
// 3.3 set input path
// FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.4 set mapper
job.setMapperClass(MyMap.class);
// 3.5 set map output key/value class
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 3.6 set partitioner class
// job.setPartitionerClass(HashPartitioner.class);
// 3.7 set reduce number
// job.setNumReduceTasks(1);
// 3.8 set sort comparator class
// job.setSortComparatorClass(LongWritable.Comparator.class);
// 3.9 set group comparator class
// job.setGroupingComparatorClass(LongWritable.Comparator.class);
// 3.10 set combiner class
// job.setCombinerClass(null);
// 3.11 set reducer class
job.setReducerClass(MyReduce.class);
// 3.12 set output format
job.setOutputFormatClass(TextOutputFormat.class);
// 3.13 job output key/value class
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 3.14 set job output path
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 4 submit job
boolean isSuccess = job.waitForCompletion(true);
// 5 exit
// System.exit(isSuccess ? 0 : 1);
return isSuccess ? 0 : 1;
}
public Job parseInputAndOutput(Tool tool, Configuration conf, String[] args)
throws Exception {
// validate
if (args.length != 2) {
System.err.printf("Usage:%s [genneric options]<input><output>\n",
tool.getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return null;
}
// 2 create job
Job job = new Job(conf, tool.getClass().getSimpleName());
// 3.1 set run jar class
job.setJarByClass(tool.getClass());
// 3.3 set input path
FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.14 set job output path
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job;
}
public static void main(String[] args) throws Exception {
args = new String[] {
"hdfs://192.168.192.129:9000/ml/bayesTrain.txt",
// "hdfs://hadoop-00:9000/home910/liyuting/output/" };
"hdfs://192.168.192.129:9000/ml/bayes/" };
// run mapreduce
int status = ToolRunner.run(new Bayes1(), args);
// 5 exit
System.exit(status);
}
}
</pre>Bayes2<p></p><p></p><pre code_snippet_id="1734263" snippet_file_name="blog_20160628_2_8203234" name="code" class="java">package com.ml.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 2/計算每一個詞的條件幾率
* 匯總到dict2.txt
*輸入格式:種別+文檔id+文檔詞(切分成A,b,c)
*輸出格式:種別+詞+詞的總數
*/
public class Bayes2 extends Configured implements Tool {
public static enum Counter {
PARSER_ERR
}
public static class MyMap extends Mapper<LongWritable, Text, Text, Text> {
private Text mykey = new Text();//種別+詞
private Text myval = new Text();//出現個數
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] array = value.toString().split(",");
String[] doc=array[2].split("-");
for (String str : doc) {
mykey.set(array[0]+ ","+ str);
myval.set("1");
context.write(mykey, myval);
}
};
}
public static class MyReduce extends Reducer<Text, Text, Text, Text> {
private Text val = new Text();
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// 用于計算每一個種別里面每一個詞出現的總數
int sum = 0;
// 循環遍歷 Interable
for (Text value : values) {
// 累加
String array = value.toString();
sum += Integer.parseInt(array);
val.set(sum + "");
}
context.write(key, val);
};
}
@Override
public int run(String[] args) throws Exception {
// 1 conf
Configuration conf = new Configuration();
conf.set("mapred.textoutputformat.separator", ",");// key value分隔符
// 2 create job
// Job job = new Job(conf, ModuleMapReduce.class.getSimpleName());
Job job = this.parseInputAndOutput(this, conf, args);
// 3 set job
// 3.1 set run jar class
// job.setJarByClass(ModuleReducer.class);
// 3.2 set intputformat
job.setInputFormatClass(TextInputFormat.class);
// 3.3 set input path
// FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.4 set mapper
job.setMapperClass(MyMap.class);
// 3.5 set map output key/value class
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 3.6 set partitioner class
// job.setPartitionerClass(HashPartitioner.class);
// 3.7 set reduce number
// job.setNumReduceTasks(1);
// 3.8 set sort comparator class
// job.setSortComparatorClass(LongWritable.Comparator.class);
// 3.9 set group comparator class
// job.setGroupingComparatorClass(LongWritable.Comparator.class);
// 3.10 set combiner class
// job.setCombinerClass(null);
// 3.11 set reducer class
job.setReducerClass(MyReduce.class);
// 3.12 set output format
job.setOutputFormatClass(TextOutputFormat.class);
// 3.13 job output key/value class
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 3.14 set job output path
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 4 submit job
boolean isSuccess = job.waitForCompletion(true);
// 5 exit
// System.exit(isSuccess ? 0 : 1);
return isSuccess ? 0 : 1;
}
public Job parseInputAndOutput(Tool tool, Configuration conf, String[] args)
throws Exception {
// validate
if (args.length != 2) {
System.err.printf("Usage:%s [genneric options]<input><output>\n",
tool.getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return null;
}
// 2 create job
Job job = new Job(conf, tool.getClass().getSimpleName());
// 3.1 set run jar class
job.setJarByClass(tool.getClass());
// 3.3 set input path
FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.14 set job output path
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job;
}
public static void main(String[] args) throws Exception {
args = new String[] {
"hdfs://192.168.192.129:9000/ml/bayesTrain.txt",
// "hdfs://hadoop-00:9000/home910/liyuting/output/" };
"hdfs://192.168.192.129:9000/ml/bayes/pword/" };
// run mapreduce
int status = ToolRunner.run(new Bayes2(), args);
// 5 exit
System.exit(status);
}
}
Bayes3
package com.ml.mapreduce;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 3/假定2分類問題-計算幾率值
* 1種別+文檔出現次數+文檔出現的詞的總數
* 2種別+詞+詞的總數
* 3種別+詞+log(詞的總數/文檔出現的詞的總數),種別-log(文檔出現次數/sum(文檔出現次數))
*
* 輸入格式:種別+詞+詞的總數
* 輸出格式:"詞","種別+log()值幾率"+1,2+種別的先驗幾率
*/
public class Bayes3 extends Configured implements Tool {
public static enum Counter {
PARSER_ERR
}
public static class MyMap extends Mapper<LongWritable, Text, Text, Text> {
private Text mykey = new Text();// 種別+詞
private Text myval = new Text();// 出現個數
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
BufferedReader br = null;
//取得當前作業的DistributedCache相干文件
Path[] distributePaths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String lines = null;
String[] class1 = {"0","0"};
String[] class0 = {"0","0"};
for(Path p : distributePaths){
if(p.getParent().toString().endsWith("bayes")){
//讀緩存文件,并放到mem中
br = new BufferedReader(new FileReader(p.toString()));
while(null!=(lines=br.readLine())){
String[] pall= lines.split(",");
if (pall[0].equals("1")) {
class1[0]=pall[1];
class1[1]=pall[2];
}else {
class0[0]=pall[1];
class0[1]=pall[2];
}
}
}
}
String[] array = value.toString().split(",");
Double plog=0.0;
if (array[0].equals("1")) {
mykey.set(array[1]);// 詞
plog=Math.log(Double.parseDouble(array[2])/Double.parseDouble(class1[1]));
myval.set(array[0]+","+plog);// 種別+log幾率
context.write(mykey, myval);
}else {
mykey.set(array[1]);// 詞
plog=Math.log(Double.parseDouble(array[2])/Double.parseDouble(class0[1]));
myval.set(array[0]+","+plog);// 種別+log幾率
context.write(mykey, myval);
}
};
}
public static class MyReduce extends Reducer<Text, Text, Text, Text> {
private Text val = new Text();
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String vals="tab";
for (Text value : values) {
// 累加
vals=vals+","+value.toString();
}
val.set(vals);
context.write(key, val);
};
}
@Override
public int run(String[] args) throws Exception {
// 1 conf
Configuration conf = new Configuration();
conf.set("mapred.textoutputformat.separator", ",");// key value分隔符
DistributedCache.addCacheFile(new Path(args[2]).toUri(), conf);//為該job添加緩存文件
// 2 create job
// Job job = new Job(conf, ModuleMapReduce.class.getSimpleName());
Job job = this.parseInputAndOutput(this, conf, args);
// 3 set job
// 3.1 set run jar class
// job.setJarByClass(ModuleReducer.class);
// 3.2 set intputformat
job.setInputFormatClass(TextInputFormat.class);
// 3.3 set input path
// FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.4 set mapper
job.setMapperClass(MyMap.class);
// 3.5 set map output key/value class
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 3.6 set partitioner class
// job.setPartitionerClass(HashPartitioner.class);
// 3.7 set reduce number
// job.setNumReduceTasks(0);
// 3.8 set sort comparator class
// job.setSortComparatorClass(LongWritable.Comparator.class);
// 3.9 set group comparator class
// job.setGroupingComparatorClass(LongWritable.Comparator.class);
// 3.10 set combiner class
// job.setCombinerClass(null);
// 3.11 set reducer class
job.setReducerClass(MyReduce.class);
// 3.12 set output format
job.setOutputFormatClass(TextOutputFormat.class);
// 3.13 job output key/value class
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 3.14 set job output path
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 4 submit job
boolean isSuccess = job.waitForCompletion(true);
// 5 exit
// System.exit(isSuccess ? 0 : 1);
return isSuccess ? 0 : 1;
}
public Job parseInputAndOutput(Tool tool, Configuration conf, String[] args)
throws Exception {
// validate
// if (args.length != 2) {
// System.err.printf("Usage:%s [genneric options]<input><output>\n",
// tool.getClass().getSimpleName());
// ToolRunner.printGenericCommandUsage(System.err);
// return null;
// }
// 2 create job
Job job = new Job(conf, tool.getClass().getSimpleName());
// 3.1 set run jar class
job.setJarByClass(tool.getClass());
// 3.3 set input path
FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.14 set job output path
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job;
}
public static void main(String[] args) throws Exception {
args = new String[] {
"hdfs://192.168.192.129:9000/ml/bayes/pword/part-r-00000",
// "hdfs://hadoop-00:9000/home910/liyuting/output/" };
"hdfs://192.168.192.129:9000/ml/bayes/pall/",
"hdfs://192.168.192.129:9000/ml/bayes/part-r-00000"};
// run mapreduce
int status = ToolRunner.run(new Bayes3(), args);
// 5 exit
System.exit(status);
}
}
Bayes4
package com.ml.mapreduce;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 4/假定2分類問題-測試
* 1種別+文檔出現次數+文檔出現的詞的總數
* 2種別+詞+詞的總數
* 3種別+詞+log(詞的總數/文檔出現的詞的總數),種別-log(文檔出現次數/sum(文檔出現次數))
*
*輸入格式:新文檔id+文檔詞(切分成A,b,c)
*輸出格式:新文檔id+種別
*/
public class Bayes4 extends Configured implements Tool {
public static enum Counter {
PARSER_ERR
}
public static class MyMap extends Mapper<LongWritable, Text, Text, Text> {
private Text mykey = new Text();//種別+詞
private Text myval = new Text();//出現個數
Map zidianString=new HashMap();//key是詞 value是幾率值-假定字典可以讀到內存中//不能的話切分讀取
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
BufferedReader br = null;
//取得當前作業的DistributedCache相干文件
Path[] distributePaths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
String lines = null;
for(Path p : distributePaths){
if(p.getParent().toString().endsWith("pall")){
//讀緩存文件,并放到mem中
br = new BufferedReader(new FileReader(p.toString()));
while(null!=(lines=br.readLine())){
String[] pall= lines.split(",");
if (pall.length>4) {
if (pall[2].equals("1")) {
zidianString.put(pall[0], pall[2]+","+pall[3]+","+pall[4]+","+pall[5]);
}else {
zidianString.put(pall[0], pall[4]+","+pall[5]+","+pall[2]+","+pall[3]);
}
}else {
if (pall[2].equals("1")) {
zidianString.put(pall[0], pall[2]+","+pall[3]+","+"0"+","+"0.0");
}else {
zidianString.put(pall[0], "1"+","+"0.0"+","+pall[2]+","+pall[3]);
}
}
}
}
}
String[] array = value.toString().split(",");
String[] doc=array[1].split("-");
for (String str : doc) {
if (zidianString.containsKey(str)) {
String[] kk=zidianString.get(str).toString().split(",");//種別+幾率
mykey.set(array[0]);//文檔id
myval.set(kk[0]+","+kk[1]+","+kk[2]+","+kk[3]);//種別+log幾率
context.write(mykey, myval);
}
}
};
}
public static class MyReduce extends Reducer<Text, Text, Text, Text> {
private Text val = new Text();
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// 用于計算每一個種別里面詞的幾率
Double sum=0.5;//種別1的先驗幾率 --需要提早算好0-0這里可以斟酌讀入--等有空再修改
Double sum2=0.5;//種別0的先驗幾率
// 循環遍歷 Interable
for (Text value : values) {
// 累加
String[] array = value.toString().split(",");
sum += Double.parseDouble(array[1]);//似然幾率
sum2 += Double.parseDouble(array[3]);//似然幾率
}
if (sum>sum2) {
val.set("種別1");
}else {
val.set("種別0");
}
context.write(key, val);
};
}
@Override
public int run(String[] args) throws Exception {
// 1 conf
Configuration conf = new Configuration();
conf.set("mapred.textoutputformat.separator", ",");// key value分隔符
DistributedCache.addCacheFile(new Path(args[2]).toUri(), conf);//為該job添加緩存文件
// 2 create job
// Job job = new Job(conf, ModuleMapReduce.class.getSimpleName());
Job job = this.parseInputAndOutput(this, conf, args);
// 3 set job
// 3.1 set run jar class
// job.setJarByClass(ModuleReducer.class);
// 3.2 set intputformat
job.setInputFormatClass(TextInputFormat.class);
// 3.3 set input path
// FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.4 set mapper
job.setMapperClass(MyMap.class);
// 3.5 set map output key/value class
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 3.6 set partitioner class
// job.setPartitionerClass(HashPartitioner.class);
// 3.7 set reduce number
// job.setNumReduceTasks(0);
// 3.8 set sort comparator class
// job.setSortComparatorClass(LongWritable.Comparator.class);
// 3.9 set group comparator class
// job.setGroupingComparatorClass(LongWritable.Comparator.class);
// 3.10 set combiner class
// job.setCombinerClass(null);
// 3.11 set reducer class
job.setReducerClass(MyReduce.class);
// 3.12 set output format
job.setOutputFormatClass(TextOutputFormat.class);
// 3.13 job output key/value class
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 3.14 set job output path
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 4 submit job
boolean isSuccess = job.waitForCompletion(true);
// 5 exit
// System.exit(isSuccess ? 0 : 1);
return isSuccess ? 0 : 1;
}
public Job parseInputAndOutput(Tool tool, Configuration conf, String[] args)
throws Exception {
// validate
// if (args.length != 2) {
// System.err.printf("Usage:%s [genneric options]<input><output>\n",
// tool.getClass().getSimpleName());
// ToolRunner.printGenericCommandUsage(System.err);
// return null;
// }
// 2 create job
Job job = new Job(conf, tool.getClass().getSimpleName());
// 3.1 set run jar class
job.setJarByClass(tool.getClass());
// 3.3 set input path
FileInputFormat.addInputPath(job, new Path(args[0]));
// 3.14 set job output path
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job;
}
public static void main(String[] args) throws Exception {
args = new String[] {
"hdfs://192.168.192.129:9000/ml/test.txt",
// "hdfs://hadoop-00:9000/home910/liyuting/output/" };
"hdfs://192.168.192.129:9000/ml/bayes/result/",
"hdfs://192.168.192.129:9000/ml/bayes/pall/part-r-00000"};
// run mapreduce
int status = ToolRunner.run(new Bayes4(), args);
// 5 exit
System.exit(status);
}
}
生活不易,碼農辛苦
如果您覺得本網站對您的學習有所幫助,可以手機掃描二維碼進行捐贈