Data set Example:
Input Data Example:
100.20.20.0 India 150.100.0.20 00.120.0.abc.txxyt.00
Input Data Example:
100.20.20.0 India 150.100.0.20 00.120.0.abc.txxyt.00
101.21.21.1 USA 151.100.0.21 11.101.axc.asdfs.212
102.20.20.0 India 150.100.0.20 00.120.0.abc.txxyt.00
103.21.21.1 USA 151.100.0.21 11.101.axc.asdfs.213
. . .
. . . .
102.20.20.0 India 150.100.0.20 00.120.0.abc.txxyt.00
103.21.21.1 USA 151.100.0.21 11.101.axc.asdfs.213
. . .
. . . .
MapReduce code for WordCount
Example:
import
org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import
org.apache.hadoop.util.ToolRunner;
public class DupIPDriver implements
Tool {
public static void main(String[] args)
throws Exception {
int ec=ToolRunner.run(new
DupIPDriver(), args);
System.out.println(ec);
}
public int run(String[] args) throws
Exception {
Path inpath=new Path(args[0]);//Raw
Syslog Data
Path wcOpath=new
Path(args[1]);//List of Wordcounts
Configuration conf=new
Configuration();
Job
wordCountjob=Job.getInstance(conf,"WordCount");
wordCountjob.setJarByClass(DupIPDriver.class);
wordCountjob.setMapperClass(WordMap.class);
wordCountjob.setReducerClass(WordReducer.class);
wordCountjob.setNumReduceTasks(1);
/*
wordCountjob.setMapOutputKeyClass(Text.class);
wordCountjob.setMapOutputValueClass(Text.class);
*/
wordCountjob.setOutputKeyClass(Text.class);
wordCountjob.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(wordCountjob,
inpath);
FileOutputFormat.setOutputPath(wordCountjob,
wcOpath);
wordCountjob.waitForCompletion(true);
}
}
Maper Code:
package com.mr.dupip.replaceval;
import java.io.IOException;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Mapper;
public class WordMap extends
Mapper<LongWritable,Text, Text, IntWritable>{
public void map(LongWritable key, Text
value,Context context) throws IOException, InterruptedException {
String s=value.toString();
for (String word : s.split(" "))
{
if (word.length()>0) {
context.write(new Text(word),new
IntWritable(1));
}
}
}
}
Reducer Code:
package com.mr.dupip.replaceval;
import java.io.IOException;
import java.util.Iterator;
import
org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.Reducer.Context;
public class WordReducer extends
Reducer<Text, IntWritable, Text, IntWritable>{
public void reduce(Text key,
Iterable<IntWritable> values,Context context) throws
IOException, InterruptedException {
int count=0;
for(IntWritable a : values){
int i=a.get();
count+=i;
}
context.write(key, new
IntWritable(count));
}
}
No comments:
Post a Comment