About

Edit photo

Wednesday, March 16, 2016

Program to find Length of each word in Hadoop


Following program is to find the length of the each word in the given input file.

For example, if your input file contains the following info

"Hi SsaiK
this program help someone who new to Hadoop"

Output:

Hadoop 6
help 4
Hi 2
new 3
program 7
someone 7
SsaiK 5
this 4
to 2
who 3

package tut.ssaik.com.lenofword;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class lengthofword {
 public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
  public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
   String line = value.toString();
   String[] letters = line.split(" ");

   for (String letter : letters) {
    context.write(new Text(letter), new IntWritable(letter.length()));
   }
  }
 }

 public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
  public void reduce(Text key, IntWritable values, Context context) throws IOException, InterruptedException {

   context.write(key, values);
  }
 }

 public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length != 2) {
   System.err.println("Usage: WordCount <in> <out>");
   System.exit(2);
  }
  Job job = new Job(conf, "word count by www.ssaik.com");
  job.setJarByClass(lengthofword.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  // filesystem fs = filesystem.get(conf);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
  // if(fs.exists(outputDir))
  // fs.delete(outputDir,true);
 }
}



Click here to find Number of vowels and consonants in the given file.
Please comment below for any other queries.

0 comments:

Post a Comment