DBILITY

hadoop WordCount GenericOptionParser 적용 예제 본문

bigdata/hadoop

hadoop WordCount GenericOptionParser 적용 예제

DBILITY 2016. 11. 6. 14:10
반응형
  1. pom.xml
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
      <groupId>com.dbility.hadoop</groupId>
      <artifactId>wordcount</artifactId>
      <version>1.0.0</version>
      <dependencies>
      	<dependency>
      		<groupId>org.apache.hadoop</groupId>
      		<artifactId>hadoop-core</artifactId>
      		<version>1.2.1</version>
      		<scope>provided</scope>
      	</dependency>
      	<dependency>
      		<groupId>org.slf4j</groupId>
      		<artifactId>slf4j-api</artifactId>
      		<version>1.7.20</version>
      		<scope>provided</scope>
      	</dependency>
      	<dependency>
      		<groupId>org.slf4j</groupId>
      		<artifactId>jcl-over-slf4j</artifactId>
      		<version>1.7.20</version>
      		<scope>provided</scope>
      	</dependency>
      	<dependency>
      		<groupId>ch.qos.logback</groupId>
      		<artifactId>logback-classic</artifactId>
      		<version>1.1.7</version>
      		<scope>provided</scope>
      	</dependency>
      	<dependency>
      		<groupId>ch.qos.logback</groupId>
      		<artifactId>logback-access</artifactId>
      		<version>1.1.7</version>
      		<scope>provided</scope>
      	</dependency>
      </dependencies>
    </project>
  2. logback.xml
    <?xml version="1.0" encoding="UTF-8"?>
    <configuration>
      <appender name="console" class="ch.qos.logback.core.ConsoleAppender">
        <!-- encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder
          by default -->
        <encoder>
          <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
        </encoder>
      </appender>
    
      <logger name="org.apache.hadoop.mapred.JobClient" additivity="false">
        <level value="info" />
        <appender-ref ref="console" />
      </logger>
    
      <root level="warn">
        <appender-ref ref="console" />
      </root>
    </configuration>
  3. WindowsLocalFileSystem.java
    package com.dbility.hadoop.utils;
    
    import java.io.IOException;
    
    import org.apache.hadoop.fs.LocalFileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.permission.FsPermission;
    
    public class WindowsLocalFileSystem extends LocalFileSystem {
    
    	public WindowsLocalFileSystem() {
    		super();
    	}
    
    	@Override
    	public boolean mkdirs(final Path p, final FsPermission permission) throws IOException {
    		final Boolean result = super.mkdirs(p);
    		this.setPermission(p, permission);
    		return result;
    	}
    
    
    	public void setPermission(final Path p, final FsPermission permission) throws IOException {
    		try {
    			super.setPermission(p, permission);
    		} catch (final IOException ioe) {
    			System.err.println(ioe.getMessage());
    		}
    	}
    }
  4. WordCountMapper.java
    package com.dbility.hadoop.wordcount;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    
    	private Text outputKey = new Text();
    	private final IntWritable outputValue = new IntWritable(1);
    
    	@Override
    	protected void map(LongWritable key, Text value, Context context)
    			throws IOException, InterruptedException {
    
    		StringTokenizer stk = new StringTokenizer(value.toString(), ",");
    
    		while (stk.hasMoreElements()) {
    			String word = (String) stk.nextElement();
    			outputKey.set(word);
    			context.write(outputKey, outputValue);
    		}
    
    	}
    }
  5. WordCountReducer.java
    package com.dbility.hadoop.wordcount;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
    	private IntWritable outputValue = new IntWritable();
    	@Override
    	protected void reduce(Text key, Iterable<IntWritable> values, Context context)
    			throws IOException, InterruptedException {
    
    		int sum = 0;
    
    		for (IntWritable value : values) {
    			sum+=value.get();
    		}
    		outputValue.set(sum);
    		context.write(key, outputValue);
    	}
    }
  6. WordCountDriver.java
    package com.dbility.hadoop.wordcount;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class WordCountDriver extends Configured implements Tool {
    
    	public int run(String[] args) throws Exception {
    
    		String[] remainArgs = new GenericOptionsParser(args).getRemainingArgs();
    
    		if (remainArgs.length != 2) {
    			System.out.println("Usage : <input> <output>");
    			return -1;
    		}
    
    		Job job = new Job(getConf(), "wordcount");
    		job.setJarByClass(WordCountDriver.class);
    		job.setMapperClass(WordCountMapper.class);
    		job.setReducerClass(WordCountReducer.class);
    
    		job.setInputFormatClass(TextInputFormat.class);
    		job.setOutputFormatClass(TextOutputFormat.class);
    		job.setOutputKeyClass(Text.class);
    		job.setOutputValueClass(IntWritable.class);
    
    		FileInputFormat.addInputPath(job, new Path(remainArgs[0]));
    		FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));
    
    		FileSystem hdfs = FileSystem.get(getConf());
    		Path path = new Path(remainArgs[1]);
    
    		if (hdfs.exists(path))
    			hdfs.delete(path, true);
    
    		return job.waitForCompletion(true) ? 0 : -2;
    	}
    
    	public static void main(String[] args) throws Exception {
    
    		Configuration conf = new Configuration();
    
    		conf.set("defalut.fs.name", "file:///");
    		conf.set("mapred.job.tracker", "local");
    		conf.set("fs.file.impl", "com.dbility.hadoop.utils.WindowsLocalFileSystem");
    		conf.set("io.serializations",
    				 "org.apache.hadoop.io.serializer.JavaSerialization,"
    				+"org.apache.hadoop.io.serializer.WritableSerialization");
    		conf.set("mapred.map.child.java.opts", "-Xms1024M -Xmx1024M");
    		conf.set("io.sort.mb", "512");
    		args =new String[] {"D:\\hadoop_test\\2008.csv","D:\\hadoop_test\\output2008"};
    		Runtime.getRuntime().exit(ToolRunner.run(conf, new WordCountDriver(), args));
    	}
    }

wordcount.zip
다운로드

 

 

반응형

'bigdata > hadoop' 카테고리의 다른 글

hadoop MultipleOutputs을 이용한 ASA 통계 작성  (0) 2016.11.12
output Key,Value Class 미지정시 오류  (0) 2016.11.11
하둡 정리 1  (0) 2016.10.11
hadoop configuration print  (0) 2016.10.08
M/R 동작특성  (0) 2016.10.08
Comments