hadoop WordCount GenericOptionParser 적용 예제

Notice

Recent Posts

Recent Comments

Link

« 2026/01 »
일	월	화	수	목	금	토
				1	2	3
4	5	6	7	8	9	10
11	12	13	14	15	16	17
18	19	20	21	22	23	24
25	26	27	28	29	30	31

Tags more

Archives

Today

Total

관리 메뉴

DBILITY

hadoop WordCount GenericOptionParser 적용 예제 본문

bigdata/hadoop

hadoop WordCount GenericOptionParser 적용 예제

DBILITY 2016. 11. 6. 14:10

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.dbility.hadoop</groupId>
  <artifactId>wordcount</artifactId>
  <version>1.0.0</version>
  <dependencies>
  	<dependency>
  		<groupId>org.apache.hadoop</groupId>
  		<artifactId>hadoop-core</artifactId>
  		<version>1.2.1</version>
  		<scope>provided</scope>
  	</dependency>
  	<dependency>
  		<groupId>org.slf4j</groupId>
  		<artifactId>slf4j-api</artifactId>
  		<version>1.7.20</version>
  		<scope>provided</scope>
  	</dependency>
  	<dependency>
  		<groupId>org.slf4j</groupId>
  		<artifactId>jcl-over-slf4j</artifactId>
  		<version>1.7.20</version>
  		<scope>provided</scope>
  	</dependency>
  	<dependency>
  		<groupId>ch.qos.logback</groupId>
  		<artifactId>logback-classic</artifactId>
  		<version>1.1.7</version>
  		<scope>provided</scope>
  	</dependency>
  	<dependency>
  		<groupId>ch.qos.logback</groupId>
  		<artifactId>logback-access</artifactId>
  		<version>1.1.7</version>
  		<scope>provided</scope>
  	</dependency>
  </dependencies>
</project>

logback.xml

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
  <appender name="console" class="ch.qos.logback.core.ConsoleAppender">
    <!-- encoders are assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder
      by default -->
    <encoder>
      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
    </encoder>
  </appender>

  <logger name="org.apache.hadoop.mapred.JobClient" additivity="false">
    <level value="info" />
    <appender-ref ref="console" />
  </logger>

  <root level="warn">
    <appender-ref ref="console" />
  </root>
</configuration>

WindowsLocalFileSystem.java

package com.dbility.hadoop.utils;

import java.io.IOException;

import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;

public class WindowsLocalFileSystem extends LocalFileSystem {

	public WindowsLocalFileSystem() {
		super();
	}

	@Override
	public boolean mkdirs(final Path p, final FsPermission permission) throws IOException {
		final Boolean result = super.mkdirs(p);
		this.setPermission(p, permission);
		return result;
	}


	public void setPermission(final Path p, final FsPermission permission) throws IOException {
		try {
			super.setPermission(p, permission);
		} catch (final IOException ioe) {
			System.err.println(ioe.getMessage());
		}
	}
}

WordCountMapper.java

package com.dbility.hadoop.wordcount;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

	private Text outputKey = new Text();
	private final IntWritable outputValue = new IntWritable(1);

	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {

		StringTokenizer stk = new StringTokenizer(value.toString(), ",");

		while (stk.hasMoreElements()) {
			String word = (String) stk.nextElement();
			outputKey.set(word);
			context.write(outputKey, outputValue);
		}

	}
}

WordCountReducer.java

package com.dbility.hadoop.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

	private IntWritable outputValue = new IntWritable();
	@Override
	protected void reduce(Text key, Iterable<IntWritable> values, Context context)
			throws IOException, InterruptedException {

		int sum = 0;

		for (IntWritable value : values) {
			sum+=value.get();
		}
		outputValue.set(sum);
		context.write(key, outputValue);
	}
}

WordCountDriver.java

package com.dbility.hadoop.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCountDriver extends Configured implements Tool {

	public int run(String[] args) throws Exception {

		String[] remainArgs = new GenericOptionsParser(args).getRemainingArgs();

		if (remainArgs.length != 2) {
			System.out.println("Usage : <input> <output>");
			return -1;
		}

		Job job = new Job(getConf(), "wordcount");
		job.setJarByClass(WordCountDriver.class);
		job.setMapperClass(WordCountMapper.class);
		job.setReducerClass(WordCountReducer.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);

		FileInputFormat.addInputPath(job, new Path(remainArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

		FileSystem hdfs = FileSystem.get(getConf());
		Path path = new Path(remainArgs[1]);

		if (hdfs.exists(path))
			hdfs.delete(path, true);

		return job.waitForCompletion(true) ? 0 : -2;
	}

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		conf.set("defalut.fs.name", "file:///");
		conf.set("mapred.job.tracker", "local");
		conf.set("fs.file.impl", "com.dbility.hadoop.utils.WindowsLocalFileSystem");
		conf.set("io.serializations",
				 "org.apache.hadoop.io.serializer.JavaSerialization,"
				+"org.apache.hadoop.io.serializer.WritableSerialization");
		conf.set("mapred.map.child.java.opts", "-Xms1024M -Xmx1024M");
		conf.set("io.sort.mb", "512");
		args =new String[] {"D:\\hadoop_test\\2008.csv","D:\\hadoop_test\\output2008"};
		Runtime.getRuntime().exit(ToolRunner.run(conf, new WordCountDriver(), args));
	}
}

wordcount.zip

다운로드

'bigdata > hadoop' 카테고리의 다른 글

hadoop MultipleOutputs을 이용한 ASA 통계 작성 (0)	2016.11.12
output Key,Value Class 미지정시 오류 (0)	2016.11.11
하둡 정리 1 (0)	2016.10.11
hadoop configuration print (0)	2016.10.08
M/R 동작특성 (0)	2016.10.08

공유하기 링크

페이스북
카카오스토리
트위터

'bigdata/hadoop' Related Articles

Comments

DBILITY

hadoop WordCount GenericOptionParser 적용 예제 본문

hadoop WordCount GenericOptionParser 적용 예제

'bigdata > hadoop' 카테고리의 다른 글

티스토리툴바