DBILITY

hadoop MRUnit 사용하기 본문

bigdata/hadoop

hadoop MRUnit 사용하기

DBILITY 2016. 11. 17. 19:13
반응형
  • 클라우데라에서 개발, 아파치 오픈소스 프로젝트로 공개하였으며,
    JUnit과 맵리듀스 프레임워크 간의 간편한 통합을 목표로 합니다.

  •  API  Description
     org.apache.hadoop.mrunit.mapreduce.MapDriver  Mapper의 출력을 검증하기 위한 API
     org.apache.hadoop.mrunit.mapreduce.ReduceDriver  Reducer의 출력을 검증하기 위한 API
     org.apache.hadoop.mrunit.mapreduce.MapReduceDriver  MapReduce Job의 출력을 검증하기 위한 API
     org.apache.hadoop.mrunit.MapDriver  Mapper의 출력을 검증하기 위한 API(mapreduce하위 제외)
     org.apache.hadoop.mrunit.ReduceDriver  Reducer의 출력을 검증하기 위한 API(mapreduce하위 제외)
     org.apache.hadoop.mrunit.PipelineMapReduceDriver  MapReduce Job의 workflow를 검증하기 위한 API
  • dependency
    <dependency>
        <groupId>org.apache.mrunit</groupId>
        <artifactId>mrunit</artifactId>
        <version>1.1.0</version>
        <scope>test</scope>
        <classifier>hadoop1</classifier>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.11</version>
        <scope>test</scope>
    </dependency>​

    hadoop2는 classifier를 hadoop2로 지정합니다.
  • WordCountTest.java
    package com.dbility.hadoop.chain;
    
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mrunit.MapDriver;
    import org.apache.hadoop.mrunit.PipelineMapReduceDriver;
    import org.apache.hadoop.mrunit.ReduceDriver;
    import org.junit.Before;
    import org.junit.Test;
    
    public class WordCountTest {
    
    	private MapDriver<LongWritable, Text, Text, IntWritable> tokenizerMapDriver;
    	private MapDriver<Text, IntWritable, Text, IntWritable> upperMapDriver;
    	private ReduceDriver<Text, IntWritable, Text, IntWritable> chainReduceDriver;
    	private PipelineMapReduceDriver<LongWritable, Text, Text, IntWritable> pMapReduceDriver;
    	private static final String inputString = "apache hadoop apache hbase apache tajo";
    
    	private TokenizerMapper tMapper;
    	private UpperCaseMapper uMapper;
    	private WordCountReducer wcReducer;
    
    	@Before
    	public void setUp() throws Exception {
    
    		tMapper = new TokenizerMapper();
    		uMapper = new UpperCaseMapper();
    		wcReducer = new WordCountReducer();
    
    		tokenizerMapDriver = MapDriver.newMapDriver(tMapper);
    		upperMapDriver = MapDriver.newMapDriver(uMapper);
    		chainReduceDriver = ReduceDriver.newReduceDriver(wcReducer);
    
    		pMapReduceDriver = PipelineMapReduceDriver.newPipelineMapReduceDriver();
    		pMapReduceDriver.withMapReduce(tMapper, wcReducer);
    		pMapReduceDriver.withMapReduce(uMapper, wcReducer);
    	}
    
    	@Test
    	public void mapReduceTest() throws Exception {
    
    		pMapReduceDriver.addInput(new LongWritable(1L), new Text(inputString));
    
    		pMapReduceDriver.withOutput(new Text("APACHE"), new IntWritable(3));
    		pMapReduceDriver.withOutput(new Text("HADOOP"), new IntWritable(1));
    		pMapReduceDriver.withOutput(new Text("HBASE"), new IntWritable(1));
    		pMapReduceDriver.withOutput(new Text("TAJO"), new IntWritable(1));
    
    		pMapReduceDriver.runTest();
    	}
    
    	@Test
    	public void tokenizerMapperTest() throws Exception {
    
    		tokenizerMapDriver.withInput(new LongWritable(1L), new Text(inputString));
    		tokenizerMapDriver.withOutput(new Text("apache"), new IntWritable(1));
    		tokenizerMapDriver.withOutput(new Text("hadoop"), new IntWritable(1));
    		tokenizerMapDriver.withOutput(new Text("apache"), new IntWritable(1));
    		tokenizerMapDriver.withOutput(new Text("hbase"), new IntWritable(1));
    		tokenizerMapDriver.withOutput(new Text("apache"), new IntWritable(1));
    		tokenizerMapDriver.withOutput(new Text("tajo"), new IntWritable(1));
    
    		tokenizerMapDriver.runTest();
    	}
    
    	@Test
    	public void uppperMapperTest() throws Exception {
    
    		upperMapDriver.withInput(new Text("apache"), new IntWritable(1));
    		upperMapDriver.withInput(new Text("hadoop"), new IntWritable(1));
    		upperMapDriver.withInput(new Text("apache"), new IntWritable(1));
    		upperMapDriver.withInput(new Text("hbase"), new IntWritable(1));
    		upperMapDriver.withInput(new Text("apache"), new IntWritable(1));
    		upperMapDriver.withInput(new Text("tajo"), new IntWritable(1));
    
    		upperMapDriver.withOutput(new Text("APACHE"), new IntWritable(1));
    		upperMapDriver.withOutput(new Text("HADOOP"), new IntWritable(1));
    		upperMapDriver.withOutput(new Text("APACHE"), new IntWritable(1));
    		upperMapDriver.withOutput(new Text("HBASE"), new IntWritable(1));
    		upperMapDriver.withOutput(new Text("APACHE"), new IntWritable(1));
    		upperMapDriver.withOutput(new Text("TAJO"), new IntWritable(1));
    
    		upperMapDriver.runTest();
    
    	}
    
    	@Test
    	public void chainReducerTest() throws Exception {
    
    		List<IntWritable> apacheList = new ArrayList<IntWritable>(Arrays.asList(new IntWritable(1),new IntWritable(1),new IntWritable(1)));
    		List<IntWritable> list = new ArrayList<IntWritable>(Arrays.asList(new IntWritable(1)));
    
    		chainReduceDriver.withInput(new Text("APACHE"), apacheList);
    		chainReduceDriver.withInput(new Text("HADOOP"), list);
    		chainReduceDriver.withInput(new Text("HBASE"), list);
    		chainReduceDriver.withInput(new Text("TAJO"), list);
    
    		chainReduceDriver.withOutput(new Text("APACHE"), new IntWritable(3));
    		chainReduceDriver.withOutput(new Text("HADOOP"), new IntWritable(1));
    		chainReduceDriver.withOutput(new Text("HBASE"), new IntWritable(1));
    		chainReduceDriver.withOutput(new Text("TAJO"), new IntWritable(1));
    
    		chainReduceDriver.runTest();
    	}
    }

wordcount.zip
다운로드

참고 서적 : 시작하세요!  하둡프로그래밍 개정 2판(위키북스) - 정재화 지음

반응형
Comments