Java MinimerArnCluster，在本地运行MR_Java_Hadoop_Mapreduce_Cloudera

Java MinimerArnCluster，在本地运行MR

java hadoop mapreduce

Java MinimerArnCluster，在本地运行MR,java,hadoop,mapreduce,cloudera,Java,Hadoop,Mapreduce,Cloudera,我正在尝试使用MiniMRYarnCluster在本地运行MR jobs。我使用的是旧的mapreduce（不是纱线）和mapreduce API v2 这些东西可以在这里找到： <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-jobclient</artifactId&

我正在尝试使用MiniMRYarnCluster在本地运行MR jobs。 我使用的是旧的mapreduce（不是纱线）和mapreduce API v2 这些东西可以在这里找到：

<dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>2.0.0-cdh4.1.1</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>

这里有一个例外：

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/yarn/service/CompositeService
    at java.lang.ClassLoader.defineClass1(Native Method)
    at java.lang.ClassLoader.defineClassCond(ClassLoader.java:631)
    at java.lang.ClassLoader.defineClass(ClassLoader.java:615)
    at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:141)
    at java.net.URLClassLoader.defineClass(URLClassLoader.java:283)
    at java.net.URLClassLoader.access$000(URLClassLoader.java:58)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:197)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.yarn.service.CompositeService
    at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
    ... 12 more
Could not find the main class: org.apache.hadoop.mapreduce.v2.app.MRAppMaster.  Program will exit.

我已经使用org.apache.hadoop.mapreduce.v2.TestMRJobs作为我自己测试的基础。有人遇到过这个问题吗

这是我的代码，它是在CI服务器或开发人员计算机上本地测试MR作业的抽象基类：

public abstract class AbstractClusterMapReduceTest {

    private static final Log LOG = LogFactory.getLog(AbstractClusterMapReduceTest.class);

    public static final String DEFAULT_LOG_CATALOG = "local-mr-logs";

    private static final int DEFAULT_NAMENODE_PORT = 50123;
    private static final int ONE_DATANODE = 1;

    private static final int DEFAULT_REDUCE_NUM_TASKS = 1;
    private static final String SLASH = "/";
    private static final String DEFAULT_MR_INPUT_DATA_FILE = "mr-input-data-file";

    private MiniMRYarnCluster mrCluster;
    private MiniDFSCluster dfsCluster;

    /** Shitty code from base Cloudera example*/
    private static Path TEST_ROOT_DIR = new Path("target",
            AbstractClusterMapReduceTest.class.getName() + "-tmpDir").makeQualified(getLocalFileSystem());
    static Path APP_JAR = new Path(TEST_ROOT_DIR, "MRAppJar.jar");


    private static FileSystem getLocalFileSystem(){
        try {
            return FileSystem.getLocal(new Configuration());
        } catch (IOException e) {
            throw new Error("Can't access local file system. MR cluster can't be started", e);
        }
    }

    /**
     * Always provide path to log catalog.
     * Default is: ${project.build.directory}/{@link AbstractClusterMapReduceTest#DEFAULT_LOG_CATALOG}
     * */
    protected String getPathToLogCatalog(){
        return getPathToOutputDirectory()+ SLASH + DEFAULT_LOG_CATALOG;
    }

    private String getPathToOutputDirectory(){
        return System.getProperty("project.build.directory");
    }

    private void checkAppJar(){
        if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
            throw new Error("MRAppJar " + MiniMRYarnCluster.APPJAR+ " not found. Not running test.");
        }else{
            LOG.info(MiniMRYarnCluster.APPJAR + " is at the right place. Can continue to setup Env...");
        }
    }

    public void setupEnv() throws IOException{
        checkAppJar();

        System.setProperty("hadoop.log.dir", getPathToLogCatalog());
        System.setProperty("javax.xml.parsers.SAXParserFactory",
                "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl");

        dfsCluster = buildMiniDFSCluster();
        //dfsCluster.getFileSystem().makeQualified(createPath(getHDFSPathToInputData()));
        //dfsCluster.getFileSystem().makeQualified(createPath(getOutputPath()));

        mrCluster = new MiniMRYarnCluster(this.getClass().getName(), 1);
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", getFileSystem().getUri().toString());   // use HDFS
        //conf.set(MRJobConfig.MR_AM_STAGING_DIR, getPathToOutputDirectory()+"/tmp-mapreduce");
        conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");
        mrCluster.init(conf);
        mrCluster.start();

        //Cloudera tricks :)
        // Copy MRAppJar and make it private. TODO: FIXME. This is a hack to
        // workaround the absent public discache.
        getLocalFileSystem().copyFromLocalFile(new Path(MiniMRYarnCluster.APPJAR), APP_JAR);
        getLocalFileSystem().setPermission(APP_JAR, new FsPermission("700"));
    }

    public void tearDown() {
        if (mrCluster != null) {
            mrCluster.stop();
            mrCluster = null;
        }
        if (dfsCluster != null) {
            dfsCluster.shutdown();
            dfsCluster = null;
        }
    }

    public boolean createAndSubmitJob() throws IOException, ClassNotFoundException, InterruptedException{
        LOG.info("createAndSubmitJob: enter");
        checkAppJar();
        LOG.info("MRAppJar has been found. Can start to create Job");

        Configuration configuration = mrCluster.getConfig();
        configuration.set(MRConfig.MASTER_ADDRESS, "local");

        Job job = Job.getInstance(configuration);
        job.setJobName(this.getClass().getSimpleName()+"-job");
        job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.

        job.setJarByClass(getMRJobClass());
        job.setJobName(getMRJobClass().getSimpleName());

        job.setNumReduceTasks(getReduceNumTasks());

        job.setOutputKeyClass(getOutputKeyClass());
        job.setOutputValueClass(getOutputValueClass());

        job.setMapperClass(getMapperClass());
        job.setReducerClass(getReducerClass());

        job.setInputFormatClass(getInputFormat());
        job.setOutputFormatClass(getOutputFormat());

        FileInputFormat.setInputPaths(job, getHDFSPathToInputData());
        FileOutputFormat.setOutputPath(job, createPath(getOutputPath()));

        job.setSpeculativeExecution(false);
        job.setMaxMapAttempts(1); // speed up failures

        LOG.info("Submitting job...");
        job.submit();
        LOG.info("Job has been submitted.");

        String trackingUrl = job.getTrackingURL();
        String jobId = job.getJobID().toString();
        LOG.info("trackingUrl:" +trackingUrl);
        LOG.info("jobId:" +jobId);

       return job.waitForCompletion(true);
    }


    protected FileSystem getFileSystem() throws IOException {
        return dfsCluster.getFileSystem();
    }


    protected int getReduceNumTasks(){
        return DEFAULT_REDUCE_NUM_TASKS;
    }


    /**
     * @return InputStream instance to file you want to run with your MR job
     * */
    protected InputStream getInputStreamForInputData() {
        return this.getClass().getClassLoader().getResourceAsStream(this.getClass().getSimpleName()+"/"+getInputDatasetName());
        //return getPathToOutputDirectory()+ SLASH + DEFAULT_INPUT_CATALOG+"/mr-input-data";
    }

    protected String getHDFSPathToInputData() throws IOException{
        InputStream inputStream = getInputStreamForInputData();
        Path hdfsInputPath = new Path(DEFAULT_MR_INPUT_DATA_FILE);
        FSDataOutputStream fsDataOutputStream = getFileSystem().create(hdfsInputPath);
        copyStream(inputStream, fsDataOutputStream);
        fsDataOutputStream.close();
        inputStream.close();

        return hdfsInputPath.toString();
    }

    private void copyStream(InputStream input, OutputStream output) throws IOException {
        byte[] buffer = new byte[1024]; // Adjust if you want
        int bytesRead;
        while ((bytesRead = input.read(buffer)) != -1)
        {
            output.write(buffer, 0, bytesRead);
        }
    }

    /**
     * Dataset should be placed in resources/ConcreteClusterMapReduceTest
     * @return a name of a file from catalog.
     * */
    protected abstract String getInputDatasetName();

    /**
     * @return path reducer output
     * default is: @{link AbstractClusterMapReduceTest#DEFAULT_OUTPUT_CATALOG}
     * */
    protected String getOutputPath(){
        return "mr-data-output";
    }

    /**
     * Creates @{link Path} using absolute path to some FS resource
     * @return new Path instance.
     * */
    protected Path createPath(String pathToFSResource){
        return new Path(pathToFSResource);
    }

    /**
     * Builds new instance of MiniDFSCluster
     * Default: @{link DEFAULT_NAMENODE_PORT}, @{link DEFAULT_NAMENODE_PORT}
     * @return MiniDFSCluster instance.
     * */
    protected MiniDFSCluster buildMiniDFSCluster() throws IOException {
        return new MiniDFSCluster.Builder(new Configuration())
                    .nameNodePort(DEFAULT_NAMENODE_PORT)
                    .numDataNodes(ONE_DATANODE)
                    .build();
    }

    protected abstract Class<? extends Configured> getMRJobClass();
    protected abstract Class<? extends Mapper> getMapperClass();
    protected abstract Class<? extends Reducer> getReducerClass();
    protected abstract Class<? extends InputFormat> getInputFormat();
    protected abstract Class<? extends OutputFormat> getOutputFormat();
    protected abstract Class<?> getOutputKeyClass();
    protected abstract Class<?> getOutputValueClass();

}

公共抽象类AbstractClusterMapReduceTest{
私有静态最终日志日志=LogFactory.getLog（AbstractClusterMapReduceTest.class）；
公共静态最终字符串DEFAULT\u LOG\u CATALOG=“local mr logs”；
私有静态final int DEFAULT_NAMENODE_PORT=50123；
私有静态final int ONE_DATANODE=1；
私有静态final int DEFAULT\u REDUCE\u NUM\u TASKS=1；
私有静态最终字符串斜杠=“/”；
私有静态最终字符串默认值\u MR\u INPUT\u DATA\u FILE=“MR INPUT DATA FILE”；
私有最小arncluster-mrCluster；
私有小型集群；
/**来自基本Cloudera示例的糟糕代码*/
私有静态路径测试\u ROOT\u DIR=新路径（“目标”，
AbstractClusterMapReduceTest.class.getName（）+“-tmpDir”）.makeQualified（getLocalFileSystem（））；
静态路径APP_JAR=新路径（TEST_ROOT_DIR，“MRAppJar.JAR”）；
私有静态文件系统getLocalFileSystem（）{
试一试{
返回FileSystem.getLocal（新配置（））；
}捕获（IOE异常）{
抛出新错误（“无法访问本地文件系统。无法启动MR群集”，e）；
}
}
/**
*始终提供日志目录的路径。
*默认值为：${project.build.directory}/{@link AbstractClusterMapReduceTest#Default_LOG_CATALOG}
* */
受保护的字符串getPathToLogCatalog（）{
返回getPathToOutputDirectory（）+斜杠+默认日志目录；
}
私有字符串getPathToOutputDirectory（）{
返回System.getProperty（“project.build.directory”）；
}
私有void checkAppJar（）{
如果（！（新文件（MiniMRYarnCluster.APPJAR））.exists（））{
抛出新错误（“MRAppJar”+MiniMRYarnCluster.APPJAR+“未找到。未运行测试”）；
}否则{
LOG.info（MiniMRYarnCluster.APPJAR+“位于正确的位置。可以继续设置Env…”）；
}
}
public void setupEnv（）引发IOException{
checkAppJar（）；
setProperty（“hadoop.log.dir”，getPathToLogCatalog（））；
System.setProperty（“javax.xml.parsers.SAXParserFactory”，
“com.sun.org.apache.xerces.internal.jaxp.saxparserfactorympl”）；
dfsCluster=buildMiniDFSCluster（）；
//dfsCluster.getFileSystem（）.makeQualified（createPath（getHDFSPathToInputData（））；
//dfsCluster.getFileSystem（）.makeQualified（createPath（getOutputPath（））；
mrCluster=new MiniMRYarnCluster（this.getClass（）.getName（），1）；
Configuration conf=新配置（）；
conf.set（“fs.defaultFS”，getFileSystem（）.getUri（）.toString（））；//使用HDFS
//conf.set（MRJobConfig.MR_AM_STAGING_DIR，getPathToOutputDirectory（）+“/tmp mapreduce”）；
conf.set（MRJobConfig.MR_AM_STAGING_DIR，“/apps_STAGING_DIR”）；
mrCluster.init（conf）；
mrCluster.start（）；
//Cloudera技巧：）
//复制MRAppJar并将其私有化。TODO:修复我。这是对
//绕过缺席的公众讨论会。
getLocalFileSystem（）.copyFromLocalFile（新路径（MiniMRYarnCluster.APPJAR），APP\u JAR）；
getLocalFileSystem（）.setPermission（APP_JAR，新的FsPermission（“700”）；
}
公共无效拆卸（）{
if（mrCluster！=null）{
mrCluster.stop（）；
mrCluster=null；
}
如果（dfsCluster！=null）{
dfsCluster.shutdown（）；
dfsCluster=null；
}
}
公共布尔createAndSubmitJob（）引发IOException、ClassNotFoundException、InterruptedException{
LOG.info（“createAndSubmitJob:enter”）；
checkAppJar（）；
LOG.info（“已找到MRAppJar。可以开始创建作业”）；
Configuration=mrCluster.getConfig（）；
set（MRConfig.MASTER_地址，“本地”）；
Job Job=Job.getInstance（配置）；
job.setJobName（this.getClass（）.getSimpleName（）+“-job”）；
job.addFileToClassPath（APP_JAR）；//AppMaster JAR本身。
setJarByClass（getMRJobClass（））；
job.setJobName（getMRJobClass（）.getSimpleName（））；
setNumReduceTasks（getReduceEnumTasks（））；
setOutputKeyClass（getOutputKeyClass（））；
setOutputValueClass（getOutputValueClass（））；
setMapperClass（getMapperClass（））；
setReducerClass（getReducerClass（））；
setInputFormatClass（getInputFormat（））；
setOutputFormatClass（getOutputFormat（））；
setInputPath（作业，getHDFSPathToInputData（））；
setOutputPath（作业，createPath（getOutputPath（））；
job.setSpecificationExecution（false）；
job.setMaxMapAttempts（1）；//加速失败
LOG.info（“提交作业…”）；
job.submit（）；
LOG.info（“作业已提交”）；
String trackingUrl=job.getTrackingURL（）；
字符串jobId=job.getJobID（）.toString（）；
LOG.info（“trackingUrl:+trackingUrl”）；
LOG.info（“jobId:+jobId”）；
返回作业。waitForCompletion（true）；
}
受保护的文件系统getFileSystem（）引发IOException{
返回dfsCluster.getFileSystem（）；
}
受保护的int getReduceEnumTasks（）{
返回默认任务数量；
}
/**
*@return InputStream实例到要与MR作业一起运行的文件
* */
受保护的InputStream getInputStreamForInputData（）{
返回此.getClass（）.getClassLoader（）.getResourceAsStream（此.getClass（）.getSimpleName（）+“/”+getInputDatasetName（））；
//返回getPathToOutputDirectory（）+斜杠+默认值\u输入\u目录+”/mr
public abstract class AbstractClusterMapReduceTest {

    private static final Log LOG = LogFactory.getLog(AbstractClusterMapReduceTest.class);

    public static final String DEFAULT_LOG_CATALOG = "local-mr-logs";

    private static final int DEFAULT_NAMENODE_PORT = 50123;
    private static final int ONE_DATANODE = 1;

    private static final int DEFAULT_REDUCE_NUM_TASKS = 1;
    private static final String SLASH = "/";
    private static final String DEFAULT_MR_INPUT_DATA_FILE = "mr-input-data-file";

    private MiniMRYarnCluster mrCluster;
    private MiniDFSCluster dfsCluster;

    /** Shitty code from base Cloudera example*/
    private static Path TEST_ROOT_DIR = new Path("target",
            AbstractClusterMapReduceTest.class.getName() + "-tmpDir").makeQualified(getLocalFileSystem());
    static Path APP_JAR = new Path(TEST_ROOT_DIR, "MRAppJar.jar");


    private static FileSystem getLocalFileSystem(){
        try {
            return FileSystem.getLocal(new Configuration());
        } catch (IOException e) {
            throw new Error("Can't access local file system. MR cluster can't be started", e);
        }
    }

    /**
     * Always provide path to log catalog.
     * Default is: ${project.build.directory}/{@link AbstractClusterMapReduceTest#DEFAULT_LOG_CATALOG}
     * */
    protected String getPathToLogCatalog(){
        return getPathToOutputDirectory()+ SLASH + DEFAULT_LOG_CATALOG;
    }

    private String getPathToOutputDirectory(){
        return System.getProperty("project.build.directory");
    }

    private void checkAppJar(){
        if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
            throw new Error("MRAppJar " + MiniMRYarnCluster.APPJAR+ " not found. Not running test.");
        }else{
            LOG.info(MiniMRYarnCluster.APPJAR + " is at the right place. Can continue to setup Env...");
        }
    }

    public void setupEnv() throws IOException{
        checkAppJar();

        System.setProperty("hadoop.log.dir", getPathToLogCatalog());
        System.setProperty("javax.xml.parsers.SAXParserFactory",
                "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl");

        dfsCluster = buildMiniDFSCluster();
        //dfsCluster.getFileSystem().makeQualified(createPath(getHDFSPathToInputData()));
        //dfsCluster.getFileSystem().makeQualified(createPath(getOutputPath()));

        mrCluster = new MiniMRYarnCluster(this.getClass().getName(), 1);
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", getFileSystem().getUri().toString());   // use HDFS
        //conf.set(MRJobConfig.MR_AM_STAGING_DIR, getPathToOutputDirectory()+"/tmp-mapreduce");
        conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");
        mrCluster.init(conf);
        mrCluster.start();

        //Cloudera tricks :)
        // Copy MRAppJar and make it private. TODO: FIXME. This is a hack to
        // workaround the absent public discache.
        getLocalFileSystem().copyFromLocalFile(new Path(MiniMRYarnCluster.APPJAR), APP_JAR);
        getLocalFileSystem().setPermission(APP_JAR, new FsPermission("700"));
    }

    public void tearDown() {
        if (mrCluster != null) {
            mrCluster.stop();
            mrCluster = null;
        }
        if (dfsCluster != null) {
            dfsCluster.shutdown();
            dfsCluster = null;
        }
    }

    public boolean createAndSubmitJob() throws IOException, ClassNotFoundException, InterruptedException{
        LOG.info("createAndSubmitJob: enter");
        checkAppJar();
        LOG.info("MRAppJar has been found. Can start to create Job");

        Configuration configuration = mrCluster.getConfig();
        configuration.set(MRConfig.MASTER_ADDRESS, "local");

        Job job = Job.getInstance(configuration);
        job.setJobName(this.getClass().getSimpleName()+"-job");
        job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.

        job.setJarByClass(getMRJobClass());
        job.setJobName(getMRJobClass().getSimpleName());

        job.setNumReduceTasks(getReduceNumTasks());

        job.setOutputKeyClass(getOutputKeyClass());
        job.setOutputValueClass(getOutputValueClass());

        job.setMapperClass(getMapperClass());
        job.setReducerClass(getReducerClass());

        job.setInputFormatClass(getInputFormat());
        job.setOutputFormatClass(getOutputFormat());

        FileInputFormat.setInputPaths(job, getHDFSPathToInputData());
        FileOutputFormat.setOutputPath(job, createPath(getOutputPath()));

        job.setSpeculativeExecution(false);
        job.setMaxMapAttempts(1); // speed up failures

        LOG.info("Submitting job...");
        job.submit();
        LOG.info("Job has been submitted.");

        String trackingUrl = job.getTrackingURL();
        String jobId = job.getJobID().toString();
        LOG.info("trackingUrl:" +trackingUrl);
        LOG.info("jobId:" +jobId);

       return job.waitForCompletion(true);
    }


    protected FileSystem getFileSystem() throws IOException {
        return dfsCluster.getFileSystem();
    }


    protected int getReduceNumTasks(){
        return DEFAULT_REDUCE_NUM_TASKS;
    }


    /**
     * @return InputStream instance to file you want to run with your MR job
     * */
    protected InputStream getInputStreamForInputData() {
        return this.getClass().getClassLoader().getResourceAsStream(this.getClass().getSimpleName()+"/"+getInputDatasetName());
        //return getPathToOutputDirectory()+ SLASH + DEFAULT_INPUT_CATALOG+"/mr-input-data";
    }

    protected String getHDFSPathToInputData() throws IOException{
        InputStream inputStream = getInputStreamForInputData();
        Path hdfsInputPath = new Path(DEFAULT_MR_INPUT_DATA_FILE);
        FSDataOutputStream fsDataOutputStream = getFileSystem().create(hdfsInputPath);
        copyStream(inputStream, fsDataOutputStream);
        fsDataOutputStream.close();
        inputStream.close();

        return hdfsInputPath.toString();
    }

    private void copyStream(InputStream input, OutputStream output) throws IOException {
        byte[] buffer = new byte[1024]; // Adjust if you want
        int bytesRead;
        while ((bytesRead = input.read(buffer)) != -1)
        {
            output.write(buffer, 0, bytesRead);
        }
    }

    /**
     * Dataset should be placed in resources/ConcreteClusterMapReduceTest
     * @return a name of a file from catalog.
     * */
    protected abstract String getInputDatasetName();

    /**
     * @return path reducer output
     * default is: @{link AbstractClusterMapReduceTest#DEFAULT_OUTPUT_CATALOG}
     * */
    protected String getOutputPath(){
        return "mr-data-output";
    }

    /**
     * Creates @{link Path} using absolute path to some FS resource
     * @return new Path instance.
     * */
    protected Path createPath(String pathToFSResource){
        return new Path(pathToFSResource);
    }

    /**
     * Builds new instance of MiniDFSCluster
     * Default: @{link DEFAULT_NAMENODE_PORT}, @{link DEFAULT_NAMENODE_PORT}
     * @return MiniDFSCluster instance.
     * */
    protected MiniDFSCluster buildMiniDFSCluster() throws IOException {
        return new MiniDFSCluster.Builder(new Configuration())
                    .nameNodePort(DEFAULT_NAMENODE_PORT)
                    .numDataNodes(ONE_DATANODE)
                    .build();
    }

    protected abstract Class<? extends Configured> getMRJobClass();
    protected abstract Class<? extends Mapper> getMapperClass();
    protected abstract Class<? extends Reducer> getReducerClass();
    protected abstract Class<? extends InputFormat> getInputFormat();
    protected abstract Class<? extends OutputFormat> getOutputFormat();
    protected abstract Class<?> getOutputKeyClass();
    protected abstract Class<?> getOutputValueClass();

}

public class POIClusterMapreduceTest extends AbstractClusterMapReduceTest{

    private static final String INTEGRATION = "integration";


    @BeforeClass(groups = INTEGRATION)
    public void setup() throws IOException {
        super.setupEnv();
    }

    //@Test(groups = INTEGRATION)
    public void runJob() throws InterruptedException, IOException, ClassNotFoundException {
        boolean result = createAndSubmitJob();
        MatcherAssert.assertThat(result, Matchers.is(true));

        String outputResultAsString = getFileSystem().open(createPath(getOutputPath())).readUTF();
        MatcherAssert.assertThat(outputResultAsString.length(), Matchers.greaterThan(0));


    }

    @AfterClass(groups = INTEGRATION)
    public void tearDown(){
        super.tearDown();
    }

    @Override
    protected Class<Main> getMRJobClass() {
        return Main.class;
    }

    @Override
    protected Class<POIMapper> getMapperClass() {
        return POIMapper.class;
    }

    @Override
    protected  Class<Reducer> getReducerClass() {
        return Reducer.class;
    }

    @Override
    protected Class<TextInputFormat> getInputFormat() {
        return TextInputFormat.class;
    }

    @Override
    protected Class<TextOutputFormat> getOutputFormat() {
        return TextOutputFormat.class;
    }

    @Override
    protected Class<LongWritable> getOutputKeyClass() {
        return LongWritable.class;
    }

    @Override
    protected Class<XVLRDataWritable> getOutputValueClass() {
        return XVLRDataWritable.class;
    }

    @Override
    protected String getInputDatasetName() {
        return "mr-input-data";
    }
}

<dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>${hadoop.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.common.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.common.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <classifier>tests</classifier>
            <version>${hadoop.common.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-test</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
    </dependencies>

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <hadoop.version>2.0.0-mr1-cdh4.1.1</hadoop.version>
    <hadoop.common.version>2.0.0-cdh4.1.1</hadoop.common.version>
</properties>

public abstract class AbstractClusterMRTest {
    private static final Log LOG = LogFactory.getLog(AbstractClusterMRTest.class);

    public static final String DEFAULT_LOG_CATALOG = "local-mr-logs";
    public static final String SLASH = "/";
    public static final String MR_DATA_OUTPUT = "mr-data-output";
    public static final String DEFAULT_OUTPUT_FILE_NAME =  "part-r-00000";

    private static final int DEFAULT_REDUCE_NUM_TASKS = 1;


    private Configuration configuration;
    private FileSystem localFileSystem;
    private MiniMRCluster mrCluster;
    private JobConf mrClusterConf;


    /**
     * Always provide path to log catalog.
     * */
    protected String getPathToLogCatalog(){
        File logCatalog = new File(getPathToOutputDirectory()+ SLASH + DEFAULT_LOG_CATALOG);
        if(!logCatalog.exists()){
            logCatalog.mkdir();
        }
        LOG.info("Path to log catalog is: "+logCatalog.getAbsolutePath());
        return logCatalog.getAbsolutePath();
    }

    private String getPathToOutputDirectory(){
        return System.getProperty("project.build.directory");
    }

    public void setup() throws IOException{
        System.setProperty("hadoop.log.dir", getPathToLogCatalog());

        configuration = new Configuration(true);
        localFileSystem = FileSystem.get(configuration);
        mrCluster = new MiniMRCluster(1, localFileSystem.getUri().toString(), 1, null, null, new JobConf(configuration));
        mrClusterConf = mrCluster.createJobConf();
    }

    public void tearDown() {
        if (mrCluster != null) {
            mrCluster.shutdown();
            mrCluster = null;
        }
    }

    /**
     * Use this method to get JobBuilder configured for testing purposes.
     * @return JobBuilder instance ready for further configuration.
     * */
    public JobBuilder createTestableJobInstance() throws IOException{
        return new JobBuilder(mrClusterConf, this.getClass().getSimpleName()+"-mrjob")
                .withNumReduceTasks(DEFAULT_REDUCE_NUM_TASKS);
    }

    /**
     * Pass configured JobBuilder and wait for completion
     * @param jobBuilder is a JobBuilder ready to submit
     * @return job completion result.
     * */
    public boolean buildSubmitAndWaitForCompletion(JobBuilder jobBuilder)
                        throws InterruptedException, IOException, ClassNotFoundException {
        String pathToInputFile = getPathToInputData();
        checkThatFileExists(pathToInputFile);

        Job job = jobBuilder.build();
        FileInputFormat.setInputPaths(job, pathToInputFile);
        FileOutputFormat.setOutputPath(job, createPath(getOutputPath()));

        LOG.info("Submitting job...");
        job.submit();
        LOG.info("Job has been submitted.");

        String trackingUrl = job.getTrackingURL();
        String jobId = job.getJobID().toString();
        LOG.info("trackingUrl:" +trackingUrl);
        LOG.info("jobId:" +jobId);

        return job.waitForCompletion(true);
    }

    /**
     * By declaration input data should be stored in test/resources folder:
     * ConcreteTestClassName/in/getInputDatasetName()
     * Don't forget to override @link{AbstractClusterMRTest#getInputDatasetName()}
     * @return path to input data
     * */
    protected String getPathToInputData(){
        String pathFile = this.getClass().getSimpleName() + SLASH + "in" + SLASH+ getInputDatasetName();
        LOG.info("Path for getting URL to file:" + pathFile);
        URL urlToFile = this.getClass().getClassLoader().getResource(pathFile);

        File file = FileUtils.toFile(urlToFile);
        return file.getAbsolutePath();
    }

    /**
     * Dataset should be placed in resources/ConcreteClusterMapReduceTest
     * @return a name of a file from catalog.
     * */
    protected abstract String getInputDatasetName();

    /**
     * @return path reducer output
     * default is: @{link AbstractClusterMapReduceTestOld#DEFAULT_OUTPUT_CATALOG}
     * */
    protected String getOutputPath(){
        return getPathToOutputDirectory()+ SLASH + MR_DATA_OUTPUT;
    }

    /**
     * @return text lines from reducer output file.
     * */
    protected List<String> getLinesFromOutputFile() throws IOException{
        String pathToResult = getOutputPath()+SLASH+DEFAULT_OUTPUT_FILE_NAME;
        File resultFile = new File(pathToResult);
        return FileUtils.readLines(resultFile);
    }

    public abstract String getEtalonOutputFileName();

    protected List<String> getLinesFromEtalonOutputFile() throws IOException{
        String pathFile = this.getClass().getSimpleName() + SLASH + "out" + SLASH+ getEtalonOutputFileName();
        LOG.debug("path to etalon file: "+ pathFile);
        URL urlToFile = this.getClass().getClassLoader().getResource(pathFile);

        File file = FileUtils.toFile(urlToFile);
        return FileUtils.readLines(file);

    }


    /**
     * Creates @{link Path} using absolute path to some FS resource
     * @return new Path instance.
     * */
    protected Path createPath(String pathToFSResource){
        return new Path(pathToFSResource);
    }

    public void checkThatFileExists(String absolutePathToFile){
        if(! new File(absolutePathToFile).exists()){
            throw new Error("Path to input file is incorrect. Can't run MR job. Incorrect path is:"+absolutePathToFile);
        }
    }

}