Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/database/8.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Database 使用mapreduce作业连接到MySql数据库时出错_Database_Hadoop - Fatal编程技术网

Database 使用mapreduce作业连接到MySql数据库时出错

Database 使用mapreduce作业连接到MySql数据库时出错,database,hadoop,Database,Hadoop,我正在尝试使用MapReduce作业连接到mysql数据库的以下代码。我面临以下错误,发布在下面。我在代码中设置了检查点 这表明在作业实际运行之前,作业的一部分运行正常,之后作业失败 import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql

我正在尝试使用MapReduce作业连接到mysql数据库的以下代码。我面临以下错误,发布在下面。我在代码中设置了检查点 这表明在作业实际运行之前,作业的一部分运行正常,之后作业失败

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
//import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;


public class TweetWordCount {


    public static class TweetWordCountMapper extends MapReduceBase implements
            Mapper<LongWritable, GetTweets, Text, IntWritable> {
        private final static IntWritable intTwordsCount = new IntWritable(1);
        private Text strTwoken = new Text();

        public void map(LongWritable key, GetTweets value,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            System.out.println("checkpoint4");
            GetTweets tweets = new GetTweets();
            tweets.strTweet = value.strTweet;
            //TwitterTokenizer twokenizer = new TwitterTokenizer();
            //List<String> twokens = twokenizer.twokenize(value.strTweet.toString());

            output.collect(new Text(value.strTweet.toString()), intTwordsCount);
            System.out.println("checkpoint5");

        }

    }


    public static class TweetWordCountReducer extends MapReduceBase implements
            Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterator<IntWritable> values,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            System.out.println("checkpoint6");
            int intTwokenCount = 0;
            while (values.hasNext()) {
                intTwokenCount += values.next().get();
            }
            output.collect(key, new IntWritable(intTwokenCount));
            System.out.println("checkpoint6");
        }
    }


    public static void main(String[] args) throws Exception {

        System.out.println("checkpoint1");
          JobConf twokenJobConf = new JobConf(new Configuration(),TweetWordCount.class);
          //JobConf twokenJobConf = new JobConf(TweetWordCount.class);
          twokenJobConf.setJobName("twoken_count");

          twokenJobConf.setInputFormat(DBInputFormat.class); //Set input format here
          twokenJobConf.setOutputFormat(TextOutputFormat.class);// Sets the output format

          Object out = new Path("twokens");

          twokenJobConf.setMapperClass(TweetWordCountMapper.class);
          twokenJobConf.setCombinerClass(TweetWordCountReducer.class);
          twokenJobConf.setReducerClass(TweetWordCountReducer.class);

          twokenJobConf.setOutputKeyClass(Text.class);
          twokenJobConf.setOutputValueClass(IntWritable.class);

          DBConfiguration.configureDB(twokenJobConf, "com.mysql.jdbc.Driver",
                  "jdbc:mysql://localhost/test", "root", "root"); //Specifies the DB configuration

          String[] fields = {"Tweet"}; //Specifies the Fields to be fetched from DB
          DBInputFormat.setInput(twokenJobConf, GetTweets.class, "NewGamil",
                  null /* conditions */, "Tweet", fields); // Specifies the DB table and fields

          //SequenceFileOutputFormat.setOutputPath(twokenJobConf, (Path) out);
          FileOutputFormat.setOutputPath(twokenJobConf, (Path) out);
          System.out.println("checkpoint2");
          JobClient.runJob(twokenJobConf);
          System.out.println("checkpoint3");

    }


    public static class GetTweets implements Writable, DBWritable {
        String strTweet;

        public GetTweets() {

        }

        public void readFields(DataInput in) throws IOException {
            System.out.println("checkpoint 2a");
            this.strTweet = Text.readString(in);
        }

        public void readFields(ResultSet resultSet) throws SQLException {
            System.out.println("checkpoint 3a");
            // this.id = resultSet.getLong(1);
            this.strTweet = resultSet.getString(1);
        }

        public void write(DataOutput out) throws IOException {

        }

        public void write(PreparedStatement stmt) throws SQLException {

        }

    }


}


rv@ramanujan:~$ hadoop jar Twit.jar 
Warning: $HADOOP_HOME is deprecated.

checkpoint1
checkpoint2
13/03/22 17:16:12 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/03/22 17:16:12 INFO mapred.JobClient: Cleaning up the staging area hdfs://localhost:54310/home/rv/hadoopfiles/mapred/staging/rv/.staging/job_201303221600_0008
Exception in thread "main" java.lang.RuntimeException: Error in configuring object
    at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93)
    at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64)
    at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117)
    at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)
    at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:981)
    at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:973)
    at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:889)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:842)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
    at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:842)
    at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:816)
    at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1253)
    at TweetWordCount.main(TweetWordCount.java:107)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:616)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: java.lang.reflect.InvocationTargetException
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:616)
    at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88)
    ... 20 more
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
    at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271)
    ... 25 more
Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
    at java.net.URLClassLoader$1.run(URLClassLoader.java:217)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:205)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:321)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:294)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:266)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:188)
    at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123)
    at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266)
    ... 25 more
导入java.io.DataInput;
导入java.io.DataOutput;
导入java.io.IOException;
导入java.sql.PreparedStatement;
导入java.sql.ResultSet;
导入java.sql.SQLException;
导入java.util.Iterator;
导入java.util.List;
导入java.util.StringTokenizer;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.filecache.DistributedCache;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.io.writeable;
导入org.apache.hadoop.io.WritableComparable;
导入org.apache.hadoop.mapred.FileInputFormat;
导入org.apache.hadoop.mapred.FileOutputFormat;
导入org.apache.hadoop.mapred.JobClient;
导入org.apache.hadoop.mapred.JobConf;
导入org.apache.hadoop.mapred.MapReduceBase;
导入org.apache.hadoop.mapred.Mapper;
导入org.apache.hadoop.mapred.OutputCollector;
导入org.apache.hadoop.mapred.Reducer;
导入org.apache.hadoop.mapred.Reporter;
导入org.apache.hadoop.mapred.SequenceFileOutputFormat;
导入org.apache.hadoop.mapred.TextOutputFormat;
导入org.apache.hadoop.mapred.lib.db.DBConfiguration;
//导入org.apache.hadoop.mapred.lib.db.DBInputFormat;
导入org.apache.hadoop.mapred.lib.db.DBInputFormat;
导入org.apache.hadoop.mapred.lib.db.DBOutputFormat;
导入org.apache.hadoop.mapred.lib.db.dbwriteable;
公共类TweetWordCount{
公共静态类TweetWordCountMapper扩展了MapReduceBase实现
制图员{
私有最终静态IntWritable IntTwordScont=新的IntWritable(1);
私有文本strTwoken=新文本();
public void映射(LongWritable键,GetTweets值,
OutputCollector输出,报告器(报告器)
抛出IOException{
系统输出打印项次(“检查点4”);
GetTweets tweets=新GetTweets();
tweets.strTweet=value.strTweet;
//TwitterTokenizer twokenizer=新的TwitterTokenizer();
//List twokens=twokenizer.twokenize(value.strTweet.toString());
collect(新文本(value.strTweet.toString()),inttwordsunt);
系统输出打印项次(“检查点5”);
}
}
公共静态类TweetWordCountReducer扩展了MapReduceBase实现
减速器{
public void reduce(文本键、迭代器值、,
OutputCollector输出,报告器(报告器)
抛出IOException{
系统输出打印项次(“检查点6”);
int intTwokenCount=0;
while(values.hasNext()){
intTwokenCount+=values.next().get();
}
collect(key,新的intwriteable(intTwokenCount));
系统输出打印项次(“检查点6”);
}
}
公共静态void main(字符串[]args)引发异常{
系统输出打印项次(“检查点1”);
JobConf twokenJobConf=newjobconf(newconfiguration(),TweetWordCount.class);
//JobConf twokenJobConf=newjobconf(TweetWordCount.class);
twokenJobConf.setJobName(“twoken_计数”);
twokenJobConf.setInputFormat(DBInputFormat.class);//在此处设置输入格式
twokenJobConf.setOutputFormat(TextOutputFormat.class);//设置输出格式
Object out=新路径(“twoken”);
twokenJobConf.setMapperClass(TweetWordCountMapper.class);
twokenJobConf.setCombinerClass(TweetWordCountReducer.class);
twokenJobConf.setReducerClass(TweetWordCountReducer.class);
twokenJobConf.setOutputKeyClass(Text.class);
twokenJobConf.setOutputValueClass(IntWritable.class);
DBConfiguration.configureDB(twokenJobConf,“com.mysql.jdbc.Driver”,
“jdbc:mysql://localhost/test“,”根“,”根“;//指定数据库配置
String[]fields={“Tweet”};//指定要从数据库中获取的字段
setInput(twokenJobConf,GetTweets.class,“NewGamil”,
null/*条件*/,“Tweet”,字段);//指定DB表和字段
//setOutputPath(twokenJobConf,(Path)out);
setOutputPath(twokenJobConf,(Path)out);
系统输出打印项次(“检查点2”);
runJob(twokenJobConf);
系统输出打印项次(“检查点3”);
}
公共静态类GetTweets实现了可写、DBWritable{
字符串strTweet;
公共GetTweets(){
}
public void readFields(DataInput in)引发IOException{
System.out.println(“检查点2a”);
this.strTweet=Text.readString(in);
}
public void readFields(ResultSet ResultSet)引发SQLException{
System.out.println(“检查点3a”);
//this.id=resultSet.getLong(1);
this.strTweet=resultSet.getString(1);
}
public void write(DataOutput out)引发IOException{
}
public void write(PreparedStatement stmt)引发SQLException{
}
}
}
rv@ramanujan:~$hadoop jar Twit.jar
警告:$HADOOP\u HOME已弃用。
检查点1
检查点2
13/03/22 17:16:12 WARN mapred.JobClient:使用GenericOptionsParser解析参数。应用程序应该为相同的应用程序实现工具。
13/03/22 17:16:12信息映射。作业客户端:清理临时区域hdfs://localhost:54310/home/rv/hadoopfiles/mapred/staging/rv/.staging/job_201303221600_0008
线程“main”java.lang.RuntimeException中出现异常:配置对象时出错
位于org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils