Java 如何在下面的代码中生成对象?
我试图理解一个java代码。(Java基础知识) 给你 WordCountMapper类Java 如何在下面的代码中生成对象?,java,hadoop,Java,Hadoop,我试图理解一个java代码。(Java基础知识) 给你 WordCountMapper类 package com.company; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOExcept
package com.company;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
for (String word : line.split(" ")) {
if (word.length() > 0) {
context.write(new Text(word), new IntWritable(1));
}
}
package org.apache.hadoop.mapreduce;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Stable;
@InterfaceAudience.Public
@InterfaceStability.Stable
public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
public Mapper() {
}
protected void setup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
throws IOException, InterruptedException {
}
protected void map(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
throws IOException, InterruptedException {
context.write(key, value);
}
protected void cleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
throws IOException, InterruptedException {
}
public void run(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
setup(context);
while (context.nextKeyValue()) {
map(context.getCurrentKey(), context.getCurrentValue(), context);
}
cleanup(context);
}
public abstract class Context implements MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
public Context() {
}
}
package.com公司;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Mapper;
导入java.io.IOException;
公共类WordCountMapper扩展了映射器{
@凌驾
公共void映射(LongWritable键、文本值、上下文上下文)引发IOException、InterruptedException{
字符串行=value.toString();
for(字符串字:line.split(“”){
if(word.length()>0){
编写(新文本(word),新可写(1));
}
}
映射器类
package com.company;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
for (String word : line.split(" ")) {
if (word.length() > 0) {
context.write(new Text(word), new IntWritable(1));
}
}
package org.apache.hadoop.mapreduce;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Stable;
@InterfaceAudience.Public
@InterfaceStability.Stable
public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
public Mapper() {
}
protected void setup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
throws IOException, InterruptedException {
}
protected void map(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
throws IOException, InterruptedException {
context.write(key, value);
}
protected void cleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context)
throws IOException, InterruptedException {
}
public void run(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException {
setup(context);
while (context.nextKeyValue()) {
map(context.getCurrentKey(), context.getCurrentValue(), context);
}
cleanup(context);
}
public abstract class Context implements MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
public Context() {
}
}
package org.apache.hadoop.mapreduce;
导入java.io.IOException;
导入org.apache.hadoop.classification.InterfaceAudience.Public;
导入org.apache.hadoop.classification.InterfaceStability.Stable;
@公共接口
@界面稳定
公共类映射器{
公共映射器(){
}
受保护的无效设置(Mapper.Context上下文)
抛出IOException、InterruptedException{
}
受保护的void映射(KEYIN键、VALUEIN值、Mapper.Context)
抛出IOException、InterruptedException{
编写(键、值);
}
受保护的空洞清理(Mapper.Context上下文)
抛出IOException、InterruptedException{
}
公共void运行(Mapper.Context上下文)引发IOException、InterruptedException{
设置(上下文);
while(context.nextKeyValue()){
映射(context.getCurrentKey(),context.getCurrentValue(),context);
}
清理(上下文);
}
公共抽象类上下文实现了MapContext{
公共上下文(){
}
}
}
主方法类
package com.company;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) throws Exception {
if(args.length !=2){
System.err.println("Invalid Command");
System.err.println("Usage: WordCount <input path> <output path>");
System.exit(0);
}
Configuration conf = new Configuration();
Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
}
package.com公司;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
公共类字数{
公共静态void main(字符串[]args)引发异常{
如果(参数长度!=2){
System.err.println(“无效命令”);
System.err.println(“用法:WordCount”);
系统出口(0);
}
Configuration conf=新配置();
Job Job=新作业(conf,“wordcount”);
job.setJarByClass(WordCount.class);
addInputPath(作业,新路径(args[0]);
setOutputPath(作业,新路径(args[1]);
setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
}
我的疑问是在WordCount类中文本值是如何产生的?我的意思是它是一个对象,但在生成它的地方,在主方法类中并没有符号来实例化文本类的实例
它的意思是-,我在创建如下格式的类之前从未见过
public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
{
公共类映射器
{
有什么建议吗?您粘贴的代码旨在使用 基本上你有三门课:
- WordCount映射器,它似乎可以拆分字符串并将其写入Hadoop流上下文
- Mapper类,它是Hadoop流媒体库的一部分
- 将作业提交到Hadoop集群的WordCount驱动程序
WordCountReducer
类,但似乎没有
任何方式:文本将作为文件复制到Hadoop集群中,并在运行作业之前必须位于HDFS(Hadoop文件系统)上,从而“存在”
这行代码指的是一条HDFS路径:
FileInputFormat.addInputPath(job, new Path(args[0]));
关于守则的问题:
public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
以下是通讯:
KEYIN = LongWritable
VALUEIN = Text
KEYOUT = Text
VALUEOUT = IntWritable
您粘贴的代码旨在使用 基本上你有三门课:
- WordCount映射器,它似乎可以拆分字符串并将其写入Hadoop流上下文
- Mapper类,它是Hadoop流媒体库的一部分
- 将作业提交到Hadoop集群的WordCount驱动程序
WordCountReducer
类,但似乎没有
任何方式:文本将作为文件复制到Hadoop集群中,并在运行作业之前必须位于HDFS(Hadoop文件系统)上,从而“存在”
这行代码指的是一条HDFS路径:
FileInputFormat.addInputPath(job, new Path(args[0]));
关于守则的问题:
public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
以下是通讯:
KEYIN = LongWritable
VALUEIN = Text
KEYOUT = Text
VALUEOUT = IntWritable
Hadoop API创建了必要的类 您可以选择设置
InputFormat
,并且需要与类在setMapperClass
中使用的输入格式相同(输入,值输入字段)。类似地,还设置了输出格式,并且存在减速机的输入和输出
默认格式是TextInputFormat
,它读取LongWritable,Text
键值对。InputSplit
类负责从文件系统读取字节,并创建传递给映射器的可写
类
值得一提的是,在你像这样开始工作之前,什么都不会产生
System.exit(job.waitForCompletion(true) ? 0 : 1);
Hadoop API创建了必要的类 您可以选择设置
InputFormat
,并且需要与类在setMapperClass
中使用的输入格式相同(输入,值输入字段)。类似地,还设置了输出格式,并且存在减速机的输入和输出
默认格式是TextInputFormat
,它读取LongWritable,Text
键值对