未设置Hadoop Pig输出目录
我正在编写我自己的Pig Store类,我不想将其存储在文件中,我计划将其发送到某个第三方数据存储(缺少API调用) 注意:我在Cloudera的VirtualBox图像上运行它 我已经编写了java类(如下所列)并创建了mystore.jar,我在下面的id.pig脚本中使用它:未设置Hadoop Pig输出目录,hadoop,bigdata,apache-pig,Hadoop,Bigdata,Apache Pig,我正在编写我自己的Pig Store类,我不想将其存储在文件中,我计划将其发送到某个第三方数据存储(缺少API调用) 注意:我在Cloudera的VirtualBox图像上运行它 我已经编写了java类(如下所列)并创建了mystore.jar,我在下面的id.pig脚本中使用它: store B INTO 'mylocation' USING MyStore('mynewlocation') 使用pig运行此脚本时,我看到以下错误: 错误6000: 以下项的输出位置验证失败:'file://
store B INTO 'mylocation' USING MyStore('mynewlocation')
使用pig运行此脚本时,我看到以下错误:
错误6000:
以下项的输出位置验证失败:'file://home/cloudera/test/id.out 更多信息如下:
未设置输出目录
or.apache.pig.impl.plan.VisitorException: ERROR 6000:
at or.apache.pig.newplan.logical.rules.InputOutputFileValidator$InputOutputFileValidator.visit(InputOutputFileValidator.java:95)
请帮忙
--------------------MyStore.java----------------------
public class MyStore extends StoreFunc {
protected RecordWriter writer = null;
private String location = null;
public MyStore () {
location= null;
}
public MyStore (String location) {
this.location= location;
}
@Override
public OutputFormat getOutputFormat() throws IOException {
return new MyStoreOutputFormat(location);
}
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
this.writer = writer;
}
@Override
public void putNext(Tuple tuple) throws IOException {
//write tuple to location
try {
writer.write(null, tuple.toString());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
@Override
public void setStoreLocation(String location, Job job) throws IOException {
if(location!= null)
this.location= location;
}
}
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.pig.data.Tuple;
public class MyStoreOutputFormat extends
TextOutputFormat<WritableComparable, Tuple> {
private String location = null;
public MyStoreOutputFormat(String location) {
this.location = location;
}
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(
TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = location;
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new MyStoreRecordWriter(fileOut);
}
protected static class MyStoreRecordWriter extends
RecordWriter<WritableComparable, Tuple> {
DataOutputStream out = null;
public MyStoreRecordWriter(DataOutputStream out) {
this.out = out;
}
@Override
public void close(TaskAttemptContext taskContext) throws IOException,
InterruptedException {
// close the location
}
@Override
public void write(WritableComparable key, Tuple value)
throws IOException, InterruptedException {
// write the data to location
if (out != null) {
out.writeChars(value.toString()); // will be calling API later. let me first dump to the location!
}
}
}
}
--------------------MyStoreOutputFormat.java----------------------
public class MyStore extends StoreFunc {
protected RecordWriter writer = null;
private String location = null;
public MyStore () {
location= null;
}
public MyStore (String location) {
this.location= location;
}
@Override
public OutputFormat getOutputFormat() throws IOException {
return new MyStoreOutputFormat(location);
}
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
this.writer = writer;
}
@Override
public void putNext(Tuple tuple) throws IOException {
//write tuple to location
try {
writer.write(null, tuple.toString());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
@Override
public void setStoreLocation(String location, Job job) throws IOException {
if(location!= null)
this.location= location;
}
}
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.pig.data.Tuple;
public class MyStoreOutputFormat extends
TextOutputFormat<WritableComparable, Tuple> {
private String location = null;
public MyStoreOutputFormat(String location) {
this.location = location;
}
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(
TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = location;
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new MyStoreRecordWriter(fileOut);
}
protected static class MyStoreRecordWriter extends
RecordWriter<WritableComparable, Tuple> {
DataOutputStream out = null;
public MyStoreRecordWriter(DataOutputStream out) {
this.out = out;
}
@Override
public void close(TaskAttemptContext taskContext) throws IOException,
InterruptedException {
// close the location
}
@Override
public void write(WritableComparable key, Tuple value)
throws IOException, InterruptedException {
// write the data to location
if (out != null) {
out.writeChars(value.toString()); // will be calling API later. let me first dump to the location!
}
}
}
}
import java.io.DataOutputStream;
导入java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.FSDataOutputStream;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.WritableComparable;
导入org.apache.hadoop.mapreduce.RecordWriter;
导入org.apache.hadoop.mapreduce.TaskAttemptContext;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
导入org.apache.pig.data.Tuple;
公共类MyStoreOutputFormat扩展
文本输出格式{
私有字符串位置=null;
公共MyStoreOutputFormat(字符串位置){
这个位置=位置;
}
@凌驾
公共记录编写器getRecordWriter(
TaskAttemptContext作业)引发IOException、InterruptedException{
Configuration=job.getConfiguration();
字符串扩展名=位置;
路径文件=getDefaultWorkFile(作业,扩展名);
FileSystem fs=file.getFileSystem(conf);
FSDataOutputStream fileOut=fs.create(文件,false);
返回新的MyStoreRecordWriter(文件输出);
}
受保护的静态类MyStoreRecordWriter扩展
录音机{
DataOutputStream out=null;
公共MyStoreRecordWriter(数据输出流输出){
this.out=out;
}
@凌驾
公共无效关闭(TaskAttemptContext taskContext)引发IOException,
中断异常{
//关闭该位置
}
@凌驾
public void write(可写可比键,元组值)
抛出IOException、InterruptedException{
//将数据写入位置
if(out!=null){
out.writeChars(value.toString());//稍后将调用API。让我先转储到该位置!
}
}
}
}
我在这里遗漏了什么吗?首先,我认为应该使用作业配置来存储位置值,而不是实例变量 在规划作业时,会调用您在setStoreLocation方法中对局部变量“location”的赋值,但getOutputFormat调用可能要到执行阶段才能执行,此时可能不再设置location变量(可能已创建类的新实例) 如果查看
PigStorage.setStoreLocation
的源代码,您应该注意到它们将位置存储在作业配置中(第2行):
然后可以在createRecordReader方法中提取自定义输出格式:
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(
TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = conf.get("mylocation");
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new MyStoreRecordWriter(fileOut);
}
@覆盖
公共记录编写器getRecordWriter(
TaskAttemptContext作业)引发IOException、InterruptedException{
Configuration=job.getConfiguration();
字符串扩展名=conf.get(“mylocation”);
路径文件=getDefaultWorkFile(作业,扩展名);
FileSystem fs=file.getFileSystem(conf);
FSDataOutputStream fileOut=fs.create(文件,false);
返回新的MyStoreRecordWriter(文件输出);
}
最后(可能是您看到的错误的实际原因),您的输出格式扩展了TextOutputFormat,并且您在record writer中使用了
getDefaultWorkFile
方法-此方法需要知道您在HDFS中将文件输出到何处,并且您没有调用FileOutputFormat.setOutputPath(作业,新路径(位置)));代码>在setStoreLocation方法中(请参阅我之前粘贴的PigStorage.setStoreLocation方法)。因此,错误是因为它不知道在哪里创建默认工作文件。请帮助。我急需它。谢谢谢谢你,克里斯。我缺少“FileOutputFormat.setOutputPath(作业,新路径(位置));”调用。根据您的输入更改了我的代码。