Apache nifi NiFi读取和修改文件内容

Apache nifi NiFi读取和修改文件内容,apache-nifi,Apache Nifi,我是Nifi的新手。我正在研究NiFiProject,它读取文件的内容并进行一些ETL。结果需要放入不同的文件中 我得到关系不满意错误: MySpanishProcessorID-b673bb80-0169-1 ooo-2f8a-c22081380d29 Myspanishprocessodidzb673bb80-0169-1000-2f8a-c22081380d29未能 标准流程文件记录LUUIDZE8EE1374-ef25-43d5-b35e导致的流程会话- ac76dba0955c,索赔标

我是Nifi的新手。我正在研究NiFiProject,它读取文件的内容并进行一些ETL。结果需要放入不同的文件中

我得到关系不满意错误:

MySpanishProcessorID-b673bb80-0169-1 ooo-2f8a-c22081380d29 Myspanishprocessodidzb673bb80-0169-1000-2f8a-c22081380d29未能 标准流程文件记录LUUIDZE8EE1374-ef25-43d5-b35e导致的流程会话- ac76dba0955c,索赔标准内容索赔 (ResourceClaimZ标准ResourceClaim(idz1554235475648-1,containerzdefault, 第Il节,偏移量;O, 未指定传输关系;处理器管理性屈服1秒: org.apache.nifi.processor.exception.FlowFileHandlingExceptlon: 标准流程文件记录LUUIDZE8EE1374-ef25-43d5-b35e- ac76dba0955c,索赔标准内容索赔 (resourceClaimzStandardResourceClaim(idz1554235475648-1,容器默认值, 截面偏移量;O, 未指定传输关系

我编写的代码是:

 @Tags({"spanish"})
@CapabilityDescription("Spanish processor")
@SeeAlso({})
@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
@WritesAttributes({@WritesAttribute(attribute="", description="")})
public class MySpanishProcessor extends AbstractProcessor {
    public static final PropertyDescriptor MY_PROPERTY = new PropertyDescriptor
            .Builder().name("MY_PROPERTY")
            .displayName("My property")
            .description("Example Property")
            .required(false)
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .build();

    public static final Relationship REL_MATCH = new Relationship.Builder()
            .name("matched")
            .description("FlowFiles are routed to this relationship when the Regular Expression is successfully evaluated and the FlowFile is modified as a result")
            .build();
    public static final Relationship REL_NO_MATCH = new Relationship.Builder()
            .name("unmatched")
            .description("FlowFiles are routed to this relationship when no provided Regular Expression matches the content of the FlowFile")
            .build();

    private List<PropertyDescriptor> descriptors;

    private Set<Relationship> relationships;

    @Override
    protected void init(final ProcessorInitializationContext context) {
        final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
        descriptors.add(MY_PROPERTY);
        this.descriptors = Collections.unmodifiableList(descriptors);

        final Set<Relationship> relationships = new HashSet<Relationship>();
        relationships.add(REL_MATCH);
        relationships.add(REL_NO_MATCH);
        this.relationships = Collections.unmodifiableSet(relationships);
    }

    @Override
    public Set<Relationship> getRelationships() {
        return this.relationships;
    }

    @Override
    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return descriptors;
    }

    @OnScheduled
    public void onScheduled(final ProcessContext context) {

    }
    Table officeTable = null;
    Table legalEntitytable = null;
    Table citiesTable = null;
    Table joinOfOfficeLegalCityTable = null;
    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
        getLogger().debug("In the Trigger");
        FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }
         //Lets read the file using the call back
        ArrayList<String> lineList= new ArrayList<>();
        session.read(flowFile, new InputStreamCallback() {
            @Override
            public void process(InputStream inputStream) throws IOException {
                BufferedReader bufferedReader= new BufferedReader(new InputStreamReader(inputStream));
                String line;
                while ((line=bufferedReader.readLine())!=null)
                {lineList.add(line);}
            }
        });

        FlowFile flowFile1=session.create();
        session.write(flowFile1, new OutputStreamCallback() {
            @Override
            public void process(OutputStream outputStream) throws IOException {
                outputStream.write("No Data".getBytes());
            }
        });
//        session.getProvenanceReporter().modifyAttributes(flowFile1);
        session.transfer(flowFile1, REL_MATCH);//needs to be called to transfer
    }
}
@Tags({“西班牙语”})
@能力描述(“西班牙语处理器”)
@另见({})
@ReadsAttributes({@ReadsAttribute(attribute=,description=)})
@WriteAttribute({@WriteAttribute(attribute=“”,description=“”)})
公共类MySpanishProcessor扩展了AbstractProcessor{
公共静态最终PropertyDescriptor MY_PROPERTY=新PropertyDescriptor
.Builder().name(“我的财产”)
.displayName(“我的财产”)
.说明(“示例属性”)
。必填项(错误)
.addValidator(标准验证器。非空验证器)
.build();
public static final REL_MATCH=new Relationship.Builder()
.名称(“匹配”)
.description(“当正则表达式成功求值并因此修改流文件时,流文件将路由到此关系”)
.build();
public static final REL_NO_MATCH=新关系.Builder()
.名称(“不匹配”)
.description(“当没有提供与流文件内容匹配的正则表达式时,流文件被路由到此关系”)
.build();
私有列表描述符;
私人关系;
@凌驾
受保护的void init(最终ProcessorInitializationContext上下文){
最终列表描述符=新的ArrayList();
描述符。添加(我的_属性);
this.descriptors=Collections.unmodifiableList(描述符);
最终集关系=新HashSet();
关系。添加(关系匹配);
添加(关系不匹配);
this.relationships=Collections.unmodifiableSet(关系);
}
@凌驾
公共集getRelationships(){
返回此项。关系;
}
@凌驾
公共最终列表getSupportedPropertyDescriptors(){
返回描述符;
}
@如期
已调度的公共void(最终ProcessContext上下文){
}
Tableoffitable=null;
表legalEntitytable=null;
表citiesTable=null;
表JoinOfOfficeGalicityTable=null;
@凌驾
public void OnTigger(最终ProcessContext上下文,最终ProcessSession会话)引发ProcessException{
getLogger().debug(“在触发器中”);
FlowFile FlowFile=session.get();
if(flowFile==null){
返回;
}
//让我们使用回调函数读取文件
ArrayList lineList=新建ArrayList();
读取(流文件,新的InputStreamCallback(){
@凌驾
公共无效进程(InputStream InputStream)引发IOException{
BufferedReader BufferedReader=新的BufferedReader(新的InputStreamReader(inputStream));
弦线;
而((line=bufferedReader.readLine())!=null)
{lineList.add(line);}
}
});
FlowFile flowFile1=session.create();
写入(flowFile1,新的OutputStreamCallback(){
@凌驾
公共无效进程(OutputStream OutputStream)引发IOException{
write(“无数据”.getBytes());
}
});
//session.getProvanceReporter().modifyAttributes(flowFile1);
session.transfer(flowFile1,REL_MATCH);//需要调用才能传输
}
}

必须考虑每个流文件,这意味着必须传输或删除从session.create创建或从session.get获取的任何流文件

任何session.write或session.puttribute的结果都将返回一个必须跟踪的新流文件引用。因此

FlowFile flowFile1=session.create();
flowFile1 = session.write(flowFile1, new OutputStreamCallback() {

然后必须传输flowFile1。

必须考虑每个流文件,这意味着必须传输或删除从session.create创建或从session.get获取的任何流文件

任何session.write或session.puttribute的结果都将返回一个必须跟踪的新流文件引用。因此

FlowFile flowFile1=session.create();
flowFile1 = session.write(flowFile1, new OutputStreamCallback() {

然后必须传输flowFile1。

经过一些尝试和错误后,以下代码工作正常

 @Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    getLogger().debug("In the Trigger");
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    //Lets read the file using the call back
    ArrayList<String> lineList = new ArrayList<>();
  final SpanishCodeFilePreprocessor spanishCodeFilePreprocessor = new SpanishCodeFilePreprocessor();
    try {
        session.read(flowFile, new InputStreamCallback() {
            @Override
            public void process(InputStream inputStream) throws IOException {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
                String line;
                while ((line = bufferedReader.readLine()) != null) {
                    spanishCodeFilePreprocessor.identifyRecordTypeAndProcessIt(line);
                }
            }
        });
    }
    catch (Exception e)
    {
        getLogger().error(e.toString());
    }
    try {
        session.write(flowFile, new OutputStreamCallback() {
            @Override
            public void process(OutputStream outputStream) throws IOException {
                officeTable=spanishCodeFilePreprocessor.getOfficeTable();

                String s = "Office Table size: " + String.valueOf(officeTable.shape());
                officeTable.write().csv(outputStream);
            }
        });
        session.getProvenanceReporter().modifyAttributes(flowFile);
        session.transfer(flowFile, REL_MATCH);//needs to be called to transfer
    } catch (Exception e) {
        getLogger().error("Exception in spanishProcessor");
        session.write(flowFile, new OutputStreamCallback() {
            @Override
            public void process(OutputStream outputStream) throws IOException {
                String s = "Office Table size: 0";
                outputStream.write(s.getBytes());
            }
        });
        session.getProvenanceReporter().modifyAttributes(flowFile);
        session.transfer(flowFile, REL_NO_MATCH);//needs to be called to transfer
    }


}
@覆盖
public void OnTigger(最终ProcessContext上下文,最终ProcessSession会话)引发ProcessException{
getLogger().debug(“在触发器中”);
FlowFile FlowFile=session.get();
if(flowFile==null){
返回;
}
//让我们使用回调函数读取文件
ArrayList lineList=新建ArrayList();
最终SpanishCodeFilePreprocessor SpanishCodeFilePreprocessor=新的SpanishCodeFilePreprocessor();
试一试{
读取(流文件,新的InputStreamCallback(){
@凌驾
公共无效进程(InputStream InputStream)引发IOException{
BufferedReader BufferedReader=新的BufferedReader(新的InputStreamReader(inputStream));
弦线;
而((line=bufferedReader.readLine())!=null){
spanishCodeFilePreprocessor.IdentificationRecordTypeA