elasticsearch,ant,ivy,nutch,Apache,elasticsearch,Ant,Ivy,Nutch" /> elasticsearch,ant,ivy,nutch,Apache,elasticsearch,Ant,Ivy,Nutch" />

Apache Nutch 2.3+;Elasticsearch 2.1.1。无法加载Elasticsearch依赖项

Apache Nutch 2.3+;Elasticsearch 2.1.1。无法加载Elasticsearch依赖项,apache,elasticsearch,ant,ivy,nutch,Apache,elasticsearch,Ant,Ivy,Nutch,我正在尝试集成Nutch2.3,以便将数据推送到最新的Elasticsearch 2.1.1。 我开始更新以下文件中的版本和依赖项: 索引器elastic/plugin.xml <plugin id="indexer-elastic" name="ElasticIndexWriter" version="1.0.0" provider-name="nutch.apache.org"> <runtime> <library name="ind

我正在尝试集成Nutch2.3,以便将数据推送到最新的Elasticsearch 2.1.1。 我开始更新以下文件中的版本和依赖项:

索引器elastic/plugin.xml

    <plugin id="indexer-elastic" name="ElasticIndexWriter" version="1.0.0"
  provider-name="nutch.apache.org">

  <runtime>
    <library name="indexer-elastic.jar">
      <export name="*" />
    </library>

    <library name="elasticsearch-2.1.1.jar"/>

    <library name="hppc-0.7.1.jar"/>
    <library name="jackson-core-2.6.2.jar"/>
    <library name="jackson-dataformat-cbor-2.6.2.jar"/>
    <library name="jackson-dataformat-smile-2.6.2.jar"/>
    <library name="jackson-dataformat-yaml-2.6.2.jar"/>
    <library name="guava-18.0.jar"/>
    <library name="compress-lzf-1.0.2.jar"/>
    <library name="t-digest-3.0.jar"/>
    <library name="jsr166e-1.1.0.jar"/>
    <library name="commons-cli-1.3.1.jar"/>
    <library name="netty-3.10.5.Final.jar"/>
    <library name="joda-time-2.8.2.jar"/>

    <library name="lucene-analyzers-common-5.3.1.jar"/>
    <library name="lucene-backward-codecs-5.3.1.jar"/>
    <library name="lucene-core-5.3.1.jar"/>
    <library name="lucene-highlighter-5.3.1.jar"/>
    <library name="lucene-join-5.3.1.jar"/>
    <library name="lucene-memory-5.3.1.jar"/>
    <library name="lucene-queries-5.3.1.jar"/>
    <library name="lucene-queryparser-5.3.1.jar"/>
    <library name="lucene-spatial-5.3.1.jar"/>
    <library name="lucene-suggest-5.3.1.jar"/>

    <library name="HdrHistogram-2.1.6.jar"/>
    <library name="joda-convert-1.2.jar"/>
  </runtime>

  <requires>
    <import plugin="nutch-extensionpoints" />
  </requires>

  <extension id="org.apache.nutch.indexer.elastic"
    name="Elasticsearch Index Writer"
    point="org.apache.nutch.indexer.IndexWriter">
    <implementation id="ElasticIndexWriter"
      class="org.apache.nutch.indexwriter.elastic.ElasticIndexWriter" />
  </extension>

</plugin>
<ivy-module version="1.0">
  <info organisation="org.apache.nutch" module="${ant.project.name}">
    <license name="Apache 2.0" />
    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org" />
    <description>Apache Nutch</description>
  </info>

  <configurations>
    <include file="../../..//ivy/ivy-configurations.xml" />
  </configurations>

  <publications>
    <!--get the artifact from our module name -->
    <artifact conf="master" />
  </publications>

  <dependencies>
    <dependency org="org.elasticsearch" name="elasticsearch"
      rev="2.1.1" conf="*->default" />

    <dependency org="com.google.guava" name="guava" rev="18.0" />
  </dependencies>
</ivy-module>

索引器elastic/ivy.xml

    <plugin id="indexer-elastic" name="ElasticIndexWriter" version="1.0.0"
  provider-name="nutch.apache.org">

  <runtime>
    <library name="indexer-elastic.jar">
      <export name="*" />
    </library>

    <library name="elasticsearch-2.1.1.jar"/>

    <library name="hppc-0.7.1.jar"/>
    <library name="jackson-core-2.6.2.jar"/>
    <library name="jackson-dataformat-cbor-2.6.2.jar"/>
    <library name="jackson-dataformat-smile-2.6.2.jar"/>
    <library name="jackson-dataformat-yaml-2.6.2.jar"/>
    <library name="guava-18.0.jar"/>
    <library name="compress-lzf-1.0.2.jar"/>
    <library name="t-digest-3.0.jar"/>
    <library name="jsr166e-1.1.0.jar"/>
    <library name="commons-cli-1.3.1.jar"/>
    <library name="netty-3.10.5.Final.jar"/>
    <library name="joda-time-2.8.2.jar"/>

    <library name="lucene-analyzers-common-5.3.1.jar"/>
    <library name="lucene-backward-codecs-5.3.1.jar"/>
    <library name="lucene-core-5.3.1.jar"/>
    <library name="lucene-highlighter-5.3.1.jar"/>
    <library name="lucene-join-5.3.1.jar"/>
    <library name="lucene-memory-5.3.1.jar"/>
    <library name="lucene-queries-5.3.1.jar"/>
    <library name="lucene-queryparser-5.3.1.jar"/>
    <library name="lucene-spatial-5.3.1.jar"/>
    <library name="lucene-suggest-5.3.1.jar"/>

    <library name="HdrHistogram-2.1.6.jar"/>
    <library name="joda-convert-1.2.jar"/>
  </runtime>

  <requires>
    <import plugin="nutch-extensionpoints" />
  </requires>

  <extension id="org.apache.nutch.indexer.elastic"
    name="Elasticsearch Index Writer"
    point="org.apache.nutch.indexer.IndexWriter">
    <implementation id="ElasticIndexWriter"
      class="org.apache.nutch.indexwriter.elastic.ElasticIndexWriter" />
  </extension>

</plugin>
<ivy-module version="1.0">
  <info organisation="org.apache.nutch" module="${ant.project.name}">
    <license name="Apache 2.0" />
    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org" />
    <description>Apache Nutch</description>
  </info>

  <configurations>
    <include file="../../..//ivy/ivy-configurations.xml" />
  </configurations>

  <publications>
    <!--get the artifact from our module name -->
    <artifact conf="master" />
  </publications>

  <dependencies>
    <dependency org="org.elasticsearch" name="elasticsearch"
      rev="2.1.1" conf="*->default" />

    <dependency org="com.google.guava" name="guava" rev="18.0" />
  </dependencies>
</ivy-module>

阿帕奇坚果
我还修改了org.apache.nutch.indexwriter.elastic.ElasticIndexWriter,以与elasticsearch 2.1.1客户机的新接口相兼容

那么问题出在哪里?

似乎indexer elastic/plugin.xml中列出的依赖项在运行时不会自动加载。因此elasticsearch客户端无法从中受益,并抛出异常。。 因此,我尝试了一种不同的方法,根据它在$NUTCH_ROOT/ivy/ivy.xml中给出的异常逐个添加依赖项,其中列出了ApacheNutch的主要依赖项。这不是正确的方法,但有点奏效

  • 如何处理插件依赖关系
  • 插件中使用较新版本库的策略是什么。例如,Nutch使用番石榴v11.0.2,但Elasticsearch 2.1.1需要番石榴v18.0。尽管我在索引器elastic/ivy.xml中显式指定了它,但它似乎在运行时加载了旧版本

  • 插件依赖项应该在插件的ivy.xml和Plugin.xml文件中声明。我还没有测试你包含的文件,但看不出它们有什么问题。正如您所指出的,在主常春藤文件中声明DEP并不好

    看,相同的逻辑适用于所有插件


    至于解决主依赖项和插件之间的冲突,不幸的是,您必须自己处理,例如,强制在main ivy.xml中使用您需要的版本,因为Nutch不将插件作为依赖项处理(在Maven意义上)是主代码的一部分。

    是否有Nutch 2.3同时运行并索引到Elasticsearch 2.x?嗯,是的,我最终成功了。我可以在几天内把它推到github。我会让你知道的。也许对于正在寻找Elasticsearch 2.x实现的其他人来说,此PR也很有趣: