Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/356.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Elasticsearch无法使用Java API查询获取超过10个文档_Java_<img Src="//i.stack.imgur.com/RUiNP.png" Height="16" Width="18" Alt="" Class="sponsor Tag Img">elasticsearch_Elastic Stack - Fatal编程技术网 elasticsearch,elastic-stack,Java,elasticsearch,Elastic Stack" /> elasticsearch,elastic-stack,Java,elasticsearch,Elastic Stack" />

Elasticsearch无法使用Java API查询获取超过10个文档

Elasticsearch无法使用Java API查询获取超过10个文档,java,elasticsearch,elastic-stack,Java,elasticsearch,Elastic Stack,我从一个名为documents的索引中读取该文件路径,并使用java代码在另一个名为documents\u attachment的索引中读取该文件和索引这些文件内容 因此,在第一个过程中,我一次无法获取超过10个记录,只能从中获取10个记录 文档索引。我的doucment索引中有超过100000条记录 如何一次获取所有100000记录 我试过使用searchSourceBuilder.size(10000)然后它的索引直到10K记录不超过这个值,并且这个方法不允许我给出超过10000的大小 请找

我从一个名为
documents
的索引中读取该文件路径,并使用java代码在另一个名为
documents\u attachment
的索引中读取该文件和索引这些文件内容

因此,在第一个过程中,我一次无法获取超过
10个
记录,只能从中获取
10个
记录
文档
索引。我的
doucment
索引中有超过
100000条
记录

如何一次获取所有
100000
记录

我试过使用
searchSourceBuilder.size(10000)
然后它的索引直到
10K
记录不超过这个值,并且这个方法不允许我给出超过
10000
的大小

请找到我下面使用的java代码

public class DocumentIndex {

private final static String INDEX = "documents";  
private final static String ATTACHMENT = "document_attachment"; 
private final static String TYPE = "doc";
private static final Logger logger = Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName());

public static void main(String args[]) throws IOException {


    RestHighLevelClient restHighLevelClient = null;
    Document doc=new Document();

    logger.info("Started Indexing the Document.....");

    try {
        restHighLevelClient = new RestHighLevelClient(RestClient.builder(new HttpHost("localhost", 9200, "http"),
                new HttpHost("localhost", 9201, "http")));
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }


    //Fetching Id, FilePath & FileName from Document Index. 
    SearchRequest searchRequest = new SearchRequest(INDEX); 
    searchRequest.types(TYPE);
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    QueryBuilder qb = QueryBuilders.matchAllQuery();
    searchSourceBuilder.query(qb);
    //searchSourceBuilder.size(10000); 
    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = null;
    try {
         searchResponse = restHighLevelClient.search(searchRequest);
    } catch (IOException e) {
        e.getLocalizedMessage();
    }

    SearchHit[] searchHits = searchResponse.getHits().getHits();
    long totalHits=searchResponse.getHits().totalHits;
    logger.info("Total Hits --->"+totalHits);


    File all_files_path = new File("d:\\All_Files_Path.txt");
    File available_files = new File("d:\\Available_Files.txt");
    File missing_files = new File("d:\\Missing_Files.txt");
    all_files_path.deleteOnExit();
    available_files.deleteOnExit();
    missing_files.deleteOnExit();
    all_files_path.createNewFile();
    available_files.createNewFile();
    missing_files.createNewFile();

    int totalFilePath=1;
    int totalAvailableFile=1;
    int missingFilecount=1;

    Map<String, Object> jsonMap ;
    for (SearchHit hit : searchHits) {

        String encodedfile = null;
        File file=null;

        Map<String, Object> sourceAsMap = hit.getSourceAsMap();


        if(sourceAsMap != null) {  
            doc.setId((int) sourceAsMap.get("id"));
            doc.setApp_language(String.valueOf(sourceAsMap.get("app_language")));
        }

        String filepath=doc.getPath().concat(doc.getFilename());



        try(PrintWriter out = new PrintWriter(new FileOutputStream(all_files_path, true))  ){
            out.println("FilePath Count ---"+totalFilePath+":::::::ID---> "+doc.getId()+"File Path --->"+filepath);
        }

        file = new File(filepath);
        if(file.exists() && !file.isDirectory()) {
            try {
                  try(PrintWriter out = new PrintWriter(new FileOutputStream(available_files, true))  ){
                        out.println("Available File Count --->"+totalAvailableFile+":::::::ID---> "+doc.getId()+"File Path --->"+filepath);
                        totalAvailableFile++;
                    }
                FileInputStream fileInputStreamReader = new FileInputStream(file);
                byte[] bytes = new byte[(int) file.length()];
                fileInputStreamReader.read(bytes);
                encodedfile = new String(Base64.getEncoder().encodeToString(bytes));
                fileInputStreamReader.close();
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
        else
        {
            PrintWriter out = new PrintWriter(new FileOutputStream(missing_files, true));
            out.close();
            missingFilecount++;
        }

        jsonMap = new HashMap<>();
        jsonMap.put("id", doc.getId());
        jsonMap.put("app_language", doc.getApp_language());
        jsonMap.put("fileContent", encodedfile);

        String id=Long.toString(doc.getId());

        IndexRequest request = new IndexRequest(ATTACHMENT, "doc", id )
                .source(jsonMap)
                .setPipeline(ATTACHMENT);

        PrintStream printStream = new PrintStream(new File("d:\\exception.txt"));
        try {
            IndexResponse response = restHighLevelClient.index(request);

        } catch(ElasticsearchException e) {
            if (e.status() == RestStatus.CONFLICT) {
            }
            e.printStackTrace(printStream);
        }

        totalFilePath++;


    }

    logger.info("Indexing done.....");
}
公共类文档索引{
私有最终静态字符串INDEX=“documents”;
私有最终静态字符串ATTACHMENT=“document\u ATTACHMENT”;
私有最终静态字符串TYPE=“doc”;
私有静态最终记录器Logger=Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName());
公共静态void main(字符串args[])引发IOException{
RestHighLevelClient RestHighLevelClient=null;
单据单据=新单据();
info(“已开始为文档编制索引…”);
试一试{
restHighLevelClient=新的restHighLevelClient(RestClient.builder(新的HttpHost(“localhost”),9200,“http”),
新的HttpHost(“localhost”,9201,“http”);
}捕获(例外e){
System.out.println(e.getMessage());
}
//正在从文档索引中获取Id、文件路径和文件名。
SearchRequest SearchRequest=新的SearchRequest(索引);
searchRequest.types(TYPE);
SearchSourceBuilder SearchSourceBuilder=新的SearchSourceBuilder();
QueryBuilder qb=QueryBuilders.matchAllQuery();
searchSourceBuilder.query(qb);
//searchSourceBuilder.size(10000);
searchRequest.source(searchSourceBuilder);
SearchResponse SearchResponse=null;
试一试{
searchResponse=restHighLevelClient.search(searchRequest);
}捕获(IOE异常){
e、 getLocalizedMessage();
}
SearchHit[]searchHits=searchResponse.getHits().getHits();
long totalHits=searchResponse.getHits().totalHits;
logger.info(“总点击次数--->”+总点击次数);
File all_files_path=新文件(“d:\\all_files_path.txt”);
文件可用\u文件=新文件(“d:\\available\u files.txt”);
File missing_files=新文件(“d:\\missing_files.txt”);
所有_文件_path.deleteOnExit();
可用的_文件。deleteOnExit();
缺少_文件。deleteOnExit();
所有_文件_path.createNewFile();
可用的_文件。createNewFile();
缺少_文件。createNewFile();
int totalFilePath=1;
int totalAvailableFile=1;
int missingFilecount=1;
地图jsonMap;
for(SearchHit:searchHits){
字符串encodedfile=null;
File=null;
Map sourceAsMap=hit.getSourceAsMap();
如果(sourceAsMap!=null){
文档setId((int)sourceAsMap.get(“id”);
doc.setApp_语言(String.valueOf(sourceAsMap.get(“app_语言”));
}
字符串filepath=doc.getPath().concat(doc.getFilename());
try(PrintWriter out=new PrintWriter(new FileOutputStream(all_files_path,true))){
out.println(“文件路径计数--“+totalFilePath+”:::::ID-->“+doc.getId()+”文件路径-->“+FilePath”);
}
文件=新文件(文件路径);
if(file.exists()&&!file.isDirectory()){
试一试{
try(PrintWriter out=new PrintWriter(new FileOutputStream(可用的\u文件,true))){
out.println(“可用文件计数-->”+totalAvailableFile+”::;
totalAvailableFile++;
}
FileInputStream fileInputStreamReader=新的FileInputStream(文件);
byte[]bytes=新字节[(int)file.length()];
fileInputStreamReader.read(字节);
encodedfile=新字符串(Base64.getEncoder().encodeToString(字节));
fileInputStreamReader.close();
}catch(filenotfounde异常){
e、 printStackTrace();
}
}
其他的
{
PrintWriter out=新的PrintWriter(新的FileOutputStream(缺少_文件,true));
out.close();
缺少FileCount++;
}
jsonMap=newhashmap();
put(“id”,doc.getId());
put(“app_language”,doc.getApp_language());
put(“fileContent”,encodedfile);
字符串id=Long.toString(doc.getId());
IndexRequest request=新IndexRequest(附件,“文件”,id)
.source(jsonMap)
.管道(附件);
PrintStream PrintStream=新的PrintStream(新文件(“d:\\exception.txt”);
试一试{
IndexResponse-response=restHighLevelClient.index(请求);
}捕捉(弹性){
if(e.status()==RestStatus.CONFLICT){
}
e、 printStackTrace(printStream);
}
totalFilePath++;
}
logger.info(“索引完成…”);
}

}

如果您有足够的内存,请将索引设置
index.max\u result\u window
从10000增加到所需的数字

但是请注意,这不会无限扩展。搜索请求占用的堆内存和时间与from+大小成比例。此设置用于限制该内存,如果设置得太高,将耗尽内存

最简单的设置方法是通过REST API:

PUT /my-index/_settings
{
    "index" : {
        "max_result_window" : 150000
    }
}

谢谢,对我来说这是一次性活动。只有一次,我必须获取
100K
记录,并且必须使用另一个名称进行索引。所以我可以遵循这个方法。?我可以在哪里设置
i