Elasticsearch无法使用Java API查询获取超过10个文档
我从一个名为Elasticsearch无法使用Java API查询获取超过10个文档,java,elasticsearch,elastic-stack,Java,elasticsearch,Elastic Stack,我从一个名为documents的索引中读取该文件路径,并使用java代码在另一个名为documents\u attachment的索引中读取该文件和索引这些文件内容 因此,在第一个过程中,我一次无法获取超过10个记录,只能从中获取10个记录 文档索引。我的doucment索引中有超过100000条记录 如何一次获取所有100000记录 我试过使用searchSourceBuilder.size(10000)然后它的索引直到10K记录不超过这个值,并且这个方法不允许我给出超过10000的大小 请找
documents
的索引中读取该文件路径,并使用java代码在另一个名为documents\u attachment
的索引中读取该文件和索引这些文件内容
因此,在第一个过程中,我一次无法获取超过10个
记录,只能从中获取10个
记录
文档
索引。我的doucment
索引中有超过100000条记录
如何一次获取所有100000
记录
我试过使用searchSourceBuilder.size(10000)
然后它的索引直到10K
记录不超过这个值,并且这个方法不允许我给出超过10000
的大小
请找到我下面使用的java代码
public class DocumentIndex {
private final static String INDEX = "documents";
private final static String ATTACHMENT = "document_attachment";
private final static String TYPE = "doc";
private static final Logger logger = Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName());
public static void main(String args[]) throws IOException {
RestHighLevelClient restHighLevelClient = null;
Document doc=new Document();
logger.info("Started Indexing the Document.....");
try {
restHighLevelClient = new RestHighLevelClient(RestClient.builder(new HttpHost("localhost", 9200, "http"),
new HttpHost("localhost", 9201, "http")));
} catch (Exception e) {
System.out.println(e.getMessage());
}
//Fetching Id, FilePath & FileName from Document Index.
SearchRequest searchRequest = new SearchRequest(INDEX);
searchRequest.types(TYPE);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
QueryBuilder qb = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(qb);
//searchSourceBuilder.size(10000);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = null;
try {
searchResponse = restHighLevelClient.search(searchRequest);
} catch (IOException e) {
e.getLocalizedMessage();
}
SearchHit[] searchHits = searchResponse.getHits().getHits();
long totalHits=searchResponse.getHits().totalHits;
logger.info("Total Hits --->"+totalHits);
File all_files_path = new File("d:\\All_Files_Path.txt");
File available_files = new File("d:\\Available_Files.txt");
File missing_files = new File("d:\\Missing_Files.txt");
all_files_path.deleteOnExit();
available_files.deleteOnExit();
missing_files.deleteOnExit();
all_files_path.createNewFile();
available_files.createNewFile();
missing_files.createNewFile();
int totalFilePath=1;
int totalAvailableFile=1;
int missingFilecount=1;
Map<String, Object> jsonMap ;
for (SearchHit hit : searchHits) {
String encodedfile = null;
File file=null;
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
if(sourceAsMap != null) {
doc.setId((int) sourceAsMap.get("id"));
doc.setApp_language(String.valueOf(sourceAsMap.get("app_language")));
}
String filepath=doc.getPath().concat(doc.getFilename());
try(PrintWriter out = new PrintWriter(new FileOutputStream(all_files_path, true)) ){
out.println("FilePath Count ---"+totalFilePath+":::::::ID---> "+doc.getId()+"File Path --->"+filepath);
}
file = new File(filepath);
if(file.exists() && !file.isDirectory()) {
try {
try(PrintWriter out = new PrintWriter(new FileOutputStream(available_files, true)) ){
out.println("Available File Count --->"+totalAvailableFile+":::::::ID---> "+doc.getId()+"File Path --->"+filepath);
totalAvailableFile++;
}
FileInputStream fileInputStreamReader = new FileInputStream(file);
byte[] bytes = new byte[(int) file.length()];
fileInputStreamReader.read(bytes);
encodedfile = new String(Base64.getEncoder().encodeToString(bytes));
fileInputStreamReader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
else
{
PrintWriter out = new PrintWriter(new FileOutputStream(missing_files, true));
out.close();
missingFilecount++;
}
jsonMap = new HashMap<>();
jsonMap.put("id", doc.getId());
jsonMap.put("app_language", doc.getApp_language());
jsonMap.put("fileContent", encodedfile);
String id=Long.toString(doc.getId());
IndexRequest request = new IndexRequest(ATTACHMENT, "doc", id )
.source(jsonMap)
.setPipeline(ATTACHMENT);
PrintStream printStream = new PrintStream(new File("d:\\exception.txt"));
try {
IndexResponse response = restHighLevelClient.index(request);
} catch(ElasticsearchException e) {
if (e.status() == RestStatus.CONFLICT) {
}
e.printStackTrace(printStream);
}
totalFilePath++;
}
logger.info("Indexing done.....");
}
公共类文档索引{
私有最终静态字符串INDEX=“documents”;
私有最终静态字符串ATTACHMENT=“document\u ATTACHMENT”;
私有最终静态字符串TYPE=“doc”;
私有静态最终记录器Logger=Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName());
公共静态void main(字符串args[])引发IOException{
RestHighLevelClient RestHighLevelClient=null;
单据单据=新单据();
info(“已开始为文档编制索引…”);
试一试{
restHighLevelClient=新的restHighLevelClient(RestClient.builder(新的HttpHost(“localhost”),9200,“http”),
新的HttpHost(“localhost”,9201,“http”);
}捕获(例外e){
System.out.println(e.getMessage());
}
//正在从文档索引中获取Id、文件路径和文件名。
SearchRequest SearchRequest=新的SearchRequest(索引);
searchRequest.types(TYPE);
SearchSourceBuilder SearchSourceBuilder=新的SearchSourceBuilder();
QueryBuilder qb=QueryBuilders.matchAllQuery();
searchSourceBuilder.query(qb);
//searchSourceBuilder.size(10000);
searchRequest.source(searchSourceBuilder);
SearchResponse SearchResponse=null;
试一试{
searchResponse=restHighLevelClient.search(searchRequest);
}捕获(IOE异常){
e、 getLocalizedMessage();
}
SearchHit[]searchHits=searchResponse.getHits().getHits();
long totalHits=searchResponse.getHits().totalHits;
logger.info(“总点击次数--->”+总点击次数);
File all_files_path=新文件(“d:\\all_files_path.txt”);
文件可用\u文件=新文件(“d:\\available\u files.txt”);
File missing_files=新文件(“d:\\missing_files.txt”);
所有_文件_path.deleteOnExit();
可用的_文件。deleteOnExit();
缺少_文件。deleteOnExit();
所有_文件_path.createNewFile();
可用的_文件。createNewFile();
缺少_文件。createNewFile();
int totalFilePath=1;
int totalAvailableFile=1;
int missingFilecount=1;
地图jsonMap;
for(SearchHit:searchHits){
字符串encodedfile=null;
File=null;
Map sourceAsMap=hit.getSourceAsMap();
如果(sourceAsMap!=null){
文档setId((int)sourceAsMap.get(“id”);
doc.setApp_语言(String.valueOf(sourceAsMap.get(“app_语言”));
}
字符串filepath=doc.getPath().concat(doc.getFilename());
try(PrintWriter out=new PrintWriter(new FileOutputStream(all_files_path,true))){
out.println(“文件路径计数--“+totalFilePath+”:::::ID-->“+doc.getId()+”文件路径-->“+FilePath”);
}
文件=新文件(文件路径);
if(file.exists()&&!file.isDirectory()){
试一试{
try(PrintWriter out=new PrintWriter(new FileOutputStream(可用的\u文件,true))){
out.println(“可用文件计数-->”+totalAvailableFile+”::;
totalAvailableFile++;
}
FileInputStream fileInputStreamReader=新的FileInputStream(文件);
byte[]bytes=新字节[(int)file.length()];
fileInputStreamReader.read(字节);
encodedfile=新字符串(Base64.getEncoder().encodeToString(字节));
fileInputStreamReader.close();
}catch(filenotfounde异常){
e、 printStackTrace();
}
}
其他的
{
PrintWriter out=新的PrintWriter(新的FileOutputStream(缺少_文件,true));
out.close();
缺少FileCount++;
}
jsonMap=newhashmap();
put(“id”,doc.getId());
put(“app_language”,doc.getApp_language());
put(“fileContent”,encodedfile);
字符串id=Long.toString(doc.getId());
IndexRequest request=新IndexRequest(附件,“文件”,id)
.source(jsonMap)
.管道(附件);
PrintStream PrintStream=新的PrintStream(新文件(“d:\\exception.txt”);
试一试{
IndexResponse-response=restHighLevelClient.index(请求);
}捕捉(弹性){
if(e.status()==RestStatus.CONFLICT){
}
e、 printStackTrace(printStream);
}
totalFilePath++;
}
logger.info(“索引完成…”);
}
}如果您有足够的内存,请将索引设置index.max\u result\u window
从10000增加到所需的数字
看
但是请注意,这不会无限扩展。搜索请求占用的堆内存和时间与from+大小成比例。此设置用于限制该内存,如果设置得太高,将耗尽内存
最简单的设置方法是通过REST API:
PUT /my-index/_settings
{
"index" : {
"max_result_window" : 150000
}
}
谢谢,对我来说这是一次性活动。只有一次,我必须获取100K
记录,并且必须使用另一个名称进行索引。所以我可以遵循这个方法。?我可以在哪里设置i