加快SQL查询和数据提取过程-Java
因此,我目前在eclipse中开发的一个应用程序遇到了一些问题。我目前有一个2文件,通过它将所有数据从文件上传到SQL数据库。我通过多线程处理这个问题,在数据库中同时填充两个文件,而且速度相对较快。最终记录总数约为1200万条 填充后,在代码中运行一个SQL查询,我希望我的记录按一定顺序排列,但是,在Java中运行此SQL最终需要花费一些时间(简单执行大约35分钟或更长时间),然后才开始转储数据。当然,这是由于数据量大,但是,有没有一种方法可以保留顺序并将数据转储到块中,或者以某种方式或某种方式改进SQL,或者以任何方式加速此应用程序加快SQL查询和数据提取过程-Java,java,sql-server,jdbc,Java,Sql Server,Jdbc,因此,我目前在eclipse中开发的一个应用程序遇到了一些问题。我目前有一个2文件,通过它将所有数据从文件上传到SQL数据库。我通过多线程处理这个问题,在数据库中同时填充两个文件,而且速度相对较快。最终记录总数约为1200万条 填充后,在代码中运行一个SQL查询,我希望我的记录按一定顺序排列,但是,在Java中运行此SQL最终需要花费一些时间(简单执行大约35分钟或更长时间),然后才开始转储数据。当然,这是由于数据量大,但是,有没有一种方法可以保留顺序并将数据转储到块中,或者以某种方式或某种方式
package fedMerger;
//PREREQUISITES: ENSURE THE FOLLOWING ARE NOT DISABLED IN SERVICES MANAGEMENT WHEN RUNNING THIS UTILITY:
//SQL SERVER BROWER
//SQL SERVER
//SQL SERVER VSS WRITER
//BENCHMARK TEST v1 - 11million merged in 77 minutes - no multi threading
//BENCHMARK TEST v2 - 11million merged in minutes - using multi threading
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Scanner;
import java.util.concurrent.TimeUnit;
public class FedMerger2 extends Thread {
private static String directory = "C:\\Users\\xxx\\Desktop\\Files\\";
private static String AgentfileName = "file1.txt";
private static String otherFileName = "file.txt";
public static Connection connection;
private static String mapperValue = "";
private static String TimeStampTableName = "TimeStampTable";
private static String timeStampColumn = "TIMESTAMP";
private static String remainingDataColumn = "REMAINING";
private static String user = "sa";
private static String pass = "xxx";
public static long timer;
public static String Timestampquery = "INSERT INTO " + TimeStampTableName + "(" + timeStampColumn + ","
+ remainingDataColumn + ") VALUES (?,?)";
public static String dbURL = "jdbc:sqlserver://localhost\\SQLExpress;database=TIMESTAMP_ORGANISER;integratedSecurity=true";
public static void main(String[] args) throws Exception {
Connection conn = null;
timer = System.currentTimeMillis();
conn = DriverManager.getConnection(dbURL, user, pass);
connection = conn;
String createTimeStampTable = "CREATE TABLE " + TimeStampTableName + "(" + timeStampColumn + " varchar(max),"
+ remainingDataColumn + " varchar(max))";
System.out.println("Tables & Columns created - Populating data...");
conn.createStatement().executeUpdate(createTimeStampTable);
Thread t1 = new Thread(){
@Override
public void run()
{
Connection conn = connection;
String mapperValue2 = "";
int i = 0;
int records = 0;
try {
BufferedReader agentFile = new BufferedReader(new FileReader(directory + AgentfileName));
PreparedStatement statement = null;
statement = conn.prepareStatement(Timestampquery);
for (mapperValue2 = agentFile.readLine(); mapperValue2 != null; mapperValue2 = agentFile.readLine()) {
i++;
records++;
if (!mapperValue2.isEmpty() && mapperValue2.length() > 5) {
statement.setString(1, mapperValue2.substring(0, 26));
statement.setString(2, mapperValue2.substring(26, mapperValue2.length()));
statement.addBatch();
} else {// ignore blanks or white spaces
System.out.println("blank found - skipped");
}
if (i == 2500) {// Populating 100000 records at a time
System.out.println("executing Agent - " + records + " records...");
statement.executeBatch();
statement.clearBatch();
i = 0;
}
}
System.out.println("executing Agent - " + records + " records...");
statement.executeBatch();
statement.close();
agentFile.close();
} catch (SQLException | FileNotFoundException ex) {
ex.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
};
Thread t2 = new Thread(){
@Override
public void run()
{
Connection conn = connection;
try {// Database setup and file to be read.
BufferedReader timeStampFile = new BufferedReader(new FileReader(directory + otherFileName));
int i = 0;
int records = 0;
PreparedStatement ps = conn.prepareStatement(Timestampquery);
// Dump FED info onto SQL
for (mapperValue = timeStampFile.readLine(); mapperValue != null; mapperValue = timeStampFile.readLine()) {
i++;
records++;
if (!mapperValue.isEmpty() && mapperValue.length() > 5) {
ps.setString(1, mapperValue.substring(0, 26));
ps.setString(2, mapperValue.substring(26, mapperValue.length()));
ps.addBatch();
} else {// ignore blanks or white spaces
System.out.println("blank found - skipped");
}
if (i == 2500) {// Populating 10000 records at a time
System.out.println("executing timestamp - " + records + " records...");
ps.executeBatch();
ps.clearBatch();
i = 0;
}
}
System.out.println("executing final " + records + " records...");
ps.executeBatch();
ps.clearBatch();
i = 0;
// Dump AGENT FED info into same SQL
System.out.print("Uploaded to database - Working SQL query");
BufferedWriter writer = new BufferedWriter(new FileWriter(directory + "newfile" + "_MergedFinal.txt"));
// Organise accordingly
String retrieveData = "select " + timeStampColumn + "+" + remainingDataColumn + " as Data from "
+ TimeStampTableName + " order by timestamp, case WHEN remaining LIKE '%agentStateEvent%' THEN -3 "
+ "WHEN remaining LIKE '%TerminalConnectionCreated%' THEN -2 " + "ELSE -1 END";
PreparedStatement stmt = conn.prepareStatement(retrieveData);
ResultSet result = null;
result = stmt.executeQuery();
int j = 0;
String results = "";
System.out.println("Data organised, ready to output...");
while (result.next()) {// SQL Query ran - Output data line by line
j++;
System.out.println("outputing data - " + j);
results = result.getString("data");
writer.write(results + "\r\n");
writer.flush();
}
writer.write(results + "\r\n");
writer.flush();
writer.close();
System.out.println("Done - View at " + directory + "NewFile_MergedFinal.txt");
conn.createStatement().executeUpdate("DROP TABLE " + TimeStampTableName);
conn.close();
timeStampFile.close();
System.out.print("Complete - Time taken: " + ((TimeUnit.MILLISECONDS.toMinutes(System.currentTimeMillis())
- TimeUnit.MILLISECONDS.toMinutes(timer))) + " minutes");
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
};
//multi thread running.
t1.start();
t2.start();
}
}
首先,您需要对查询的所有列的where子句建立适当的索引 其次,您可以使用一些本机实用程序更快地转储数据。榜样 bcp实用程序
declare @sql varchar(8000)
select @sql = 'bcp "select * from EmailVarification..tblTransaction" queryout c:\bcp\Tom.xls -c -t, -T -S' + @@servername
exec master..xp_cmdshell @sql
Microsoft JDBC驱动程序在
executeQuery()
返回调用方之前,将完整结果缓冲在内存中。添加参数;选择method=cursor
到您的JDBC URL,以避免出现这种情况,并查看这是否会改善情况。除上述内容外,我还猜测您的数据库没有在您订购的列上建立索引,谢谢您的回复。将尝试选择方法并返回报告。不幸的是,它还没有被编入索引unfortunately@a_horse_with_no_name不幸的是,没有什么不同:(您需要所有数据吗?通常,对数据库的查询应该返回一小部分数据,或者您也可以在本地处理所有数据。查询1200万条记录几乎肯定不是您想要做的事情——可能会添加一个LIMIT或TOP子句,或者如果您打算以某种方式聚合数据,请使用GROUP BY和HAVING子句。您有没有这样做尝试给表一个时间戳索引(或者更好的是,您的完整ORDERBY子句)?