Java 如何在使用多线程从mysql表读取时限制记录数
我的mysql表中有150万条记录。我试图在一个批处理过程中读取所有记录,即计划在一个批处理中读取1000条记录,并在控制台中打印这些记录Java 如何在使用多线程从mysql表读取时限制记录数,java,mysql,multithreading,Java,Mysql,Multithreading,我的mysql表中有150万条记录。我试图在一个批处理过程中读取所有记录,即计划在一个批处理中读取1000条记录,并在控制台中打印这些记录 为此,我计划使用java实现多线程概念。如何实现这一点?在MySQL中,您可以一次获取所有记录,或者以流式方式逐个获取记录(请参阅)。或者,您可以使用limit关键字进行分块(请参阅) 无论您使用流式处理结果还是分块,都可以在读取数据时使用多线程处理(或打印)数据。这通常使用生产者-消费者模式完成,在这种情况下,生产者从数据库检索数据,将其放入队列,消费者从
为此,我计划使用java实现多线程概念。如何实现这一点?在MySQL中,您可以一次获取所有记录,或者以流式方式逐个获取记录(请参阅)。或者,您可以使用
limit
关键字进行分块(请参阅)
无论您使用流式处理结果还是分块,都可以在读取数据时使用多线程处理(或打印)数据。这通常使用生产者-消费者模式完成,在这种情况下,生产者从数据库检索数据,将其放入队列,消费者从队列中获取数据并进行处理(例如,打印到控制台)
不过还有一点管理开销:生产者和消费者都可能冻结错误或因错误而绊倒,他们都需要意识到这一点,这样他们就不会永远挂起(可能会冻结您的应用程序)。这就是“合理”超时的原因(“合理”完全取决于在您的情况下什么是合适的)
我试着把它放在一个最小的运行示例中,但它仍然有很多代码(见下文)。有两行注释可用于测试超时情况。还有一个refreshttestdata
变量可用于重用插入的记录(插入记录可能需要很长时间)。
为了保持整洁,省略了许多关键字,如
private/public
(即需要在非演示代码中添加这些关键字)
import java.sql.*;
导入java.util.*;
导入java.util.concurrent.*;
导入org.slf4j.Logger;
导入org.slf4j.LoggerFactory;
公共类获取行{
私有静态最终记录器log=LoggerFactory.getLogger(FetchRows.class);
公共静态void main(字符串[]args){
试一试{
新建FetchRows().print();
}捕获(例外e){
e、 printStackTrace();
}
}
void print()引发异常{
Class.forName(“com.mysql.jdbc.Driver”).newInstance();
Properties dbProps=新属性();
setProperty(“用户”、“测试”);
setProperty(“密码”、“测试”);
try(Connection conn=DriverManager.getConnection(“jdbc:mysql://localhost:3306/test“,dbProps)){
try(语句st=conn.createStatement()){
准备测试数据(st);
}
// https://stackoverflow.com/a/2448019/3080094
try(语句st=conn.createStatement(仅限java.sql.ResultSet.TYPE_FORWARD_),
java.sql.ResultSet.CONCUR(只读)){
st.setFetchSize(整型最小值);
获取和打印测试数据(st);
}
}
}
布尔refreshTestData=true;
int maxRecords=5_555;
void prepareTestData(语句st)引发SQLException{
int recordCount=0;
如果(刷新测试数据){
st.execute(“删除表,如果存在记录”);
st.execute(“创建表fetchrecords(id mediumint not null auto_increment主键,created timestamp default current_timestamp)”;
对于(int i=0;i0){
排队(打印队列,l);
}
}catch(TimeoutException | interruptedeexception e){
log.error(“无法完成将记录打印到控制台:{}”,e.getMessage());
printTask.stop();
}最后{
log.info(“读取记录完成”);
如果(!printTask.isStopping()){
试一试{
排队(printQueue,Collections.emptyList());
}捕获(例外e){
日志错误(“无法发出最后一条记录打印的信号。”,e);
printTask.stop();
}
}
如果(!printTask.await)(printFinishTimeoutS,TimeUnit.SECON
import java.sql.*;
import java.util.*;
import java.util.concurrent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FetchRows {
private static final Logger log = LoggerFactory.getLogger(FetchRows.class);
public static void main(String[] args) {
try {
new FetchRows().print();
} catch (Exception e) {
e.printStackTrace();
}
}
void print() throws Exception {
Class.forName("com.mysql.jdbc.Driver").newInstance();
Properties dbProps = new Properties();
dbProps.setProperty("user", "test");
dbProps.setProperty("password", "test");
try (Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", dbProps)) {
try (Statement st = conn.createStatement()) {
prepareTestData(st);
}
// https://stackoverflow.com/a/2448019/3080094
try (Statement st = conn.createStatement(java.sql.ResultSet.TYPE_FORWARD_ONLY,
java.sql.ResultSet.CONCUR_READ_ONLY)) {
st.setFetchSize(Integer.MIN_VALUE);
fetchAndPrintTestData(st);
}
}
}
boolean refreshTestData = true;
int maxRecords = 5_555;
void prepareTestData(Statement st) throws SQLException {
int recordCount = 0;
if (refreshTestData) {
st.execute("drop table if exists fetchrecords");
st.execute("create table fetchrecords (id mediumint not null auto_increment primary key, created timestamp default current_timestamp)");
for (int i = 0; i < maxRecords; i++) {
st.addBatch("insert into fetchrecords () values ()");
if (i % 500 == 0) {
st.executeBatch();
log.debug("{} records available.", i);
}
}
st.executeBatch();
recordCount = maxRecords;
} else {
try (ResultSet rs = st.executeQuery("select count(*) from fetchrecords")) {
rs.next();
recordCount = rs.getInt(1);
}
}
log.info("{} records available for testing.", recordCount);
}
int batchSize = 1_000;
int maxBatchesInMem = 3;
int printFinishTimeoutS = 5;
void fetchAndPrintTestData(Statement st) throws SQLException, InterruptedException {
final BlockingQueue<List<FetchRecordBean>> printQueue = new LinkedBlockingQueue<List<FetchRecordBean>>(maxBatchesInMem);
final PrintToConsole printTask = new PrintToConsole(printQueue);
new Thread(printTask).start();
try (ResultSet rs = st.executeQuery("select * from fetchrecords")) {
List<FetchRecordBean> l = new LinkedList<>();
while (rs.next()) {
FetchRecordBean bean = new FetchRecordBean();
bean.setId(rs.getInt("id"));
bean.setCreated(new java.util.Date(rs.getTimestamp("created").getTime()));
l.add(bean);
if (l.size() % batchSize == 0) {
/*
* The printTask can stop itself when this producer is too slow to put records on the print-queue.
* Therefor, also check printTask.isStopping() to break the while-loop.
*/
if (printTask.isStopping()) {
throw new TimeoutException("Print task has stopped.");
}
enqueue(printQueue, l);
l = new LinkedList<>();
}
}
if (l.size() > 0) {
enqueue(printQueue, l);
}
} catch (TimeoutException | InterruptedException e) {
log.error("Unable to finish printing records to console: {}", e.getMessage());
printTask.stop();
} finally {
log.info("Reading records finished.");
if (!printTask.isStopping()) {
try {
enqueue(printQueue, Collections.<FetchRecordBean> emptyList());
} catch (Exception e) {
log.error("Unable to signal last record to print.", e);
printTask.stop();
}
}
if (!printTask.await(printFinishTimeoutS, TimeUnit.SECONDS)) {
log.error("Print to console task did not finish.");
}
}
}
int enqueueTimeoutS = 5;
// To test a slow printer, see also Thread.sleep statement in PrintToConsole.print.
// int enqueueTimeoutS = 1;
void enqueue(BlockingQueue<List<FetchRecordBean>> printQueue, List<FetchRecordBean> l) throws InterruptedException, TimeoutException {
log.debug("Adding {} records to print-queue.", l.size());
if (!printQueue.offer(l, enqueueTimeoutS, TimeUnit.SECONDS)) {
throw new TimeoutException("Unable to put print data on queue within " + enqueueTimeoutS + " seconds.");
}
}
int dequeueTimeoutS = 5;
class PrintToConsole implements Runnable {
private final BlockingQueue<List<FetchRecordBean>> q;
private final CountDownLatch finishedLock = new CountDownLatch(1);
private volatile boolean stop;
public PrintToConsole(BlockingQueue<List<FetchRecordBean>> q) {
this.q = q;
}
@Override
public void run() {
try {
while (!stop) {
List<FetchRecordBean> l = q.poll(dequeueTimeoutS, TimeUnit.SECONDS);
if (l == null) {
log.error("Unable to get print data from queue within {} seconds.", dequeueTimeoutS);
break;
}
if (l.isEmpty()) {
break;
}
print(l);
}
if (stop) {
log.error("Printing to console was stopped.");
}
} catch (Exception e) {
log.error("Unable to print records to console.", e);
} finally {
if (!stop) {
stop = true;
log.info("Printing to console finished.");
}
finishedLock.countDown();
}
}
void print(List<FetchRecordBean> l) {
log.info("Got list with {} records from print-queue.", l.size());
// To test a slow printer, see also enqueueTimeoutS.
// try { Thread.sleep(1500L); } catch (Exception ignored) {}
}
public void stop() {
stop = true;
}
public boolean isStopping() {
return stop;
}
public void await() throws InterruptedException {
finishedLock.await();
}
public boolean await(long timeout, TimeUnit tunit) throws InterruptedException {
return finishedLock.await(timeout, tunit);
}
}
class FetchRecordBean {
private int id;
private java.util.Date created;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public java.util.Date getCreated() {
return created;
}
public void setCreated(java.util.Date created) {
this.created = created;
}
}
}