使用Hibernate/Spring数据批量插入Postgres数据库：60K行需要2分钟，这是不可接受的_Spring_Postgresql_Hibernate_Jpa_Batch Insert

使用Hibernate/Spring数据批量插入Postgres数据库：60K行需要2分钟，这是不可接受的

spring postgresql hibernate jpa

使用Hibernate/Spring数据批量插入Postgres数据库：60K行需要2分钟，这是不可接受的,spring,postgresql,hibernate,jpa,batch-insert,Spring,Postgresql,Hibernate,Jpa,Batch Insert,我需要使用Hibernate/Spring数据将60K行插入Java/Spring应用程序中的Postgres DB中插入的数据是（1）用户，（2）相关的新用户也必须在研究参与者中。这两个都是针对每个60K记录的下面的操作正常，但性能很差：60K需要2分钟。请注意，我正在填写Hibernate实体，然后根据大小为1000的列表执行saveAll UsersT user = new UsersT(); user.setUsername(study.getAbbr

我需要使用Hibernate/Spring数据将60K行插入Java/Spring应用程序中的Postgres DB中

插入的数据是（1）用户，（2）相关的新用户也必须在研究参与者中。这两个都是针对每个60K记录的

下面的操作正常，但性能很差：60K需要2分钟。请注意，我正在填写Hibernate实体，然后根据大小为1000的列表执行

saveAll

        UsersT user = new UsersT();
        user.setUsername(study.getAbbreviation().toUpperCase()+subjectId);
        user.setRoleTypeId(new LookupT(150));
        user.setCreatedDate(new Date());
        //...
        List<StudyParticipantsT> participants = new ArrayList<StudyParticipantsT>();
        StudyParticipantsT sp = new StudyParticipantsT();
        sp.setStudyT(study);
        sp.setUsersT(user);
        sp.setSubjectId(subjectId);
        sp.setLocked("N");
        participants.add(sp);
        user.setStudyParticipantsTs(participants);

        // Add to Batch-Insert List; if list size ready for batch-insert, or if at the end of all subjectIds, do Batch-Insert saveAll() and clear the list
        batchInsertUsers.add(user);
        if (batchInsertUsers.size() == 1000 || i == subjectIds.size() - 1) {
            // Log this Batch-Insert
            if(log.isDebugEnabled()){
                log.debug("createParticipantsAccounts() Batch-Insert: Saving " + batchInsertUsers.size() + " records");
            }
            userDAO.saveAll(batchInsertUsers);
            // Reset list
            batchInsertUsers.clear();
        }

UsersT user=new UsersT（）；
user.setUsername（study.get缩写（）.toUpperCase（）+subjectId）；
setRoleTypeId（新LookupT（150））；
setCreatedDate（新日期（））；
//...
列表参与者=新建ArrayList（）；
StudyParticipantsT sp=新的StudyParticipantsT（）；
sp.setStudyT（研究）；
sp.setUsersT（用户）；
sp.setsubjected（主语）；
sp.setLocked（“N”）；
参与者。添加（sp）；
user.setStudyParticipantsTs（参与者）；
//添加到批量插入列表；如果列表大小已准备好批量插入，或者如果在所有主题的末尾，请执行batch insert saveAll（）并清除列表
batchInsertUsers.add（用户）；
if（batchInsertUsers.size（）==1000 | | i==subjectIds.size（）-1）{
//记录此批插入
if（log.isDebugEnabled（））{
log.debug（“createParticipantsAccounts（）批插入：保存“+batchInsertUsers.size（）+”记录”）；
}
userDAO.saveAll（batchInsertUsers）；
//重置列表
batchInsertUsers.clear（）；
}

我找到了一个线程，其中有人遇到了相同的问题，他们找到的唯一解决方案是为每个1000块编写一个自定义的本机SQL

INSERT（…），（…），（…）

字符串，然后手动运行，完全删除ORM/Hibernate层：

但是我的插入涉及一些连接的表。我可以自己花时间将所有这些实体语句重写为自定义SQL，但这并不简单

还有其他解决办法吗？我正在使用 -弹簧5.0.2

-Hibernate5.2.12

我们通过使用SpringJDBC的

jdbcTemplate.batchUpdate

（无Hibernate）并提前为任何外键保留一个序列范围来提高性能

我们没有降低到实际N个重复的

INSERT

语句的水平，而上面提到的另一张海报做到了这一点；我们仍然使用框架方法（JDBCTemplate），但至少我们不再使用Hibernate/ORM。这种方法速度很快，但没有N repeated

INSERT

s那么快，但现在可以接受了

实际的SpringJDBC批插入是通过

jdbcTemplate.batchUpdate（sqlInsert，new BatchPreparedStatementSetter（）{..}

，我们实际上自己分割了批次--

BatchPreparedStatementSetter

不会自动为我们分割任何内容，它只会以预定大小提交特定批次

/**
 * The following method performs a Native-SQL Batch-Insert of Participant accounts (using JdbcTemplate) to improve performance.
 * Each Participant account requires 2 INSERTs: (1) USERS_T, (2) STUDY_PARTICIPANTS_T (with an FK reference to USERS_T.ID).
 * Since there is no easy way to track the Sequence IDs between the two Batch-Inserts, we reserve the ID range for both tables, and 
 * then manually calculate our own IDs for USERS_T and STUDY_PARTICIPANTS_T ourselves.
 * Initially, domain objects are filled out; then they are added to the Batch List that we submit and clear ourselves.
 * (Originally the Batch-Insert was implemented with Hibernate/HQL, but due to slow performance it was nativized with jdbcTemplate.)
 * 
 * NOTE: The entire method is @Transactional and all data will be rolled back in case of any exceptions in this method (rollbackFor=Exception.class).
 * The updated Sequence values (set during reservation) will not be rolled back in this case, but Sequence gaps are normal. 
 */
@Override
@Transactional(readOnly = false, rollbackFor = Exception.class)
public void createParticipantsAccounts(long studyId, List<String> subjectIds) throws Exception {

    int maxInsertParticipantsBatchSize = 1000; // Batch size is 1000
    
    /*
      We need to insert into 2 tables, USERS_T and STUDY_PARTICIPANTS_T. 
      The table STUDY_PARTICIPANTS_T has an FK dependency on USERS_T.ID.
      Since there is no easy way to track the Sequence IDs between the two Batch-Inserts, we reserve the ID range for both tables, 
      and then manually calculate our own IDs for USERS_T and STUDY_PARTICIPANTS_T ourselves.
      The Sequences are immediately updated to the calculated final values to reserve the range. 
     */
    // 1. Obtain current Sequence values
    Integer currUsersTSeqVal = userDAO.getCurrentUsersTSeqVal();
    Integer currStudyParticipantsTSeqVal = studyParticipantsDAO.getCurrentStudyParticipantsTSeqVal();
    // 2. Immediately update the Sequences to the calculated final value (this reserves the ID range immediately)
    // In Postgres, updating the Sequences is: SELECT setval('users_t_id_seq', :val)
    userDAO.setCurrentUsersTSeqVal(currUsersTSeqVal + subjectIds.size());
    studyParticipantsDAO.setCurrentStudyParticipantsTSeqVal(currStudyParticipantsTSeqVal + subjectIds.size());                          
    
    // List for Batch-Inserts, maintained and submitted by ourselves in accordance with our batch size
    List<UsersT> batchInsertUsers = new ArrayList<UsersT>();        
    
    for(int i = 0; i < subjectIds.size(); i++) {
        
        String subjectId = subjectIds.get(i);           
        
        // Prepare domain object (UsersT with associated StudyParticipantsT) to be used in the Native-SQL jdbcTemplate batchUpdate
        UsersT user = new UsersT();
        user.setId(currUsersTSeqVal + 1 + i); // Set ID to calculated value
        user.setUsername(study.getAbbreviation().toUpperCase()+subjectId);
        user.setActiveFlag(true);
        // etc., fill out object, then subobject:
        List<StudyParticipantsT> participants = new ArrayList<StudyParticipantsT>();
        StudyParticipantsT sp = new StudyParticipantsT();
        sp.setId(currStudyParticipantsTSeqVal + 1 + i); // Set ID to caculated value
        // ...etc.
        user.setStudyParticipantsTs(participants);
        
        // Add to Batch-Insert List of Users
        batchInsertUsers.add(user);
        
        // If list size ready for Batch-Insert, or if at the end of all subjectIds, perform Batch Insert (both tables) and clear list
        if (batchInsertUsers.size() == maxInsertParticipantsBatchSize || i == subjectIds.size() - 1) {
            
            // Part 1: Insert batch into USERS_T
            nativeBatchInsertUsers(jdbcTemplate, batchInsertUsers);             
            // Part 2: Insert batch into STUDY_PARTICIPANTS_T
            nativeBatchInsertStudyParticipants(jdbcTemplate, batchInsertUsers);             
            // Reset list
            batchInsertUsers.clear();
        }
    }
}

/**
*以下方法执行参与者帐户的本机SQL批插入（使用JdbcTemplate）以提高性能。
*每个参与者帐户需要2个插入项：（1）用户；（2）研究参与者（带有对用户ID的FK引用）。
*由于没有简单的方法跟踪两个批插入之间的序列ID，因此我们为两个表保留ID范围，并且
*然后为用户手动计算我们自己的ID，并自己研究参与者。
*首先填写域对象；然后将它们添加到我们提交的批处理列表中并自行清除。
*（批插入最初是用Hibernate/HQL实现的，但由于性能低下，它是用jdbcTemplate实现的。）
* 
*注意：整个方法是@Transactional的，如果此方法中出现任何异常，所有数据都将回滚（rollboor=Exception.class）。
*在这种情况下，更新的序列值（在保留期间设置）将不会回滚，但序列间隔是正常的。
*/
@凌驾
@事务（readOnly=false，rollboor=Exception.class）
public void createParticipantsAccounts（长studyId，列表主体ID）引发异常{
int maxInsertParticipantsBatchSize=1000；//批大小为1000
/*
我们需要在两个表中插入用户和研究参与者。
表研究参与者对用户ID具有FK依赖性。
由于没有简单的方法跟踪两个批插入之间的序列ID，我们为两个表保留了ID范围，
然后为用户手动计算我们自己的ID，并自己研究参与者。
序列立即更新为计算的最终值以保留范围。
*/
//1.获取当前序列值
整数currUsersTSeqVal=userDAO.getCurrentUsersTSeqVal（）；
整数currStudyParticipantsTSeqVal=studyParticipantsDAO.getCurrentStudyParticipantsTSeqVal（）；
//2.立即将序列更新为计算的最终值（这将立即保留ID范围）
//在Postgres中，更新序列是：选择setval（'users\t\u id\u seq'，：val）
userDAO.setCurrentUsersTSeqVal（currUsersTSeqVal+subjectIds.size（））；
studyParticipantDao.setCurrentStudyParticipantsTSeqVal（currStudyParticipantsTSeqVal+subjectId.size（））；
//批次插入物清单，由我方根据我方批次大小进行维护和提交
列出batchInsertUsers=new ArrayList（）；
for（int i=0；i/**
 * Native-SQL Batch-Insert into USERS_T for Participant Upload.
 * NOTE: This method is part of its Parent's @Transactional. (Note also that we need "final" on the List param for Inner-Class access to this variable.)
 *  
 * @param jdbcTemplate
 * @param batchInsertUsers
 */
private void nativeBatchInsertUsers(JdbcTemplate jdbcTemplate, final List<UsersT> batchInsertUsers) {

    String sqlInsert =  "INSERT INTO PUBLIC.USERS_T (id, password, user_name, created_by, created_date, last_changed_by, last_changed_date, " + 
                                                    "first_name, last_name, organization, phone, lockout_date, lockout_counter, last_login_date, " + 
                                                    "password_last_changed_date, temporary_password, active_flag, uuid, " + 
                                                    "role_type_id, ws_account_researcher_id) " +
                        "VALUES (?, ?, ?, ?, ?, ?, ?, " +
                                "?, ?, ?, ?, ?, ?, ?, " + 
                                "?, ?, ?, ?, " + 
                                "?, ?" +
                                ") ";

    
    jdbcTemplate.batchUpdate(sqlInsert, new BatchPreparedStatementSetter() {

        @Override
        public int getBatchSize() {
            return batchInsertUsers.size();
        }

        @Override
        public void setValues(PreparedStatement ps, int i) throws SQLException {
            ps.setInt(1, batchInsertUsers.get(i).getId()); // ID (provided by ourselves)
            // etc., set PS for each i-th object

        }       
        
    });
    
}

/**
 * Native-SQL Batch-Insert into STUDY_PARTICIPANTS_T for Participant Upload.
 * NOTE: This method is part of its Parent's @Transactional. (Note also that we need "final" on the List param for Inner-Class access to this variable.)
 *  
 * @param jdbcTemplate
 * @param batchInsertUsers
 */ 
private void nativeBatchInsertStudyParticipants(JdbcTemplate jdbcTemplate, final List<UsersT> batchInsertUsers) {
    
    String sqlInsert =  "INSERT INTO PUBLIC.STUDY_PARTICIPANTS_T (id, study_id, subject_id, user_id, locked, " +                                                                     "created_by, created_date, last_changed_by, last_changed_date) " + 
                        "VALUES (?, ?, ?, ?, ?, " +
                                "?, ?, ?, ? " +
                                ") ";
            
    jdbcTemplate.batchUpdate(sqlInsert, new BatchPreparedStatementSetter() {

        @Override
        public int getBatchSize() {
            return batchInsertUsers.size();
        }   
        
        @Override
        public void setValues(PreparedStatement ps, int i) throws SQLException {            
            
            ps.setInt(1, batchInsertUsers.get(i).getStudyParticipantsTs().get(0).getId()); // ID (provided by ourselves)
            // etc. 
        }
        
    });
    
}