Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/mysql/61.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/sql/87.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
无法从mysql表中删除重复值_Mysql_Sql - Fatal编程技术网

无法从mysql表中删除重复值

无法从mysql表中删除重复值,mysql,sql,Mysql,Sql,我有一个表ship_details,它没有任何约束。数据来自数据源&表的原始设计者认为传入的数据没有重复。现在我必须删除重复的条目。现在这个表有994184个条目。 表定义是 CREATE TABLE `ship_details` ( `id` int(11) NOT NULL AUTO_INCREMENT, `order_number` varchar(150) DEFAULT NULL, `delivery_id` varchar(150) DEFAULT NULL, `tr

我有一个表ship_details,它没有任何约束。数据来自数据源&表的原始设计者认为传入的数据没有重复。现在我必须删除重复的条目。现在这个表有994184个条目。 表定义是

CREATE TABLE `ship_details` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `order_number` varchar(150) DEFAULT NULL,
  `delivery_id` varchar(150) DEFAULT NULL,
  `transaction_type` varchar(150) DEFAULT NULL,
  `pick_date` varchar(150) DEFAULT NULL,
  `pn_note_number` varchar(150) DEFAULT NULL,
  `item_id` varchar(150) DEFAULT NULL,
  `item_code` varchar(150) DEFAULT NULL,
  `picked_quantity` varchar(150) DEFAULT NULL,
  `lot_number` varchar(150) DEFAULT NULL,
  `lot_expiry` varchar(150) DEFAULT NULL,
  `name` varchar(150) DEFAULT NULL,
  `delivered_date` varchar(150) DEFAULT NULL,
  `extra_attrib1` varchar(150) DEFAULT NULL,
  `extra_attrib2` varchar(150) DEFAULT NULL,
  `extra_attrib3` varchar(150) DEFAULT NULL,
  `extra_attrib4` varchar(150) DEFAULT NULL,
  `extra_attrib5` varchar(150) DEFAULT NULL,
  `extra_attrib6` varchar(150) DEFAULT NULL,
  `extra_attrib7` varchar(150) DEFAULT NULL,
  `extra_attrib8` varchar(150) DEFAULT NULL,
  `extra_attrib9` varchar(150) DEFAULT NULL,
  `extra_attrib10` varchar(150) DEFAULT NULL,
  `last_updated` varchar(100) DEFAULT NULL,
  `outbound_id` varchar(100) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=994222 DEFAULT CHARSET=latin1;
我尝试使用以下脚本删除重复条目:

delete s1 
from ship_details s1 
inner join ship_details s2 
where  s1.id < s2.id 
and s2.order_number = s1.order_number
and s2.delivery_id = s1.delivery_id 
and s2.item_code = s1.item_code 
and s2.lot_number = s1.lot_number 
and s2.picked_quantity = s1.picked_quantity;
但这个命令并没有给我唯一的结果。结果是154948行。Pl.见此:

INSERT INTO clean_ship_details (order_number,delivery_id,transaction_type,pick_date,pn_note_number,item_id,item_code,picked_quantity,lot_number,lot_expiry,name,delivered_date,extra_attrib10,last_updated,outbound_id) VALUES
     ('181020373','10068965','Shipped','2018-11-11T15:50:48.000+04:00','PN176348','516169','VCH128','73','C34142','2021-02-28T00:00:00.000+04:00','DVT-6410','2019-06-18T15:48:12.000+04:00','','2019-06-18T15:54:40.000+04:00','51616973_73_'),
     ('181020373','10068965','Shipped','2018-11-11T15:50:48.000+04:00','PN176348','516169','VCH128','73','C34142','2021-02-28T00:00:00.000+04:00','DVT-6410','2019-06-18T15:48:12.000+04:00','','2019-06-18T15:54:40.000+04:00','58719373_73_'),
     ('181020373','10068965','Shipped','2018-11-11T15:50:48.000+04:00','PN176348','516170','VCH120','12','K33471/A','2020-10-31T00:00:00.000+04:00','DVT-6410','2019-06-18T15:48:12.000+04:00','','2019-06-18T15:54:40.000+04:00','51617012_12_'),
     ('181020373','10068965','Shipped','2019-06-19T12:22:39.000+04:00','PN239867','587193','VCH128','2','E34284','2021-04-30T00:00:00.000+04:00','DVT-6410','2019-06-18T15:48:12.000+04:00','','2019-06-18T15:54:40.000+04:00','5161692_2_'),
     ('181020373','10068965','Shipped','2019-06-19T12:22:39.000+04:00','PN239867','587193','VCH128','2','E34284','2021-04-30T00:00:00.000+04:00','DVT-6410','2019-06-18T15:48:12.000+04:00','','2019-06-18T15:54:40.000+04:00','5871932_2_'),
     ('191002479','10091039','Shipped','2019-02-12T07:50:55.000+04:00','PN186154','544495','VTP048','170','205809','2020-07-31T00:00:00.000+04:00','DVT-6479','2019-07-11T07:30:38.000+04:00','','2019-07-11T09:31:22.000+04:00','544495170_170_'),
     ('191002479','10091039','Shipped','2019-02-12T07:50:55.000+04:00','PN186154','544495','VTP048','170','205809','2020-07-31T00:00:00.000+04:00','DVT-6479','2019-07-11T07:30:38.000+04:00','','2019-07-11T09:31:22.000+04:00','594447170_170_'),
     ('191002479','10091039','Shipped','2019-07-18T07:45:49.000+04:00','PN249274','594447','VTP048','11','208744','2021-01-31T00:00:00.000+04:00','DVT-6479','2019-07-11T07:30:38.000+04:00','','2019-07-11T09:31:22.000+04:00','54449511_11_'),
     ('191002479','10091039','Shipped','2019-07-18T07:45:49.000+04:00','PN249274','594447','VTP048','11','208744','2021-01-31T00:00:00.000+04:00','DVT-6479','2019-07-11T07:30:38.000+04:00','','2019-07-11T09:31:22.000+04:00','59444711_11_'),
     ('191006312','10188037','Shipped','2019-03-31T12:17:39.000+04:00','PN201490','560373','VTP048','26','207783','2020-12-31T00:00:00.000+04:00','DVT-6694','2019-10-08T07:08:45.000+04:00','','2019-10-08T07:11:44.000+04:00','56037326_26_');
我无法将其插入新表。 更新我尝试使用脚本插入,但没有成功,因为即使只有1条记录,也超出了锁定等待时间:

INSERT IGNORE INTO clean_ship_details (order_number,delivery_id,transaction_type,pick_date,pn_note_number,item_id,item_code,picked_quantity,lot_number,lot_expiry,name,delivered_date,last_updated,outbound_id) SELECT order_number,delivery_id,transaction_type,pick_date,pn_note_number,item_id,item_code,picked_quantity,lot_number,lot_expiry,name,delivered_date,last_updated,outbound_id FROM ship_details order by order_number,delivery_id,item_id limit 10;

如果如您所说,订单号、交货id、项目代码和提货数量的唯一约束,您也不需要使用distinct,因为unique key将检测重复项,并且您可以插入忽略错误,那么您的第二种方法会消除重复项

使用示例数据


创建新表通常更快,只保留您想要保留的数据,然后删除旧表,然后重命名(并重新索引)新表;它适用于所有选定的列。很难看出是什么造成了重复—第一个查询在where列中的列远远少于第二个查询在select列中的列。此外,您的第一个查询在联接中没有ON。这可能会导致超时。@草莓,我理解,但您看到前两行所有列都是相同的。我遗漏了什么吗?@subwendu Mahanta你最后一条评论的意义是什么?我已经将ship_细节的当前数据导出到一个sql脚本中&手动运行该脚本,每次插入10个数据块。但这是太多的工作。如果我尝试使用脚本插入,我会得到锁等待时间超过。
INSERT IGNORE INTO clean_ship_details (order_number,delivery_id,transaction_type,pick_date,pn_note_number,item_id,item_code,picked_quantity,lot_number,lot_expiry,name,delivered_date,last_updated,outbound_id) SELECT order_number,delivery_id,transaction_type,pick_date,pn_note_number,item_id,item_code,picked_quantity,lot_number,lot_expiry,name,delivered_date,last_updated,outbound_id FROM ship_details order by order_number,delivery_id,item_id limit 10;