Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/mysql/63.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
MYSQL-将数据拆分为多行_Mysql_Split - Fatal编程技术网

MYSQL-将数据拆分为多行

MYSQL-将数据拆分为多行,mysql,split,Mysql,Split,我使用从IMDB收集信息并将其传输到MYSQL数据库的应用程序导入了一些数据 这些字段似乎没有被规范化,并且在一个字段中包含了许多值 例如: Table Movie MovieID Movie_Title Written_By 1 Movie1 Person1, Person2 2 Movie2 Person3 3

我使用从IMDB收集信息并将其传输到MYSQL数据库的应用程序导入了一些数据

这些字段似乎没有被规范化,并且在一个字段中包含了许多值

例如:

Table Movie
MovieID          Movie_Title           Written_By
1                Movie1                Person1, Person2   
2                Movie2                Person3  
3                Movie3                Person4, Person2, Person6  
有没有办法将这些值分开,并将它们插入到另一个类似这样的表中,并且没有任何重复项

Table Writers
WriterID         Written_By                MovieId      
1                Person1                   1
2                Person2                   1
3                Person3                   3
我在谷歌上搜索了一下,发现我应该用PHP来处理这些数据。 但我对PHP一无所知


是否仅使用MYSQL就可以转换这些数据

MySQL对于这种字符串操作不是特别好。您很可能会发现,通过一种常规编程语言(perl、php、ruby、python等)往返数据要容易得多,这种语言具有更强大的文本搜索功能

在做任何不可逆转的事情之前,您很可能希望查看结果,尤其是在名称中可能嵌入逗号的情况下

Alice,Eve,Bob
很容易用逗号分开,但是怎么办

Alice,Eve,Esquire.,Bob

不幸的是,MySQL中没有字符串拆分函数。这是一个将字符串拆分为多列的解决方案(与您的方案不完全相同)。

您可以使用一个使用光标的存储过程来解决这个问题,但它不是很优雅,但也不是一个逗号分隔的编写器列表

在类似的问题中有下面的代码,但是你最好彻底检查一下

希望有帮助:)

示例表

drop table if exists movies_unf;
create table movies_unf
(
movieID int unsigned not null primary key,
movie_title varchar(255) not null,
written_by varchar(1024) not null
)engine=innodb;

insert into movies_unf values 
(1,'movie1','person1, person2'),
(2,'movie2','person3'),
(3,'movie3','person4, person2, person6'),
(4,'movie4','person4, person4, person1, person2, person1,person8,'), -- dodgy writers
(5,'movie1','person1, person2'); -- dodgy movie

drop table if exists movies;
create table movies
(
movie_id int unsigned not null auto_increment primary key,
title varchar(255) unique not null
)engine=innodb;

drop table if exists writers;
create table writers
(
writer_id int unsigned not null auto_increment primary key,
name varchar(255) unique not null
)engine=innodb;

drop table if exists movie_writers;
create table movie_writers
(
movie_id int unsigned not null,
writer_id int unsigned not null,
primary key (movie_id, writer_id)
)engine=innodb;
存储过程

drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()
begin

declare v_movieID int unsigned default 0;
declare v_movie_title varchar(255);
declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;
declare v_writer_id int unsigned default 0;
declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;
declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;
declare v_cursor cursor for 
    select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;
repeat
  fetch v_cursor into v_movieID, v_movie_title, v_writers;

  set v_movie_title = trim(v_movie_title);
  set v_writers = replace(v_writers,' ', '');

  -- insert the movie
  insert ignore into movies (title) values (v_movie_title);
  select movie_id into v_movie_id from movies where title = v_movie_title;  

  -- split the out the writers and insert
  set v_csv_done = 0;       
  set v_csv_idx = 1;

  while not v_csv_done do
    set v_name = substring(v_writers, v_csv_idx, 
      if(locate(',', v_writers, v_csv_idx) > 0, 
        locate(',', v_writers, v_csv_idx) - v_csv_idx, 
        length(v_writers)));

      set v_name = trim(v_name);

      if length(v_name) > 0 then
        set v_csv_idx = v_csv_idx + length(v_name) + 1;

        insert ignore into writers (name) values (v_name);
        select writer_id into v_writer_id from writers where name = v_name; 
        insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);
      else
        set v_csv_done = 1;
      end if;

  end while;

until v_done end repeat;
close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;
编辑

修改存储过程,使其不会跳过键值

drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()
begin

declare v_movieID int unsigned default 0;
declare v_movie_title varchar(255);
declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;
declare v_writer_id int unsigned default 0;
declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;
declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;
declare v_cursor cursor for 
    select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;
repeat
  fetch v_cursor into v_movieID, v_movie_title, v_writers;

  set v_movie_title = trim(v_movie_title);
  set v_writers = replace(v_writers,' ', '');

  -- insert the movie

  if not exists (select 1 from movies where title = v_movie_title) then
    insert ignore into movies (title) values (v_movie_title);
  end if;  
  select movie_id into v_movie_id from movies where title = v_movie_title;  

  -- split the out the writers and insert
  set v_csv_done = 0;       
  set v_csv_idx = 1;

  while not v_csv_done do
    set v_name = substring(v_writers, v_csv_idx, 
      if(locate(',', v_writers, v_csv_idx) > 0, 
        locate(',', v_writers, v_csv_idx) - v_csv_idx, 
        length(v_writers)));

      set v_name = trim(v_name);

      if length(v_name) > 0 then
        set v_csv_idx = v_csv_idx + length(v_name) + 1;


        if not exists (select 1 from writers where name = v_name) then
          insert ignore into writers (name) values (v_name);
        end if;
        select writer_id into v_writer_id from writers where name = v_name; 
        insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);
      else
        set v_csv_done = 1;
      end if;

  end while;

until v_done end repeat;
close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;

你会继续得到这样的反馈吗?还是这是一次手术?另外,从源代码中提取的初始数据是什么格式的(在输入MySQL之前)。很可能,您需要一种不同的方法来加载MySql表?也请阅读本文-感谢您的代码!有一个小问题。writers表中的writer_id不是递增的。3,4,6,12等不重要的是,它仍然是一个唯一的键,它跳过值的原因是我在使用insert ignore,而不是在插入之前检查编写器是否存在-如果您认为这是一个问题,这是一个很容易的更改
drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()
begin

declare v_movieID int unsigned default 0;
declare v_movie_title varchar(255);
declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;
declare v_writer_id int unsigned default 0;
declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;
declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;
declare v_cursor cursor for 
    select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;
repeat
  fetch v_cursor into v_movieID, v_movie_title, v_writers;

  set v_movie_title = trim(v_movie_title);
  set v_writers = replace(v_writers,' ', '');

  -- insert the movie

  if not exists (select 1 from movies where title = v_movie_title) then
    insert ignore into movies (title) values (v_movie_title);
  end if;  
  select movie_id into v_movie_id from movies where title = v_movie_title;  

  -- split the out the writers and insert
  set v_csv_done = 0;       
  set v_csv_idx = 1;

  while not v_csv_done do
    set v_name = substring(v_writers, v_csv_idx, 
      if(locate(',', v_writers, v_csv_idx) > 0, 
        locate(',', v_writers, v_csv_idx) - v_csv_idx, 
        length(v_writers)));

      set v_name = trim(v_name);

      if length(v_name) > 0 then
        set v_csv_idx = v_csv_idx + length(v_name) + 1;


        if not exists (select 1 from writers where name = v_name) then
          insert ignore into writers (name) values (v_name);
        end if;
        select writer_id into v_writer_id from writers where name = v_name; 
        insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);
      else
        set v_csv_done = 1;
      end if;

  end while;

until v_done end repeat;
close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;