Php 导入500 000个类别,其中包含左侧和右侧的嵌套集模型。
我有50万个类别,下面是mysql表结构 创建嵌套的表格类别( 类别id INT自动增量主键, 名称VARCHAR(20)不为空, lft INT不为空, rgt INT不为空 ); 下面是它是如何左右移动的 我从另一个表中获取行数据,该表的结构如下 如果不存在,则创建表格Php 导入500 000个类别,其中包含左侧和右侧的嵌套集模型。,php,mysql,performance,Php,Mysql,Performance,我有50万个类别,下面是mysql表结构 创建嵌套的表格类别( 类别id INT自动增量主键, 名称VARCHAR(20)不为空, lft INT不为空, rgt INT不为空 ); 下面是它是如何左右移动的 我从另一个表中获取行数据,该表的结构如下 如果不存在,则创建表格tbl\u类别( idint(111)不为空, category\u idint(111)不为空, category\u路径varchar(255)不为空, category\u namevarchar(255)不为空, st
tbl\u类别
(
id
int(111)不为空,
category\u id
int(111)不为空,
category\u路径
varchar(255)不为空,
category\u name
varchar(255)不为空,
status
int(1)不为空,
tdate
datetime不为空
)ENGINE=MyISAM自动增量=498013默认字符集=latin1
有这样的数据吗
id category_id category_path category_name status tdate
1 381773 Top/Arts Arts 0 2014-10-07 11:11:05
2 423945 Top/Arts/Animation Animation 0 2014-10-07 11:11:21
3 425085 Top/Arts/Animation/Anime Anime 0 2014-10-07 11:11:34
4 425147 Top/Arts/Animation/Anime/Characters Characters 0 2014-10-07 11:11:35
我有下面的php脚本来插入数据
echo $sql = "Select * from tbl_categories where status='1' and`category_path` LIKE 'Top/Arts%' order by id limit 0,5";
$resCategory = select($sql);
for($k=0;$k<count($resCategory);$k++){
$rec_id =$resCategory[$k]["id"];
$category_id =$resCategory[$k]["category_id"];
$category_path =$resCategory[$k]["category_path"];
$category_name =$resCategory[$k]["category_name"];
$friendly_url = getAlias($category_name);
//exit;
$cate_path_arr = explode("/",$category_path);
$parent_full_friendly_url = "";
$pad="";
for($i=1;$i<count($cate_path_arr)-1;$i++){
$parent_full_friendly_url .= $pad.getAlias($cate_path_arr[$i]);
$pad="/";
}
$full_friendly_url = $parent_full_friendly_url.$pad.$friendly_url;
if($cate_path_arr[count($cate_path_arr)-2] == "Top"){
$parent_category_id = 0;
}else{
$parentCatName = $cate_path_arr[count($cate_path_arr)-2] ;
$sql = "Select id,root_id from nested_category where title = \"".$parentCatName."\" and full_friendly_url=\"".$parent_full_friendly_url."\"";
$parentCat = select($sql);
$parent_category_id = $parentCat[0]["id"];
$root_id = $parentCat[0]["root_id"];
}
$sql = "INSERT INTO nested_category SET ";
$sql .= "root_id = \"".$root_id."\", ";
$sql .= "title = \"".$category_name."\", ";
$sql .= "category_id = \"".$parent_category_id."\", ";
$sql .= "page_title = \"".$category_name."\", ";
$sql .= "friendly_url = \"".$friendly_url."\", ";
$sql .= "full_friendly_url = \"".$full_friendly_url."\", ";
$sql .= "featured = \"y\", ";
$sql .= "enabled = \"y\" ";
$db->query($sql);
$lastInsId = mysql_insert_id();
$root_id = findRootCategoryId($lastInsId);
if($parent_category_id > 0){
$sql = "UPDATE nested_category SET count_sub = count_sub+1 where id=\"".$root_id."\"";
$db->query($sql);
}else{
$sql = "UPDATE nested_category SET root_id = \"".$lastInsId."\" where id=\"".$lastInsId."\"";
$db->query($sql);
}
rebuildCategoryTree($root_id, 1,$lastInsId);
$sql = "UPDATE tbl_categories SET status = '0',tdate=\"".date("Y-m-d h:i:s")."\" where id=\"".$rec_id."\"";
$db->query($sql);
}
function getAlias($catTitle){
$alias = preg_replace("/(\s){2,}/",'$1',$catTitle);
$alias = str_replace('-', '_', $alias);
$alias = preg_replace(array('/\s+/','/[^A-Za-z0-9\-]/'), array('-','-'), $alias);
$page_alias = trim(strtolower($alias));
return $page_alias;
}
function findRootCategoryId($category_id) {
global $db,$dbSelect;
$category_id = str_replace("'","",$category_id);
while($category_id != 0) {
$sql = "SELECT category_id, id FROM nested_category WHERE id = $category_id";
$row = select($sql);
$category_id = $row[0]["category_id"];
$root_category_id = $row[0]["id"];
}
return $root_category_id;
}
function rebuildCategoryTree($category_id, $node_left,$thisId) {
global $db,$dbSelect;
if (($category_id > 0) or ($thisId > 0)) {
$category_id = ($category_id>0)?$category_id:$thisId;
$node_left = ($node_left>0)?$node_left:1;
$root_category_id = findRootCategoryId($category_id);
$sql = 'UPDATE nested_category SET root_id = '.$root_category_id.' WHERE id='.$category_id;
$db->query($sql);
$node_right = $node_left+1;
$sql = 'SELECT id FROM nested_category WHERE category_id= '.$category_id;
$resParentCat = select($sql);
for ($i=0;$i<count($resParentCat);$i++) {
$node_right = rebuildCategoryTree($resParentCat[$i]['id'], $node_right,$thisId);
}
$sql = 'UPDATE nested_category SET `left` = '.$node_left.', `right` = '.$node_right.', root_id = '.$root_category_id.' WHERE id = '.$category_id;
$db->query($sql);
return $node_right+1;
}
}
?>
echo$sql=“从tbl_类别中选择*,其中status='1'和'category_path'类似于'Top/Arts%'按id限制排序0,5”;
$resCategory=select($sql);
对于($k=0;$k 0){
$sql=“UPDATE nested\u category SET count\u sub=count\u sub+1,其中id=\”.$root\u id.\”;
$db->query($sql);
}否则{
$sql=“UPDATE nested\u category SET root\u id=\”.$lastInside.\“其中id=\”.$lastInside.\”;
$db->query($sql);
}
重建分类树($root\u id,1,$lastInside);
$sql=“更新tbl\U类别集状态='0',tdate=\”.date(“Y-m-d h:i:s”)。“\”其中id=\”。$rec\U id。“\”;
$db->query($sql);
}
函数getAlias($catTitle){
$alias=preg_replace(“/(\s){2,}/”、“$1”、$catTitle);
$alias=str_replace('-','',$alias);
$alias=preg_replace(数组('/\s+/','/[^A-Za-z0-9\-]/')、数组('-','-')、$alias);
$page_alias=trim(strtolower($alias));
返回$page\u别名;
}
函数findRootCategoryId($category\u id){
全球$db,$dbSelect;
$category\u id=str\u replace(“,”,$category\u id);
而($category_id!=0){
$sql=“从嵌套的\u类别中选择类别\u id,其中id=$category\u id”;
$row=select($sql);
$category_id=$row[0][“category_id”];
$root_category_id=$row[0][“id”];
}
返回$root\u category\u id;
}
函数重建类别树($category\u id,$node\u left,$thisId){
全球$db,$dbSelect;
如果($category_id>0)或($thisId>0)){
$category\u id=($category\u id>0)?$category\u id:$thisId;
$node_left=($node_left>0)?$node_left:1;
$root\u category\u id=findRootCategoryId($category\u id);
$sql='UPDATE nested_category SET root_id='.$root_category_id.'WHERE id='.$category_id;
$db->query($sql);
$node_right=$node_left+1;
$sql='从嵌套的\u类别中选择id,其中类别\u id='。$category\u id;
$resParentCat=select($sql);
对于($i=0;$iquery($sql);
返回$node_right+1;
}
}
?>
我的问题是,是否有任何方法可以加快数据插入速度,因为通过计算数据的左右两侧来插入数据需要花费太多时间
我知道这是我第一次发布这个问题,我需要你们从我这边给你们所有的细节。请让我知道你们的宝贵意见
提前感谢。不要让任何人拉你的链。对于相对稳定的数据,嵌套集远远优于邻接列表。下面是我用来将800k+行AL转换为NS的脚本。它通常在6-8分钟内运行。它最初是用t-SQL编写的,我做了一些更改以使其工作(或多或少)使用您所展示的模式。它需要在MySql中运行。在任何情况下,都不需要函数或存储过程。您需要的所有内容都可以直接从输入表中提取 我对您的设计做了两个更改。首先,您的示例数据看起来有一个非常好的标识值,因此我使用了该值而不是标识字段。其次,我添加了SetID列以维护每个集合。因此,完成的数据将如下所示:
id SetID Name lft rgt
381773 Arts Arts 0 11
423945 Arts Animation 1 6
425085 Arts Anime 2 5
425147 Arts Character 3 4
132456 Arts 2D 7 10
123457 Arts Painting 8 9
381773 Crafts Crafts 0 7
423945 Crafts Clay 1 6
425085 Crafts Structures 2 3
425147 Crafts Pottery 4 5
整个表不应该定义一棵树,而应该定义一个树林(我称它们为“集合”,与“嵌套集合”术语保持一致)边界编号在每个集合中都是唯一的。每个集合的每个子集合都有用于分组的集合标识符。边界编号在每个集合的开头重新开始,因此每个集合根的左边界值将为零
代码如下。记住,这更多的是一个模板,而不是最终代码。提出问题
CREATE TABLE IF NOT EXISTS nested_category(
ID INT not null PRIMARY KEY,
SetID varchar( 20 ) not null,
Name VARCHAR( 20 ) NOT NULL,
lft INT NOT NULL,
rgt INT NOT NULL
);
-- This was originally written in T-SQL so some syntax modification will
-- be in order. Sorry.
declare
@CatIDNew int,
@CatIDPrev int = 0,
@rgt int,
@SetID varchar( 20 ),
@NameNew varchar( 20 ),
@LevelNew int,
@LevelPrev int = 0;
declare
@Defs table(
Lev int,
ID int,
Name varchar( 20 ),
lft int
);
declare SS cursor fast_forward for
select Category_ID, Category_Name, Length( Path ) - Length( Replace( Path, '/', '' )) as Category_Level
from tbl_categories
--where Category_Path like 'Top/Arts%' ) -- Uncomment to test a smaller sample
order by Category_Path;
open ss;
fetch next from ss into @CatIDNew, @NameNew, @LevelNew;
-- Go ahead and store the first record of the table
insert into @Defs values( @LevelNew, @CatIDNew, @NameNew, 0 );
select @LevelPrev = @LevelNew,
@CatIDPrev = @CatIDNew,
@SetID = @NameNew,
@rgt = 1;
-- Get the second row
fetch next from ss into @CatIDNew, @NameNew, @LevelNew;
-- Now we enter the row with the first entry in the table variable. We're using this table as a stack.
-- As we read entries from the cursor, we push them "onto" the stack until we read an entry the same
-- or smaller level than the previous entry.
while @@FETCH_STATUS = 0 begin
if @LevelNew = 1 begin
-- Handle the special case when transitioning between sets
insert into nested_category( ID, SetID, Name, lft, rgt )
select ID, @SetID, Name, lft, @rgt +( @LevelPrev - Lev )
from @Defs
where Lev <= @LevelPrev
order by Lev desc;
-- Clear the stack
delete @Defs;
insert @Defs values( @LevelNew, @CatIDNew, @NameNew, 0 );
select @LevelPrev = @LevelNew,
@CatIDPrev = @CatIDNew,
@SetID = @NameNew,
@rgt = 1;
end
else if @LevelNew > @LevelPrev begin
-- This new standard is the next level down from the last one. Save the last one.
Merge into @Defs d
using (select @LevelNew) as Search( Lev )
on d.Lev = Search.Lev
when matched then
update set ID = @CatIDNew,
Name = @NameNew,
lft = @rgt
when not matched then
insert values( @LevelNew, @CatIDNew, @NameNew, @rgt );
select @LevelPrev = @LevelNew,
@rgt = @rgt + 1;
end
else begin
-- This is on the same or higher level as the last one. We can write out the last one to the new level.
insert into nested_category( ID, SetID, Name, lft, rgt )
select ID, @SetID, Name, lft, @rgt +( @LevelPrev - Lev )
from @Defs
where Lev between @LevelNew and @LevelPrev;
-- Then insert/replace the new one (in case the next one is lower)
select @LevelPrev = @LevelNew,
@CatIDPrev = @CatIDNew,
@rgt = @rgt + @@ROWCOUNT;
Merge into @Defs d
using (select @LevelNew) as Search( Lev )
on d.Lev = Search.Lev
when matched then
update set ID = @NameNew,
Name = @NameNew,
lft = @rgt
when not matched then
insert values( @LevelNew, @CatIDNew, @NameNew, @rgt );
set @rgt = @rgt + 1;
end;
-- Read in the next row
fetch next from ss into @CatIDNew, @NameNew, @LevelNew;
end;
-- There are no more entries. Purge any remaining in the stack.
insert into nested_category( ID, SetID, Name, lft, rgt )
select ID, @SetID, Name, lft, @rgt +( @LevelPrev - Lev )
from @Defs
where Lev <= @LevelPrev;
close ss;
deallocate ss;
-- Saving these definitions until after the table is loaded saves a lot of time.
alter table nested_category add
constraint PK_nested_category primary key clustered( ID ),
constraint IX_nested_category_lft unique( SetID, lft ),
constraint CK_nested_categoryBounds check( lft < rgt );
alter table nested_category add
constraint FK_nested_categoryOrgID foreign key( SetID )references StandardOrgs( SetID ),
constraint FK_nested_categoryGUID foreign key( ID )references StandardGuidXref ( EDID ),
constraint FK_nested_categorySubject foreign key( SubjectID )references StandardSubjects ( ID ),
constraint FK_nested_categoryLevel foreign key( SetID, LevelID )references StandardLevels ( SetID, LevelID );
create index IX_nested_categoryBounds on nested_category ( SetID, lft, rgt ); -- oft-used covering index
创建表(如果不存在嵌套的\u类别)(
ID INT非空主键,
SetID varchar(20)不为空,
名称VARCHAR(20)不为空,
lft INT不为空,
rgt INT不为空
);
--这最初是用T-SQL编写的,因此需要进行一些语法修改
--一切正常,对不起。
声明
@CatIDNew int,
@CatIDPrev int=0,
@rgt int,
@SetID varchar(20),
@新瓦查尔(20),
@LevelNew int,
@LevelPrev int=0;
声明
@Defs表(
列文,
ID int,
名称varchar(20),
lft int
);
将SS游标快进声明为
选择类别ID、类别名称、长度(路径)-长度(替换(路径“/”、”)作为类别级别
来自tbl_类别
--其中Category_Path(如“Top/Arts%”)——取消注释以测试较小的样本
按类别和路径排序;
开放式ss;
从ss获取下一个到@CatIDNew、@NameNew、@LevelNew;
--继续并存储表的第一条记录
插入@Defs值(@LevelNew、@CatIDNew、@NameNew、0);
选择@LevelPrev=@LevelNew,
@CatIDPrev=@CatIDNew,
@SetID=@NameNew,
@rgt=1;
--坐第二排
从ss int获取下一个