Sql server 为每个父项选择前1条记录时,子查询太慢
我正在尝试使用子查询方法查找所选日期内的最后一条记录。问题是查询太慢。我想知道是否有人对如何重写此查询以提高性能有任何想法。我的服务器因此而死亡。 为了便于测试,我创建了一个表变量来生成用于测试的假数据。要测试此脚本,请运行usp_ExtractData'400000' 我关心的是B部分 对于400000*3=1200000条记录,我的结果是18秒。在真实的数据库上,我每晚对它进行索引和重新索引Sql server 为每个父项选择前1条记录时,子查询太慢,sql-server,performance,sql-server-2008-r2,database-performance,Sql Server,Performance,Sql Server 2008 R2,Database Performance,我正在尝试使用子查询方法查找所选日期内的最后一条记录。问题是查询太慢。我想知道是否有人对如何重写此查询以提高性能有任何想法。我的服务器因此而死亡。 为了便于测试,我创建了一个表变量来生成用于测试的假数据。要测试此脚本,请运行usp_ExtractData'400000' 我关心的是B部分 对于400000*3=1200000条记录,我的结果是18秒。在真实的数据库上,我每晚对它进行索引和重新索引 --Store proceedure with table variable data ALTER
--Store proceedure with table variable data
ALTER PROCEDURE [dbo].[usp_ExtractData](
@TotalRecord int--Create random records for each product
)
AS
BEGIN
--MS SQL 2008
SET NOCOUNT ON;
--SECTION 1--Create test data--- GO TO SECTION 2
--Create Variable table to Products fake data
DECLARE @Product TABLE
(
ProductID int primary key not null
,SKU varchar(100) not null
)
--Insert couple records into @Product table
INSERT INTO @Product(ProductID, SKU) VALUES (100,'CUP100')
INSERT INTO @Product(ProductID, SKU) VALUES (101,'CUP101')
INSERT INTO @Product(ProductID, SKU) VALUES (102,'MUG101')
--Create Variable table to hold Products History data
DECLARE @History TABLE
(
ID int identity not null
,ProductID int not null
,VisitedDatetime datetime not null
)
--Generate random record for testing
WHILE @TotalRecord>0
BEGIN
INSERT INTO @History( ProductID, VisitedDatetime) VALUES (100,DATEADD(minute,rand()*100,GETDATE()))
INSERT INTO @History( ProductID, VisitedDatetime) VALUES (101,DATEADD(minute,rand()*100,GETDATE()))
INSERT INTO @History( ProductID, VisitedDatetime) VALUES (102,DATEADD(minute,rand()*100,GETDATE()))
set @TotalRecord=@TotalRecord-1
END
--SECTION 1--Finised creating test data
---SECTION B
--SELECTION B1- SEE DATA
SELECT * FROM @History ORDER BY ProductID, VisitedDatetime DESC
--Run query to find the last visit per each ProductID
--THIS IS TOO SLOW
DECLARE @TestPerformanceDatetime datetime--Test performance
SET @TestPerformanceDatetime= GETDATE()
SELECT *, (select top(1) VisitedDatetime FROM @History as t2 WHERE t2.ProductID=ProductID and VisitedDatetime BETWEEN GETDATE() AND GETDATE()+10 ORDER BY VisitedDatetime DESC) as LastVistiDate
FROM @Product
--Display the performance
SELECT DATEDIFF(SECOND, @TestPerformanceDatetime,getdate()) AS TotalSeconds
---SECTION B - End
END
我用这个查询的原始版本得到了0秒,所以我将随机测试记录的数量从400000增加到4000000
CREATE PROCEDURE [dbo].[usp_ExtractData_test](
@TotalRecord int--Create random records for each product
)
AS
BEGIN
--MS SQL 2008
SET NOCOUNT ON;
--SECTION 1--Create test data--- GO TO SECTION 2
--Create Variable table to Products fake data
DECLARE @Product TABLE
(
ProductID int primary key not null
,SKU varchar(100) not null
)
--Insert couple records into @Product table
INSERT INTO @Product(ProductID, SKU) VALUES (100,'CUP100')
INSERT INTO @Product(ProductID, SKU) VALUES (101,'CUP101')
INSERT INTO @Product(ProductID, SKU) VALUES (102,'MUG101')
--Create Variable table to hold Products History data
DECLARE @History TABLE
(
ID int identity not null
,ProductID int not null
,VisitedDatetime datetime not null
)
--Generate random record for testing
WHILE @TotalRecord>0
BEGIN
INSERT INTO @History( ProductID, VisitedDatetime) VALUES (100,DATEADD(minute,rand()*100,GETDATE()))
INSERT INTO @History( ProductID, VisitedDatetime) VALUES (101,DATEADD(minute,rand()*100,GETDATE()))
INSERT INTO @History( ProductID, VisitedDatetime) VALUES (102,DATEADD(minute,rand()*100,GETDATE()))
set @TotalRecord=@TotalRecord-1
END
--SECTION 1--Finised creating test data
---SECTION B
--Run query to find the last visit per each ProductID
--THIS IS TOO SLOW
DECLARE @TestPerformanceDatetime datetime--Test performance
SET @TestPerformanceDatetime= GETDATE()
SELECT P.*, LastVisitDate.VisitedDatetime
FROM @Product P
LEFT
JOIN (select top(1) T2.VisitedDatetime FROM @History as t2
ORDER BY T2.VisitedDatetime DESC) as LastVisitDate
ON LastVisitDate.VisitedDatetime BETWEEN GETDATE() AND GETDATE()+10
--Display the performance
SELECT DATEDIFF(SECOND, @TestPerformanceDatetime,getdate()) AS TotalSeconds
---SECTION B - End
END
在我的笔记本上,我看到大约102346毫秒用于生成历史记录,5120毫秒用于第一次搜索,643毫秒用于第二次搜索。奥托,它在沸腾Rosetta@Home同时全力以赴
declare @HistoryRecordsPerProduct int = 400000
set nocount on
-- drop table #Product
-- drop table #History
-- Create the test tables.
create table #Product
( ProductId Int primary key not null, SKU VarChar(100) not null )
insert into #Product ( ProductId, SKU ) values
( 100, 'CUP100' ), ( 101, 'CUP101' ), ( 102, 'MUG102' )
create table #History
( Id Int identity not null, ProductId Int not null, VisitedDatetime DateTime not null )
-- EDIT: Note the following index on both columns.
create index History_Product_VisitedDateTime on #History ( ProductId, VisitedDateTime desc )
-- Populate the history table.
declare @Start as DateTime = GetDate()
while @HistoryRecordsPerProduct > 0
begin
insert into #History ( ProductId, VisitedDatetime ) values ( 100, DateAdd( minute, rand() * 100, GetDate() ) )
insert into #History ( ProductId, VisitedDatetime ) values ( 101, DateAdd( minute, rand() * 100, GetDate() ) )
insert into #History ( ProductId, VisitedDatetime ) values ( 102, DateAdd( minute, rand() * 100, GetDate() ) )
set @HistoryRecordsPerProduct = @HistoryRecordsPerProduct - 1
end
select DateDiff( ms, @Start, GetDate() ) as 'Elapsed History Generation (ms)'
-- Query the data.
set @Start = GetDate()
declare @End as DateTime = @Start + 10 -- Days.
select @Start as [Start], @End as [End]
select ProductId, SKU,
( select Max( VisitedDateTime ) from #History where ProductId = #Product.ProductId and
@Start <= VisitedDatetime and VisitedDatetime <= @End ) as VDT
from #Product
select DateDiff( ms, @Start, GetDate() ) as 'Elapsed Search (ms)'
-- And again with the data cached.
set @Start = GetDate()
set @End = @Start + 10 -- Days.
select @Start as [Start], @End as [End]
select ProductId, SKU,
( select Max( VisitedDateTime ) from #History where ProductId = #Product.ProductId and
@Start <= VisitedDatetime and VisitedDatetime <= @End ) as VDT
from #Product
select DateDiff( ms, @Start, GetDate() ) as 'Elapsed Search (ms)'
使用交叉应用和最大值
是的,在查询中使用GetDate的Microsoft SQL Server 2008 R2正在追逐一个移动目标,影响性能,并且可能会产生奇怪的结果,例如,随着日期的更改。在变量中捕获当前日期/时间,然后根据需要使用该值几乎总是一个更好的主意。这在存储过程中的多个语句中更为重要。多次使用GetDate最常见的原因是在捕获长时间运行的操作的开始时间和结束时间时。生产中的历史记录表上是否有索引,例如,具有ProductId和VisitedDateTime的复合键?这将有助于您从历史记录中选择MaxVisitedDateTime,其中ProductId=Outer.ProductId。是的,历史记录中有2个indexProductID和VisitedDateTime,产品表中有1个indexProductID。GETDATE只是为了测试。我想我们无法沟通。我建议对历史使用一个包含两列的索引:ProductId和VisitedDateTime desc。可能会有其他索引,但该索引会加快子查询的速度。VisitedDateTime是错误的。它应该使用每个产品ID的最后一个日期。修复后,group by位于子查询之外。现在我得到了一致的结果,您可以看到,在我的系统上,查询4mil记录需要111秒,但我的方法只需要9秒。这大约是性能的12倍。最大值确实提高了速度。我很愿意向你竖起大拇指,但我没有足够的声誉。最大值就是答案。非常感谢。
select *
from @Product p
cross apply (
select MAX(VisitedDatetime) LastVisitedDatetime
from @History
where VisitedDatetime BETWEEN GETDATE() AND GETDATE()+10
and ProductID = p.ProductID
) h