慢速TSQL查询_Tsql_Join_Sql Server 2008 R2

慢速TSQL查询

tsql join sql-server-2008-r2

慢速TSQL查询,tsql,join,sql-server-2008-r2,Tsql,Join,Sql Server 2008 R2,关于如何提高查询性能有什么想法吗 [富时指数] 主键是sID，wordPos. 在wordID、sID、wordPos上有一个索引。它们都是int 最后，使用一个不同的名称。大多数sID只有几个匹配项。某些sID可能有超过10000个匹配项并终止查询查询前27749行在11秒内返回的位置。没有一个sID具有超过500个匹配项。每个匹配项的总和为65615 仅第27750排比赛就需要2分钟，有15000场比赛这并不奇怪，因为末尾的连接在[sID]上因为在最终的使用中，有没有一种方法

关于如何提高查询性能有什么想法吗

[富时指数] 主键是sID，wordPos.
在wordID、sID、wordPos上有一个索引。
它们都是int

最后，使用一个不同的名称。
大多数sID只有几个匹配项。
某些sID可能有超过10000个匹配项并终止查询

查询前27749行在11秒内返回的位置。
没有一个sID具有超过500个匹配项。
每个匹配项的总和为65615

仅第27750排比赛就需要2分钟，有15000场比赛

这并不奇怪，因为末尾的连接在[sID]上

因为在最终的使用中，有没有一种方法可以找到第一个呢肯定的

on [wXright].[sID] = [wXleft].[sID]
    and [wXright].[wordPos] >  [wXleft].[wordPos]
    and [wXright].[wordPos] <= [wXleft].[wordPos] + 10

我这样做只是为了尝试，它完全改变了查询计划。
我不知道问题查询需要多长时间。我在20:00放弃了。
我甚至不打算将此作为答案发布，因为我不认为它对其他任何人都有价值。
希望得到更好的答案。
如果我在接下来的两天内没有得到一个答案，我将删除这个问题

这并不能解决问题

  select distinct [ft1].[sID]
  from [ftsIndex] as [ft1] with (nolock)
  join [ftsIndex] as [ft2] with (nolock)
    on [ft2].[sID] = [ft1].[sID]
   and [ft1].[wordID] in (select [id] from [FTSwordDef] with (nolock) where [word] like 'brown')
   and [ft2].[wordID] in (select [id] from [FTSwordDef] with (nolock) where [word] like 'fox')
   and [ft2].[wordPos] >  [ft1].[wordPos]
   and [ft2].[wordPos] <= [ft1].[wordPos] + 10

全表def

CREATE TABLE [dbo].[FTSindex](
    [sID] [int] NOT NULL,
    [wordPos] [int] NOT NULL,
    [wordID] [int] NOT NULL,
    [charPos] [int] NOT NULL,
 CONSTRAINT [PK_FTSindex] PRIMARY KEY CLUSTERED 
(
    [sID] ASC,
    [wordPos] ASC
)WITH (PAD_INDEX  = ON, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON, FILLFACTOR = 100) ON [PRIMARY]
) ON [PRIMARY]

GO

ALTER TABLE [dbo].[FTSindex]  WITH CHECK ADD  CONSTRAINT [FK_FTSindex_FTSwordDef] FOREIGN KEY([wordID])
REFERENCES [dbo].[FTSwordDef] ([ID])
GO

ALTER TABLE [dbo].[FTSindex] CHECK CONSTRAINT [FK_FTSindex_FTSwordDef]
GO

更新：

; with o as (
 -- Union all resolves costly sort
    select sid, wordpos, wordid
      from FTSindex 
     where wordID = 1
     union all
    select sid, wordpos, wordID
      from FTSindex 
     where wordID = 2
),
g as (
    select sid, wordid, wordpos,
           ROW_NUMBER() over(order by sid, wordpos) rn
      from o
)
select count(distinct g1.sid)
  from g g1
 inner join g g2
    on g1.sID = g2.sID 
   and g1.rn = g2.rn - 1
 where g1.wordID = 1
   and g2.wordID = 2
   and g1.wordPos >= g2.wordpos - 10

; with o as (
    select sid, wordpos, wordid
      from FTSindex 
     where wordID = 1
     union all
    select sid, wordpos, wordID
      from FTSindex 
     where wordID = 2
),
g as (
    select sid, wordid, wordpos,
           ROW_NUMBER() over(order by sid, wordpos) rn
    from o
)
select sid, rn, [1], [2]
from
(
-- Collapse rns belonging to wordid 2 to ones belonging to wordid 1
-- so they appear in the same row
   select sid, wordpos, wordid, rn - case when wordid = 1 then 0 else 1 end rn
   from g
) g1
pivot (max(wordpos) for wordid in ([1], [2])) u
where [2] - [1] <= 10

您仍然可以使用

union all

，这有助于优化器在流程的最后一部分延迟筛选“L”和“R”边时保留索引中的顺序。不幸的是，您需要事先检索所有wordid，并在

equals

条件下使用它们。在我的机器上，它将执行时间减少到2/3：

  ; with o as (
    select sID, wordPos, wordID
      from FTSindex 
     where wordID = 1
   union all
    select sID, wordPos, wordID
      from FTSindex 
     where wordID = 4
   union all
    select sID, wordPos, wordID
      from FTSindex 
     where wordID = 2
 ),
 g as (
    select sID, wordPos, wordID,
           ROW_NUMBER() over (partition by [sID] order by wordPos) rn
      from o
 )
 select count(distinct(g1.sID))   --   26919 00:02 
      from g g1
      join g g2
        on g1.sID = g2.sID 
       and g1.rn  = g2.rn - 1
       and g1.wordPos >= g2.wordPos - 10 
    -- Now is the time to repartition the stream
       and g1.wordID in (1, 4)
       and g2.wordID = 2

哦，现在真的需要两秒钟吗

更新-2:

; with o as (
 -- Union all resolves costly sort
    select sid, wordpos, wordid
      from FTSindex 
     where wordID = 1
     union all
    select sid, wordpos, wordID
      from FTSindex 
     where wordID = 2
),
g as (
    select sid, wordid, wordpos,
           ROW_NUMBER() over(order by sid, wordpos) rn
      from o
)
select count(distinct g1.sid)
  from g g1
 inner join g g2
    on g1.sID = g2.sID 
   and g1.rn = g2.rn - 1
 where g1.wordID = 1
   and g2.wordID = 2
   and g1.wordPos >= g2.wordpos - 10

; with o as (
    select sid, wordpos, wordid
      from FTSindex 
     where wordID = 1
     union all
    select sid, wordpos, wordID
      from FTSindex 
     where wordID = 2
),
g as (
    select sid, wordid, wordpos,
           ROW_NUMBER() over(order by sid, wordpos) rn
    from o
)
select sid, rn, [1], [2]
from
(
-- Collapse rns belonging to wordid 2 to ones belonging to wordid 1
-- so they appear in the same row
   select sid, wordpos, wordid, rn - case when wordid = 1 then 0 else 1 end rn
   from g
) g1
pivot (max(wordpos) for wordid in ([1], [2])) u
where [2] - [1] <= 10

1和2代表所选单词的ID。10字以内的多次点击结果与原查询结果不同；原始查询将报告所有这些，但此查询将仅显示最近的查询

这样做的目的是只提取搜索到的单词，并比较两个相邻单词之间的距离，其中WordID1排在第一位，WordID2排在第二位

更新-1：

我删除了这篇文章，因为它没有我想象的那么好。但是，它比优化查询更适合OP的需求，因为它允许同时搜索多个单词（在where子句中指定的另一个单词附近找到的单词列表）

第一次尝试：

; with o as (
 -- Union all resolves costly sort
    select sid, wordpos, wordid
      from FTSindex 
     where wordID = 1
     union all
    select sid, wordpos, wordID
      from FTSindex 
     where wordID = 2
),
g as (
    select sid, wordid, wordpos,
           ROW_NUMBER() over(order by sid, wordpos) rn
      from o
)
select count(distinct g1.sid)
  from g g1
 inner join g g2
    on g1.sID = g2.sID 
   and g1.rn = g2.rn - 1
 where g1.wordID = 1
   and g2.wordID = 2
   and g1.wordPos >= g2.wordpos - 10

; with o as (
    select sid, wordpos, wordid
      from FTSindex 
     where wordID = 1
     union all
    select sid, wordpos, wordID
      from FTSindex 
     where wordID = 2
),
g as (
    select sid, wordid, wordpos,
           ROW_NUMBER() over(order by sid, wordpos) rn
    from o
)
select sid, rn, [1], [2]
from
(
-- Collapse rns belonging to wordid 2 to ones belonging to wordid 1
-- so they appear in the same row
   select sid, wordpos, wordid, rn - case when wordid = 1 then 0 else 1 end rn
   from g
) g1
pivot (max(wordpos) for wordid in ([1], [2])) u
where [2] - [1] <= 10

可能有一种方法与

top 1

结合使用

select [wXleft].[sID], [wXleft].[wordPos]
  from [ftsIndex] wXleft with (nolock)
 cross apply 
 (
    select top 1 r.sID 
      from [ftsIndex] r 
     where r.sID = wXleft.sID 
       and r.wordPos > wxLeft.wordPos 
       and r.wordPos <= wxLeft.wordPos + 10 
       and r.wordID in
           (select [id]
              from [FTSwordDef] with (nolock) 
             where [word] like 'Fox') 
 ) wXright
 where [wXleft].[wordID] in 
       (select [id] 
          from [FTSwordDef] with (nolock) 
         where [word] like 'Brown')

选择[wXleft].[sID]，[wXleft].[wordPos]
从[ftsIndex]到wXleft，带（nolock）
交叉应用
(
选择Top1R.sID
从[ftsIndex]r
其中r.sID=wXleft.sID
和r.wordPos>wxLeft.wordPos
而r.wordPos嗯，我希望我有更多的信息或测试方法，但如果没有，我可能会尝试这样做：
 IF OBJECT_ID(N'tempdb..#tempMatch', N'U') IS NOT NULL   DROP TABLE #tempMatch
 CREATE TABLE #tempMatch(
    [sID] [int] NOT NULL,
    [wordPos] [int] NOT NULL,
    [wordID] [int] NOT NULL,
 CONSTRAINT [PK2] PRIMARY KEY CLUSTERED 
(
    [sID] ASC,
    [wordPos] ASC
))

--
;WITH cteWords As 
(
            SELECT 'Brown' as [word]
  UNION ALL SELECT 'Fox'
)
INSERT INTO #tempMatch ([sID],[wordPos],[wordID])
SELECT sID, wordPos, wordID
FROM    ftsIndex
WHERE   EXISTS
        (Select * From FTSWordDef s1
         inner join cteWords s2 ON s1.word = s2.word
         Where ftsIndex.wordID = s1.id)
;

select count(distinct(s1.[sID]))
    from #tempMatch s1
    join #tempMatch s2
        on  s2.[sID] = s1.[sID]
        and s2.[wordPos] >  s1.[wordPos]
        and s2.[wordPos] <= s1.[wordPos] + 10
    where s1.wordID = (select id from FTSWordDef w where w.word = 'Brown')
      and s2.wordID = (select id from FTSWordDef w where w.word = 'Fox')

请让我知道这些是否有帮助。
我不知道您的所有数据，但您是否考虑过可能会插入临时表，然后在临时表上创建聚集索引？先插入，然后创建索引。这通常比单独创建索引更快。这可能会对您有所帮助，因此我认为可能不会将其添加为cOMENT.@djangojazz插入只需5秒。如果我添加排序，以便按主键顺序插入记录，则仍需5秒。我们需要表/键/索引定义和查询计划（实际）。此外，与仅使用SQL Server全文搜索相比，这种设计/方法有什么原因吗？SQL Server全文搜索不适用“狐狸”或“郊狼”10以内的“快速棕色”。将添加表def。如何发布查询计划？您必须将其放到某个在线共享位置并链接到它。（我最不喜欢StackOverflow）在2/3的时间内返回与内部循环联接相同的答案。在接受此答案之前，将等待几天以获得奇迹般的答案。谢谢。为什么您要取消其他选项？它更快。我一直在尝试调整它，以尝试从中获得更多。奇怪的是，CTE中产生的排序是主要成本。@BUM bec因为我的计时错误，它花费的时间和我最初的尝试一样多。同时，我已经解决了排序部分，但Sql Server需要对每个引用执行一次CTE，并且有两个引用。我将在一分钟后发布新版本。在我的测试中，它稍微快了一点，我一直在试图找出如何降低成本的方法排序。即使索引按该排序，这两种排序也占了成本的70%，所以在我看来，这应该是一种便宜的排序。@BARM请看一看新版本。必须在第一个约束中添加wordID，并且两者都会在join cteWords上抛出一个错误。@BARM错误是什么？我无法测试编译，因为我们没有表定义。@bum为什么必须将wordID添加到第一个约束？根据您的帖子，（sID，wordPos）
应该足够了，因为它们是我在插入..选择..
中绘制的唯一表的主键。（事实上，现在我看到了它，我意识到DISTINCT
是多余的，不应该在那里）我所知道的是我得到了一个PK冲突。这也让我困惑，因为sID和wordPos是表上的PK。错误是Msg 208，级别16，状态1，第40行无效对象名“cteWords”。@bum无效对象名“cteWords”也没有意义。这里有些东西不对劲。
 IF OBJECT_ID(N'tempdb..#tempMatch', N'U') IS NOT NULL   DROP TABLE #tempMatch
 CREATE TABLE #tempMatch(
    [sID] [int] NOT NULL,
    [wordPos] [int] NOT NULL,
    [wordID] [int] NOT NULL,
 CONSTRAINT [PK2] PRIMARY KEY CLUSTERED 
(
    [sID] ASC,
    [wordPos] ASC
))

--
;WITH cteWords As 
(
            SELECT 'Brown' as [word]
  UNION ALL SELECT 'Fox'
)
INSERT INTO #tempMatch ([sID],[wordPos],[wordID])
SELECT sID, wordPos, wordID
FROM    ftsIndex
WHERE   EXISTS
        (Select * From FTSWordDef s1
         inner join cteWords s2 ON s1.word = s2.word
         Where ftsIndex.wordID = s1.id)
;

select count(distinct(s1.[sID]))
    from #tempMatch s1
    join #tempMatch s2
        on  s2.[sID] = s1.[sID]
        and s2.[wordPos] >  s1.[wordPos]
        and s2.[wordPos] <= s1.[wordPos] + 10
    where s1.wordID = (select id from FTSWordDef w where w.word = 'Brown')
      and s2.wordID = (select id from FTSWordDef w where w.word = 'Fox')

 IF OBJECT_ID(N'tempdb..#tempMatch', N'U') IS NOT NULL   DROP TABLE #tempMatch
 CREATE TABLE #tempMatch(
    [sID] [int] NOT NULL,
    [wordID] [int] NOT NULL,
    [wordPos] [int] NOT NULL,
 CONSTRAINT [PK0] PRIMARY KEY CLUSTERED 
(
    [wordID] ASC,
    [sID] ASC,
    [wordPos] ASC
))