复杂的SQL编写
我有这张桌子:复杂的SQL编写,sql,sql-server-ce,sql-server-ce-4,Sql,Sql Server Ce,Sql Server Ce 4,我有这张桌子: table session( ID number, SessionID VarChar, Date, Filter ) 此表包含搜索信息,如下所示: ID SessionID Date filter 4 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 meagPixel=5 6 peqq421gaspts3nuulq5mwcq 24/05/2012
table session(
ID number,
SessionID VarChar,
Date,
Filter
)
此表包含搜索信息,如下所示:
ID SessionID Date filter
4 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 meagPixel=5
6 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 brand=Canon
7 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 brand=Canon&meagPixel=12.1
8 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 brand=Canon
10 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 brand=Nikon
12 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 meagPixel=12.1
13 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 meagPixel=12.1&opticalZoom=True
14 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 meagPixel=12.1&opticalZoom=True&brand=Panasonic
16 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 price=500.00
18 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 price=499.00
19 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 price=499.00&brand=Olympus
21 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 zoomRange=2000
22 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 zoomRange=2000&brand=Leica
23 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 zoomRange=2000&brand=Leica&price=1995.00
24 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True
25 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2
26 peqq421gaspts3nuulq5mwcq 24/05/2012 13:50 zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2&weight=345
27 peqq421gaspts3nuulq5mwcq 24/05/2012 13:58 zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2
41 poiq41111spts00000q5aaaa 27/05/2012 13:48 meagPixel=5
我想得到唯一的搜索。唯一搜索包括:
- 用户(会话)的最长搜索(筛选器)
- 如果第一个筛选器发生更改,则需要将其视为新搜索(筛选器)
SELECT MAX(Filter)
FROM Session
GROUP BY SessionID
顺便说一句,我给出的示例表数据的结果应该返回:
ID SessionID Date filter
4 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 meagPixel=5
7 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 brand=Canon&meagPixel=12.1
10 peqq421gaspts3nuulq5mwcq 24/05/2012 13:48 brand=Nikon
14 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 meagPixel=12.1&opticalZoom=True&brand=Panasonic
16 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 price=500.00
19 peqq421gaspts3nuulq5mwcq 24/05/2012 13:49 price=499.00&brand=Olympus
26 peqq421gaspts3nuulq5mwcq 24/05/2012 13:50 zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2&weight=345
41 poiq41111spts00000q5aaaa 27/05/2012 13:48 meagPixel=5
感谢您的帮助和指导。要获得最长的搜索筛选器,您需要执行以下操作:
select s.*
from (select s.*,
row_number() over (partition by sessionid order by len desc) as rownum
from (select s.*, len(filter) as len
from session s
) s
) s
where rownum = 1
我是用windows函数来实现的。您可以通过使用聚合和联接来完成相同的任务
但是,您的意思是会话不是真正的标识符。会话/筛选器不可用。下面的查询基本上得到了您想要的结果:
select s.*
from (select s.*,
row_number() overo over (partition by sessionid, filter
order by len desc) as rownum
from (select s.*, len(filter) as len
from session s
) s
) s
where rownum = 1
(唯一的更改是将partitioning子句包含在filter中。)
你可能有重复的。如果您想要所有重复项,则可以使用稍微不同的查询。要获得最长的搜索筛选器,您需要执行以下操作:
select s.*
from (select s.*,
row_number() over (partition by sessionid order by len desc) as rownum
from (select s.*, len(filter) as len
from session s
) s
) s
where rownum = 1
我是用windows函数来实现的。您可以通过使用聚合和联接来完成相同的任务
但是,您的意思是会话不是真正的标识符。会话/筛选器不可用。下面的查询基本上得到了您想要的结果:
select s.*
from (select s.*,
row_number() overo over (partition by sessionid, filter
order by len desc) as rownum
from (select s.*, len(filter) as len
from session s
) s
) s
where rownum = 1
(唯一的更改是将partitioning子句包含在filter中。)
你可能有重复的。如果您想要所有的副本,则可以使用稍微不同的查询。首先,示例数据中似乎有错误,我认为第25、26和27行都应该出现在最终数据中。27当然应该,因为它是会话ID和日期组合的唯一条目 假设以上是正确的,那么我认为我已经正确地建立了你的逻辑 步骤1是为每个筛选器定义第一个搜索项,以及它在会话中出现的顺序:
;WITH CTE AS
( SELECT *,
SUBSTRING(Filter, 1, CASE WHEN CHARINDEX('&', Filter) = 0 THEN LEN(Filter) ELSE CHARINDEX('&', Filter) - 1 END) [FirstTerm],
FROM Session
)
下一步是确定每个搜索是新搜索,还是前一个搜索的继续。这是通过获取会话中的上一个搜索词(为什么在上一个CTE中定义了SessionOrder)并确定第一个搜索词是否相同来实现的
, CTE2 AS
( SELECT T1.*,
CASE WHEN T1.SessionOrder = 1 OR T2.SessionOrder IS NOT NULL THEN 1 ELSE 0 END [NewSearch]
FROM CTE T1
LEFT JOIN CTE T2
ON T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
AND T1.FirstTerm != T2.FirstTerm
AND T1.SessionOrder = T2.SessionOrder + 1
)
接下来,每个新的搜索都需要在会话中有自己的排名,以便分组。然后定义规则(SessionID、Date和First Search term的唯一组合),然后可以根据筛选器的长度在唯一组合内对每个项目进行排序:
, CTE3 AS
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY SessionID, Date, ISNULL(SearchNumber, 0) ORDER BY LEN(Filter) DESC) [SearchOrder]
FROM CTE2 T1
OUTER APPLY
( SELECT SUM(NewSearch) [SearchNumber]
FROM CTE2 T2
WHERE T1.SessionOrder >= T2.SessionOrder
AND T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
) c
)
最后,您需要做的就是将结果限制为SessionID、Date和first filter term的每个组合的最长搜索项:
SELECT ID, SessionID, Date, Filter
FROM CTE3
WHERE SearchOrder = 1
ORDER BY ID
通常我会把这些都放在SQLFiddle上,而不是在这里发布一个完整的工作示例,但它今天似乎不起作用。下面是我用来测试数据的完整SQL:
CREATE TABLE #Session (ID INT, SessionID VARCHAR(50), Date DATETIME, Filter VARCHAR(200))
INSERT INTO #Session VALUES
(2, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon'),
(4, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'meagPixel=5'),
(6, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon'),
(7, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon&meagPixel=12.1'),
(8, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon'),
(10, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Nikon'),
(12, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'meagPixel=12.1'),
(13, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'meagPixel=12.1&opticalZoom=True'),
(14, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'meagPixel=12.1&opticalZoom=True&brand=Panasonic'),
(16, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'price=500.00'),
(18, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'price=499.00'),
(19, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'price=499.00&brand=Olympus'),
(21, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000'),
(22, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica'),
(23, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica&price=1995.00'),
(24, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True'),
(25, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2'),
(26, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:50', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2&weight=345'),
(27, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:58', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2'),
(41, 'poiq41111spts00000q5aaaa', '27/05/2012 13:48', 'meagPixel=5')
;WITH CTE AS
( SELECT *,
SUBSTRING(Filter, 1, CASE WHEN CHARINDEX('&', Filter) = 0 THEN LEN(Filter) ELSE CHARINDEX('&', Filter) - 1 END) [FirstTerm],
FROM #Session
), CTE2 AS
( SELECT T1.*,
CASE WHEN T1.SessionOrder = 1 OR T2.SessionOrder IS NOT NULL THEN 1 ELSE 0 END [NewSearch]
FROM CTE T1
LEFT JOIN CTE T2
ON T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
AND T1.FirstTerm != T2.FirstTerm
AND T1.SessionOrder = T2.SessionOrder + 1
), CTE3 AS
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY SessionID, Date, ISNULL(SearchNumber, 0) ORDER BY LEN(Filter) DESC) [SearchOrder]
FROM CTE2 T1
OUTER APPLY
( SELECT SUM(NewSearch) [SearchNumber]
FROM CTE2 T2
WHERE T1.SessionOrder >= T2.SessionOrder
AND T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
) c
)
SELECT ID, SessionID, Date, Filter
FROM CTE3
WHERE SearchOrder = 1
ORDER BY ID
DROP TABLE #Session
附录 好的,根据您不希望按日期列分组的结果集,您只需按第一个搜索词和sessionID分组的长度顺序排列行 此查询生成与示例数据相同的结果。我已经在2008年R1中测试过,但没有理由认为它不能在SQLServerCE中工作
;WITH CTE AS
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY SessionID, SUBSTRING(Filter, 1, CASE WHEN CHARINDEX('&', Filter) = 0 THEN LEN(Filter) ELSE CHARINDEX('&', Filter) - 1 END) ORDER BY LEN(Filter) DESC) [RowNumber]
FROM Session
)
SELECT *
FROM CTE
WHERE RowNumber = 1
ORDER BY ID
关于最终解决方案首先,您的样本数据中似乎有错误,我认为第25、26和27行都应该出现在最终数据中。27当然应该,因为它是会话ID和日期组合的唯一条目 假设以上是正确的,那么我认为我已经正确地建立了你的逻辑 步骤1是为每个筛选器定义第一个搜索项,以及它在会话中出现的顺序:
;WITH CTE AS
( SELECT *,
SUBSTRING(Filter, 1, CASE WHEN CHARINDEX('&', Filter) = 0 THEN LEN(Filter) ELSE CHARINDEX('&', Filter) - 1 END) [FirstTerm],
FROM Session
)
下一步是确定每个搜索是新搜索,还是前一个搜索的继续。这是通过获取会话中的上一个搜索词(为什么在上一个CTE中定义了SessionOrder)并确定第一个搜索词是否相同来实现的
, CTE2 AS
( SELECT T1.*,
CASE WHEN T1.SessionOrder = 1 OR T2.SessionOrder IS NOT NULL THEN 1 ELSE 0 END [NewSearch]
FROM CTE T1
LEFT JOIN CTE T2
ON T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
AND T1.FirstTerm != T2.FirstTerm
AND T1.SessionOrder = T2.SessionOrder + 1
)
接下来,每个新的搜索都需要在会话中有自己的排名,以便分组。然后定义规则(SessionID、Date和First Search term的唯一组合),然后可以根据筛选器的长度在唯一组合内对每个项目进行排序:
, CTE3 AS
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY SessionID, Date, ISNULL(SearchNumber, 0) ORDER BY LEN(Filter) DESC) [SearchOrder]
FROM CTE2 T1
OUTER APPLY
( SELECT SUM(NewSearch) [SearchNumber]
FROM CTE2 T2
WHERE T1.SessionOrder >= T2.SessionOrder
AND T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
) c
)
最后,您需要做的就是将结果限制为SessionID、Date和first filter term的每个组合的最长搜索项:
SELECT ID, SessionID, Date, Filter
FROM CTE3
WHERE SearchOrder = 1
ORDER BY ID
通常我会把这些都放在SQLFiddle上,而不是在这里发布一个完整的工作示例,但它今天似乎不起作用。下面是我用来测试数据的完整SQL:
CREATE TABLE #Session (ID INT, SessionID VARCHAR(50), Date DATETIME, Filter VARCHAR(200))
INSERT INTO #Session VALUES
(2, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon'),
(4, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'meagPixel=5'),
(6, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon'),
(7, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon&meagPixel=12.1'),
(8, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Canon'),
(10, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'brand=Nikon'),
(12, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'meagPixel=12.1'),
(13, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:48', 'meagPixel=12.1&opticalZoom=True'),
(14, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'meagPixel=12.1&opticalZoom=True&brand=Panasonic'),
(16, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'price=500.00'),
(18, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'price=499.00'),
(19, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'price=499.00&brand=Olympus'),
(21, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000'),
(22, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica'),
(23, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica&price=1995.00'),
(24, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True'),
(25, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:49', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2'),
(26, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:50', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2&weight=345'),
(27, 'peqq421gaspts3nuulq5mwcq', '24/05/2012 13:58', 'zoomRange=2000&brand=Leica&price=1995.00&opticalZoom=True&meagPixel=16.2'),
(41, 'poiq41111spts00000q5aaaa', '27/05/2012 13:48', 'meagPixel=5')
;WITH CTE AS
( SELECT *,
SUBSTRING(Filter, 1, CASE WHEN CHARINDEX('&', Filter) = 0 THEN LEN(Filter) ELSE CHARINDEX('&', Filter) - 1 END) [FirstTerm],
FROM #Session
), CTE2 AS
( SELECT T1.*,
CASE WHEN T1.SessionOrder = 1 OR T2.SessionOrder IS NOT NULL THEN 1 ELSE 0 END [NewSearch]
FROM CTE T1
LEFT JOIN CTE T2
ON T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
AND T1.FirstTerm != T2.FirstTerm
AND T1.SessionOrder = T2.SessionOrder + 1
), CTE3 AS
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY SessionID, Date, ISNULL(SearchNumber, 0) ORDER BY LEN(Filter) DESC) [SearchOrder]
FROM CTE2 T1
OUTER APPLY
( SELECT SUM(NewSearch) [SearchNumber]
FROM CTE2 T2
WHERE T1.SessionOrder >= T2.SessionOrder
AND T1.SessionID = T2.SessionID
AND T1.Date = T2.Date
) c
)
SELECT ID, SessionID, Date, Filter
FROM CTE3
WHERE SearchOrder = 1
ORDER BY ID
DROP TABLE #Session
附录 好的,根据您不希望按日期列分组的结果集,您只需按第一个搜索词和sessionID分组的长度顺序排列行 此查询生成与示例数据相同的结果。我已经在2008年R1中测试过,但没有理由认为它不能在SQLServerCE中工作
;WITH CTE AS
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY SessionID, SUBSTRING(Filter, 1, CASE WHEN CHARINDEX('&', Filter) = 0 THEN LEN(Filter) ELSE CHARINDEX('&', Filter) - 1 END) ORDER BY LEN(Filter) DESC) [RowNumber]
FROM Session
)
SELECT *
FROM CTE
WHERE RowNumber = 1
ORDER BY ID
用于模式和插入查询的最终解决方案的。 我尝试过稍微不同的方法。我不确定这是否在所有情况下都有效。它在mysql和mssql中工作
select *
from tsession t1
where not exists (
select *
from tsession t2
where t2.filter like concat(t1.filter,'%')
and t1.filter<>t2.filter
and t1.sessionid=t2.sessionid)
order by id;
选择*
来自tsession t1
不存在的地方(
选择*
从t2会话
其中t2.filter-like-concat(t1.filter,“%”)
和t1.filter2.filter
和t1.sessionid=t2.sessionid)
按id订购;
这将给出问题所需的准确结果。@GarethD-Tx用于模式和插入查询。 我尝试过稍微不同的方法。我不确定这是否在所有情况下都有效。它在mysql和mssql中工作
select *
from tsession t1
where not exists (
select *
from tsession t2
where t2.filter like concat(t1.filter,'%')
and t1.filter<>t2.filter
and t1.sessionid=t2.sessionid)
order by id;
选择*
来自tsession t1
不存在的地方(
选择*
从t2会话