SQL查询-每天确定新访客
我有一个简化的模式,如下所示:SQL查询-每天确定新访客,sql,tsql,Sql,Tsql,我有一个简化的模式,如下所示: CREATE TABLE MyTable ( DateTimeOffset HitDate NOT NULL, IpAddress varchar(15) ) 示例行可能如下所示: “2013年7月10日8:05:29-07:00”“111.222.333.444” 我正在尝试解决一个查询,该查询将为我提供每天的唯一IP地址数,例如2013年7月10日。实际上,这一部分相当简单,我已经为此创建了一个查询。但是,对于这个查询,我想要的是在当前日期之前从
CREATE TABLE MyTable (
DateTimeOffset HitDate NOT NULL,
IpAddress varchar(15)
)
示例行可能如下所示:
“2013年7月10日8:05:29-07:00”“111.222.333.444”
我正在尝试解决一个查询,该查询将为我提供每天的唯一IP地址数,例如2013年7月10日。实际上,这一部分相当简单,我已经为此创建了一个查询。但是,对于这个查询,我想要的是在当前日期之前从未存在过的唯一IP地址的数量。我不在乎约会之后,就在约会之前
例如,假设我有以下数据,这就是我所有的数据:
'7/10/2013 8:05:29 -07:00' '111.222.333.444'
'7/10/2013 12:05:29 -07:00' '111.222.333.222'
'7/9/2013 9:05:29 -07:00' '111.222.333.444'
'7/9/2013 10:05:29 -07:00' '111.222.333.555'
'7/8/2013 11:05:29 -07:00' '111.222.333.222'
'7/8/2013 4:05:29 -07:00' '111.222.333.555'
查询应输出以下内容:
'7/8/2013' 2 (neither IpAddress existed before this date so both are new)
'7/9/2013' 1 (only one of the IpAddresses is new - the one ending in '444')
'7/10/2013' 0 (both IpAddresses had existed before this date)
目标数据库是SQL Server 2012。我向第一个正确创建SQL语句的人提供100分奖励。[编辑-改为使用DateTimeOffset] 从创建基本MyTable开始,我想到了以下几点:
-- Test data creation to match schema & examples
create TABLE MyTable (
HitDate DateTimeOffset NOT NULL,
IpAddress varchar(15)
)
insert into MyTable values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
insert into MyTable values ('7/10/2013 12:05:29 -07:00', '111.222.333.222')
insert into MyTable values ('7/9/2013 9:05:29 -07:00', '111.222.333.444')
insert into MyTable values ('7/9/2013 10:05:29 -07:00', '111.222.333.555')
insert into MyTable values ('7/8/2013 11:05:29 -07:00', '111.222.333.222')
insert into MyTable values ('7/8/2013 4:05:29 -07:00', '111.222.333.555')
-- actual solution starts here
create TABLE #MyTable (
HitDate date,
IpAddress varchar(15)
)
-- populate data into required format for main query
insert into #MyTable SELECT HitDate, IpAddress FROM MyTable
-- main query
select distinct convert(varchar(10), HitDate, 101) HitDate, ISNULL(dateCounts.counter, 0) NewIPAddresses
from #MyTable mainDates
left outer join
(
select main.HitDate dateValue, count(*) counter from
#MyTable main
left outer join #MyTable sub on main.IpAddress = sub.IpAddress and main.HitDate > sub.HitDate
where sub.IpAddress is null
group by main.HitDate
) dateCounts on dateCounts.dateValue = HitDate
其中:
HitDate NewIPAddresses
-------------- --------------
07/08/2013 2
07/09/2013 1
07/10/2013 0
尝试以下查询:
SELECT A.HITDATE , ISNULL(B.CNT , 0) AS COUNT
FROM
(
SELECT DISTINCT CONVERT(DATE , HITDATE) AS HITDATE
FROM mytable1
) AS A
LEFT OUTER JOIN
(
SELECT CONVERT(DATE , HITDATE) AS HITDATE , COUNT(*) AS CNT
FROM MyTable1 AS A
WHERE IpAddress NOT IN
(
SELECT B.IpAddress FROM mytable1 AS B WHERE A.HITDATE > B.HITDATE
)
GROUP BY CONVERT(DATE , HITDATE)
) AS B ON A.HITDATE = B.HITDATE
这是我的解决方案:
CREATE TABLE Test(
DT datetime NOT NULL,
IP varchar(15)
)
INSERT INTO TEST
SELECT '2013-07-10', 'aaa' UNION ALL
SELECT '2013-07-10', 'bbb' UNION ALL
SELECT '2013-07-10', 'ccc' UNION ALL
SELECT '2013-07-09', 'aaa' UNION ALL
SELECT '2013-07-09', 'bbb' UNION ALL
SELECT '2013-07-08', 'aaa' UNION ALL
SELECT '2013-07-08', 'bbb'
SELECT CAST(T1.DT AS DATE) Date, COUNT(T1.IP)
FROM Test T1
WHERE T1.IP NOT IN (
SELECT DISTINCT T2.IP FROM Test T2 WHERE CAST(T2.DT AS DATE) < T1.DT
)
GROUP BY CAST(T1.DT AS DATE)
这看起来也不错,但不那么复杂
SELECT DISTINCT(IPAddress)
FROM [MyTable]
WHERE CONVERT(DATE , HITDATE) <= '2013-07-09'
GROUP BY IPAddress
HAVING COUNT(*) = 1 AND MAX(CONVERT(DATE , HITDATE)) = '2013-07-09';
结果:
HitDate IpAddress
2013-07-08 2
2013-07-09 1
2013-07-10 0
在我看来,最简单的方法是找到IP地址出现的最早日期,然后将其用于聚合:
select cast(minHitDate as Date), count(*) as FirstTimeVisitors
from (select IpAddress, min(HitDate) as minHitDate
from MyTable t
group by IpAddress
) i
group by cast(minHitDate as Date)
order by 1;
另一种可供选择的表格可用于计算第一次访客、第二次访客等,该表格使用密集等级:
我在T-SQL中使用了一个游标来实现所需的结果 代码如下:
INSERT INTO MyTable(HitDate, IpAddress) values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/10/2013 12:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 9:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 10:05:29 -07:00', '111.222.333.555')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 11:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 4:05:29 -07:00', '111.222.333.555')
DECLARE @HitDate DATE
DECLARE cCursor CURSOR FAST_FORWARD LOCAL
FOR
SELECT
DISTINCT CAST(HitDate AS DATE)
FROM
MyTable
ORDER BY
CAST(HitDate AS DATE) ASC
OPEN cCursor
FETCH NEXT FROM cCursor INTO @HitDate
WHILE (@@FETCH_STATUS = 0)
BEGIN
SELECT @HitDate, COUNT(IpAddress)
FROM MyTable
WHERE IpAddress NOT IN (SELECT IpAddress FROM MyTable WHERE HitDate < @HitDate) AND CAST(HitDate AS DATE) = @HitDate
FETCH NEXT FROM cCursor INTO @HitDate
END
CLOSE cCursor
DEALLOCATE cCursor
结果如下:
2013-07-08 2
2013-07-09 1
2013-07-10 0试试这个:
SELECT CONVERT(DATE, mt.HitDate) AS hitDate
, COUNT(firstDate.ipAddress) AS newIPAddresses
FROM MyTable mt
LEFT JOIN (SELECT IpAddress, MIN(HitDate) AS FirstHitDate
FROM MyTable
GROUP BY IpAddress) firstDate
ON mt.HitDate = firstDate.FirstHitDate
AND mt.IpAddress = firstDate.IpAddress
GROUP BY CONVERT(DATE, mt.HitDate)
ORDER BY 1
输出:
试试这个
SELECT
CONVERT(DATE,T1.[HitDate]) AS [HitDate]
,SUM(CASE WHEN T2.[IpAddress] IS NULL THEN 1 ELSE 0 END) AS IpAddress
FROM
YourTableName T1
LEFT JOIN
YourTableName T2 ON T1.IpAddress = T2.IpAddress
AND
T1.[HitDate] > T2.[HitDate]
GROUP BY CONVERT(DATE,T1.[HitDate])
输出
这是一个时区偏移量。我们使用时区偏移量信息存储所有数据HitDate不是现有的type@t-clausen.dk-当然是。@RandyMinder我找不到任何文档,当我声明HitDate类型的变量时,我得到一个错误:declare@a HitDate这不起作用。我无法在查询中硬编码日期。我在帖子中展示的数据只是样本数据,这是不对的。表中的数据类型错误。它应该是DateTimeOffset,而不是Date。结果不正确。@RandyMinder您是否连接到原始提问者?如果是这样的话,可能会用到一些额外的测试数据,因为上面的内容对我来说很好。缺少一个日期“2013-07-09”,这是这个问题的难点尼斯-这会产生正确的结果。如果可以的话,我会给你和海伦100分。@Hosea146我认为你无法控制。我相信当你设立奖励时,得票最多的人会得到奖励。@Hosea146找到了改进我的scriptNice的方法-这会产生正确的结果。SQLFiddle也不错。以前从未见过这种情况。@Hosea146如果有相同的rowsVery好主意确定最早或第一个日期和IP地址出现,然后从那里开始,这将不会产生正确的结果。我实际上实现了这个解决方案。
INSERT INTO MyTable(HitDate, IpAddress) values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/10/2013 12:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 9:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 10:05:29 -07:00', '111.222.333.555')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 11:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 4:05:29 -07:00', '111.222.333.555')
DECLARE @HitDate DATE
DECLARE cCursor CURSOR FAST_FORWARD LOCAL
FOR
SELECT
DISTINCT CAST(HitDate AS DATE)
FROM
MyTable
ORDER BY
CAST(HitDate AS DATE) ASC
OPEN cCursor
FETCH NEXT FROM cCursor INTO @HitDate
WHILE (@@FETCH_STATUS = 0)
BEGIN
SELECT @HitDate, COUNT(IpAddress)
FROM MyTable
WHERE IpAddress NOT IN (SELECT IpAddress FROM MyTable WHERE HitDate < @HitDate) AND CAST(HitDate AS DATE) = @HitDate
FETCH NEXT FROM cCursor INTO @HitDate
END
CLOSE cCursor
DEALLOCATE cCursor
SELECT CONVERT(DATE, mt.HitDate) AS hitDate
, COUNT(firstDate.ipAddress) AS newIPAddresses
FROM MyTable mt
LEFT JOIN (SELECT IpAddress, MIN(HitDate) AS FirstHitDate
FROM MyTable
GROUP BY IpAddress) firstDate
ON mt.HitDate = firstDate.FirstHitDate
AND mt.IpAddress = firstDate.IpAddress
GROUP BY CONVERT(DATE, mt.HitDate)
ORDER BY 1
hitDate newIPAddress
2013-07-08 2
2013-07-09 1
2013-07-10 0
SELECT
CONVERT(DATE,T1.[HitDate]) AS [HitDate]
,SUM(CASE WHEN T2.[IpAddress] IS NULL THEN 1 ELSE 0 END) AS IpAddress
FROM
YourTableName T1
LEFT JOIN
YourTableName T2 ON T1.IpAddress = T2.IpAddress
AND
T1.[HitDate] > T2.[HitDate]
GROUP BY CONVERT(DATE,T1.[HitDate])
HitDate IpAddress
2013-07-08 2
2013-07-09 1
2013-07-10 0