SQL查询-每天确定新访客

SQL查询-每天确定新访客,sql,tsql,Sql,Tsql,我有一个简化的模式,如下所示: CREATE TABLE MyTable ( DateTimeOffset HitDate NOT NULL, IpAddress varchar(15) ) 示例行可能如下所示: “2013年7月10日8:05:29-07:00”“111.222.333.444” 我正在尝试解决一个查询,该查询将为我提供每天的唯一IP地址数,例如2013年7月10日。实际上,这一部分相当简单,我已经为此创建了一个查询。但是,对于这个查询,我想要的是在当前日期之前从

我有一个简化的模式,如下所示:

CREATE TABLE MyTable (
   DateTimeOffset HitDate NOT NULL,
   IpAddress varchar(15)
)
示例行可能如下所示:

“2013年7月10日8:05:29-07:00”“111.222.333.444”

我正在尝试解决一个查询,该查询将为我提供每天的唯一IP地址数,例如2013年7月10日。实际上,这一部分相当简单,我已经为此创建了一个查询。但是,对于这个查询,我想要的是在当前日期之前从未存在过的唯一IP地址的数量。我不在乎约会之后,就在约会之前

例如,假设我有以下数据,这就是我所有的数据:

'7/10/2013 8:05:29 -07:00' '111.222.333.444'
'7/10/2013 12:05:29 -07:00' '111.222.333.222'
'7/9/2013 9:05:29 -07:00' '111.222.333.444'
'7/9/2013 10:05:29 -07:00' '111.222.333.555'
'7/8/2013 11:05:29 -07:00' '111.222.333.222'
'7/8/2013 4:05:29 -07:00' '111.222.333.555'
查询应输出以下内容:

'7/8/2013' 2 (neither IpAddress existed before this date so both are new)
'7/9/2013' 1 (only one of the IpAddresses is new - the one ending in '444')
'7/10/2013' 0 (both IpAddresses had existed before this date)

目标数据库是SQL Server 2012。我向第一个正确创建SQL语句的人提供100分奖励。

[编辑-改为使用DateTimeOffset]

从创建基本MyTable开始,我想到了以下几点:

-- Test data creation to match schema & examples

create TABLE MyTable (
   HitDate DateTimeOffset  NOT NULL,
   IpAddress varchar(15)
)

insert into MyTable values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
insert into MyTable values ('7/10/2013 12:05:29 -07:00', '111.222.333.222')
insert into MyTable values ('7/9/2013 9:05:29 -07:00', '111.222.333.444')
insert into MyTable values ('7/9/2013 10:05:29 -07:00', '111.222.333.555')
insert into MyTable values ('7/8/2013 11:05:29 -07:00', '111.222.333.222')
insert into MyTable values ('7/8/2013 4:05:29 -07:00', '111.222.333.555')

-- actual solution starts here

create TABLE #MyTable (
   HitDate date,
   IpAddress varchar(15)
)

-- populate data into required format for main query

insert into #MyTable SELECT HitDate, IpAddress FROM MyTable

-- main query

select distinct convert(varchar(10), HitDate, 101) HitDate, ISNULL(dateCounts.counter, 0) NewIPAddresses
from #MyTable mainDates
left outer join 
    (
        select main.HitDate dateValue, count(*) counter from
            #MyTable main
            left outer join #MyTable sub on main.IpAddress = sub.IpAddress and main.HitDate > sub.HitDate
            where sub.IpAddress is null
            group by main.HitDate
    ) dateCounts on dateCounts.dateValue = HitDate
其中:

    HitDate        NewIPAddresses 
    -------------- --------------
    07/08/2013     2
    07/09/2013     1
    07/10/2013     0
尝试以下查询:

SELECT A.HITDATE , ISNULL(B.CNT , 0) AS COUNT
FROM 
(
SELECT DISTINCT CONVERT(DATE , HITDATE) AS HITDATE
FROM mytable1
) AS A 
LEFT OUTER JOIN
(
SELECT CONVERT(DATE , HITDATE) AS HITDATE , COUNT(*) AS CNT
FROM MyTable1 AS A
WHERE IpAddress NOT IN 
(
SELECT B.IpAddress FROM mytable1 AS B WHERE A.HITDATE > B.HITDATE
)
GROUP BY CONVERT(DATE , HITDATE)
) AS B ON A.HITDATE = B.HITDATE
这是我的解决方案:

CREATE TABLE Test(
DT datetime NOT NULL,
IP varchar(15)
)

INSERT INTO TEST
SELECT '2013-07-10', 'aaa' UNION ALL
SELECT '2013-07-10', 'bbb' UNION ALL
SELECT '2013-07-10', 'ccc' UNION ALL
SELECT '2013-07-09', 'aaa' UNION ALL
SELECT '2013-07-09', 'bbb' UNION ALL
SELECT '2013-07-08', 'aaa' UNION ALL
SELECT '2013-07-08', 'bbb'


SELECT CAST(T1.DT AS DATE) Date, COUNT(T1.IP) 
FROM Test T1
WHERE T1.IP NOT IN (
    SELECT DISTINCT T2.IP FROM Test T2 WHERE CAST(T2.DT AS DATE) < T1.DT
)
GROUP BY CAST(T1.DT AS DATE)            

这看起来也不错,但不那么复杂

SELECT DISTINCT(IPAddress)
FROM [MyTable] 
WHERE CONVERT(DATE , HITDATE) <= '2013-07-09'
GROUP BY IPAddress
HAVING COUNT(*) = 1 AND MAX(CONVERT(DATE , HITDATE)) = '2013-07-09';
结果:

HitDate    IpAddress
2013-07-08 2
2013-07-09 1
2013-07-10 0

在我看来,最简单的方法是找到IP地址出现的最早日期,然后将其用于聚合:

select cast(minHitDate as Date), count(*) as FirstTimeVisitors
from (select IpAddress, min(HitDate) as minHitDate
      from MyTable t
      group by IpAddress
     ) i
group by cast(minHitDate as Date)
order by 1;
另一种可供选择的表格可用于计算第一次访客、第二次访客等,该表格使用密集等级:


我在T-SQL中使用了一个游标来实现所需的结果

代码如下:

INSERT INTO MyTable(HitDate, IpAddress) values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/10/2013 12:05:29 -07:00',     '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 9:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 10:05:29 -07:00', '111.222.333.555')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 11:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 4:05:29 -07:00', '111.222.333.555')


DECLARE @HitDate DATE

DECLARE cCursor CURSOR FAST_FORWARD LOCAL
FOR
SELECT 
    DISTINCT CAST(HitDate AS DATE)
FROM 
    MyTable
ORDER BY
    CAST(HitDate AS DATE) ASC

OPEN cCursor

FETCH NEXT FROM cCursor INTO @HitDate
WHILE (@@FETCH_STATUS = 0)
BEGIN

    SELECT @HitDate, COUNT(IpAddress)
    FROM MyTable
    WHERE IpAddress NOT IN (SELECT IpAddress FROM MyTable WHERE HitDate < @HitDate) AND     CAST(HitDate AS DATE) = @HitDate

    FETCH NEXT FROM cCursor INTO @HitDate
END

CLOSE cCursor
DEALLOCATE cCursor
结果如下:

2013-07-08 2 2013-07-09 1 2013-07-10 0

试试这个:

    SELECT CONVERT(DATE, mt.HitDate) AS hitDate
           , COUNT(firstDate.ipAddress) AS newIPAddresses
      FROM MyTable mt
 LEFT JOIN (SELECT IpAddress, MIN(HitDate) AS FirstHitDate
              FROM MyTable
          GROUP BY IpAddress) firstDate
          ON mt.HitDate = firstDate.FirstHitDate
          AND mt.IpAddress = firstDate.IpAddress
  GROUP BY CONVERT(DATE, mt.HitDate)
  ORDER BY 1
输出:

试试这个

SELECT 
  CONVERT(DATE,T1.[HitDate]) AS [HitDate]
  ,SUM(CASE WHEN T2.[IpAddress] IS NULL THEN 1 ELSE 0 END) AS IpAddress
FROM 
    YourTableName T1 
LEFT JOIN
    YourTableName T2 ON T1.IpAddress = T2.IpAddress
AND 
    T1.[HitDate] > T2.[HitDate]
GROUP BY CONVERT(DATE,T1.[HitDate])
输出


这是一个时区偏移量。我们使用时区偏移量信息存储所有数据HitDate不是现有的type@t-clausen.dk-当然是。@RandyMinder我找不到任何文档,当我声明HitDate类型的变量时,我得到一个错误:declare@a HitDate这不起作用。我无法在查询中硬编码日期。我在帖子中展示的数据只是样本数据,这是不对的。表中的数据类型错误。它应该是DateTimeOffset,而不是Date。结果不正确。@RandyMinder您是否连接到原始提问者?如果是这样的话,可能会用到一些额外的测试数据,因为上面的内容对我来说很好。缺少一个日期“2013-07-09”,这是这个问题的难点尼斯-这会产生正确的结果。如果可以的话,我会给你和海伦100分。@Hosea146我认为你无法控制。我相信当你设立奖励时,得票最多的人会得到奖励。@Hosea146找到了改进我的scriptNice的方法-这会产生正确的结果。SQLFiddle也不错。以前从未见过这种情况。@Hosea146如果有相同的rowsVery好主意确定最早或第一个日期和IP地址出现,然后从那里开始,这将不会产生正确的结果。我实际上实现了这个解决方案。
INSERT INTO MyTable(HitDate, IpAddress) values ('7/10/2013 8:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/10/2013 12:05:29 -07:00',     '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 9:05:29 -07:00', '111.222.333.444')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/9/2013 10:05:29 -07:00', '111.222.333.555')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 11:05:29 -07:00', '111.222.333.222')
INSERT INTO MyTable(HitDate, IpAddress) VALUES('7/8/2013 4:05:29 -07:00', '111.222.333.555')


DECLARE @HitDate DATE

DECLARE cCursor CURSOR FAST_FORWARD LOCAL
FOR
SELECT 
    DISTINCT CAST(HitDate AS DATE)
FROM 
    MyTable
ORDER BY
    CAST(HitDate AS DATE) ASC

OPEN cCursor

FETCH NEXT FROM cCursor INTO @HitDate
WHILE (@@FETCH_STATUS = 0)
BEGIN

    SELECT @HitDate, COUNT(IpAddress)
    FROM MyTable
    WHERE IpAddress NOT IN (SELECT IpAddress FROM MyTable WHERE HitDate < @HitDate) AND     CAST(HitDate AS DATE) = @HitDate

    FETCH NEXT FROM cCursor INTO @HitDate
END

CLOSE cCursor
DEALLOCATE cCursor
    SELECT CONVERT(DATE, mt.HitDate) AS hitDate
           , COUNT(firstDate.ipAddress) AS newIPAddresses
      FROM MyTable mt
 LEFT JOIN (SELECT IpAddress, MIN(HitDate) AS FirstHitDate
              FROM MyTable
          GROUP BY IpAddress) firstDate
          ON mt.HitDate = firstDate.FirstHitDate
          AND mt.IpAddress = firstDate.IpAddress
  GROUP BY CONVERT(DATE, mt.HitDate)
  ORDER BY 1
hitDate     newIPAddress
2013-07-08  2
2013-07-09  1
2013-07-10  0
SELECT 
  CONVERT(DATE,T1.[HitDate]) AS [HitDate]
  ,SUM(CASE WHEN T2.[IpAddress] IS NULL THEN 1 ELSE 0 END) AS IpAddress
FROM 
    YourTableName T1 
LEFT JOIN
    YourTableName T2 ON T1.IpAddress = T2.IpAddress
AND 
    T1.[HitDate] > T2.[HitDate]
GROUP BY CONVERT(DATE,T1.[HitDate])
HitDate    IpAddress
2013-07-08  2
2013-07-09  1
2013-07-10  0