Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/sql/79.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/7/sql-server/25.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
SQL:使用LEAD()和PARTITION BY访问当前行之后的下一行_Sql_Sql Server_Count_Left Join_Aggregate - Fatal编程技术网

SQL:使用LEAD()和PARTITION BY访问当前行之后的下一行

SQL:使用LEAD()和PARTITION BY访问当前行之后的下一行,sql,sql-server,count,left-join,aggregate,Sql,Sql Server,Count,Left Join,Aggregate,我有一个移动应用程序浏览历史数据集,如下所示 DeviceDateTime:用户在移动应用程序中查看页面的日期和时间 UserID:每个UserID代表一个登录移动应用程序的访问者 PageName:移动应用程序中有不同的页面。所有访客 将首先在主页上着陆,然后导航到不同的位置 页数 PageSequence:访问页面的顺序。例如,Seq_1 Home> Seq_2 My Account=首先登录主页,然后导航到“My Account” “帐户”页 DeviceDateTime 用户ID 页

我有一个移动应用程序浏览历史数据集,如下所示

  • DeviceDateTime:用户在移动应用程序中查看页面的日期和时间
  • UserID:每个UserID代表一个登录移动应用程序的访问者
  • PageName:移动应用程序中有不同的页面。所有访客 将首先在主页上着陆,然后导航到不同的位置 页数
  • PageSequence:访问页面的顺序。例如,Seq_1 Home> Seq_2 My Account=首先登录主页,然后导航到“My Account” “帐户”页
DeviceDateTime 用户ID 页码 页面序列 2021-01-19 16:40:00.000 乌瑟拉 家 序号1 2021-01-19 16:40:00.000 乌瑟拉 我的帐户 序号2 2021-01-19 16:40:07.000 乌瑟拉 我的活动 序号3 2021-01-19 16:40:07.000 乌瑟拉 餐厅名单 序号4 2021-01-19 16:40:18.000 乌瑟拉 餐厅详情页 序号5 2021-01-19 16:40:31.000 乌瑟拉 餐厅详情页 序号6 2021-01-19 16:40:31.000 乌瑟拉 餐厅预订确认 序号7 2021-01-19 16:40:40.000 乌瑟拉 家 序号8 2021-01-19 16:40:45.000 乌瑟拉 写信给我们 序号9 2021-01-19 16:40:46.000 乌瑟拉 家 序号10 2021-01-28 21:11:53.000 用户B 家 序号1 2021-01-28 21:12:01.000 用户B 餐厅名单 序号2 2021-01-28 21:13:37.000 用户B 餐厅名单 序号3 2021-02-16 09:43:27.000 乌瑟拉 家 序号1 2021-02-16 09:43:43.000 乌瑟拉 写信给我们 序号2 2021-02-16 09:44:50.000 乌瑟拉 我的帐户 序号3 2021-02-16 09:45:03.000 乌瑟拉 我的活动 序号4
不要按
devicedatetime
进行分区,而是按它排序

select
    UserID,
    DeviceDateTime,
    PageName,
    lead(PageName) over (partition by UserID order by DeviceDateTime) as next_pagename
  from seq_fixed
您不应该按时间进行分区,因为分区是“使所有行都被视为属于同一集合的因素”,即相同的用户标识是按时间(或顺序)排序的“一组行”。如果您将这些行划分为“每用户每秒”的集合,您就不会得到“每用户访问十页”的“每用户”旅程,而是“每用户访问一页十次”

此外,不要使用分组方式挤压“导航到同一页面”,请使用
。。。从with_next WHERE next_pagename pagename pagename

您计算
pageseq
,但从不使用它-在潜在客户中按它排序(如果您认为同一用户的不同页面的两个日期时间以每秒的精度相同),或者转储它

最后,如果“用户计数”应该是“从该页面导航到该页面的不同用户的数量”,则需要是
计数(不同用户ID),因为没有用户
,而不是
计数(*)
-countstar是“该页面导航发生的次数,包括同一用户的倍数”。这两种统计数据都包含进来可能会很方便,以便了解用户数量

编辑:

以下是您的查询的修改版本,其中注释掉了我建议删除的所有位:

DROP TABLE IF EXISTS #App;
CREATE TABLE #App (
    DeviceDateTime SMALLDATETIME,
    UserID VARCHAR(100),
    PageName VARCHAR(100),
    PageSequence VARCHAR(100))
INSERT INTO #App VALUES
    ('2021-01-19 16:40:00.000','UserA', 'Home', 'Seq_1'),
    ('2021-01-19 16:40:00.000','UserA', 'My Account', 'Seq_2'),
    ('2021-01-19 16:40:07.000','UserA', 'My Activity', 'Seq_3'),
    ('2021-01-19 16:40:07.000','UserA', 'Restaurant Listing', 'Seq_4'),
    ('2021-01-19 16:40:18.000','UserA', 'Restaurant Details Page', 'Seq_5'),
    ('2021-01-19 16:40:31.000','UserA', 'Restaurant Details Page', 'Seq_6'),
    ('2021-01-19 16:40:31.000','UserA', 'Restaurant Booking Confirmation', 'Seq_7'),
    ('2021-01-19 16:40:40.000','UserA', 'Home', 'Seq_8'),
    ('2021-01-19 16:40:45.000','UserA', 'Write To Use', 'Seq_9'), --TYPO ALERT!!!--TYPO ALERT!!!--TYPO ALERT!!!--TYPO ALERT!!!--TYPO ALERT!!!--TYPO ALERT!!!
    ('2021-01-19 16:40:46.000','UserA', 'Home', 'Seq_10'),
    ('2021-01-28 21:11:53.000','UserB', 'Home', 'Seq_1'),
    ('2021-01-28 21:12:01.000','UserB', 'Restaurant Listing', 'Seq_2'),
    ('2021-01-28 21:13:37.000','UserB', 'Restaurant Listing', 'Seq_3'),
    ('2021-02-16 09:43:27.000','UserA', 'Home', 'Seq_1'),
    ('2021-02-16 09:43:43.000','UserA', 'Write To Us', 'Seq_2'),
    ('2021-02-16 09:44:50.000','UserA', 'My Account', 'Seq_3'),
    ('2021-02-16 09:45:03.000','UserA', 'My Activity', 'Seq_4');

DROP TABLE IF EXISTS #SD;
with seq_fixed as
(
  select
    UserID,
    DeviceDateTime,
    PageName/*,
    cast(right(PageSequence, charindex('_', reverse(PageSequence)) - 1) as int) as pagesequencefinal*/
  from #App
)
, with_next as
(
  select
    UserID,
    DeviceDateTime,
    PageName,
    lead(PageName) over (partition by UserID/*, DeviceDateTime*/ order by /*UserID,*/ DeviceDateTime ASC) as next_pagename
  from seq_fixed
  /*group by UserID, DeviceDateTime, PageName*/
)
select PageName, next_pagename, count(*) AS No_of_User
into #SD
from with_next
where next_pagename is not null
group by PageName, next_pagename
order by PageName, next_pagename;

SELECT * FROM #SD

在以前的请求中,您只想确认每个用户的页面第一次出现, 所以Home->PageX->Home->PageY将被解释为Home->PageX->PageY。要做到这一点,您必须按用户和页面分组才能找到第一次出现的情况

此新请求中不是这种情况,因此不要聚合:

with seq_fixed as
(
  select
    userid,
    pagename,
    cast(right(pagesequence, charindex('_', reverse(pagesequence)) - 1) as int) as pagesequencefinal
  from app
)
, with_next as
(
  select
    userid,
    pagename,
    lead(pagename) over (partition by userid order by pagesequencefinal) as next_pagename
  from seq_fixed
)
select pagename, next_pagename, count(*)
from with_next
where next_pagename is not null
group by pagename, next_pagename
order by pagename, next_pagename;

唯一的问题是:当用户昨天在PageX上结束,今天从Home开始时,这将计为PageX->Home。如果你想防止这种情况发生,你需要对这种情况进行一些检测,例如,不要考虑一个条目,当它的PEDDESER至少1个小时或类似的时候。为此,您可以使用时间戳列和
LAG

,正如Thorsten Kettner所指出的:当用户昨天在PageX上结束,今天从Home开始时,这将被视为PageX->Home。我采纳了他的建议,并尝试使用timestamp列和LAG函数来防止这种情况发生。然而,它看起来非常长。有没有办法缩短它

DROP TABLE IF EXISTS #App
CREATE TABLE #App (
    DeviceDateTime SMALLDATETIME,
    UserID VARCHAR(100),
    PageName VARCHAR(100),
    PageSequence VARCHAR(100))
INSERT INTO #App VALUES
    ('2021-01-19 16:40:00.000','UserA', 'Home', 'Seq_1'),
    ('2021-01-19 16:40:00.000','UserA', 'My Account', 'Seq_2'),
    ('2021-01-19 16:40:07.000','UserA', 'My Activity', 'Seq_3'),
    ('2021-01-19 16:40:07.000','UserA', 'Restaurant Listing', 'Seq_4'),
    ('2021-01-19 16:40:18.000','UserA', 'Restaurant Details Page', 'Seq_5'),
    ('2021-01-19 16:40:31.000','UserA', 'Restaurant Details Page', 'Seq_6'),
    ('2021-01-19 16:40:31.000','UserA', 'Restaurant Booking Confirmation', 'Seq_7'),
    ('2021-01-19 16:40:40.000','UserA', 'Home', 'Seq_8'),
    ('2021-01-19 16:40:45.000','UserA', 'Write To Us', 'Seq_9'),
    ('2021-01-19 16:40:46.000','UserA', 'Home', 'Seq_10'),
    ('2021-01-28 21:11:53.000','UserB', 'Home', 'Seq_1'),
    ('2021-01-28 21:12:01.000','UserB', 'Restaurant Listing', 'Seq_2'),
    ('2021-01-28 21:13:37.000','UserB', 'Restaurant Listing', 'Seq_3'),
    ('2021-02-16 09:43:27.000','UserA', 'Home', 'Seq_1'),
    ('2021-02-16 09:43:43.000','UserA', 'Write To Us', 'Seq_2'),
    ('2021-02-16 09:44:50.000','UserA', 'My Account', 'Seq_3'),
    ('2021-02-16 09:45:03.000','UserA', 'My Activity', 'Seq_4');

    With #App2 as (
select
    DeviceDateTime,
    UserID,
    PageName,
    cast(right(PageSequence, charindex('_', reverse(PageSequence)) - 1) as int) as pagesequencefinal,
    lead(DeviceDateTime) over (partition by UserID order by DeviceDateTime) as next_day,
    cast(lag(DeviceDateTime,1) over (partition by UserID order by DeviceDateTime) as date) as Previous_day,
    CASE WHEN DATEDIFF(day, cast(lag(DeviceDateTime,1) over (partition by UserID order by DeviceDateTime) as date), DeviceDateTime) <1 THEN 0 ELSE 1 END AS Dayend_flag,
    DATEDIFF(day, cast(lag(DeviceDateTime,1) over (partition by UserID order by DeviceDateTime) as date), DeviceDateTime) as test
  from #App
), #App3 as (
select
    DeviceDateTime,
    UserID,
    PageName,
    pagesequencefinal,
    next_day,
    Previous_day,
    Dayend_flag,
    test,
    SUM(Dayend_flag) OVER (ORDER BY UserID, DeviceDateTime, pagesequencefinal) AS Session_Num
  from #App2
), #App4 as (
select
    UserID,
    PageName,
    lead(PageName) over (partition by Session_Num order by pagesequencefinal) as next_pagename
  from #App3
)
select PageName, next_pagename, count(*) AS No_of_User
from #App4
where next_pagename is not null
group by PageName, next_pagename
order by PageName, next_pagename;
删除表格(如果存在)#App
创建表格#应用程序(
DeviceDateTime SMALLDATETIME,
UserID VARCHAR(100),
PageName VARCHAR(100),
PageSequence VARCHAR(100))
插入到#应用程序值中
('2021-01-19 16:40:00.000','UserA','Home','Seq_1'),
('2021-01-19 16:40:00.000','UserA','My Account','Seq_2'),
('2021-01-19 16:40:07.000','UserA','My Activity','Seq_3'),
('2021-01-19 16:40:07.000','UserA','Restaurant Listing','Seq_4'),
('2021-01-19 16:40:18.000','UserA','Restaurant Details Page','Seq_5'),
('2021-01-19 16:40:31.000','UserA','Restaurant Details Page','Seq_6'),
('2021-01-19 16:40:31.000'、'UserA'、'Restaurant Booking Confirmation'、'Seq_7'),
('2021-01-19 16:40:40.000','UserA','Home','Seq_8'),
('2021-01-19 16:40:45.000','UserA','Write To Us','Seq_9'),
('2021-01-19 16:40:46.000','UserA','Home','Seq_10'),
('2021-01-2821:11:53.000','UserB','Home','Seq_1'),
('2021-01-28 21:12:01.000','UserB','Restaurant Listing','Seq_2'),
('2021-01-28 21:13:37.000','UserB','Restaurant Listing','Seq_3'),
('2021-02-16 09:43:27.000','UserA','Home','Seq_1'),
('2021-02-16 09:43:43.000','UserA','Write To Us','Seq_2'),
('2021-02-16 09:44:50.000','UserA','My Account','Seq_3'),
('2021-02-16 09:45:03.000','UserA','My Activity','Seq_4');
将#App2作为(
选择
DeviceDateTime,
用户ID,
页码,
将(右(PageSequence,charindex(“”),反向(PageSequence))-1)转换为int)转换为pagesequencefinal,
将(DeviceDateTime)提前到(按用户ID顺序按DeviceDateTime划分)下一天,
强制转换(延迟(DeviceDateTime,1)超过(按用户ID顺序按DeviceDateTime划分)作为日期)作为前一天,
当DATEDIFF(day,cast(lag(DeviceDateTime,1)超过(按用户ID顺序按DeviceDateTime划分)作为日期)时,DeviceDateTime“不起作用”为什么它不起作用?它出错了吗?是否给出了意外的结果、不希望出现的行为等等?您在这里问的是什么?您在这里向我们倾诉了大量代码、数据和文本,但我看不到清晰的问题陈述(f除外)