使用dplyr mutate()规范化数据会带来不一致

使用dplyr mutate()规范化数据会带来不一致,r,dplyr,mutate,R,Dplyr,Mutate,我试图用下面的代码从这篇博客文章中复制框架,但结果似乎不一致 library(tidyverse) library(magrittr) ids <- c("1617", "1516", "1415", "1314", "1213", "1112", "1011", "0910", "0809", "0708", "0607", "0506") data <- ids %>% map(function(i) {read_csv(paste0("http://www.fo

我试图用下面的代码从这篇博客文章中复制框架,但结果似乎不一致

library(tidyverse)
library(magrittr)

ids <- c("1617", "1516", "1415", "1314", "1213", "1112", "1011", "0910", "0809", "0708", "0607", "0506")

data <- ids %>% 
  map(function(i) {read_csv(paste0("http://www.football-data.co.uk/mmz4281/", i ,"/F1.csv")) %>% 
      select(Date:AST) %>%
      mutate(season = i)})

data <- bind_rows(data)

data <- data[complete.cases(data[ , 1:3]), ]

tmp1 <- data %>% 
  select(season, HomeTeam, FTHG:FTR,HS:AST) %>%
  rename(BP = FTHG,
         BC = FTAG,
         TP = HS,
         TC = AS,
         TCP = HST,
         TCC = AST,
         team = HomeTeam)%>%
  mutate(Pts = ifelse(FTR == "H", 3, ifelse(FTR == "A", 0, 1)), 
         Terrain = "Domicile")

tmp2 <- data %>% 
  select(season, AwayTeam, FTHG:FTR, HS:AST) %>%
  rename(BP = FTAG,
         BC = FTHG,
         TP = AS,
         TC = HS,
         TCP = AST,
         TCC = HST,
         team = AwayTeam)%>%
  mutate(Pts = ifelse(FTR == "A", 3 ,ifelse(FTR == "H", 0 , 1)),
         Terrain = "Extérieur")

tmp3 <- bind_rows(tmp1, tmp2)

l1_0517 <- tmp3 %>%
  group_by(season, team)%>%
  summarise(j = n(),
            pts = sum(Pts),
            diff_but = (sum(BP) - sum(BC)),
            diff_t_ca = (sum(TCP, na.rm = T) - sum(TCC, na.rm = T)),
            diff_t = (sum(TP, na.rm = T) - sum(TC, na.rm = T)), 
            but_p = sum(BP),
            but_c = sum(BC),
            tir_ca_p = sum(TCP, na.rm = T),
            tir_ca_c = sum(TCC, na.rm = T),
            tir_p = sum(TP, na.rm = T),
            tir_c = sum(TC, na.rm = T)) %>%
  arrange((season), desc(pts), desc(diff_but))
例如,当
但是p
有值
83
、第5行和第7行时,我分别在
0.5612738
0.5128357
处得到
norm\u攻击

这正常吗?我希望
平均值(l1_0517$but_p)
是固定的,因此当
l1_0517$but_p
的值是对数标准化的时,会得到相同的结果吗

更新

我尝试了一个更简单的例子,但我无法重现这个问题:

df <- tibble(a = as.integer(runif(200, 15, 100)))

df <- df %>%
  mutate(norm_a = a %>% divide_by(mean(a)) %>%
           log())
df%除以(平均值(a))%>%
日志())

我在查看了
l1\u 0517的类型后找到了解决方案

这是一个
分组的_df
,因此结果不同

正确的代码是:

l1_0517 <- tmp3 %>%
  group_by(season, team)%>%
  summarise(j = n(),
            pts = sum(Pts),
            diff_but = (sum(BP) - sum(BC)),
            diff_t_ca = (sum(TCP, na.rm = T) - sum(TCC, na.rm = T)),
            diff_t = (sum(TP, na.rm = T) - sum(TC, na.rm = T)), 
            but_p = sum(BP),
            but_c = sum(BC),
            tir_ca_p = sum(TCP, na.rm = T),
            tir_ca_c = sum(TCC, na.rm = T),
            tir_p = sum(TP, na.rm = T),
            tir_c = sum(TC, na.rm = T)) %>%
  ungroup() %>%
  arrange((season), desc(pts), desc(diff_but))
l1\u 0517%
分组(赛季、团队)%>%
总结(j=n(),
pts=总和(pts),
diff_but=(求和(BP)-求和(BC)),
diff_t_ca=(sum(TCP,na.rm=t)-sum(TCC,na.rm=t)),
差异t=(总和(TP,na.rm=t)-总和(TC,na.rm=t)),
但_p=总和(BP),
但_c=总和(BC),
tir_ca_p=sum(TCP,na.rm=T),
tir_ca_c=总和(TCC,na.rm=T),
tir_p=总和(TP,na.rm=T),
tir_c=总和(TC,na.rm=T))%>%
解组()%>%
安排((季节)、描述(临时)、描述(差异)
df <- tibble(a = as.integer(runif(200, 15, 100)))

df <- df %>%
  mutate(norm_a = a %>% divide_by(mean(a)) %>%
           log())
l1_0517 <- tmp3 %>%
  group_by(season, team)%>%
  summarise(j = n(),
            pts = sum(Pts),
            diff_but = (sum(BP) - sum(BC)),
            diff_t_ca = (sum(TCP, na.rm = T) - sum(TCC, na.rm = T)),
            diff_t = (sum(TP, na.rm = T) - sum(TC, na.rm = T)), 
            but_p = sum(BP),
            but_c = sum(BC),
            tir_ca_p = sum(TCP, na.rm = T),
            tir_ca_c = sum(TCC, na.rm = T),
            tir_p = sum(TP, na.rm = T),
            tir_c = sum(TC, na.rm = T)) %>%
  ungroup() %>%
  arrange((season), desc(pts), desc(diff_but))