如何在R中找到与一个或多个有实质性差异的行对?

如何在R中找到与一个或多个有实质性差异的行对?,r,R,我有一个df,显示每小时通过路段id的车辆百分比。 测量值进一步分为方向r 最后一列是小时记录1到24 我感兴趣的是找到id,在那里,小时值之间存在着很大的差异,即早上/晚上的交通量因方向r而不同 我不知道如何有效地处理这个问题。首先需要通过id创建一个组,然后将行相互减去,查看是否有任何值大于(例如)2 所需输出是一个向量,其id为差值大于2至少一小时的向量 数据: structure(list(id = c("Hochbergerstrasse 55", "Ho

我有一个df,显示每小时通过路段
id
的车辆百分比。 测量值进一步分为方向
r

最后一列是小时记录
1
24

我感兴趣的是找到
id
,在那里,小时值之间存在着很大的差异,即早上/晚上的交通量因方向
r
而不同

我不知道如何有效地处理这个问题。首先需要通过
id
创建一个组,然后将行相互减去,查看是否有任何值大于(例如)2

所需输出是一个向量,其id为差值大于2至少一小时的向量

数据:

structure(list(id = c("Hochbergerstrasse 55", "Hochbergerstrasse 55", 
"Johanniterbrücke", "Johanniterbrücke", "Wettsteinbrücke", "Wettsteinbrücke", 
"Viaduktstrasse 60 (Rialto)", "Viaduktstrasse 60 (Rialto)", "Dorenbachviadukt", 
"Dorenbachviadukt", "Wasgenring 62", "Wasgenring 62", "Äussere Baselstrasse 381 (Riehen)", 
"Äussere Baselstrasse 381 (Riehen)", "Bäumlihofstrasse (Riehen)", 
"Bäumlihofstrasse (Riehen)", "Grenzacherstrasse 511", "Grenzacherstrasse 511", 
"St. Alban-Anlage 72", "St. Alban-Anlage 72"), type = c("13", 
"13", "12", "12", "13", "13", "13", "13", "12", "12", "12", "12", 
"13", "13", "12", "12", "14", "14", "13", "13"), source = c("bs", 
"bs", "bs", "bs", "bs", "bs", "bs", "bs", "bs", "bs", "bs", "bs", 
"bs", "bs", "bs", "bs", "bs", "bs", "bs", "bs"), r = c("1", "2", 
"1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", 
"2", "1", "2", "1", "2"), `1` = c("0.85003673000000002", "0.84919173299999995", 
"1.5073329710000001", "1.6612883949999999", "1.9605881759999999", 
"1.8001595079999999", "1.8975332069999999", "1.446019935", "1.0564147239999999", 
"0.87236867100000004", "0.93815149399999997", "1.1354737669999999", 
"0.99779897299999998", "1.1513463319999999", "0.72833211899999994", 
"1.089378559", "0.62853551200000002", "0.955223881", "1.446208113", 
"0.99173553699999994"), `2` = c("0.53520831099999999", "0.54225496200000001", 
"1.0456273760000001", "1.087606071", "1.5704711410000001", "1.2988492650000001", 
"1.157495256", "0.92657588099999999", "0.62077978700000003", 
"0.47411340800000001", "0.53277739199999996", "0.67867397500000004", 
"0.58694057200000005", "0.74280408499999995", "0.38844379699999998", 
"0.61896509", "0.251414205", "0.52537313399999996", "0.88183421500000003", 
"0.62442607900000002"), `3` = c("0.461748347", "0.34786167400000001", 
"0.77403584999999997", "0.75295804899999996", "1.1903571070000001", 
"0.86589950999999998", "0.64516129", "0.61771725399999999", "0.37028969699999997", 
"0.331879386", "0.32429928200000002", "0.45679979100000001", 
"0.38151137200000002", "0.46425255300000001", "0.26705510999999998", 
"0.396137658", "0.163419233", "0.29850746299999997", "0.59964726599999996", 
"0.42240587699999999"), `4` = c("0.52471403100000003", "0.40924902800000001", 
"0.62466051099999997", "0.62148918399999997", "1.050315095", 
"0.67221146200000004", "0.49335863400000002", "0.56156114000000001", 
"0.33761707699999999", "0.35084392199999997", "0.31271716500000002", 
"0.45679979100000001", "0.35216434299999999", "0.40854224700000003", 
"0.26705510999999998", "0.29710324300000002", "0.163419233", 
"0.214925373", "0.47619047599999997", "0.36730945799999998"), 
    `5` = c("0.59817399500000001", "0.58317986499999996", "0.59750135800000004", 
    "0.58563403800000002", "0.99029708900000002", "0.64942463299999997", 
    "0.58823529399999996", "0.82830268100000004", "0.37028969699999997", 
    "0.65427650299999995", "0.64859856400000004", "0.77003393399999998", 
    "0.64563462900000002", "0.38997214499999999", "0.50983248400000003", 
    "0.32186184699999998", "0.414833438", "0.26268656699999998", 
    "0.47619047599999997", "0.42240587699999999"), `6` = c("1.122888026", 
    "1.432371598", "1.004888647", "1.0278474959999999", "1.1503451039999999", 
    "0.79753902200000004", "0.85388994299999998", "1.68468342", 
    "0.686125027", "1.7068082689999999", "1.274032893", "1.761942052", 
    "1.555392517", "0.81708449400000005", "1.723719349", "0.74275810799999997", 
    "1.5713387809999999", "0.93134328399999999", "0.74074074099999998", 
    "0.84481175399999997"), `7` = c("3.7464581799999999", "3.5297728670000001", 
    "3.2455187400000001", "2.8445081870000002", "2.8208462540000001", 
    "1.8229463370000001", "2.2580645160000001", "4.0572792360000003", 
    "2.6682639950000002", "5.1204248059999999", "3.1040074130000002", 
    "5.3771861129999996", "4.3286867200000003", "2.2841225629999999", 
    "4.8798252", "1.78261946", "4.9780012569999998", "2.2567164179999999", 
    "1.7989417990000001", "3.2323232320000002"), `8` = c("4.701437716", 
    "4.2766523430000003", "5.4454101030000004", "4.637265448", 
    "5.3315994800000004", "3.6572860889999999", "3.7950664139999999", 
    "5.952548084", "4.5306033540000001", "6.8367153419999998", 
    "5.0961315730000001", "6.3038371179999997", "6.4563462950000003", 
    "3.8068709379999999", "7.7688759410000001", "3.49096311", 
    "8.6989314899999997", "3.4388059700000002", "3.439153439", 
    "8.0808080810000007"), `9` = c("5.4465316399999999", "4.0515653780000003", 
    "5.4861488319999996", "5.3424166370000004", "5.6917075119999998", 
    "4.8763814520000004", "4.5540796959999996", "5.7419626560000001", 
    "4.6286212149999999", "6.4384600799999996", "5.5478341440000003", 
    "5.3249804230000004", "6.4563462950000003", "4.326833798", 
    "6.8220441850000002", "4.4317900469999998", "7.5675675680000003", 
    "4.4417910450000004", "4.2680776009999999", "9.1643709829999995"
    ), `10` = c("5.6774058140000001", "4.2561898920000001", "5.3231939160000001", 
    "5.2946097769999998", "5.3516054820000001", "4.944741939", 
    "4.8007590130000004", "5.7981187700000003", "4.7593116970000002", 
    "5.8410771859999997", "5.2582812140000001", "4.6202036020000001", 
    "5.3705062359999998", "4.8282265549999996", "5.8266569559999999", 
    "5.0507551370000003", "5.9459459460000001", "4.7164179099999997", 
    "4.4444444440000002", "6.299357208"), `11` = c("5.9187742679999999", 
    "5.2383875590000004", "5.6626833239999996", "5.677064659", 
    "5.6216865059999996", "5.5599863279999999", "5.4269449720000003", 
    "5.9946651690000001", "5.4563275969999996", "5.9927934760000001", 
    "5.6057447299999996", "4.9725920129999999", "5.5612619219999999", 
    "5.682451253", "5.8994901679999998", "5.8182718490000003", 
    "6.0339409179999999", "5.3850746269999998", "5.343915344", 
    "5.7667584940000003"), `12` = c("6.391016896", "6.1387354209999998", 
    "5.7577403589999996", "6.2268435520000001", "5.6817045110000004", 
    "6.2435912040000003", "5.8823529409999997", "6.1912115679999999", 
    "6.2622522329999999", "6.2582969850000003", "6.150104239", 
    "5.768728791", "5.9280997800000002", "6.3509749299999996", 
    "6.2151007529999998", "6.6105471649999998", "6.0842237590000003", 
    "6.2328358210000001", "6.4373897710000003", "5.7667584940000003"
    ), `13` = c("6.6533739110000001", "6.2819725799999997", "5.4454101030000004", 
    "5.9400023900000001", "5.1015304590000001", "5.93596901", 
    "5.8064516130000001", "5.5875333429999996", "6.1206708780000003", 
    "5.6229850179999996", "5.8605513089999999", "5.6382145650000002", 
    "5.7079970649999998", "6.5181058500000004", "5.753823744", 
    "6.5362713540000001", "5.6065367689999999", "6.6626865669999997", 
    "6.331569665", "5.1974288340000001"), `14` = c("6.8422709619999997", 
    "6.813996317", "6.3145029880000001", "5.8921955300000004", 
    "6.1118335500000001", "5.9131821809999998", "5.920303605", 
    "6.2473676820000001", "6.2295796120000002", "6.7324103930000003", 
    "6.5323141070000004", "5.8470373269999998", "6.2949376380000004", 
    "6.4252553389999996", "6.3850449139999998", "6.2391681109999997", 
    "6.599622879", "6.1492537309999999", "5.9082892420000004", 
    "6.574839302"), `15` = c("7.3145135899999998", "6.6809903830000001", 
    "6.6539923950000004", "6.5853950040000004", "6.3018905670000001", 
    "6.6309673010000001", "6.451612903", "6.4860311670000002", 
    "6.6325419300000004", "6.6565522469999996", "6.81028492", 
    "5.8992430169999999", "6.7644900950000002", "6.9637883010000001", 
    "6.7006554989999998", "6.981926219", "6.6499057199999996", 
    "6.7104477610000002", "6.4197530860000001", "6.4279155189999999"
    ), `16` = c("7.1361108199999999", "7.4176386330000001", "6.5996740899999997", 
    "6.6451535799999997", "6.33189957", "6.7676882760000003", 
    "6.6034155600000002", "6.4158360239999999", "6.8285776519999999", 
    "6.3910487390000004", "6.7523743339999998", "6.2385800050000002", 
    "6.8378576669999998", "6.9823584030000001", "6.6035445499999996", 
    "6.9076504090000004", "6.7881835319999997", "6.8776119400000004", 
    "6.8077601410000002", "6.1157024790000003"), `17` = c("7.2620421869999996", 
    "8.5635359120000007", "7.1292775669999999", "7.5176287799999999", 
    "6.5019505850000003", "7.4740799820000001", "7.3244781779999997", 
    "6.5843043659999996", "8.1245915919999998", "6.6849990520000002", 
    "8.4433634469999994", "7.0347167839999996", "7.1606749819999997", 
    "7.8737233050000004", "6.8220441850000002", "8.1703391930000002", 
    "7.2030169700000002", "8.3940298510000009", "8.2716049379999994", 
    "6.5197428830000002"), `18` = c("7.3984678349999999", "8.3179864949999995", 
    "7.305812059", "7.4100633440000001", "6.6119835949999999", 
    "8.089324371", "7.9506641370000004", "6.7527727080000002", 
    "8.5928991509999992", "6.6944813200000004", "8.6518415569999991", 
    "7.3610023489999996", "6.6764490099999998", "8.4493964720000001", 
    "6.6763777620000004", "9.2597177520000002", "6.5870521679999996", 
    "9.8029850750000005", "9.9470899470000003", "6.8319559229999998"
    ), `19` = c("6.8002938400000001", "7.0493145080000001", "6.5046170559999998", 
    "6.4658778530000003", "6.2118635590000002", "7.4285063229999997", 
    "7.5332068310000002", "5.9806261410000001", "7.9721193640000001", 
    "6.0496870850000004", "6.9840166779999997", "6.9694596710000001", 
    "6.0308143799999998", "8.0408542250000004", "5.9480456420000003", 
    "8.5169596429999999", "5.7825267130000002", "9.0507462689999993", 
    "8.3421516750000002", "6.2075298439999997"), `20` = c("5.1317032219999996", 
    "5.6374053609999999", "5.1738185769999996", "5.1392374810000003", 
    "5.0715214560000001", "5.411871938", "5.9392789370000001", 
    "4.7592306610000001", "5.8592899149999997", "4.5989000569999998", 
    "4.89923558", "5.4815974939999998", "4.8422597209999996", 
    "5.9424326829999998", "4.5399368779999998", "5.7687546420000002", 
    "4.4626021370000002", "5.755223881", "5.4673721339999997", 
    "4.775022957"), `21` = c("3.431629762", "4.7472887249999998", 
    "3.9244975559999999", "4.003824549", "3.9611883570000002", 
    "4.1358095019999999", "4.3643263760000002", "3.608030324", 
    "4.2147680239999996", "3.252417978", "3.5093815149999998", 
    "3.9937353170000001", "3.6537050619999998", "3.9554317550000002", 
    "3.083272639", "3.6642733349999999", "2.853551226", "3.665671642", 
    "3.8624338620000001", "3.287419651"), `22` = c("2.529121629", 
    "2.895436873", "3.1233025529999998", "3.2389147839999999", 
    "3.3610083020000001", "3.2699099920000001", "3.4535104360000002", 
    "3.0183911270000001", "3.103898933", "2.6645173529999999", 
    "2.6638869590000001", "3.093187157", "2.9933969189999998", 
    "2.9340761369999999", "2.5734401550000001", "2.7234463980000001", 
    "2.1244500309999998", "2.7701492540000001", "3.0511463839999999", 
    "2.387511478"), `23` = c("2.1723160880000001", "2.568037651", 
    "3.0961433999999999", "2.9640253379999999", "3.3410023010000001", 
    "3.1901560899999999", "3.548387097", "2.7095324999999999", 
    "2.7553909820000002", "2.2757443579999999", "2.4785730830000001", 
    "2.8321587049999999", "2.7439471750000002", "2.7112349120000001", 
    "2.2335518329999999", "2.7482050010000001", "1.8101822750000001", 
    "2.7104477610000002", "2.980599647", "2.1671258029999998"
    ), `24` = c("1.3537622", "1.370984244", "2.2542096690000002", 
    "2.4381498750000001", "2.6808042410000001", "2.5635182859999999", 
    "2.7514231499999999", "2.0496981609999998", "1.818775866", 
    "1.498198369", "1.62149641", "1.983816236", "1.672780631", 
    "1.9498607240000001", "1.383831027", "1.8321366670000001", 
    "1.03079824", "1.7910447759999999", "2.2574955910000001", 
    "1.5243342520000001")), row.names = c("X2", "X3", "X4", "X5", 
"X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", 
"X16", "X17", "X18", "X19", "X20", "X21"), class = "data.frame")

我不确定我是否正确理解了你的问题,但这是我试图解决的问题。如果我理解正确,您想知道我们每天的
r=1
r=2
之间的差异最大吗?然后我会这样做(假设您的数据存储在
data.frame
/
tibble
db
中):

library(tidyverse)
db %>%
  # First let us gather all the hours into a variable and all the percentage pass
  # effectively stacking the data.
  gather(hour, perc_pass, -id, -type, -source, -r) %>%
  # Then, before we pivot the data wider, we just change the values of r to avoid
  # numbers as column names. Possibly easier ways to do this. 
  mutate(
    r = ifelse(r == 1, "dir_1", "dir_2")
  ) %>%
  # Now we can pivot the data wider creating one column with percentage pass for
  # direction 1 and one for direction 2
  pivot_wider(names_from = r, values_from = perc_pass) %>%
  # Simply subtract one column from another to create a variable for the difference
  mutate(
    diff_dir = as.numeric(dir_2) - as.numeric(dir_1)
  ) %>%
  # Filter for values larger than 2, but can be anything, really. 
  filter(abs(diff_dir) > 2)
然后产生如下结果:

# A tibble: 22 x 7
   id                               type  source hour  dir_1             dir_2             diff_dir
   <chr>                            <chr> <chr>  <chr> <chr>             <chr>                <dbl>
 1 Dorenbachviadukt                 12    bs     7     2.66826399500000~ 5.12042480599999~     2.45
 2 Wasgenring 62                    12    bs     7     3.10400741300000~ 5.37718611299999~     2.27
 3 Äussere Baselstrasse 381 (Riehe~ 13    bs     7     4.32868672000000~ 2.28412256299999~    -2.04
 4 Bäumlihofstrasse (Riehen)        12    bs     7     4.8798252         1.78261946           -3.10
 5 Grenzacherstrasse 511            14    bs     7     4.97800125699999~ 2.25671641799999~    -2.72
 6 Viaduktstrasse 60 (Rialto)       13    bs     8     3.79506641399999~ 5.952548084           2.16
 7 Dorenbachviadukt                 12    bs     8     4.53060335400000~ 6.83671534199999~     2.31
 8 Äussere Baselstrasse 381 (Riehe~ 13    bs     8     6.45634629500000~ 3.80687093799999~    -2.65
 9 Bäumlihofstrasse (Riehen)        12    bs     8     7.76887594100000~ 3.49096311           -4.28
10 Grenzacherstrasse 511            14    bs     8     8.69893148999999~ 3.43880597000000~    -5.26
# ... with 12 more rows
#一个tible:22 x 7
id类型源小时目录\u 1目录\u 2差异目录
1 Dorenbachviadukt 12 bs 7 2.66826399500000~5.1204248059999~2.45
2 Wasgenring 62 12 BS7 3.10400741300000~5.37718611299999~2.27
3Äussere Baselstrasse 381(Riehe~13 BS7 4.32868672000000~2.28412256999~-2.04
4 Bäumlihofstrasse(Riehen)12 bs 7 4.8798252 1.78261946-3.10
5 Grenzacherstrasse 511 14英国标准7 4.97800125699999~2.25671641799999~2.72
6 Viaduktstrasse 60(Rialto)13 bs 8 3.79506641399999~5.952548084 2.16
7 Dorenbachviadukt 12英国标准8 4.53060335400000~6.83671534199999~2.31
8Äussere Baselstrasse 381(Riehe~13 bs 8 6.45634629500000~3.80687093799999~-2.65
9 Bäumlihofstrasse(Riehen)12 bs 8 7.76887594100000~3.49096311-4.28
10 Grenzacherstrasse 511 14 bs 8 8.698931489999~3.438805970000~-5.26
#…还有12行

我们可以看到哪些街道、类型、来源和时间在交通方向上的差异超过2个百分点。

编辑:我错过了另一个答案……这基本上是一样的,只是使用base-R而不是tidyverse函数

我是否正确理解,您需要每小时方向之间的差异?如果是,您可以执行以下操作。“dat”是我为您的数据结构提供的名称

#change traffic values from character to numeric
dat[,5:28] <- apply(dat[,5:28], 2, as.numeric)

#create new data frame with difference values between r = 1 and r = 2 per hour
xdat <- cbind(dat[dat$r == 1, 1:3],
             dat[dat$r == 1, 5:28] - dat[dat$r == 2, 5:28])

#reshape hours to long format
xdat <- reshape(data = xdat, direction = "long", 
                idvar = c("id", "type", "source"), 
                timevar = "hour", varying = 4:27,
                v.names = "trafficDiff", 
                times = colnames(xdat)[4:27])
rownames(xdat) = NULL

#restrict to difference values >= 2
xdat[abs(xdat$trafficDiff) >= 2,]

你是在寻找一天中某个特定的时间?你的问题是指早上/晚上,还是一天中所有时间之间的差异?例如,凌晨1点与所有时间,凌晨2点与所有时间?上午/晚上只是一个说明性的例子,其中可能性最高。但我对任何时间都感兴趣。差异最大的是wh非常感谢您的回答!它也非常有效:)
                                   id type source hour trafficDiff
65                   Dorenbachviadukt   12     bs    7   -2.452161
66                      Wasgenring 62   12     bs    7   -2.273179
67  Äussere Baselstrasse 381 (Riehen)   13     bs    7    2.044564
68          Bäumlihofstrasse (Riehen)   12     bs    7    3.097206
69              Grenzacherstrasse 511   14     bs    7    2.721285
74         Viaduktstrasse 60 (Rialto)   13     bs    8   -2.157482
75                   Dorenbachviadukt   12     bs    8   -2.306112
77  Äussere Baselstrasse 381 (Riehen)   13     bs    8    2.649475
78          Bäumlihofstrasse (Riehen)   12     bs    8    4.277913
79              Grenzacherstrasse 511   14     bs    8    5.260126
80                St. Alban-Anlage 72   13     bs    8   -4.641655
87  Äussere Baselstrasse 381 (Riehen)   13     bs    9    2.129512
88          Bäumlihofstrasse (Riehen)   12     bs    9    2.390254
89              Grenzacherstrasse 511   14     bs    9    3.125777
90                St. Alban-Anlage 72   13     bs    9   -4.896293
178         Bäumlihofstrasse (Riehen)   12     bs   18   -2.583340
179             Grenzacherstrasse 511   14     bs   18   -3.215933
180               St. Alban-Anlage 72   13     bs   18    3.115134
187 Äussere Baselstrasse 381 (Riehen)   13     bs   19   -2.010040
188         Bäumlihofstrasse (Riehen)   12     bs   19   -2.568914
189             Grenzacherstrasse 511   14     bs   19   -3.268220
190               St. Alban-Anlage 72   13     bs   19    2.134622