Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/66.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 重新编码分类变量_R_Grouping - Fatal编程技术网

R 重新编码分类变量

R 重新编码分类变量,r,grouping,R,Grouping,我无法从变量“Text\u General\u Code”中的现有类别中进行更大的“分类” 我试图独立处理“文本\常规\代码”。它在我的报告文件中给了我八个以上的变量 library(ggplot2) library(lubridate) library(zoo) library(dplyr) library(knitr) library(plotly) # Read csv in R ## pdx = read.csv("https://cyo.arringtonadventures.co

我无法从变量“Text\u General\u Code”中的现有类别中进行更大的“分类”

我试图独立处理“文本\常规\代码”。它在我的报告文件中给了我八个以上的变量

library(ggplot2)
library(lubridate)
library(zoo)
library(dplyr)
library(knitr)
library(plotly)

# Read csv in R
## 
pdx = read.csv("https://cyo.arringtonadventures.com/crime/crime.csv",header = T)
head(pdx)

# Create a variable count with value 1
pdx$Count <- 1

# Convert Date from factor to date
#pdx$Date <- mdy_hms(pdx$Dispatch_Date_Time)

# Extract year from Date
pdx$Year <- substring(pdx$Dispatch_Date,1,4)

# Rename District from Dc_Dist 
colnames(pdx)[1] <- "District"

# Drop all variables we are not interested in
#select(pdx, -2,-3,-5,-7,-8,-9,-11,-12,-13,-14)

# Group Text_General_Code by categories
pdx$Category[pdx$Text_General_Code == "THEFT" | pdx$Text_General_Code == "MOTOR VEHICLE THEFT"]  <- "Theft"
pdx$Category[pdx$Text_General_Code == "BATTERY"] <- "Battery"
pdx$Category[pdx$Text_General_Code == "CRIMINAL DAMAGE"] <- "Criminal damage"
pdx$Category[pdx$Text_General_Code == "NARCOTICS" | pdx$Text_General_Code == "OTHER NARCOTIC VIOLATION"] <- "Narcotics"
pdx$Category[pdx$Text_General_Code == "ASSAULT"] <- "Assault"
pdx$Category[pdx$Text_General_Code == "BURGLARY"] <- "Burglary"
pdx$Category[pdx$Text_General_Code == "ROBBERY"]  <- "ROBBERY"
pdx$Category[pdx$Text_General_Code == "ARSON" | pdx$Text_General_Code == "CONCEALED CARRY LICENSE VIOLATION" |
            pdx$Text_General_Code == "CRIMINAL TRESPASS" | pdx$Text_General_Code == "GAMBLINGS" |
            pdx$Text_General_Code == "HUMAN TRAFFICKING" | pdx$Text_General_Code == "INTERFERENCE WITH PUBLIC OFFICER" |
            pdx$Text_General_Code == "INTIMIDATION" | pdx$Type == "KIDNAPPING" | pdx$Type == "LIQUOR LAW VIOLATION" |
            pdx$Text_General_Code == "NON-CRIMINAL" | pdx$Text_General_Code == "NON - CRIMINAL" | 
            pdx$Text_General_Code == "OBSCENITY" | pdx$Text_General_Code == "OFFENSE INVOLVING CHILDREN"| 
            pdx$Text_General_Code == "PROSTITUTION" | pdx$Text_General_Code == "PUBLIC INDECENCY"| 
            pdx$Text_General_Code == "PUBLIC PEACE VIOLATION" | pdx$Text_General_Code == "STALKING"| 
            pdx$Text_General_Code == "WEAPONS VIOLATION"| pdx$Text_General_Code == "HOMICIDE" |
            pdx$Text_General_Code == "CRIM SEXUAL ASSAULT" | pdx$Text_General_Code == "SEX OFFENSE" |
            pdx$Text_General_Code == "DECEPTIVE PRACTICE" | pdx$Text_General_Code == "OTHER OFFENSE"] <- "Others"
库(ggplot2)
图书馆(lubridate)
图书馆(动物园)
图书馆(dplyr)
图书馆(knitr)
图书馆(绘本)
#在R中读取csv
## 
pdx=read.csv(“https://cyo.arringtonadventures.com/crime/crime.csv“,页眉=T)
头部(pdx)
#创建一个值为1的变量计数

pdx$Count首先,在
read.csv
语句中,添加
stringsAsFactors=F
,使其在使用时没有因子级别。此外,可能有助于确保Text_General_Code字段的大小写完全相同:

    pdx = read.csv("https://cyo.arringtonadventures.com/crime/crime.csv",header = T, stringsAsFactors=F) %>%
          mutate(Text_General_Code = str_to_sentence(Text_General_Code)) 
然后在文本\常规\代码中计算值,并可能将其输出到您可以检查的对象(假设您使用的是Rstudio):


这是什么编程语言?你需要用你正在使用的语言的名称来标记你的问题。你也可以在case\u-when链的末尾使用
TRUE~“Others”
,这样你就不需要列出所有你想重新编码到
Others
(实际上就像case\u-when末尾的else一样)嗯@emily kothe,一直以来,我都在使用case\u when&我不知道
TRUE~“Others”
excellenthanks来回答我的问题。。。他们帮我解决了我的问题。。。。。。。。。。。我是R编程环境的新手。
   tgc <- pdx %>%
   count(Text_General_Code)
   view(tgc)
pdx <- pdx %>%
  mutate(category = case_when(Text_General_Code == "Thefts" | 
                              Text_General_Code == "Motor Vehicle Theft" |
                              Text_General_Code == "Theft from Vehicle" 
                               ~ "Theft",
                             Text_General_Code == "Robbery Firearm" | 
                              Text_General_Code == "Robbery No Firearm" 
                              ~ "Robbery"))
pdx %>%
   count(category, Text_General_Code)