需要为r choropleth映射排序数据的帮助吗_R_Data Manipulation_Data Cleaning_Choropleth_Choroplethr

需要为r choropleth映射排序数据的帮助吗

需要为r choropleth映射排序数据的帮助吗,r,data-manipulation,data-cleaning,choropleth,choroplethr,R,Data Manipulation,Data Cleaning,Choropleth,Choroplethr,我正在为r choropleth地图的数据排序寻求帮助我正试图创建一个带有R的choropleth地图，使用zipcode和盗窃事件但是，map函数没有正常执行，或者我输入的数据类型不正确问题是，当我创建贴图时，颜色渐变输出不正确：顺序不对以下是当前输出的外观：你可以看到它是1，11，12，13，14，15。。。2，3，4 我希望它从1（最小值）数到15（最大值）除了最后一个函数（在“sortedData_”上调用的choropleth zipcode map函数）之外，几乎所有功能

我正在为r choropleth地图的数据排序寻求帮助

我正试图创建一个带有R的choropleth地图，使用zipcode和盗窃事件

但是，map函数没有正常执行，或者我输入的数据类型不正确

问题是，当我创建贴图时，颜色渐变输出不正确：顺序不对

以下是当前输出的外观：

你可以看到它是1，11，12，13，14，15。。。2，3，4

我希望它从1（最小值）数到15（最大值）

除了最后一个函数（在“sortedData_”上调用的choropleth zipcode map函数）之外，几乎所有功能都可以正常工作

谢谢你的帮助

代码如下：

library(choroplethr)
library(choroplethrMaps)
library(ggplot2)
# # use the devtools package from CRAN to install choroplethrZip from github
# # install.packages("devtools")
library(devtools)
# install_github('arilamstein/choroplethrZip')
library(choroplethrZip)
library(data.table)

# These are needed:
library(readr)
detach("package:plyr", unload=TRUE) # <- might not be necessary. 
library(dplyr)
#
# Import & inspect 2014 neighborhood economic data
#
austin2014_data_raw <- read_csv('https://data.austintexas.gov/resource/hcnj-rei3.csv', na = '')
glimpse(austin2014_data_raw)
nrow(austin2014_data_raw)
# # View(austin2014_data_raw)

# Clean it: Remove the first row, which has no zip code (this row is the average data for all of Austin)
austin2014_data <- austin2014_data_raw[-1,]
# # View(austin2014_data)
nrow(austin2014_data) # now there's two less rows. <-- Solution: on line above, check only by zipcode: na.omit(data_raw$Zipcode)
              # or, get the alternative from StackOverflow

#
# Grab the zip code data separately
# (when I tried to grab it just by doing: austin2014_data$`Zip Code` the outputted column looks weird)

zipCodesOfData <- fread('https://data.austintexas.gov/resource/hcnj-rei3.csv') %>%
  mutate(`Zip Code` = ifelse(`Zip Code` == "", NA, `Zip Code`)) %>%
  na.omit() %>% 
  select(`Zip Code`)
nrow(zipCodesOfData)
# View(zipCodesOfData)

# Rename it from "Zip Code" to "ZipCode". Recombine it with the dataset. 
names(zipCodesOfData) <- "ZipCode"
austin2014_data <- cbind(austin2014_data, zipCodesOfData)
# View(austin2014_data)

# Select a few columns for our neighborhood economic data subset
columnSelection <- c("ZipCode", "Population below poverty level", "Median household income", "Unemployment", "Median rent", "Percentage of rental units in poor condition")
austin2014_EconData_selection <- subset(austin2014_data, select=columnSelection)
names(austin2014_EconData_selection)
# View(austin2014_EconData_selection)

# Reset row number index since we removed the first row (r kept the original index, now we're gonna reset it)
rownames(austin2014_EconData_selection) <- 1:nrow(austin2014_EconData_selection)
# View(austin2014_EconData_selection)


#
# Import crime data
#

# Import data
austinCrime2014_data_raw <- read_csv('https://data.austintexas.gov/resource/7g8v-xxja.csv', na = '')
glimpse(austinCrime2014_data_raw)
nrow(austinCrime2014_data_raw)

# How many unique zipcodes?
length(unique(austinCrime2014_data_raw$`GO Location Zip`))
# # View(austinCrime2014_data_raw)

# Select and rename required columns
columnSelection_Crime <- c("GO Location Zip", "GO Highest Offense Desc", "Highest NIBRS/UCR Offense Description")
austinCrime_dataset <- select(austinCrime2014_data_raw, one_of(columnSelection_Crime))
names(austinCrime_dataset) <- c("zipcode", "highestOffenseDesc", "NIBRS_OffenseDesc")
glimpse(austinCrime_dataset)
nrow(austinCrime_dataset)

# Filter crime data by zipcodes available in the neighborhood economic data subset
length(unique(austinCrime2014_data_raw$`GO Location Zip`)) # 36
length(unique(austin2014_EconData_selection$ZipCode))

austinCrime2014_data_selected_zips <- filter(austinCrime_dataset, zipcode %in% austin2014_EconData_selection$ZipCode)
glimpse(austinCrime2014_data_selected_zips)
length(unique(austinCrime2014_data_selected_zips$zipcode)) # 31
nrow(austinCrime2014_data_selected_zips)
typeof(austinCrime2014_data_selected_zips)



#
# Convert our crime data subset from string/char data into factorized data so we can see levels
#

# let's make the character data columns c("highestOffenseDesc", "NIBRS_OffenseDesc") into factors so we can check its levels
glimpse(austinCrime2014_data_selected_zips) # characters
cols <- c("highestOffenseDesc", "NIBRS_OffenseDesc") # columns with character datatype to convert to factor datatype
austinCrime2014_data_selected_zips[cols] <- lapply(austinCrime2014_data_selected_zips[cols], factor)
glimpse(austinCrime2014_data_selected_zips) # factors

# View(austinCrime2014_data_selected_zips)

levels(austinCrime2014_data_selected_zips$highestOffenseDesc) #--> looks good
levels(austinCrime2014_data_selected_zips$NIBRS_OffenseDesc) # output is weird:  "Burglary / \nBreaking & Entering" "Robbery"

typeof(austinCrime2014_data_selected_zips)

## Shortening the entry "Burglary / Breaking & Entering" to just "Burglary
columnWithStringReplaced<- gsub("Burglary / \nBreaking & Entering", "Burglary", austinCrime2014_data_selected_zips$NIBRS_OffenseDesc)
austinCrime2014_data_selected_zips <- cbind(austinCrime2014_data_selected_zips, columnWithStringReplaced)
typeof(austinCrime2014_data_selected_zips)
austinCrime2014_data_selected_zips <- austinCrime2014_data_selected_zips[,-3] #remove old NIBRS column
glimpse(austinCrime2014_data_selected_zips)
# View(austinCrime2014_data_selected_zips)
names(austinCrime2014_data_selected_zips) <- c("zipcode", "highestOffenseDesc)", "NIBRS_OffenseDesc") # make all columns have the right same
glimpse(austinCrime2014_data_selected_zips)

# # View our data
# View(austin2014_EconData_selection)
# View(austinCrime2014_data_selected_zips)
levels(austinCrime2014_data_selected_zips$highestOffenseDesc) #--> looks good
levels(austinCrime2014_data_selected_zips$NIBRS_OffenseDesc) #--> looks good
glimpse(austinCrime2014_data_selected_zips)

#
# Next step: counting NIBRS crimes per zipcode
#
zipCrimeCountNIBRS <- austinCrime2014_data_selected_zips[,-2]
glimpse(zipCrimeCountNIBRS)


# This gives us count of NIBRS_OffenseDesc per Zipcode
zipCrimeCountNIBRS = zipCrimeCountNIBRS %>% 
  group_by(zipcode, NIBRS_OffenseDesc) %>% 
  mutate(occ = n())

# View(zipCrimeCountNIBRS)

# filter and group the crimes (burglary, robbery) by the zipcodes present
# this is using dplyr
rob_and_burg_perZip <- zipCrimeCountNIBRS %>%
  group_by(zipcode, occ, NIBRS_OffenseDesc) %>%
  summarise() %>%
  select(zipcode=zipcode, occ, NIBRS_OffenseDesc)

# View(rob_and_burg_perZip)

# lets check unique zipcodes
length(unique(rob_and_burg_perZip$zipcode)) # 31 zipcodes present
print(nrow(rob_and_burg_perZip)/2) # but some don't have both crimes accounted for

# select out all zipcodes with robbery -- something like this:
# select zipcode, NIBRS_OffenseDesc
# where NIBRS_OffenseDesc = robbery

robberyPerZip <- filter(rob_and_burg_perZip, NIBRS_OffenseDesc == "Robbery")
# View(robberyPerZip) # has 30 rows

# need to diff the zips and tack on ones not shown

# I found this function here: http://stackoverflow.com/questions/21574214/finding-elements-that-do-not-overlap-between-two-vectors
mysetdiff<-function (x, y, multiple=FALSE) 
{
  x <- as.vector(x)
  y <- as.vector(y)
  if (length(x) || length(y)) {
    if (!multiple) {
      unique( x[match(x, y, 0L) == 0L])  
    }else  x[match(x, y, 0L) == 0L] 
  } else x
}

# This shows the one zipcode missing in robberyPerZip$zipcode
diff_zip_Robbery <- mysetdiff(rob_and_burg_perZip$zipcode, robberyPerZip$zipcode)
print(diff_zip_Robbery)

# need to create list: 1 row, 3 columns, then rbind it
typeof(robberyPerZip)
# View(robberyPerZip)
list_to_append <- list(diff_zip_Robbery, 0, "Robbery")
names(list_to_append) <- c("zipcode", "occ", "NIBRS_OffenseDesc")
robberyPerZip_done <- rbind(robberyPerZip, list_to_append)
# View(robberyPerZip_done)

##########
#### Same thing as last step, but with burglary instead of robbery:
# select out all zipcodes with burglary
burglaryPerZip <- filter(rob_and_burg_perZip, NIBRS_OffenseDesc == "Burglary")
# View(burglaryPerZip) # has 28 rows

# This shows the three zipcodes missing in burglaryPerZip$zipcode
diff_zip_Burglary <- mysetdiff(rob_and_burg_perZip$zipcode, burglaryPerZip$zipcode)
print(diff_zip_Burglary)

newMatrix <- cbind(diff_zip_Burglary, c(0,0,0), c("Burglary", "Burglary", "Burglary"))
colnames(newMatrix) <- c("zipcode", "occ", "NIBRS_OffenseDesc")
# View(newMatrix)

## make both (a matrix, newMatrix) and (a list, burglaryPerZip) into data frames in order to combine them
mat_df <- data.frame(newMatrix)
burg_df <- data.frame(burglaryPerZip)

# doesn't work b/c they're different object types:
# burglaryPerZip_done <- rbind(burglaryPerZip, newMatrix)

# but since we converted into dataframe, this works:
burglaryPerZip_done <- rbind(burg_df, mat_df)
class(burglaryPerZip_done)
nrow(burglaryPerZip_done)

burglaryPerZip_done <- as.list(burglaryPerZip_done) # --> convert back into a list

## Now we have a list of robberies and burglaries per zipcode, including those with zero per zipcode:
# View(burglaryPerZip_done)
# View(robberyPerZip_done)

# Need to provide list of 'region' (i.e. zipcode) and 'value' (i.e. numeric value to map)
  ## Burglary:
burglary_occuranceByZip <- burglaryPerZip_done[-3] #take off the crime description column
names(burglary_occuranceByZip) <- c("region", "value") # rename to region, value
burglary_occuranceByZip <- data.frame(burglary_occuranceByZip) # make it a dataframe
sortedData_BurglaryPerZip <- burglary_occuranceByZip[order(as.numeric(as.character(burglary_occuranceByZip$value))),] # order by value 

class(sortedData_BurglaryPerZip)
glimpse(sortedData_BurglaryPerZip)
sortedData_BurglaryPerZip

  ## Robbery:
Robbery_occuranceByZip <- robberyPerZip_done[-3] #take off the crime description column
names(Robbery_occuranceByZip) <- c("region", "value") # rename to region, value
Robbery_occuranceByZip <- data.frame(Robbery_occuranceByZip) # make it a dataframe
sortedData_RobberyPerZip <- Robbery_occuranceByZip[order(as.numeric(as.character(Robbery_occuranceByZip$value))),] # order by value 

class(sortedData_RobberyPerZip)
glimpse(sortedData_RobberyPerZip)
sortedData_RobberyPerZip

# Now we're ready to map.
# First, let's map raw numbers of crimes per zipcode
# Then, let's divide crimes by population and map that as well.
# burglary_occuranceByZip$value <- as.numeric(as.character(sortedData_BurglaryPerZip$value))
# burglary_occuranceByZip$region <- as.numeric(as.character(sortedData_BurglaryPerZip$region))
# class(sortedData_BurglaryPerZip)
# class(sortedData_BurglaryPerZip$value)
# glimpse(sortedData_BurglaryPerZip)
# View(sortedData_BurglaryPerZip)
zip_choropleth(burglary_occuranceByZip, 
               zip_zoom = sortedData_BurglaryPerZip$region, 
               title      = "Burglary occurances by zipcode",
               legend     = "Burglaries",
               ) + coord_map()



## Robberies map:
sortedData_RobberyPerZip$region <- as.character(sortedData_RobberyPerZip$region)
class(sortedData_RobberyPerZip$value)
zip_choropleth(sortedData_RobberyPerZip, 
               zip_zoom = sortedData_RobberyPerZip$region, 
               title      = "Burglary occurances by zipcode",
               legend     = "Burglaries",
) + coord_map()

## Burglaries map:
print(sortedData_BurglaryPerZip$value) # Value is sorted
sortedData_BurglaryPerZip$region <- as.character(sortedData_BurglaryPerZip$region)
sortedData_BurglaryPerZip$value <- as.character(sortedData_BurglaryPerZip$value)
class(sortedData_BurglaryPerZip$value)
zip_choropleth(sortedData_BurglaryPerZip, 
               zip_zoom = sortedData_BurglaryPerZip$region, 
               title      = "Burglary occurances by zipcode",
               legend     = "Burglaries",
) + coord_map()
    ### Yet this map's legend and order of shading is out of order.

库（choroplethr）
图书馆（choroplethrMaps）
图书馆（GG2）
##使用CRAN的devtools包从github安装choroplethrZip
##安装程序包（“devtools”）
图书馆（devtools）
#安装github（'arilamstein/choroplethrZip'））
图书馆（choroplethrZip）
库（数据表）
#这些是需要的：
图书馆（readr）
分离（“package:plyr”，unload=TRUE）#包括使用function dput的样本数据会有所帮助。还有苏格斯在看。从排序序列1、11、12、2可以快速猜测，在某个点上，这是一个字符，而它应该是一个数字。如果这来自数据集，请检查列的每个值是否都是数字。在正确的位置应用as.numeric
或as.integer
可能会纠正错误。谢谢。我运行了这个：“Sortedata_防盗码Zip$value足够有趣了，我想解决我问题的是这个：“Sortedata_防盗码Zip$value