--- title: "Ferguson effect - cleaning" author: "Tyler Lane" date: "26/06/2019" output: pdf_document editor_options: chunk_output_type: console --- ```{r setup, echo=FALSE} knitr::opts_chunk$set(echo = TRUE) library("tidyverse") library("lubridate") library("zoo") library("readxl") library("janitor") load("ucr.rda") load("combined ucr.rda") # create negate %in% function `%!in%` <- negate(`%in%`) ``` # protested deaths data ```{r, warning = FALSE} protested <- bind_rows( # washington post shootings read_xlsx("protested shootings.xlsx") %>% filter(protest == "yes") %>% mutate(first = excel_numeric_to_date(date), second = excel_numeric_to_date(date2 %>% as.numeric()), third = excel_numeric_to_date(date3 %>% as.numeric())) %>% select(name, first, second, third, city, state, link, reason), # other identified shootings read_csv("non-wapo events.csv") %>% filter(protest == "yes") %>% mutate(first = dmy(date), second = dmy(date2), third = NA) %>% select(name, first, second, third, city, state, link, reason)) %>% # manipulate to combine pivot_longer(-c(name, city, state, link, reason), names_to = "incident", values_to = "event") %>% filter(!is.na(event)) %>% mutate(wiki = if_else(str_detect(link, "wikipedia"), 1, 0), qtr = as.yearqtr(event), name = str_replace(name, " ", " "), # unique events variable city_name = paste(city, name)) %>% filter(qtr <= "2016 Q4") %>% arrange(city) save(protested, file = "final protested list.rda") # protested events to match to claims data match_protested <- protested %>% mutate(city = ifelse(city %in% c("Albuquerque", "Atlanta", "Aurora", "Austin", "Bakersfield", "Baltimore", "Baton Rouge", "Charlotte", "Chicago", "Cincinnati", "Cleveland", "Columbus", "Denver", "Des Moines", "Detroit", "Durham", "Fresno", "Houston", "Indianapolis", "Jacksonville", "Long Beach", "Los Angeles", "Louisville", "Madison", "Memphis", "Milwaukee", "Montgomery", "New Orleans", "New York", "Oakland", "Philadelphia", "Raleigh", "Sacramento", "San Antonio", "San Diego", "San Francisco", "Seattle", "Stockton", "Tampa", "Tulsa", "Wichita"), city, case_when( city == "Anaheim" ~ "Anaheim-Santa Ana", city %in% c("Arlington", "Fort Worth", "Grapevine") ~ "Dallas-Fort Worth-Arlington", city == "El Cajon" ~ "San Diego", city %in% c("Falcon Heights", "Minneapolis", "St. Paul") ~ "Minneapolis-St Paul", city %in% c("Ferguson", "St. Louis") ~ "St Louis", city == "Hampstead" ~ "Houston", city %in% c("Mesa", "Tempe") ~ "Greater Phoenix", city == "North Charleston" ~ "Charleston-North Charleston", city == "Washington" ~ "Washington DC" ))) %>% select(name, city, state, wiki) %>% distinct ``` # import raw data ```{r} # # prep and combine annual data sets, save for easy upload # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2011.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2012.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2013.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2014.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2015.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2016.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2017.rda") # load("ucr_offenses_known_monthly_1960_2018_rda/offenses_known_monthly_2018.rda") # # # u <- rbind(offenses_known_monthly_2011, # offenses_known_monthly_2012, # offenses_known_monthly_2013, # offenses_known_monthly_2014, # offenses_known_monthly_2015, # offenses_known_monthly_2016, # offenses_known_monthly_2017, # offenses_known_monthly_2018) # # save(u, file = "combined ucr.rda") ``` # ucr data ```{r} s_crime <- u %>% mutate( city = case_when( str_detect(agency_name, "albuquerque|university of new mexico") & state == "new mexico" ~ "Albuquerque", str_detect(agency_name, "anaheim|garden grove|orange|santa ana") & state == "california" ~ "Anaheim-Santa Ana", str_detect(census_name, "atlanta") & state == "georgia" ~ "Atlanta", agency_name == "aurora" & state == "colorado" ~ "Aurora", agency_name %in% c("austin", "isd: austin") & state == "texas" ~ "Austin", str_detect(agency_name, "bakersfield") & state == "california" ~ "Bakersfield", agency_name %in% c("baltimore", "bal wash intl airport", "university of baltimore", "univ of md:baltimore cit") & state == "maryland" ~ "Baltimore", agency_name %in% c("baton rouge", "la st univ: baton rouge", "southern u &a&m c: btn r", "state police, baton roug") & state == "louisiana" ~ "Baton Rouge", str_detect(agency_name, "charleston") & state == "south carolina" ~ "Charleston-North Charleston", str_detect(agency_name, "charlotte") & state == "north carolina" ~ "Charlotte", agency_name == "chicago" & state == "illinois" ~ "Chicago", str_detect(agency_name, "cincinnati") & state == "ohio" ~ "Cincinnati", (agency_name == "cleveland state univ" | census_name == "cleveland city") & state == "ohio" ~ "Cleveland", agency_name %in% c("columbus", "oh state univ: columbus", "columbus state comm coll") & state == "ohio" ~ "Columbus", str_detect(agency_name, "corpus christi") & state == "texas" ~ "Corpus Christi", str_detect(agency_name, "arlington|dallas|fort worth") & agency_name %!in% c("hos dis:dallas county", "lake dallas") & state == "texas" ~ "Dallas-Fort Worth-Arlington", str_detect(agency_name, "denver") & state == "colorado" ~ "Denver", # not enough data # str_detect(census_name, "des moines") & # state == "iowa" ~ "Des Moines", agency_name == "detroit" & state == "michigan" ~ "Detroit", agency_name %in% c("duke university", "durham") & state == "north carolina" ~ "Durham", str_detect(agency_name, "fresno") & state == "california" ~ "Fresno", str_detect(agency_name, "houston") & state == "texas" ~ "Houston", str_detect(agency_name, "indianapolis|purdue") & agency_name != "indianapolis intrntl apt" & state == "indiana" ~ "Indianapolis", # florida data not in dataset # agency_name == "jacksonville" & # state == "florida" ~ "Jacksonville", agency_name %in% c("los angeles", "los angeles city college", "ca st un: los angeles", "fbi, los angeles", "un of ca: los angeles") & state == "california" ~ "Los Angeles", str_detect(agency_name, "long beach") & state == "california" ~ "Long Beach", # data not in dataset # agency_name == "louisville" & # state == "kentucky" ~ "Louisville", # not enough data # agency_name == "madison" & # state == "wisconsin" ~ "Madison", str_detect(agency_name, "memphis") & agency_name != "memphis-shelby cnty a a" & state == "tennessee" ~ "Memphis", # florida data not in dataset # census_name == "miami city" & # state == "florida" ~ "Miami", agency_name %in% c("milwaukee", "univ of wi: milwaukee ") & state == "wisconsin" ~ "Milwaukee", str_detect(agency_name, "minneapolis|st. paul") & state == "minnesota" ~ "Minneapolis-St Paul", # not enough data # str_detect(census_name, "montgomery") & # state == "alabama" ~ "Montgomery", str_detect(agency_name, "nashville") & agency_name == "tennessee" ~ "Nashville", agency_name %in% c("new orleans", "tulane university", "southeastern louisiana u", "southern u &a&m c: new o", "la st u: hsc, n orleans") & state == "louisiana" ~ "New Orleans", census_name == "new york city" & state == "new york" ~ "New York", agency_name == "oakland" & state == "california" ~ "Oakland", str_detect(agency_name, "oklahoma city") & state == "oklahoma" ~ "Oklahoma City", agency_name == "philadelphia" & state == "pennsylvania" ~ "Philadelphia", agency_name %in% c("phoenix", "mesa", "tempe", "chandler", "scottsdale", "glendale", "gilbert", "peoria") & state == "arizona" ~ "Greater Phoenix", # raleigh data not in dataset # agency_name == "raleigh" & # state == "north carolina" ~ "Raleigh", agency_name %in% c("sacramento", "ca st un: sacramento", "calif st pol sacramento") & state == "california" ~ "Sacramento", str_detect(agency_name, "san antonio") & state == "texas" ~ "San Antonio", agency_name %in% c("san diego", "san diego st university", "un of ca: san diego") & state == "california" ~ "San Diego", agency_name %in% c("san francisco", "san francisco st univ", "un of ca: san francisco") & state == "california" ~ "San Francisco", str_detect(agency_name, "santa ana") & state == "california" ~ "Santa Ana", agency_name %in% c("seattle", "university of washington") & state == "washington" ~ "Seattle", agency_name %in% c("st. louis", "univ of mo: st. louis", "washington university") & state == "missouri" ~ "St Louis", str_detect(agency_name, "stockton") & state == "california" ~ "Stockton", # florida data not in dataset # agency_name == "tampa" & # state == "florida" ~ "Tampa", str_detect(agency_name, "tulsa") & state == "oklahoma" ~ "Tulsa", state == "district of columbia" ~ "Washington DC", str_detect(agency_name, "wichita") & state == "kansas" ~ "Wichita")) %>% filter(!is.na(city)) %>% mutate(date = date(date), qtr = as.yearqtr(date), year = year(qtr), homicide = actual_murder, assault = actual_assault_aggravated) %>% select(city, qtr, year, homicide, assault, population) s_pop <- s_crime %>% select(city, year, population) %>% distinct() %>% group_by(city, year) %>% summarise(pop = sum(population)) s <- s_crime %>% group_by(year, qtr, city) %>% summarise(homicide = sum(homicide), assault = sum(assault)) %>% left_join(s_pop, by = c("year", "city")) %>% mutate(hom_rate = homicide / pop * 10 ^ 5, assault_rate = assault / pop * 10 ^ 5) %>% ungroup() %>% select(city, qtr, homicide, assault, pop, hom_rate, assault_rate) %>% arrange(city, qtr) ``` # louisville data ```{r} # source: https://data.louisvilleky.gov/dataset/crime-reports louisville <- bind_rows( read_csv("louisville data/Crime_Data_2012.csv"), read_csv("louisville data/Crime_Data_2013.csv"), read_csv("louisville data/Crime_Data_2014.csv"), read_csv("louisville data/Crime_Data_2015.csv"), read_csv("louisville data/Crime_Data_2016_39.csv"), read_csv("louisville data/Crime_Data_2017_9.csv"), read_csv("louisville data/Crime_Data_2018.csv")) %>% clean_names() %>% filter(str_detect(uor_desc, "ASSAULT - 1ST|ASSAULT - 2ND") | (crime_type == "HOMICIDE"), uor_desc != "JUSTIFIABLE HOMICIDE", att_comp == "COMPLETED") %>% mutate(qtr = if_else(is.na(date_occured), date(date_reported), date(date_occured)) %>% as.yearqtr(), crime_type = recode(crime_type, OTHER = "ASSAULT")) %>% filter(qtr >= 2012) %>% group_by(qtr) %>% count(crime_type) %>% pivot_wider(names_from = "crime_type", values_from = "n") %>% clean_names() %>% mutate(city = "Louisville", year = year(qtr), assault = as.numeric(assault), homicide = as.numeric(homicide)) louisville # population data source: https://worldpopulationreview.com/us-cities/louisville-population/ l_pop <- tibble::tribble( ~`Year `, ~Total.Population, ~Growth, ~Annual.Growth.Rate, 2020L, 1306285, 4492, "0.34%", 2019L, 1301793, 4492, "0.35%", 2018L, 1297301, 4492, "0.35%", 2017L, 1292809, 7628, "0.59%", 2016L, 1285181, 6204, "0.48%", 2015L, 1278977, 6894, "0.54%", 2014L, 1272083, 7012, "0.55%", 2013L, 1265071, 12006, "0.95%", 2012L, 1253065, 8420, "0.67%", 2011L, 1244645, 7010, "0.56%" ) %>% clean_names() %>% transmute(year, pop = total_population) louisville <- left_join(louisville, l_pop, by = "year") %>% mutate(hom_rate = homicide / pop * 10e4, assault_rate = assault / pop * 10e4) %>% select(city, qtr, homicide, assault, pop, hom_rate, assault_rate) ``` # raleigh NC data ```{r} # source: http://data-ral.opendata.arcgis.com/datasets/raleigh-police-incidents-nibrs?geometry=-80.029%2C35.408%2C-77.219%2C36.188&selectedAttribute=reported_year # data are in two separate files raleigh <- rbind( # first file, up to 2014 (read_csv("raleigh data/Raleigh_Police_Incidents_(SRS).csv") %>% clean_names() %>% mutate(datetime = as.POSIXct(inc_datetime / 1000, origin = "1970-01-01"), date = date(datetime), qtr = as.yearqtr(date)) %>% filter(str_detect(lcr_desc, "ASSAULT WITH FIREARM|ASSAULT WITH HANDS|ASSAULT WITH KNIFE|ASSAULT WITH OTHER|Assault/Firearm|ASSAULT/LEO - FIREARM|ASSAULT/LEO - HANDS|ASSAULT/LEO - OTHER|Homicide/Murder|MURDER"), qtr >= "2012 Q2") %>% mutate(crime = if_else(str_detect(lcr_desc, "ASSAULT|Assault"), "assault", "homicide")) %>% select(qtr, crime)), # second file, up to (read_csv("raleigh data/Raleigh_Police_Incidents__NIBRS_.csv") %>% mutate(date = date(reported_date), qtr = as.yearqtr(date)) %>% filter(crime_category == "MURDER" | crime_description %in% c("Assault/Aggravated", "Assault/Firearm", "Assault/Hands (Aggravated)", "Assault/Knife", "Child Abuse/Aggravated"), crime_description != "Homicide/Justifiable Homicide", qtr <= "2018 Q4") %>% mutate(crime = if_else(crime_category == "MURDER", "homicide", "assault")) %>% select(qtr, crime)) ) %>% # combine group_by(qtr) %>% count(crime) %>% pivot_wider(names_from = "crime", values_from = "n") %>% clean_names() %>% mutate(city = "Raleigh", year = year(qtr)) # population data from https://worldpopulationreview.com/us-cities/raleigh-population/ r_pop <- tibble::tribble( ~`Year `, ~Total.Population, ~Growth, ~Annual.Growth.Rate, 2020L, 1418936, 28198, "1.99%", 2019L, 1390738, 28198, "2.03%", 2018L, 1362540, 28198, "2.07%", 2017L, 1334342, 30497, "2.29%", 2016L, 1303845, 32649, "2.50%", 2015L, 1271196, 29960, "2.36%", 2014L, 1241236, 27908, "2.25%", 2013L, 1213328, 25642, "2.11%", 2012L, 1187686, 25590, "2.15%", 2011L, 1162096, 24715, "2.13%" ) %>% clean_names() %>% transmute(year, pop = total_population) raleigh <- left_join(raleigh, r_pop, by = "year") %>% mutate(hom_rate = homicide / pop * 10e4, assault_rate = assault / pop * 10e4) %>% select(city, qtr, homicide, assault, pop, hom_rate, assault_rate) ``` # tampa ```{r} # source: request to tampa police # combining homicide and assault data tampa <- # homicide data read_csv("tampa data/tabula-Tyler 090.csv") %>% clean_names() %>% filter(report_date != "Report Date", primary_offense %!in% c("JUSTIFIABLE HOMICIDE", "MANSLAUGHTER - NEGLIGENT", "DUI-MANSLAUGHTER")) %>% mutate(qtr = mdy(report_date) %>% as.yearqtr()) %>% group_by(qtr) %>% summarise(homicide = n()) %>% left_join( # assault data read_csv("tampa data/tabula-Tyler 130.csv") %>% clean_names() %>% filter(report_date != "Report Date", primary_offense %!in% c("AGG ASSAULT", "AGG BATTERY")) %>% mutate(qtr = mdy(report_date) %>% as.yearqtr()) %>% group_by(qtr) %>% summarise(assault = n())) %>% transmute(qtr, homicide, assault, year = year(qtr)) # population, from t_pop <- tibble::tribble( ~Year, ~Total.Population, ~Growth, ~Annual.Growth.Rate, 2020L, 3245540, 51440, "1.58%", 2019L, 3194100, 51440, "1.61%", 2018L, 3142660, 51440, "1.64%", 2017L, 3091220, 57000, "1.84%", 2016L, 3034220, 64970, "2.14%", 2015L, 2969250, 56370, "1.90%", 2014L, 2912880, 42520, "1.46%", 2013L, 2870360, 24640, "0.86%", 2012L, 2845720, 16970, "0.60%", 2011L, 2828750, 40380, "1.43%" ) %>% clean_names() %>% transmute(year, pop = total_population) tampa <- left_join(tampa, t_pop, by = "year") %>% mutate(hom_rate = homicide / pop * 10e4, assault_rate = assault / pop * 10e4, city = "Tampa") %>% select(city, qtr, homicide, assault, pop, hom_rate, assault_rate) ``` # merge city data ```{r} df <- bind_rows(s, louisville, raleigh, tampa) # save files save(df, file = "crime.rda") ```