--- title: "victimisation" author: "Tyler Lane" date: "30/08/2020" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(janitor) library(see) library(magrittr) library(forecast) library(scales) library(broom) library(ggpubr) ``` # functions ```{r} # autocorrelation check autocor <- function(x) { par(mfrow = c(1, 2)) ggarrange( ggAcf(residuals(x), lag.max = 4) + theme_bw() + labs(title = "ACF", y = NULL, x = NULL), ggPacf(residuals(x), lag.max = 4) + theme_bw() + labs(title = "PACF", y = NULL, x = NULL), ncol = 2) } # function to check models look <- function(x) { x %>% tidy(conf.int = TRUE, conf.level = 0.99) %>% clean_names() %>% transmute(term, `est (99% ci)` = paste0(round(estimate * 100, 1), " (", round(conf_low * 100, 1), ",", round(conf_high * 100, 1), ")"), p = round(p_value, 3), sig = case_when(p_value <= .001 ~ "***", p_value <= .01 ~ "**", p_value <= .05 ~ "*", p_value >= .05 ~ "")) %>% return() } ``` # clean data, plot ```{r} d <- read_csv("NVAT_Report_14-May-21_11_00_45PM.csv", skip = 9) %>% clean_names() %>% transmute(reported = replace_na(x4, "All"), x1993, x1994, x1995, x1996, x1997, x1998, x1999, x2000, x2001, x2002, x2003, x2004, x2005, x2006, x2007, x2008, x2009, x2010, x2011, x2012, x2013, x2014, x2015, x2016, x2017, x2018, x2019) %>% filter(!is.na(x1993)) %>% mutate(group = c("Total", rep(c("White respondents", "Black respondents", "Other respondents"), each = 4))) %>% mutate_at(vars(contains("x")), funs(as.numeric)) %>% pivot_longer(cols = c(x1993:x2019), names_to = "year", values_to = "count") %>% pivot_wider(names_from = reported, values_from = count) %>% clean_names() %>% mutate(report_rate = yes_reported_to_the_police / all * 100, year = as.numeric(str_remove(year, "[x]")), group = fct_relevel(group, "White respondents", "Black respondents", "Other respondents", "Total"), ferguson = ifelse(year < 2014, "Pre-Ferguson", "Post-Ferguson") %>% fct_relevel("Pre-Ferguson", "Post-Ferguson")) # make total dataset total <- d %>% filter(group != "Total") %>% group_by(year) %>% summarise(all = sum(all), reported = sum(yes_reported_to_the_police)) %>% mutate(report_rate = reported / all * 100) # plot by ethnic group reporting_plot <- d %>% ggplot(aes(year, report_rate, colour = ferguson)) + geom_vline(xintercept = 2013.5, linetype = "dashed") + geom_point() + geom_smooth(method = "lm") + scale_y_continuous(limits = c(0, 100), labels = percent_format(scale = 1)) + scale_x_continuous(breaks = c(1995, 2005, 2013.5), labels = c("1995", "2005", "Micheal Brown\nkilled in Ferguson")) + facet_grid( ~ group, switch = "y") + labs(y = "% reported to police", x = NULL, caption = "Source: NCVS [National Crime Victimization Survey] Victimisation Analysis Tool (NVAT); https://www.bjs.gov/index.cfm?ty=nvat") + see::theme_lucid() + scale_colour_brewer(type = "qual") + theme(legend.title = element_blank()) ggsave("sf3 report plot.png", reporting_plot, height = 7, width = 9) # check standard deviations of report rate d_sum <- d %>% filter(ferguson == "Pre-Ferguson") %>% group_by(group) %>% summarise(mean = mean(report_rate), sd = sd(report_rate)) d_sum ``` # test effect, all ```{r} assault_total <- total %>% mutate(level = ifelse(year >= 2014, 1, 0)) %>% group_by(level) %>% mutate(time = ifelse(level == 1, row_number() %>% as.numeric(), rev(row_number() %>% as.numeric() - 1) * -1), trend = ifelse(level == 1, time, 0)) assault_model <- lm(log(report_rate) ~ level + time + trend, assault_total %>% filter(year != 2014)) look(assault_model) # check autocorrelation autocor(assault_model) ## no evidence of autocorrelation # add predicted values assault_fitted <- bind_cols(assault_total, # fitted values predict(assault_model, assault_total, interval = "confidence") %>% exp() %>% as_tibble() %>% # counterfactual values mutate(counter = predict(assault_model, assault_total %>% mutate(level = 0, trend = 0)) %>% exp(), group = "All respondents")) ``` # test effect, black respondents ```{r} black_assault <- d %>% filter(group == "Black respondents") %>% mutate(level = ifelse(year >= 2014, 1, 0)) %>% group_by(level) %>% mutate(time = ifelse(level == 1, row_number() %>% as.numeric(), rev(row_number() %>% as.numeric() - 1) * -1), trend = ifelse(level == 1, time, 0)) black_assault_model <- lm(log(report_rate) ~ level + time + trend, black_assault %>% filter(year != 2014)) look(black_assault_model) # check autocorrelation autocor(black_assault_model) ## no evidence of autocorrelation # add predicted values black_fitted <- bind_cols(black_assault, # fitted values predict(black_assault_model, black_assault, interval = "confidence") %>% exp() %>% as_tibble() %>% # counterfactual values mutate(counter = predict(black_assault_model, black_assault %>% mutate(level = 0, trend = 0)) %>% exp())) ``` # combined results ```{r} results <- bind_rows( look(assault_model) %>% mutate(Respondents = "All respondents"), look(black_assault_model) %>% mutate(Respondents = "Black respondents")) %>% filter(term %in% c("level", "trend")) %>% mutate(term = recode(term, "level" = "Acute", "trend" = "Gradual")) %>% write_csv(file = "st3 ncvs.csv") ``` # combine its data, plot ```{r} assault_plot <- # combine bind_rows(assault_fitted, black_fitted) %>% mutate(group = fct_relevel(group, "All respondents", "Black respondents")) %>% # plot data ggplot(aes(year, report_rate, ymin = lwr, ymax = upr)) + geom_vline(xintercept = 2014, size = 3, alpha = .5, colour = "#95a5a6") + geom_ribbon(data = . %>% filter(year <= 2013), alpha = .3, colour = "#c83200", colour = "blue") + geom_ribbon(data = . %>% filter(year > 2014), alpha = .3, colour = "#c83200", colour = "blue") + geom_point(colour = "#c83200") + geom_line(data = . %>% filter(year <= 2013), aes(year, fit), alpha = .5, size = 1.2, colour = "#c83200") + geom_line(data = . %>% filter(year >= 2015), aes(year, fit), alpha = .5, size = 1.2, colour = "#c83200") + geom_line(data = . %>% filter(year >= 2015), mapping = aes(year, counter), size = 1.2, linetype = "dashed", alpha = .5, colour = "#c83200") + labs(x = NULL, y = "Aggravated assaults reported to police") + coord_cartesian(ylim = c(0, 100)) + scale_y_continuous(labels = percent_format(scale = 1)) + scale_x_continuous(breaks = c(1995, 2005, 2014), labels = c("1995", "2005", "Micheal Brown\nkilled in Ferguson")) + facet_wrap( ~ group, ncol = 1) + theme_lucid() + theme(legend.title = element_blank(), axis.text = element_text(size = 8), text = element_text(size = 12)) ggsave("sf2 reporting its.png") ```