#attach packages
library(tidyverse)
library(knitr)
library(readxl)
library(zoo)
covid = read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
head(covid, 5)
## # A tibble: 5 x 6
## date county state fips cases deaths
## <date> <chr> <chr> <chr> <dbl> <dbl>
## 1 2020-01-21 Snohomish Washington 53061 1 0
## 2 2020-01-22 Snohomish Washington 53061 1 0
## 3 2020-01-23 Snohomish Washington 53061 1 0
## 4 2020-01-24 Cook Illinois 17031 1 0
## 5 2020-01-24 Snohomish Washington 53061 1 0
countydata = covid %>%
filter(state == "California") %>%
group_by(county) %>%
mutate(newCases = cases - lag(cases)) %>%
ungroup()
countydata2= countydata %>%
filter(date == max(date))
(most_new_cases = countydata2 %>%
slice_max(newCases, n = 5) %>%
select(county, newCases))
## # A tibble: 5 x 2
## county newCases
## <chr> <dbl>
## 1 Los Angeles 809
## 2 San Diego 265
## 3 Orange 185
## 4 Fresno 159
## 5 San Bernardino 156
(most_cumulative_cases = countydata2 %>%
slice_max(cases, n = 5) %>%
select(county, cases))
## # A tibble: 5 x 2
## county cases
## <chr> <dbl>
## 1 Los Angeles 253985
## 2 Riverside 55073
## 3 Orange 52121
## 4 San Bernardino 50699
## 5 San Diego 42742
knitr::kable(most_new_cases,
caption = "Most New Cases California Counties",
col.names = c("County", "New Cases"),
format.args = list(big.mark = ","))
County | New Cases |
---|---|
Los Angeles | 809 |
San Diego | 265 |
Orange | 185 |
Fresno | 159 |
San Bernardino | 156 |
knitr::kable(most_cumulative_cases,
caption = "Most Cumulative Cases California Counties",
col.names = c("County", "Total Cases"),
format.args = list(big.mark = ","))
County | Total Cases |
---|---|
Los Angeles | 253,985 |
Riverside | 55,073 |
Orange | 52,121 |
San Bernardino | 50,699 |
San Diego | 42,742 |
library(readxl)
pop <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip=2)
names(pop)
## [1] "FIPStxt" "State"
## [3] "Area_Name" "Rural-urban_Continuum Code_2003"
## [5] "Rural-urban_Continuum Code_2013" "Urban_Influence_Code_2003"
## [7] "Urban_Influence_Code_2013" "Economic_typology_2015"
## [9] "CENSUS_2010_POP" "ESTIMATES_BASE_2010"
## [11] "POP_ESTIMATE_2010" "POP_ESTIMATE_2011"
## [13] "POP_ESTIMATE_2012" "POP_ESTIMATE_2013"
## [15] "POP_ESTIMATE_2014" "POP_ESTIMATE_2015"
## [17] "POP_ESTIMATE_2016" "POP_ESTIMATE_2017"
## [19] "POP_ESTIMATE_2018" "POP_ESTIMATE_2019"
## [21] "N_POP_CHG_2010" "N_POP_CHG_2011"
## [23] "N_POP_CHG_2012" "N_POP_CHG_2013"
## [25] "N_POP_CHG_2014" "N_POP_CHG_2015"
## [27] "N_POP_CHG_2016" "N_POP_CHG_2017"
## [29] "N_POP_CHG_2018" "N_POP_CHG_2019"
## [31] "Births_2010" "Births_2011"
## [33] "Births_2012" "Births_2013"
## [35] "Births_2014" "Births_2015"
## [37] "Births_2016" "Births_2017"
## [39] "Births_2018" "Births_2019"
## [41] "Deaths_2010" "Deaths_2011"
## [43] "Deaths_2012" "Deaths_2013"
## [45] "Deaths_2014" "Deaths_2015"
## [47] "Deaths_2016" "Deaths_2017"
## [49] "Deaths_2018" "Deaths_2019"
## [51] "NATURAL_INC_2010" "NATURAL_INC_2011"
## [53] "NATURAL_INC_2012" "NATURAL_INC_2013"
## [55] "NATURAL_INC_2014" "NATURAL_INC_2015"
## [57] "NATURAL_INC_2016" "NATURAL_INC_2017"
## [59] "NATURAL_INC_2018" "NATURAL_INC_2019"
## [61] "INTERNATIONAL_MIG_2010" "INTERNATIONAL_MIG_2011"
## [63] "INTERNATIONAL_MIG_2012" "INTERNATIONAL_MIG_2013"
## [65] "INTERNATIONAL_MIG_2014" "INTERNATIONAL_MIG_2015"
## [67] "INTERNATIONAL_MIG_2016" "INTERNATIONAL_MIG_2017"
## [69] "INTERNATIONAL_MIG_2018" "INTERNATIONAL_MIG_2019"
## [71] "DOMESTIC_MIG_2010" "DOMESTIC_MIG_2011"
## [73] "DOMESTIC_MIG_2012" "DOMESTIC_MIG_2013"
## [75] "DOMESTIC_MIG_2014" "DOMESTIC_MIG_2015"
## [77] "DOMESTIC_MIG_2016" "DOMESTIC_MIG_2017"
## [79] "DOMESTIC_MIG_2018" "DOMESTIC_MIG_2019"
## [81] "NET_MIG_2010" "NET_MIG_2011"
## [83] "NET_MIG_2012" "NET_MIG_2013"
## [85] "NET_MIG_2014" "NET_MIG_2015"
## [87] "NET_MIG_2016" "NET_MIG_2017"
## [89] "NET_MIG_2018" "NET_MIG_2019"
## [91] "RESIDUAL_2010" "RESIDUAL_2011"
## [93] "RESIDUAL_2012" "RESIDUAL_2013"
## [95] "RESIDUAL_2014" "RESIDUAL_2015"
## [97] "RESIDUAL_2016" "RESIDUAL_2017"
## [99] "RESIDUAL_2018" "RESIDUAL_2019"
## [101] "GQ_ESTIMATES_BASE_2010" "GQ_ESTIMATES_2010"
## [103] "GQ_ESTIMATES_2011" "GQ_ESTIMATES_2012"
## [105] "GQ_ESTIMATES_2013" "GQ_ESTIMATES_2014"
## [107] "GQ_ESTIMATES_2015" "GQ_ESTIMATES_2016"
## [109] "GQ_ESTIMATES_2017" "GQ_ESTIMATES_2018"
## [111] "GQ_ESTIMATES_2019" "R_birth_2011"
## [113] "R_birth_2012" "R_birth_2013"
## [115] "R_birth_2014" "R_birth_2015"
## [117] "R_birth_2016" "R_birth_2017"
## [119] "R_birth_2018" "R_birth_2019"
## [121] "R_death_2011" "R_death_2012"
## [123] "R_death_2013" "R_death_2014"
## [125] "R_death_2015" "R_death_2016"
## [127] "R_death_2017" "R_death_2018"
## [129] "R_death_2019" "R_NATURAL_INC_2011"
## [131] "R_NATURAL_INC_2012" "R_NATURAL_INC_2013"
## [133] "R_NATURAL_INC_2014" "R_NATURAL_INC_2015"
## [135] "R_NATURAL_INC_2016" "R_NATURAL_INC_2017"
## [137] "R_NATURAL_INC_2018" "R_NATURAL_INC_2019"
## [139] "R_INTERNATIONAL_MIG_2011" "R_INTERNATIONAL_MIG_2012"
## [141] "R_INTERNATIONAL_MIG_2013" "R_INTERNATIONAL_MIG_2014"
## [143] "R_INTERNATIONAL_MIG_2015" "R_INTERNATIONAL_MIG_2016"
## [145] "R_INTERNATIONAL_MIG_2017" "R_INTERNATIONAL_MIG_2018"
## [147] "R_INTERNATIONAL_MIG_2019" "R_DOMESTIC_MIG_2011"
## [149] "R_DOMESTIC_MIG_2012" "R_DOMESTIC_MIG_2013"
## [151] "R_DOMESTIC_MIG_2014" "R_DOMESTIC_MIG_2015"
## [153] "R_DOMESTIC_MIG_2016" "R_DOMESTIC_MIG_2017"
## [155] "R_DOMESTIC_MIG_2018" "R_DOMESTIC_MIG_2019"
## [157] "R_NET_MIG_2011" "R_NET_MIG_2012"
## [159] "R_NET_MIG_2013" "R_NET_MIG_2014"
## [161] "R_NET_MIG_2015" "R_NET_MIG_2016"
## [163] "R_NET_MIG_2017" "R_NET_MIG_2018"
## [165] "R_NET_MIG_2019"
p2=pop %>%
select(fips=FIPStxt, state= State, county=Area_Name, pop2019=POP_ESTIMATE_2019) %>%
filter(state=="CA") %>%
group_by(county) %>%
slice_max(pop2019, n=1) %>%
select(fips, pop2019)
covidpop = inner_join(countydata, p2, by="fips")
count(covidpop)
## # A tibble: 1 x 1
## n
## <int>
## 1 10470
head(covidpop)
## # A tibble: 6 x 9
## date county.x state fips cases deaths newCases county.y pop2019
## <date> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2020-01-25 Orange Califo~ 06059 1 0 NA Orange Coun~ 3.18e6
## 2 2020-01-26 Los Angel~ Califo~ 06037 1 0 NA Los Angeles~ 1.00e7
## 3 2020-01-26 Orange Califo~ 06059 1 0 0 Orange Coun~ 3.18e6
## 4 2020-01-27 Los Angel~ Califo~ 06037 1 0 0 Los Angeles~ 1.00e7
## 5 2020-01-27 Orange Califo~ 06059 1 0 0 Orange Coun~ 3.18e6
## 6 2020-01-28 Los Angel~ Califo~ 06037 1 0 0 Los Angeles~ 1.00e7
covidpop2=covidpop %>%
filter(date==max(date))
(most_new_cases_capita = covidpop2 %>%
mutate(CPC=cases/pop2019) %>%
slice_max(CPC, n = 5) %>%
select(county.x, CPC))
## # A tibble: 5 x 2
## county.x CPC
## <chr> <dbl>
## 1 Imperial 0.0622
## 2 Kings 0.0464
## 3 Kern 0.0341
## 4 Tulare 0.0324
## 5 Merced 0.0308
(most_cumulative_cases_capita = covidpop2 %>%
mutate(NCPC=newCases/pop2019) %>%
slice_max(NCPC, n = 5) %>%
select(county.x, NCPC))
## # A tibble: 5 x 2
## county.x NCPC
## <chr> <dbl>
## 1 Kings 0.000262
## 2 San Benito 0.000239
## 3 Monterey 0.000203
## 4 Lake 0.000171
## 5 Fresno 0.000159
knitr::kable(most_new_cases_capita,
caption = "Most New Cases per Capita in California Counties",
col.names = c("County", "New Cases per Capita"),
format.args = list(big.mark = ","))
County | New Cases per Capita |
---|---|
Imperial | 0.0622134 |
Kings | 0.0464038 |
Kern | 0.0341423 |
Tulare | 0.0324199 |
Merced | 0.0307584 |
knitr::kable(most_cumulative_cases_capita,
caption = "Most Cumulative Cases per Capita in California Counties",
col.names = c("County", "Total Cases per Capita"),
format.args = list(big.mark = ","))
County | Total Cases per Capita |
---|---|
Kings | 0.0002615 |
San Benito | 0.0002388 |
Monterey | 0.0002027 |
Lake | 0.0001708 |
Fresno | 0.0001591 |
library(dplyr)
covidpop3=covidpop %>%
filter(date >= max(date)-14) %>%
group_by(county.x, fips) %>%
summarize(newCases=sum(newCases)) %>%
inner_join(p2, by="fips") %>%
select(county,newCases,pop2019)
print((sum(covidpop3$newCases)/sum(covidpop3$pop2019))*100000)
## [1] 154.1042
The total number of new cases for the state of California is
print(sum(most_cumulative_cases$cases))
## [1] 454620
The total number of new cases for the state of California is
print(sum(most_new_cases$newCases))
## [1] 1574
The safest counties that have not yet broken the COVID-19 guidelines include:
safe_county=covidpop3 %>%
mutate(guideline=(newCases/pop2019)*100000) %>%
filter(guideline<100) %>%
pull(county) ->
safe_counties
print(safe_counties)
## [1] "Alpine County" "Del Norte County" "El Dorado County" "Humboldt County"
## [5] "Inyo County" "Lassen County" "Mariposa County" "Mono County"
## [9] "Nevada County" "Placer County" "Plumas County" "Shasta County"
## [13] "Sierra County" "Siskiyou County" "Solano County" "Tehama County"
## [17] "Trinity County" "Tuolumne County"
state.of.interest=c("New York", "California", "Louisiana", "Florida")
covid %>%
filter(state==state.of.interest) %>%
group_by(state, date) %>%
summarise(cases = sum(cases)) %>%
ungroup() %>%
mutate(newCases = cases - lag(cases),
roll7 = rollmean(newCases, 7, fill = NA, align="right")) %>%
ggplot(aes(x = date, y = newCases, color = state)) +
geom_line(size = 1) +
facet_wrap(~state) +
ggthemes::theme_solarized() +
theme(legend.position = 'NA') +
labs(title = "Daily New Case Counts",
subtitle = "Data Source: NY-Times",
x = "Date",
y = "New Cases")
state.of.interest=c("New York", "California", "Louisiana", "Florida")
p3=pop %>%
select(fips=FIPStxt, state=Area_Name, pop2019=POP_ESTIMATE_2019) %>%
group_by(state) %>%
slice_max(pop2019, n=1) %>%
select(fips, pop2019)
percapdata=covid %>%
filter(state %in% c("New York", "California", "Louisiana", "Florida" )) %>%
group_by(state, date) %>%
summarize(cases=sum(cases), .groups="drop") %>%
mutate(newCases = cases - lag(cases)) %>%
group_by(state) %>%
right_join(p3, by="state") %>%
mutate(percap= newCases/pop2019,
roll7 = rollmean(percap, 7, fill = NA, align="right"))
#making the second per capita graph
percapdata %>%
ggplot(aes(x = date, y = percap, color = state)) +
geom_line(size = 1) +
facet_wrap(~state) +
ggthemes::theme_solarized() +
theme(legend.position = 'NA') +
labs(title = "Daily New Case Counts",
subtitle = "Data Source: NY-Times",
x = "Date",
y = "New Cases per Capita")
Scaling by population makes some states appear much better, and makes others appear worse. This is because it places the new case counts in a ratio with the total population, so we get a better idea of what percentage of the population actually has covid.