Question 1

#attach packages
library(tidyverse)
library(knitr)
library(readxl)
library(zoo)

covid = read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')

head(covid, 5)
## # A tibble: 5 x 6
##   date       county    state      fips  cases deaths
##   <date>     <chr>     <chr>      <chr> <dbl>  <dbl>
## 1 2020-01-21 Snohomish Washington 53061     1      0
## 2 2020-01-22 Snohomish Washington 53061     1      0
## 3 2020-01-23 Snohomish Washington 53061     1      0
## 4 2020-01-24 Cook      Illinois   17031     1      0
## 5 2020-01-24 Snohomish Washington 53061     1      0
countydata = covid %>% 
  filter(state == "California") %>% 
  group_by(county) %>% 
  mutate(newCases = cases - lag(cases)) %>% 
  ungroup()
countydata2= countydata %>% 
  filter(date == max(date))

(most_new_cases = countydata2 %>% 
  slice_max(newCases, n = 5) %>% 
  select(county, newCases))
## # A tibble: 5 x 2
##   county         newCases
##   <chr>             <dbl>
## 1 Los Angeles         809
## 2 San Diego           265
## 3 Orange              185
## 4 Fresno              159
## 5 San Bernardino      156
(most_cumulative_cases = countydata2 %>% 
  slice_max(cases, n = 5) %>% 
  select(county, cases))
## # A tibble: 5 x 2
##   county          cases
##   <chr>           <dbl>
## 1 Los Angeles    253985
## 2 Riverside       55073
## 3 Orange          52121
## 4 San Bernardino  50699
## 5 San Diego       42742
knitr::kable(most_new_cases, 
             caption = "Most New Cases California Counties",
             col.names = c("County", "New Cases"),
             format.args = list(big.mark = ","))
Most New Cases California Counties
County New Cases
Los Angeles 809
San Diego 265
Orange 185
Fresno 159
San Bernardino 156
knitr::kable(most_cumulative_cases, 
             caption = "Most Cumulative Cases California Counties",
             col.names = c("County", "Total Cases"),
             format.args = list(big.mark = ","))
Most Cumulative Cases California Counties
County Total Cases
Los Angeles 253,985
Riverside 55,073
Orange 52,121
San Bernardino 50,699
San Diego 42,742
library(readxl)
pop <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip=2)
names(pop)
##   [1] "FIPStxt"                         "State"                          
##   [3] "Area_Name"                       "Rural-urban_Continuum Code_2003"
##   [5] "Rural-urban_Continuum Code_2013" "Urban_Influence_Code_2003"      
##   [7] "Urban_Influence_Code_2013"       "Economic_typology_2015"         
##   [9] "CENSUS_2010_POP"                 "ESTIMATES_BASE_2010"            
##  [11] "POP_ESTIMATE_2010"               "POP_ESTIMATE_2011"              
##  [13] "POP_ESTIMATE_2012"               "POP_ESTIMATE_2013"              
##  [15] "POP_ESTIMATE_2014"               "POP_ESTIMATE_2015"              
##  [17] "POP_ESTIMATE_2016"               "POP_ESTIMATE_2017"              
##  [19] "POP_ESTIMATE_2018"               "POP_ESTIMATE_2019"              
##  [21] "N_POP_CHG_2010"                  "N_POP_CHG_2011"                 
##  [23] "N_POP_CHG_2012"                  "N_POP_CHG_2013"                 
##  [25] "N_POP_CHG_2014"                  "N_POP_CHG_2015"                 
##  [27] "N_POP_CHG_2016"                  "N_POP_CHG_2017"                 
##  [29] "N_POP_CHG_2018"                  "N_POP_CHG_2019"                 
##  [31] "Births_2010"                     "Births_2011"                    
##  [33] "Births_2012"                     "Births_2013"                    
##  [35] "Births_2014"                     "Births_2015"                    
##  [37] "Births_2016"                     "Births_2017"                    
##  [39] "Births_2018"                     "Births_2019"                    
##  [41] "Deaths_2010"                     "Deaths_2011"                    
##  [43] "Deaths_2012"                     "Deaths_2013"                    
##  [45] "Deaths_2014"                     "Deaths_2015"                    
##  [47] "Deaths_2016"                     "Deaths_2017"                    
##  [49] "Deaths_2018"                     "Deaths_2019"                    
##  [51] "NATURAL_INC_2010"                "NATURAL_INC_2011"               
##  [53] "NATURAL_INC_2012"                "NATURAL_INC_2013"               
##  [55] "NATURAL_INC_2014"                "NATURAL_INC_2015"               
##  [57] "NATURAL_INC_2016"                "NATURAL_INC_2017"               
##  [59] "NATURAL_INC_2018"                "NATURAL_INC_2019"               
##  [61] "INTERNATIONAL_MIG_2010"          "INTERNATIONAL_MIG_2011"         
##  [63] "INTERNATIONAL_MIG_2012"          "INTERNATIONAL_MIG_2013"         
##  [65] "INTERNATIONAL_MIG_2014"          "INTERNATIONAL_MIG_2015"         
##  [67] "INTERNATIONAL_MIG_2016"          "INTERNATIONAL_MIG_2017"         
##  [69] "INTERNATIONAL_MIG_2018"          "INTERNATIONAL_MIG_2019"         
##  [71] "DOMESTIC_MIG_2010"               "DOMESTIC_MIG_2011"              
##  [73] "DOMESTIC_MIG_2012"               "DOMESTIC_MIG_2013"              
##  [75] "DOMESTIC_MIG_2014"               "DOMESTIC_MIG_2015"              
##  [77] "DOMESTIC_MIG_2016"               "DOMESTIC_MIG_2017"              
##  [79] "DOMESTIC_MIG_2018"               "DOMESTIC_MIG_2019"              
##  [81] "NET_MIG_2010"                    "NET_MIG_2011"                   
##  [83] "NET_MIG_2012"                    "NET_MIG_2013"                   
##  [85] "NET_MIG_2014"                    "NET_MIG_2015"                   
##  [87] "NET_MIG_2016"                    "NET_MIG_2017"                   
##  [89] "NET_MIG_2018"                    "NET_MIG_2019"                   
##  [91] "RESIDUAL_2010"                   "RESIDUAL_2011"                  
##  [93] "RESIDUAL_2012"                   "RESIDUAL_2013"                  
##  [95] "RESIDUAL_2014"                   "RESIDUAL_2015"                  
##  [97] "RESIDUAL_2016"                   "RESIDUAL_2017"                  
##  [99] "RESIDUAL_2018"                   "RESIDUAL_2019"                  
## [101] "GQ_ESTIMATES_BASE_2010"          "GQ_ESTIMATES_2010"              
## [103] "GQ_ESTIMATES_2011"               "GQ_ESTIMATES_2012"              
## [105] "GQ_ESTIMATES_2013"               "GQ_ESTIMATES_2014"              
## [107] "GQ_ESTIMATES_2015"               "GQ_ESTIMATES_2016"              
## [109] "GQ_ESTIMATES_2017"               "GQ_ESTIMATES_2018"              
## [111] "GQ_ESTIMATES_2019"               "R_birth_2011"                   
## [113] "R_birth_2012"                    "R_birth_2013"                   
## [115] "R_birth_2014"                    "R_birth_2015"                   
## [117] "R_birth_2016"                    "R_birth_2017"                   
## [119] "R_birth_2018"                    "R_birth_2019"                   
## [121] "R_death_2011"                    "R_death_2012"                   
## [123] "R_death_2013"                    "R_death_2014"                   
## [125] "R_death_2015"                    "R_death_2016"                   
## [127] "R_death_2017"                    "R_death_2018"                   
## [129] "R_death_2019"                    "R_NATURAL_INC_2011"             
## [131] "R_NATURAL_INC_2012"              "R_NATURAL_INC_2013"             
## [133] "R_NATURAL_INC_2014"              "R_NATURAL_INC_2015"             
## [135] "R_NATURAL_INC_2016"              "R_NATURAL_INC_2017"             
## [137] "R_NATURAL_INC_2018"              "R_NATURAL_INC_2019"             
## [139] "R_INTERNATIONAL_MIG_2011"        "R_INTERNATIONAL_MIG_2012"       
## [141] "R_INTERNATIONAL_MIG_2013"        "R_INTERNATIONAL_MIG_2014"       
## [143] "R_INTERNATIONAL_MIG_2015"        "R_INTERNATIONAL_MIG_2016"       
## [145] "R_INTERNATIONAL_MIG_2017"        "R_INTERNATIONAL_MIG_2018"       
## [147] "R_INTERNATIONAL_MIG_2019"        "R_DOMESTIC_MIG_2011"            
## [149] "R_DOMESTIC_MIG_2012"             "R_DOMESTIC_MIG_2013"            
## [151] "R_DOMESTIC_MIG_2014"             "R_DOMESTIC_MIG_2015"            
## [153] "R_DOMESTIC_MIG_2016"             "R_DOMESTIC_MIG_2017"            
## [155] "R_DOMESTIC_MIG_2018"             "R_DOMESTIC_MIG_2019"            
## [157] "R_NET_MIG_2011"                  "R_NET_MIG_2012"                 
## [159] "R_NET_MIG_2013"                  "R_NET_MIG_2014"                 
## [161] "R_NET_MIG_2015"                  "R_NET_MIG_2016"                 
## [163] "R_NET_MIG_2017"                  "R_NET_MIG_2018"                 
## [165] "R_NET_MIG_2019"
p2=pop %>% 
  select(fips=FIPStxt, state= State, county=Area_Name, pop2019=POP_ESTIMATE_2019) %>% 
  filter(state=="CA") %>% 
  group_by(county) %>% 
  slice_max(pop2019, n=1) %>% 
  select(fips, pop2019)
  
covidpop = inner_join(countydata, p2, by="fips")
count(covidpop)
## # A tibble: 1 x 1
##       n
##   <int>
## 1 10470
head(covidpop)
## # A tibble: 6 x 9
##   date       county.x   state   fips  cases deaths newCases county.y     pop2019
##   <date>     <chr>      <chr>   <chr> <dbl>  <dbl>    <dbl> <chr>          <dbl>
## 1 2020-01-25 Orange     Califo~ 06059     1      0       NA Orange Coun~  3.18e6
## 2 2020-01-26 Los Angel~ Califo~ 06037     1      0       NA Los Angeles~  1.00e7
## 3 2020-01-26 Orange     Califo~ 06059     1      0        0 Orange Coun~  3.18e6
## 4 2020-01-27 Los Angel~ Califo~ 06037     1      0        0 Los Angeles~  1.00e7
## 5 2020-01-27 Orange     Califo~ 06059     1      0        0 Orange Coun~  3.18e6
## 6 2020-01-28 Los Angel~ Califo~ 06037     1      0        0 Los Angeles~  1.00e7
covidpop2=covidpop %>% 
  filter(date==max(date))

(most_new_cases_capita = covidpop2 %>% 
  mutate(CPC=cases/pop2019) %>% 
  slice_max(CPC, n = 5) %>% 
  select(county.x, CPC))
## # A tibble: 5 x 2
##   county.x    CPC
##   <chr>     <dbl>
## 1 Imperial 0.0622
## 2 Kings    0.0464
## 3 Kern     0.0341
## 4 Tulare   0.0324
## 5 Merced   0.0308
(most_cumulative_cases_capita = covidpop2 %>% 
  mutate(NCPC=newCases/pop2019) %>% 
  slice_max(NCPC, n = 5) %>% 
  select(county.x, NCPC))
## # A tibble: 5 x 2
##   county.x       NCPC
##   <chr>         <dbl>
## 1 Kings      0.000262
## 2 San Benito 0.000239
## 3 Monterey   0.000203
## 4 Lake       0.000171
## 5 Fresno     0.000159
knitr::kable(most_new_cases_capita, 
             caption = "Most New Cases per Capita in California Counties",
             col.names = c("County", "New Cases per Capita"),
             format.args = list(big.mark = ","))
Most New Cases per Capita in California Counties
County New Cases per Capita
Imperial 0.0622134
Kings 0.0464038
Kern 0.0341423
Tulare 0.0324199
Merced 0.0307584
knitr::kable(most_cumulative_cases_capita, 
             caption = "Most Cumulative Cases per Capita in California Counties",
             col.names = c("County", "Total Cases per Capita"),
             format.args = list(big.mark = ","))
Most Cumulative Cases per Capita in California Counties
County Total Cases per Capita
Kings 0.0002615
San Benito 0.0002388
Monterey 0.0002027
Lake 0.0001708
Fresno 0.0001591
library(dplyr)
covidpop3=covidpop %>% 
  filter(date >= max(date)-14) %>% 
  group_by(county.x, fips) %>% 
  summarize(newCases=sum(newCases)) %>% 
  inner_join(p2, by="fips") %>% 
  select(county,newCases,pop2019)
print((sum(covidpop3$newCases)/sum(covidpop3$pop2019))*100000)
## [1] 154.1042

The total number of new cases for the state of California is

print(sum(most_cumulative_cases$cases))
## [1] 454620

The total number of new cases for the state of California is

print(sum(most_new_cases$newCases))
## [1] 1574

The safest counties that have not yet broken the COVID-19 guidelines include:

safe_county=covidpop3 %>%
  mutate(guideline=(newCases/pop2019)*100000) %>% 
  filter(guideline<100) %>% 
  pull(county) ->
  safe_counties
  
print(safe_counties)
##  [1] "Alpine County"    "Del Norte County" "El Dorado County" "Humboldt County" 
##  [5] "Inyo County"      "Lassen County"    "Mariposa County"  "Mono County"     
##  [9] "Nevada County"    "Placer County"    "Plumas County"    "Shasta County"   
## [13] "Sierra County"    "Siskiyou County"  "Solano County"    "Tehama County"   
## [17] "Trinity County"   "Tuolumne County"

Question 2

state.of.interest=c("New York", "California", "Louisiana", "Florida")
covid %>%
  filter(state==state.of.interest) %>%
  group_by(state, date) %>%
  summarise(cases = sum(cases)) %>%
  ungroup() %>%
  mutate(newCases = cases - lag(cases),
         roll7 = rollmean(newCases, 7, fill = NA, align="right")) %>%
  ggplot(aes(x = date, y = newCases, color = state)) +
  geom_line(size = 1) +
  facet_wrap(~state) +
  ggthemes::theme_solarized() +
  theme(legend.position = 'NA') +
  labs(title = "Daily New Case Counts",
       subtitle = "Data Source: NY-Times",
       x = "Date",
       y = "New Cases")

state.of.interest=c("New York", "California", "Louisiana", "Florida")
p3=pop %>% 
  select(fips=FIPStxt, state=Area_Name, pop2019=POP_ESTIMATE_2019) %>% 
  group_by(state) %>% 
  slice_max(pop2019, n=1) %>% 
  select(fips, pop2019)

percapdata=covid %>%
  filter(state %in% c("New York", "California", "Louisiana", "Florida" )) %>%
  group_by(state, date) %>% 
  summarize(cases=sum(cases), .groups="drop") %>% 
  mutate(newCases = cases - lag(cases)) %>% 
  group_by(state) %>% 
  right_join(p3, by="state") %>% 
  mutate(percap= newCases/pop2019,
         roll7 = rollmean(percap, 7, fill = NA, align="right"))
#making the second per capita graph
percapdata %>% 
  ggplot(aes(x = date, y = percap, color = state)) +
  geom_line(size = 1) +
  facet_wrap(~state) +
  ggthemes::theme_solarized() +
  theme(legend.position = 'NA') +
  labs(title = "Daily New Case Counts",
       subtitle = "Data Source: NY-Times",
       x = "Date",
       y = "New Cases per Capita")

Scaling by population makes some states appear much better, and makes others appear worse. This is because it places the new case counts in a ratio with the total population, so we get a better idea of what percentage of the population actually has covid.