Faculty Sites

!DOCTYPE html PUBLIC “-//W3C//DTD XHTML 1.0 Transitional//EN” “http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd”>

## DATA SCIENCE IN A PANDEMIC

## Professor Dennis F.X. Mathaisel

## This script entails a data visualization, referenced as Figures 2 and 3 in the paper. # The script has been originally produced by Peter Ellis

# The dataset utilized are from Johns Hopkins University

# The script utilizes the “coronavirus” package developed by Rami Krispin

# The script has been modified by Veo Chae under direction of Professor Mathaisel

#————— Setup——————–

devtools::install_github(“RamiKrispin/coronavirus”) install.packages(“coronavirus”)

library(coronavirus) library(tidyverse) library(scales)

#package “ggpubr” was added to create a grid for 2 graphs (Edit: Chae) install.packages(“ggpubr”)

library(ggpubr)

the_caption = “Source: WHO and many others via Johns Hopkins University and Rami Krispin’s coronavirus R package.\nAnalysis by http://freerangestats.info”

#Changed from Top 8 –> Top 10(Edit: Chae) top_countries <- coronavirus %>%

filter(type == “confirmed”) %>% group_by(country) %>% summarise(cases = sum(cases)) %>% top_n(10, wt = cases)

#—————————global total——————-

#Non China is being assessed as China is the starting point of the pandemic. first_non_china_d <- coronavirus %>%

filter(country != “China” & type == “death” & cases > 0) %>% arrange(date) %>%

slice(1) %>% pull(date)

#Alteration from Italy –> US(Edit: Chae) first_US_d <- coronavirus %>%

filter(country == “US” & type == “death” & cases > 0) %>% arrange(date) %>%

slice(1) %>% pull(date)

#grouping in order to create a cumulative worldwide fatality rate over time d1 <- coronavirus %>%

group_by(date, type) %>% summarise(cases = sum(cases)) %>% arrange(date) %>%

spread(type, cases) %>%

ungroup() %>%

mutate(cfr_today = death / confirmed,

cfr_cumulative_1 = cumsum(death) / cumsum(confirmed))

d1b <- d1 %>%

filter(date %in% c(first_US_d, first_non_china_d)) ac <- “steelblue”

#Observing the first global date when passing 10,000 and 100,000 cases of confirmed cases. d1c <- d1 %>%

mutate(cc = cumsum(confirmed)) %>% summarise(`10000` = min(date[cc > 10000]),

`100000` = min(date[cc > 100000])) %>% gather(variable, date) %>%

left_join(d1, by = “date”) %>%

mutate(label = paste0(format(as.numeric(variable), big.mark = “,”, scientific = FALSE), “\ncases”))

#Assigned the graph with name “GLOBAL” in order to put together 2 graphs at the end (Edit: Chae) GLOBAL <- d1 %>%

ggplot(aes(d1,x = date, y = cfr_cumulative_1)) + geom_line() +

scale_y_continuous(label = percent_format(accuracy = 0.1)) + expand_limits(y = 0) +

geom_point(data = d1b, colour = ac, shape = 1, size = 2) +

#added a geom_point to point out when was the date when the 10,000 and 100,000 confirmed cases took place within the graph (Edit: Chae)

geom_point(data = d1c, color = “black”, shape = 1, size = 2)+ annotate(“text”, x = first_US_d,

y = filter(d1, date == first_US_d)$cfr_cumulative_1 – 0.001, label = “First death in US”,

hjust = 0, size = 2, colour = ac) +

#added annotation so that the date of the First US death will be presented (Edit: Chae) annotate(“text”, x = first_US_d,

y = filter(d1, date == first_US_d)$cfr_cumulative_1 – 0.002, label = first_US_d,

hjust = 0, size = 2, colour = ac) + annotate(“text”, x = first_non_china_d,

y = filter(d1, date == first_non_china_d)$cfr_cumulative_1 + 0.001, label = “First death outside China”,

hjust = -.15, size = 2, colour = ac) +

#added annotation so that the date of the First Non-US death will be presented (Edit: Chae) annotate(“text”, x = first_non_china_d,

y = filter(d1, date == first_non_china_d)$cfr_cumulative_1, label = first_non_china_d,

hjust = -.2, size = 2, colour = ac) +

#changed the text color from grey –> black + nudge_y to to 0.003 (Edit: Chae) geom_text(data = d1c, aes(label = label),

size = 2, colour = “black”,

hjust = 1, lineheight = 0.9, nudge_y = 0.003) + labs(x = “”,

y = “Observed case fatality rate”,

#deleted the outdated title and adjusted to the title below for up to date use (Edit: Chae) title = “Global case fatality rate of COVID-19”)+

#line 75 was added to center the graph Title(Edit: Chae) theme(plot.title = element_text(hjust=0.5))

#—————–Country-specific totals————————

#calculating country specific fatality rate and merge with the Top 10 countries list d2 <- coronavirus %>%

group_by(date, country, type) %>% summarise(cases = sum(cases)) %>% group_by(date, country) %>% spread(type, cases) %>% arrange(date) %>% group_by(country) %>%

mutate(cfr_cumulative = cumsum(death) / cumsum(confirmed)) %>% filter(!is.na(cfr_cumulative)) %>%

ungroup() %>% inner_join(top_countries, by = “country”)

#Assigned the graph with name “COUNTRY” in order to place it in grid at the end (Edit: Chae) COUNTRY <- d2 %>%

ggplot(aes(x = date, y = cfr_cumulative, colour = country)) + geom_line() +

geom_text(data = filter(d2, date == max(date)), aes(label = country), hjust = 0, check_overlap = FALSE, size = 3) +

scale_y_continuous(label = percent_format(accuracy = 1), limits = c(0, 0.16)) + #Pallete Color was Changed from Set 2 to Set 3 as Set 2 only included 8 colors

#Top 8 countries were changed to Top 10 in the beginning, thus required a new pallete (Edit: Chae) scale_colour_brewer(palette = “Set3”) +

expand_limits(x = max(d2$date) + 10) + labs(caption = the_caption,

x = “”,

#deleted the y axis for better reading when placed on grid at the end (Edit: Chae) y = “”,

#The title was adjusted so that it can be used with the up to date information (Edit: Chae) title = “Country-specific case fatality rate of COVID-19”) +

#line 104 was added to center the graph Title (Edit: Chae) theme(plot.title = element_text(hjust=0.5))+

#Caption Size was adjusted (Edit: Chae) theme(plot.caption = element_text(size = 4))+

#legend was added as the marking on the graph itself was hard to read (Edit: Chae) theme(legend.position = “bottom”)+

theme(legend.title = element_text( size=5), legend.text=element_text(size=5)) #————creating grid for the 2 graphs —————

#placed the “GLOBAL” and “COUNTRY” graphs into a grid so that it is easier to view (Edit: Chae) ggarrange(GLOBAL,COUNTRY)

install.packages(“renderLeaflet”) library(renderLeaflet)

output$global_fatal <- renderLeaflet(GLOBAL) output$global_Top_10 <- renderLeaflet(COUNTRY)

# END OF SCRIPT