## DATA SCIENCE IN A PANDEMIC

## Professor Dennis F.X. Mathaisel

## This script entails data visualizations, referenced as Figures 4 and 5 in the paper.

## Script Generated by: Dennis Mathaisel, and Nicholas Widjaja under direction of Professor Mathaisel

## Data Source: https://ourworldindata.org/coronavirus

# Set the working directory

setwd(“C:/Docs/Papers/COVID-19/Journal Papers/1st Paper Data Science in a Pandemic/Data Science Journal/Revision Data Science Journal/Scripts/1st Paper Scripts Repository/Data”)

#Read File

covid<- read.csv(“COVID-19 Newcovid.csv”) #covid <- read.csv(file.choose(), header=T)

#Libraries install.packages(“ggvis”, dep=TRUE) library(ggvis)

library(ggplot2) library(dplyr) library(hrbrthemes) library(viridis) library(tidyr) library(Hmisc)

#Checking structure of Data str(covid)

#Checking Missing NA of Data anyNA(covid$date) anyNA(covid$day) anyNA(covid$month) anyNA(covid$cases) anyNA(covid$deaths) anyNA(covid$country) anyNA(covid$geoId) anyNA(covid$countrycode) anyNA(covid$popData2018)

#Check if Data is Dataframe is.data.frame(covid)

#Attributes of Data Frame names(covid)

dim(covid) class(covid) length(covid) attributes(covid)

#Summary of data

summary(covid)

#Read new file

newcovid<- read.csv(“COVID-19 Newcovid.csv”) newcovid <- covid

#Change format for date

newcovid$date <- as.Date(newcovid$date, format = “%m/%d/%Y”) str(newcovid)

#Filtering the most covid cases for (i in 1:nrow(newcovid))

{

newcovid$Record[i]=i topcovid =

select(newcovid,date,cases,deaths,country,geoId,countrycode,Total.cases,T otal.deaths)%>%

filter(geoId %in% c(“IT”, “US”, “CN”, “ES”, “DE”))

}

topcovid str(topcovid)

######################################################################### ############################################

#Graph 1

#Cases of Covid Past 3 Months for (i in 1:nrow(topcovid))

{

topcovid$Record[i]=i Subset1 =

select(topcovid,date,cases,deaths,country,geoId,countrycode,Total.cases,T otal.deaths)%>%

filter(date > as.Date(“2020-01-01”))

}

#Plot 1 Subset1 %>%

ggplot( aes(x=date, y=Total.cases, group=country, color=country)) + geom_line() +

ggtitle(“Cases of Covid Past 3 Months”) + theme_ipsum() +

ylab(“Cases”)

#Cases of Covid Past 2 Months for (i in 1:nrow(topcovid))

{

topcovid$Record[i]=i Subset2 =

select(topcovid,date,cases,deaths,country,geoId,countrycode,Total.cases,T otal.deaths)%>%

filter(date > as.Date(“2020-02-01”))

}

#Plot 2 Subset2 %>%

ggplot( aes(x=date, y=Total.cases, group=country, color=country)) + geom_line() +

ggtitle(“Cases of Covid Past 2 Months”) + theme_ipsum() +

ylab(“Cases”)

#Cases of Covid Past 1 Month for (i in 1:nrow(newcovid))

{

newcovid$Record[i]=i Subset3 =

select(newcovid,date,cases,deaths,country,geoId,countrycode)%>% filter(date > as.Date(“2020-03-01”))

}

covid$date Subset3 str(Subset3) #Plot 3 Subset3 %>%

ggplot( aes(x=date, y=cases, group=country, color=country)) + geom_line() +

ggtitle(“Cases of Covid Past 1 Month”) + theme_ipsum() +

ylab(“Cases”)

######################################################################### ############################################

#Graph 2

covidGraph2 <- subset(topcovid, date==”2020-04-01″ & (countrycode==”CHN”

| countrycode==”DEU” | countrycode==”ITA” | countrycode==”ESP” | countrycode==”USA”),

select=c(date, country, Total.cases, Total.deaths)) testedPos <- covidGraph2$Total.cases – covidGraph2$Total.deaths

country <- rep(covidGraph2$country, each=2)

caseType <- rep(c(“Alive & Tested Positive”, “Deaths”), 5) totalCases <- c(rbind(testedPos, covidGraph2$Total.deaths)) data <- data.frame(country, caseType, totalCases)

ggplot(data, aes(fill=caseType, y=totalCases, x=country)) + geom_bar(position=”fill”, stat=”identity”) + ggtitle(“Cases:Death ratio”)

######################################################################### ############################################

#Graph 3

#Calculations Cases per million population #(cases/(population/100000))

#Spain round(102136/(47000000/1000000),digits=2)

#Italy round(110574/(60000000/1000000),digits=2)

#Germany round(73522/(84000000/1000000),digits=2)

#United States round(216721/(331000000/1000000),digits=2)

#China round(82395/(1439000000/1000000),digits=2)

#Barplot

barplot1 <- data.frame( Country=c(“Spain”,”Italy”,”Germany”,”USA”,”China”), Cases=c(2173.11,1842.90,875.26,654.75,57.26)

)

#Barplot

ggplot(barplot1, aes(x=reorder(Country, Cases), y=Cases, label=Cases)) + geom_bar(aes(fill=Country), stat= “identity”) +

coord_flip() + geom_text(size=4) +

ylab(“Cases per million population”) + xlab(“Country”) +

ggtitle(“Cases per Million Population for Top 5 Countries”)

######################################################################### ############################################

#Graph 4

#ScatterPlot

topcovid %>% ggvis(~cases, ~deaths) %>% layer_points(fill =

~factor(country))

######################################################################### ############################################

#Graph 5

#Interactive Density Plot Subset3 %>% ggvis(~cases) %>%

layer_densities(adjust = input_slider(.1, 1, value = 1, step = .1, label =”Adjustment”))

# END OF SCRIPT