## DATA SCIENCE IN A PANDEMIC
## Professor Dennis F.X. Mathaisel
## This script entails data visualizations, referenced as Figures 4 and 5 in the paper.
## Script Generated by: Dennis Mathaisel, and Nicholas Widjaja under direction of Professor Mathaisel
## Data Source: https://ourworldindata.org/coronavirus
# Set the working directory
setwd(“C:/Docs/Papers/COVID-19/Journal Papers/1st Paper Data Science in a Pandemic/Data Science Journal/Revision Data Science Journal/Scripts/1st Paper Scripts Repository/Data”)
#Read File
covid<- read.csv(“COVID-19 Newcovid.csv”) #covid <- read.csv(file.choose(), header=T)
#Libraries install.packages(“ggvis”, dep=TRUE) library(ggvis)
library(ggplot2) library(dplyr) library(hrbrthemes) library(viridis) library(tidyr) library(Hmisc)
#Checking structure of Data str(covid)
#Checking Missing NA of Data anyNA(covid$date) anyNA(covid$day) anyNA(covid$month) anyNA(covid$cases) anyNA(covid$deaths) anyNA(covid$country) anyNA(covid$geoId) anyNA(covid$countrycode) anyNA(covid$popData2018)
#Check if Data is Dataframe is.data.frame(covid)
#Attributes of Data Frame names(covid)
dim(covid) class(covid) length(covid) attributes(covid)
#Summary of data
summary(covid)
#Read new file
newcovid<- read.csv(“COVID-19 Newcovid.csv”) newcovid <- covid
#Change format for date
newcovid$date <- as.Date(newcovid$date, format = “%m/%d/%Y”) str(newcovid)
#Filtering the most covid cases for (i in 1:nrow(newcovid))
{
newcovid$Record[i]=i topcovid =
select(newcovid,date,cases,deaths,country,geoId,countrycode,Total.cases,T otal.deaths)%>%
filter(geoId %in% c(“IT”, “US”, “CN”, “ES”, “DE”))
}
topcovid str(topcovid)
######################################################################### ############################################
#Graph 1
#Cases of Covid Past 3 Months for (i in 1:nrow(topcovid))
{
topcovid$Record[i]=i Subset1 =
select(topcovid,date,cases,deaths,country,geoId,countrycode,Total.cases,T otal.deaths)%>%
filter(date > as.Date(“2020-01-01”))
}
#Plot 1 Subset1 %>%
ggplot( aes(x=date, y=Total.cases, group=country, color=country)) + geom_line() +
ggtitle(“Cases of Covid Past 3 Months”) + theme_ipsum() +
ylab(“Cases”)
#Cases of Covid Past 2 Months for (i in 1:nrow(topcovid))
{
topcovid$Record[i]=i Subset2 =
select(topcovid,date,cases,deaths,country,geoId,countrycode,Total.cases,T otal.deaths)%>%
filter(date > as.Date(“2020-02-01”))
}
#Plot 2 Subset2 %>%
ggplot( aes(x=date, y=Total.cases, group=country, color=country)) + geom_line() +
ggtitle(“Cases of Covid Past 2 Months”) + theme_ipsum() +
ylab(“Cases”)
#Cases of Covid Past 1 Month for (i in 1:nrow(newcovid))
{
newcovid$Record[i]=i Subset3 =
select(newcovid,date,cases,deaths,country,geoId,countrycode)%>% filter(date > as.Date(“2020-03-01”))
}
covid$date Subset3 str(Subset3) #Plot 3 Subset3 %>%
ggplot( aes(x=date, y=cases, group=country, color=country)) + geom_line() +
ggtitle(“Cases of Covid Past 1 Month”) + theme_ipsum() +
ylab(“Cases”)
######################################################################### ############################################
#Graph 2
covidGraph2 <- subset(topcovid, date==”2020-04-01″ & (countrycode==”CHN”
| countrycode==”DEU” | countrycode==”ITA” | countrycode==”ESP” | countrycode==”USA”),
select=c(date, country, Total.cases, Total.deaths)) testedPos <- covidGraph2$Total.cases – covidGraph2$Total.deaths
country <- rep(covidGraph2$country, each=2)
caseType <- rep(c(“Alive & Tested Positive”, “Deaths”), 5) totalCases <- c(rbind(testedPos, covidGraph2$Total.deaths)) data <- data.frame(country, caseType, totalCases)
ggplot(data, aes(fill=caseType, y=totalCases, x=country)) + geom_bar(position=”fill”, stat=”identity”) + ggtitle(“Cases:Death ratio”)
######################################################################### ############################################
#Graph 3
#Calculations Cases per million population #(cases/(population/100000))
#Spain round(102136/(47000000/1000000),digits=2)
#Italy round(110574/(60000000/1000000),digits=2)
#Germany round(73522/(84000000/1000000),digits=2)
#United States round(216721/(331000000/1000000),digits=2)
#China round(82395/(1439000000/1000000),digits=2)
#Barplot
barplot1 <- data.frame( Country=c(“Spain”,”Italy”,”Germany”,”USA”,”China”), Cases=c(2173.11,1842.90,875.26,654.75,57.26)
)
#Barplot
ggplot(barplot1, aes(x=reorder(Country, Cases), y=Cases, label=Cases)) + geom_bar(aes(fill=Country), stat= “identity”) +
coord_flip() + geom_text(size=4) +
ylab(“Cases per million population”) + xlab(“Country”) +
ggtitle(“Cases per Million Population for Top 5 Countries”)
######################################################################### ############################################
#Graph 4
#ScatterPlot
topcovid %>% ggvis(~cases, ~deaths) %>% layer_points(fill =
~factor(country))
######################################################################### ############################################
#Graph 5
#Interactive Density Plot Subset3 %>% ggvis(~cases) %>%
layer_densities(adjust = input_slider(.1, 1, value = 1, step = .1, label =”Adjustment”))
# END OF SCRIPT
© 2024 Babson College. All rights reserved.