Figure 2 and 3 Line Charts

# DATA SCIENCE IN A PANDEMIC# ## Professor Dennis F.X. Mathaisel ## This script entails a Line Chart visualizations, referenced as Figures 2 and 3 in the paper. ## Script developed by Abdullah Zahid under direction of Professor Mathaisel ## The dataset was taken from https://ourworldindata.org/coronavirus

#Libraries

install.packages(“ggvis”, dep=TRUE)

install.packages(“lattice”, dep=TRUE)

install.packages(“RJSplot”, dep=TRUE)

install.packages(“hrbrthemes”)

install.packages(“Hmisc”)

library(ggvis)

library(ggplot2)

library(dplyr)

library(hrbrthemes)

library(viridis)

library(tidyr)

library(Hmisc)

# Set the working directory

setwd(“C:/Docs/Papers/COVID-19/Journal Papers/1st Paper Data Science in a Pandemic/Data Science Journal/Revision Data Science Journal/Scripts/1st Paper Scripts Repository/Data”)

#Read File

covid<- read.csv(“owid-covid-data.csv”)

# covid<- read.csv(file.choose())

#Checking structure of Data

str(covid)

#Check if Data is Dataframe

is.data.frame(covid)

#Attributes of Data Frame

names(covid)

dim(covid)

class(covid)

length(covid)

attributes(covid)

#Summary of data

summary(covid)

#Change format for date

covid$date <- as.Date(covid$date, format = “%m/%d/%Y”)

str(covid)

#Filtering the most covid cases

for (i in 1:nrow(covid))

{

covid$Record[i]=i

topcovid = select(covid,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths)%>%

filter(iso_code %in% c(“BRA”, “FRA”, “DEU”, “IND”, “ITA”,”RUS”,”ESP”,”TUR”,”GBR”,”USA”))

}

#topcovid

str(topcovid)

#Missing Values

anyNA(topcovid$iso_code)

anyNA(topcovid$location)

anyNA(topcovid$date)

anyNA(topcovid$total_cases)

anyNA(topcovid$new_cases)

anyNA(topcovid$total_deaths)

anyNA(topcovid$new_deaths)

#Removing Missing Values

na.omit(topcovid$iso_code)

na.omit(topcovid$location)

na.omit(topcovid$date)

na.omit(topcovid$total_cases)

na.omit(topcovid$new_cases)

na.omit(topcovid$total_deaths)

na.omit(topcovid$new_deaths)

#Cases of Covid Since October

for (i in 1:nrow(topcovid))

{

topcovid$Record[i]=i

Subset1 = select(topcovid,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths)%>%

filter(date > as.Date(“2020-09-30”))

}

#####################################################################################################################

#Graph 1

#Line Graphs showing trends since October 2020

#Plot 1

Subset1 %>%

ggplot( aes(x=date, y=total_cases, group=location, color=location)) +

geom_line() +

ggtitle(“Cases of Covid Since October”) +

theme_ipsum() +

ylab(“Cases”)

#Cases of Covid Since November

for (i in 1:nrow(topcovid))

{

topcovid$Record[i]=i

Subset2 = select(topcovid,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths)%>%

filter(date > as.Date(“2020-10-31”))

}

#Plot 2

Subset2 %>%

ggplot( aes(x=date, y=total_cases, group=location, color=location)) +

geom_line() +

ggtitle(“Cases of Covid Since November”) +

theme_ipsum() +

ylab(“Cases”)

#Cases of Covid Since December

for (i in 1:nrow(topcovid))

{

topcovid$Record[i]=i

Subset3 = select(topcovid,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths)%>%

filter(date > as.Date(“2020-11-30”))

}

#Plot 3

Subset3 %>%

ggplot( aes(x=date, y=total_cases, group=location, color=location)) +

geom_line() +

ggtitle(“Cases of Covid Since December”) +

theme_ipsum() +

ylab(“Cases”)

#####################################################################################################################

#Graph 2

#Graph showing ratio of cases to deaths, however this plot was the least significant one in my opinion

“TUR”|iso_code== “GBR”|iso_code== “USA”),

select=c(date, location, total_cases, total_deaths))

testedPos <- covidGraph2$total_cases – covidGraph2$total_deaths

country <- rep(covidGraph2$location, each=2)

caseType <- rep(c(“Alive & Tested Positive”, “Deaths”), 5)

totalCases <- c(rbind(testedPos, covidGraph2$total_deaths))

data <- data.frame(country, caseType, totalCases)

ggplot(data, aes(fill=caseType, y=totalCases, x=country)) + geom_bar(position=”fill”, stat=”identity”) +

ggtitle(“Cases:Death ratio”)

#####################################################################################################################

#Graph 3

#Barplot showing cases per million of population

#Calculations Cases per million population

#(cases/(population/100000))

#Brazil

round(7961673/(209500000/1000000),digits=2)

#Spain

round(2024904/(46940000/1000000),digits=2)

#Italy

round(2220361/(60000000/1000000),digits=2)

#Germany

round(1886561/(84000000/1000000),digits=2)

#United States

round(21574043/(331000000/1000000),digits=2)

#India

round(10413417/(1353000000/1000000),digits=2)

#France

round(2763370/(66990000/1000000),digits=2)

#Turkey

round(2296102/(82000000/1000000),digits=2)

#United Kingdom

round(2898052/(66650000/1000000),digits=2)

#Russia

round(3297833/(144500000/1000000),digits=2)

#Barplot

barplot1 <- data.frame(

Country=c(“Brazil”,”Spain”,”Italy”,”Germany”,”USA”,”India”,”France”,”Turkey”,”United Kingdom”,”Russia”),

Cases=c(38003.21,43138.13,37006.02,22459.06,65178.38,7696.54,41250.49,28001.24,43481.65,22822.37)

)

#Barplot

ggplot(barplot1, aes(x=reorder(Country, Cases), y=Cases, label=Cases)) +

geom_bar(aes(fill=Country), stat= “identity”) +

coord_flip() +

geom_text(size=4) +

ylab(“Cases per million population”) +

xlab(“Location”) +

ggtitle(“Cases per Million Population for Top 10 Countries”)

#####################################################################################################################

#Graph 4

#Attempt was to see scatterplot of Cases to Deaths per cpuntry however the plot did not come correctly

#please check to see if you can correct this otherwise I am working on finding solution to this

#ScatterPlot

topcovid %>% ggvis(~total_cases, ~total_deaths) %>% layer_points(fill = ~factor(location))

#####################################################################################################################

# END OF SCRIPT

Faculty Sites

Dennis F.X. Mathaisel