## DATA SCIENCE IN A PANDEMIC
## Professor Dennis F.X. Mathaisel
## This script generates a series of violin plots, referenced as Figures 6, 7 and 8 in the paper.
## Script Generated by Abdullah Zahid under the direction of Professor Mathaisel
# Packages Utilized library(ggplot2) library(dplyr) library(hrbrthemes) library(viridis) library(Hmisc) library(tidyr) library(ggvis) library(plotly)
# Read File: The file is provided within the “Data” section of the repository
covid= read.csv(“Asia Top 5 Cases.csv”)
#Check internal structure and summary statistics of the “covid” dataset str(covid)
summary(covid)
#Checking for Missing Values wihtin the “covid” dataset anyNA(covid$date)
anyNA(covid$day) anyNA(covid$month) anyNA(covid$cases) anyNA(covid$deaths) anyNA(covid$countries) anyNA(covid$geoId) anyNA(covid$countrycode) anyNA(covid$popData2018) anyNA(covid$continent)
#1. Violin Graph entailing information on Number of Confirmed Cases in: #China, India, Iran, Israel, Turkey
df <- covid
fig <- df %>% plot_ly(
x = ~countries, y = ~cases,
split = ~countries, type = ‘violin’, box = list(
visible = T
),
meanline = list( visible = T
)
)
fig <- fig %>% layout(
xaxis = list(
title = “Asia COVID Cases”
),
yaxis = list(
title = “Number of Cases”, zeroline = F
)
)
fig
© 2024 Babson College. All rights reserved.