Faculty Sites

## DATA SCIENCE IN A PANDEMIC

## Professor Dennis F.X. Mathaisel


## This script generates a series of violin plots, referenced as Figures 6, 7 and 8 in the paper.

## Script Generated by Abdullah Zahid under the direction of Professor Mathaisel


# Packages Utilized library(ggplot2) library(dplyr) library(hrbrthemes) library(viridis) library(Hmisc) library(tidyr) library(ggvis) library(plotly)


# Read File: The file is provided within the “Data” section of the repository

covid= read.csv(“Asia Top 5 Cases.csv”)


#Check internal structure and summary statistics of the “covid” dataset str(covid)

summary(covid)


#Checking for Missing Values wihtin the “covid” dataset anyNA(covid$date)

anyNA(covid$day) anyNA(covid$month) anyNA(covid$cases) anyNA(covid$deaths) anyNA(covid$countries) anyNA(covid$geoId) anyNA(covid$countrycode) anyNA(covid$popData2018) anyNA(covid$continent)


#1. Violin Graph entailing information on Number of Confirmed Cases in: #China, India, Iran, Israel, Turkey

df <- covid


fig <- df %>% plot_ly(

x = ~countries, y = ~cases,

split = ~countries, type = ‘violin’, box = list(

visible = T

),

meanline = list( visible = T

)

)


fig <- fig %>% layout(

xaxis = list(

title = “Asia COVID Cases”

),

yaxis = list(

title = “Number of Cases”, zeroline = F

)

)


fig