The following excerpt are the processing and data exploration steps completed for the Influenza Analysis
.
#load needed packages. make sure they are installed.
library(readxl) #for loading Excel files
library(dplyr) #for data processing
library(here) #to set paths
#path to data
<- here::here("data","raw_data","SympAct_Any_Pos.Rds")
data_location
#load data.
<- readRDS(data_location)
rawdata
#take a look at the data
::glimpse(rawdata) dplyr
## Rows: 735
## Columns: 63
## $ DxName1 <fct> "Influenza like illness - Clinical Dx", "Acute tonsi~
## $ DxName2 <fct> NA, "Influenza like illness - Clinical Dx", "Acute p~
## $ DxName3 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Fever, unspecified"~
## $ DxName4 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Other fatigue", NA,~
## $ DxName5 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Headache", NA, NA, ~
## $ Unique.Visit <chr> "340_17632125", "340_17794836", "342_17737773", "342~
## $ ActivityLevel <int> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, ~
## $ ActivityLevelF <fct> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, ~
## $ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y~
## $ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, ~
## $ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, ~
## $ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, ~
## $ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes~
## $ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes~
## $ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi~
## $ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve~
## $ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes~
## $ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, ~
## $ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No~
## $ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N~
## $ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, ~
## $ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,~
## $ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye~
## $ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y~
## $ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,~
## $ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y~
## $ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y~
## $ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,~
## $ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, ~
## $ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, ~
## $ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N~
## $ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, ~
## $ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N~
## $ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N~
## $ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, ~
## $ RapidFluA <fct> Presumptive Negative For Influenza A, NA, Presumptiv~
## $ RapidFluB <fct> Presumptive Negative For Influenza B, NA, Presumptiv~
## $ PCRFluA <fct> NA, NA, NA, NA, NA, NA, Influenza A Not Detected, N~
## $ PCRFluB <fct> NA, NA, NA, NA, NA, NA, Influenza B Not Detected, N~
## $ TransScore1 <dbl> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5~
## $ TransScore1F <fct> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5~
## $ TransScore2 <dbl> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4~
## $ TransScore2F <fct> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4~
## $ TransScore3 <dbl> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3~
## $ TransScore3F <fct> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3~
## $ TransScore4 <dbl> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4~
## $ TransScore4F <fct> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4~
## $ ImpactScore <int> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9~
## $ ImpactScore2 <int> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,~
## $ ImpactScore3 <int> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, ~
## $ ImpactScoreF <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9~
## $ ImpactScore2F <fct> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,~
## $ ImpactScore3F <fct> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, ~
## $ ImpactScoreFD <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9~
## $ TotalSymp1 <dbl> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1~
## $ TotalSymp1F <fct> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1~
## $ TotalSymp2 <dbl> 8, 10, 17, 16, 11, 14, 10, 11, 13, 10, 14, 19, 13, 1~
## $ TotalSymp3 <dbl> 8, 9, 16, 15, 11, 14, 10, 10, 12, 9, 13, 18, 12, 16,~
str(rawdata)
## 'data.frame': 735 obs. of 63 variables:
## $ DxName1 : Factor w/ 92 levels "Acute bronchitis, unspecified",..: 57 16 57 57 9 57 39 17 57 57 ...
## $ DxName2 : Factor w/ 142 levels "14 weeks gestation of pregnancy",..: NA 69 11 129 69 NA 69 60 12 NA ...
## $ DxName3 : Factor w/ 92 levels "Abnormal weight loss",..: NA NA NA NA NA NA NA NA 38 NA ...
## $ DxName4 : Factor w/ 41 levels "Acute bronchitis, unspecified",..: NA NA NA NA NA NA NA NA 30 NA ...
## $ DxName5 : Factor w/ 5 levels "Acute suppurative otitis media without spontaneous rupture of ear drum, right ear",..: NA NA NA NA NA NA NA NA 3 NA ...
## $ Unique.Visit : chr "340_17632125" "340_17794836" "342_17737773" "342_17806002" ...
## $ ActivityLevel : int 10 6 2 2 5 3 4 0 0 5 ...
## ..- attr(*, "label")= chr "Activity Level"
## $ ActivityLevelF : Factor w/ 11 levels "0","1","2","3",..: 11 7 3 3 6 4 5 1 1 6 ...
## $ SwollenLymphNodes: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 2 1 ...
## ..- attr(*, "label")= chr "Swollen Lymph Nodes"
## $ ChestCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## ..- attr(*, "label")= chr "Chest Congestion"
## $ ChillsSweats : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 2 2 2 1 ...
## ..- attr(*, "label")= chr "Chills/Sweats"
## $ NasalCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## ..- attr(*, "label")= chr "Nasal Congestion"
## $ CoughYN : Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Cough"
## $ Sneeze : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 2 1 2 1 1 ...
## ..- attr(*, "label")= chr "Sneeze"
## $ Fatigue : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Fatigue"
## $ SubjectiveFever : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 1 ...
## ..- attr(*, "label")= chr "Subjective Fever"
## $ Headache : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 2 2 2 ...
## ..- attr(*, "label")= chr "Headache"
## $ Weakness : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 3 3 2 4 3 3 ...
## $ WeaknessYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Weakness"
## $ CoughIntensity : Factor w/ 4 levels "None","Mild",..: 4 4 2 3 1 3 4 3 3 3 ...
## ..- attr(*, "label")= chr "Cough Severity"
## $ CoughYN2 : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 2 2 2 2 ...
## $ Myalgia : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 2 3 2 4 3 2 ...
## $ MyalgiaYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Myalgia"
## $ RunnyNose : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 1 2 2 2 2 ...
## ..- attr(*, "label")= chr "Runny Nose"
## $ AbPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Abdominal Pain"
## $ ChestPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 2 1 1 1 ...
## ..- attr(*, "label")= chr "Chest Pain"
## $ Diarrhea : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 1 1 ...
## $ EyePn : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Eye Pain"
## $ Insomnia : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 2 2 2 ...
## ..- attr(*, "label")= chr "Sleeplessness"
## $ ItchyEye : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Itchy Eyes"
## $ Nausea : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 1 2 2 ...
## $ EarPn : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Ear Pain"
## $ Hearing : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Loss of Hearing"
## $ Pharyngitis : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 1 1 ...
## ..- attr(*, "label")= chr "Sore Throat"
## $ Breathless : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 1 1 1 2 ...
## ..- attr(*, "label")= chr "Breathlessness"
## $ ToothPn : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 1 ...
## ..- attr(*, "label")= chr "Tooth Pain"
## $ Vision : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Blurred Vision"
## $ Vomit : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## ..- attr(*, "label")= chr "Vomiting"
## $ Wheeze : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 1 1 ...
## ..- attr(*, "label")= chr "Wheezing"
## $ BodyTemp : num 98.3 100.4 100.8 98.8 100.5 ...
## $ RapidFluA : Factor w/ 2 levels "Positive for Influenza A",..: 2 NA 2 2 NA NA NA 1 2 2 ...
## $ RapidFluB : Factor w/ 2 levels "Positive for Influenza B",..: 2 NA 2 2 NA NA NA 2 2 2 ...
## $ PCRFluA : Factor w/ 4 levels " Influenza A Detected",..: NA NA NA NA NA NA 2 NA NA NA ...
## $ PCRFluB : Factor w/ 3 levels " Influenza B Detected",..: NA NA NA NA NA NA 2 NA NA NA ...
## $ TransScore1 : num 1 3 4 5 0 2 2 5 4 4 ...
## $ TransScore1F : Factor w/ 6 levels "0","1","2","3",..: 2 4 5 6 1 3 3 6 5 5 ...
## ..- attr(*, "label")= chr "Infectiousness Score"
## $ TransScore2 : num 1 2 3 4 0 2 2 4 3 3 ...
## $ TransScore2F : Factor w/ 5 levels "0","1","2","3",..: 2 3 4 5 1 3 3 5 4 4 ...
## ..- attr(*, "label")= chr "Infectiousness Score"
## $ TransScore3 : num 1 1 2 3 0 2 2 3 2 2 ...
## $ TransScore3F : Factor w/ 4 levels "0","1","2","3": 2 2 3 4 1 3 3 4 3 3 ...
## ..- attr(*, "label")= chr "Infectiousness Score"
## $ TransScore4 : num 0 2 4 4 0 1 1 4 3 3 ...
## $ TransScore4F : Factor w/ 5 levels "0","1","2","3",..: 1 3 5 5 1 2 2 5 4 4 ...
## $ ImpactScore : int 7 8 14 12 11 12 8 7 10 7 ...
## $ ImpactScore2 : int 6 7 13 11 10 11 7 6 9 6 ...
## $ ImpactScore3 : int 3 4 9 7 6 7 3 3 6 4 ...
## $ ImpactScoreF : Factor w/ 21 levels "0","1","2","3",..: 8 9 15 13 12 13 9 8 11 8 ...
## ..- attr(*, "label")= chr "Morbidity Score"
## $ ImpactScore2F : Factor w/ 19 levels "0","1","2","3",..: 7 8 14 12 11 12 8 7 10 7 ...
## ..- attr(*, "label")= chr "Morbidity Score"
## $ ImpactScore3F : Factor w/ 15 levels "0","1","2","3",..: 4 5 10 8 7 8 4 4 7 5 ...
## ..- attr(*, "label")= chr "Morbidity Score"
## $ ImpactScoreFD : Factor w/ 17 levels "2","3","4","5",..: 6 7 13 11 10 11 7 6 9 6 ...
## $ TotalSymp1 : num 8 11 18 17 11 14 10 12 14 11 ...
## $ TotalSymp1F : Factor w/ 19 levels "5","6","7","8",..: 4 7 14 13 7 10 6 8 10 7 ...
## $ TotalSymp2 : num 8 10 17 16 11 14 10 11 13 10 ...
## $ TotalSymp3 : num 8 9 16 15 11 14 10 10 12 9 ...
summary(rawdata)
## DxName1
## Influenza like illness - Clinical Dx :328
## Influenza - Virus Identified :131
## Fever, unspecified :101
## Cough : 66
## Acute pharyngitis, unspecified : 50
## Acute upper respiratory infection, unspecified: 22
## (Other) : 37
## DxName2
## Influenza - Virus Identified :126
## Influenza like illness - Clinical Dx:115
## Fever, unspecified : 45
## Cough : 41
## Acute pharyngitis, unspecified : 31
## (Other) : 97
## NA's :280
## DxName3
## Influenza - Virus Identified : 23
## Influenza like illness - Clinical Dx: 14
## Cough : 10
## Fever, unspecified : 6
## Acute pharyngitis, unspecified : 4
## (Other) : 52
## NA's :626
## DxName4
## Influenza - Virus Identified : 3
## Acute upper respiratory infection, unspecified: 2
## Encounter for immunization : 2
## Influenza like illness - Clinical Dx : 2
## Acute pharyngitis, unspecified : 1
## (Other) : 9
## NA's :716
## DxName5
## Acute suppurative otitis media without spontaneous rupture of ear drum, right ear : 0
## Encounter for immunization : 0
## Headache : 1
## Other infectious mononucleosis without complication : 0
## Strain of other flexor muscle, fascia and tendon at forearm level, right arm, subsequent encounter: 0
## NA's :734
##
## Unique.Visit ActivityLevel ActivityLevelF SwollenLymphNodes
## Length:735 Min. : 0.000 3 :125 No :421
## Class :character 1st Qu.: 3.000 5 : 97 Yes:314
## Mode :character Median : 4.000 4 : 95
## Mean : 4.463 2 : 80
## 3rd Qu.: 6.000 7 : 68
## Max. :10.000 6 : 66
## (Other):204
## ChestCongestion ChillsSweats NasalCongestion CoughYN Sneeze Fatigue
## No :326 No :131 No :170 No : 75 No :340 No : 64
## Yes:409 Yes:604 Yes:565 Yes:660 Yes:395 Yes:671
##
##
##
##
##
## SubjectiveFever Headache Weakness WeaknessYN CoughIntensity CoughYN2
## No :230 No :115 None : 49 No : 49 None : 47 No : 47
## Yes:505 Yes:620 Mild :224 Yes:686 Mild :156 Yes:688
## Moderate:341 Moderate:360
## Severe :121 Severe :172
##
##
##
## Myalgia MyalgiaYN RunnyNose AbPain ChestPain Diarrhea EyePn
## None : 79 No : 79 No :211 No :642 No :501 No :636 No :622
## Mild :214 Yes:656 Yes:524 Yes: 93 Yes:234 Yes: 99 Yes:113
## Moderate:327
## Severe :115
##
##
##
## Insomnia ItchyEye Nausea EarPn Hearing Pharyngitis Breathless
## No :316 No :553 No :477 No :573 No :705 No :121 No :438
## Yes:419 Yes:182 Yes:258 Yes:162 Yes: 30 Yes:614 Yes:297
##
##
##
##
##
## ToothPn Vision Vomit Wheeze BodyTemp
## No :569 No :716 No :656 No :514 Min. : 97.20
## Yes:166 Yes: 19 Yes: 79 Yes:221 1st Qu.: 98.20
## Median : 98.50
## Mean : 98.94
## 3rd Qu.: 99.30
## Max. :103.10
## NA's :5
## RapidFluA
## Positive for Influenza A :169
## Presumptive Negative For Influenza A:159
## NA's :407
##
##
##
##
## RapidFluB PCRFluA
## Positive for Influenza B : 26 Influenza A Detected :120
## Presumptive Negative For Influenza B:302 Influenza A Not Detected: 33
## NA's :407 Assay Invalid : 0
## Indeterminate : 1
## NA's :581
##
##
## PCRFluB TransScore1 TransScore1F TransScore2
## Influenza B Detected : 9 Min. :0.000 0: 13 Min. :0.000
## Influenza B Not Detected:145 1st Qu.:3.000 1: 53 1st Qu.:2.000
## Assay Invalid : 0 Median :4.000 2:107 Median :3.000
## NA's :581 Mean :3.473 3:157 Mean :2.917
## 3rd Qu.:5.000 4:210 3rd Qu.:4.000
## Max. :5.000 5:195 Max. :4.000
##
## TransScore2F TransScore3 TransScore3F TransScore4 TransScore4F
## 0: 13 Min. :0.000 0: 24 Min. :0.000 0: 50
## 1: 89 1st Qu.:1.000 1:166 1st Qu.:2.000 1:103
## 2:138 Median :2.000 2:222 Median :3.000 2:154
## 3:201 Mean :2.148 3:323 Mean :2.576 3:230
## 4:294 3rd Qu.:3.000 3rd Qu.:4.000 4:198
## Max. :3.000 Max. :4.000
##
## ImpactScore ImpactScore2 ImpactScore3 ImpactScoreF ImpactScore2F
## Min. : 2.000 Min. : 2.000 Min. : 0.00 8 :105 7 :107
## 1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 3.00 9 :104 8 :102
## Median : 9.000 Median : 8.000 Median : 5.00 10 : 88 9 : 90
## Mean : 9.514 Mean : 8.581 Mean : 5.06 7 : 84 10 : 86
## 3rd Qu.:11.000 3rd Qu.:10.000 3rd Qu.: 7.00 11 : 82 6 : 85
## Max. :18.000 Max. :17.000 Max. :13.00 12 : 58 11 : 59
## (Other):214 (Other):206
## ImpactScore3F ImpactScoreFD TotalSymp1 TotalSymp1F TotalSymp2
## 4 :134 8 :105 Min. : 5.00 12 : 86 Min. : 4.00
## 5 :112 9 :104 1st Qu.:11.00 13 : 84 1st Qu.:10.00
## 3 :108 10 : 88 Median :13.00 14 : 80 Median :12.00
## 6 :102 7 : 84 Mean :12.99 11 : 72 Mean :12.43
## 7 : 66 11 : 82 3rd Qu.:15.00 10 : 62 3rd Qu.:15.00
## 2 : 64 12 : 58 Max. :23.00 15 : 61 Max. :22.00
## (Other):149 (Other):214 (Other):290
## TotalSymp3
## Min. : 3.00
## 1st Qu.:10.00
## Median :12.00
## Mean :11.66
## 3rd Qu.:14.00
## Max. :21.00
##
#remove variable Score, Total, FluA, FluB, Dxname, or activity in name
#chose to use direct pipes since the actions were so similar and I didn't wnat to repeat myself too much in the comments
##select(-contains("")) == removes columns with the whatever is inside the "quotes".
<-rawdata%>%
rawdata1select(-contains("Score"))%>% #removed cols containing Score
select(-contains("Total"))%>% #removed cols containing Total
select(-contains("FluA"))%>% #removed cols containing FluA
select(-contains("FluB"))%>% #removed cols containing FluB
select(-contains("DxName"))%>% #removed cols containing DxName
select(-contains("Activity"))%>% #removed cols containing Activity
select(-c(Unique.Visit)) # -c removes exactly named cols, remove col Unique.Visit
#take a look at cleaned data removed cols directed by assessment page
glimpse(rawdata1)
## Rows: 735
## Columns: 32
## $ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y~
## $ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, ~
## $ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, ~
## $ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, ~
## $ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes~
## $ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes~
## $ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi~
## $ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve~
## $ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes~
## $ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, ~
## $ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No~
## $ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N~
## $ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, ~
## $ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,~
## $ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye~
## $ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y~
## $ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,~
## $ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y~
## $ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y~
## $ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,~
## $ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, ~
## $ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, ~
## $ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N~
## $ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, ~
## $ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N~
## $ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N~
## $ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, ~
str(rawdata1)
## 'data.frame': 735 obs. of 32 variables:
## $ SwollenLymphNodes: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 2 1 ...
## ..- attr(*, "label")= chr "Swollen Lymph Nodes"
## $ ChestCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## ..- attr(*, "label")= chr "Chest Congestion"
## $ ChillsSweats : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 2 2 2 1 ...
## ..- attr(*, "label")= chr "Chills/Sweats"
## $ NasalCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## ..- attr(*, "label")= chr "Nasal Congestion"
## $ CoughYN : Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Cough"
## $ Sneeze : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 2 1 2 1 1 ...
## ..- attr(*, "label")= chr "Sneeze"
## $ Fatigue : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Fatigue"
## $ SubjectiveFever : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 1 ...
## ..- attr(*, "label")= chr "Subjective Fever"
## $ Headache : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 2 2 2 ...
## ..- attr(*, "label")= chr "Headache"
## $ Weakness : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 3 3 2 4 3 3 ...
## $ WeaknessYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Weakness"
## $ CoughIntensity : Factor w/ 4 levels "None","Mild",..: 4 4 2 3 1 3 4 3 3 3 ...
## ..- attr(*, "label")= chr "Cough Severity"
## $ CoughYN2 : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 2 2 2 2 ...
## $ Myalgia : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 2 3 2 4 3 2 ...
## $ MyalgiaYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## ..- attr(*, "label")= chr "Myalgia"
## $ RunnyNose : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 1 2 2 2 2 ...
## ..- attr(*, "label")= chr "Runny Nose"
## $ AbPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Abdominal Pain"
## $ ChestPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 2 1 1 1 ...
## ..- attr(*, "label")= chr "Chest Pain"
## $ Diarrhea : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 1 1 ...
## $ EyePn : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Eye Pain"
## $ Insomnia : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 2 2 2 ...
## ..- attr(*, "label")= chr "Sleeplessness"
## $ ItchyEye : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Itchy Eyes"
## $ Nausea : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 1 2 2 ...
## $ EarPn : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Ear Pain"
## $ Hearing : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Loss of Hearing"
## $ Pharyngitis : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 1 1 ...
## ..- attr(*, "label")= chr "Sore Throat"
## $ Breathless : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 1 1 1 2 ...
## ..- attr(*, "label")= chr "Breathlessness"
## $ ToothPn : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 1 ...
## ..- attr(*, "label")= chr "Tooth Pain"
## $ Vision : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Blurred Vision"
## $ Vomit : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## ..- attr(*, "label")= chr "Vomiting"
## $ Wheeze : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 1 1 ...
## ..- attr(*, "label")= chr "Wheezing"
## $ BodyTemp : num 98.3 100.4 100.8 98.8 100.5 ...
summary(rawdata1)
## SwollenLymphNodes ChestCongestion ChillsSweats NasalCongestion CoughYN
## No :421 No :326 No :131 No :170 No : 75
## Yes:314 Yes:409 Yes:604 Yes:565 Yes:660
##
##
##
##
##
## Sneeze Fatigue SubjectiveFever Headache Weakness WeaknessYN
## No :340 No : 64 No :230 No :115 None : 49 No : 49
## Yes:395 Yes:671 Yes:505 Yes:620 Mild :224 Yes:686
## Moderate:341
## Severe :121
##
##
##
## CoughIntensity CoughYN2 Myalgia MyalgiaYN RunnyNose AbPain
## None : 47 No : 47 None : 79 No : 79 No :211 No :642
## Mild :156 Yes:688 Mild :214 Yes:656 Yes:524 Yes: 93
## Moderate:360 Moderate:327
## Severe :172 Severe :115
##
##
##
## ChestPain Diarrhea EyePn Insomnia ItchyEye Nausea EarPn
## No :501 No :636 No :622 No :316 No :553 No :477 No :573
## Yes:234 Yes: 99 Yes:113 Yes:419 Yes:182 Yes:258 Yes:162
##
##
##
##
##
## Hearing Pharyngitis Breathless ToothPn Vision Vomit Wheeze
## No :705 No :121 No :438 No :569 No :716 No :656 No :514
## Yes: 30 Yes:614 Yes:297 Yes:166 Yes: 19 Yes: 79 Yes:221
##
##
##
##
##
## BodyTemp
## Min. : 97.20
## 1st Qu.: 98.20
## Median : 98.50
## Mean : 98.94
## 3rd Qu.: 99.30
## Max. :103.10
## NA's :5
#the data set is not quite like the assignment I am 5 over in observations
#noted that BodyTemp has 5 NA's, need to remove
<-rawdata1%>%
rawdata2na.omit() # omit NAs in the data set
#take a look at cleaned data to assignment specs (760 obv. of 32 variables)
glimpse(rawdata2)
## Rows: 730
## Columns: 32
## $ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y~
## $ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, ~
## $ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, ~
## $ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, ~
## $ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes~
## $ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes~
## $ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi~
## $ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve~
## $ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes~
## $ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, ~
## $ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No~
## $ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N~
## $ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, ~
## $ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,~
## $ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye~
## $ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y~
## $ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,~
## $ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y~
## $ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y~
## $ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,~
## $ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, ~
## $ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, ~
## $ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N~
## $ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, ~
## $ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N~
## $ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N~
## $ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, ~
str(rawdata2)
## 'data.frame': 730 obs. of 32 variables:
## $ SwollenLymphNodes: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 2 1 ...
## $ ChestCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## $ ChillsSweats : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 2 2 2 1 ...
## $ NasalCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## $ CoughYN : Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 2 2 2 2 ...
## $ Sneeze : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 2 1 2 1 1 ...
## $ Fatigue : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ SubjectiveFever : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 1 ...
## $ Headache : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 2 2 2 ...
## $ Weakness : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 3 3 2 4 3 3 ...
## $ WeaknessYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ CoughIntensity : Factor w/ 4 levels "None","Mild",..: 4 4 2 3 1 3 4 3 3 3 ...
## $ CoughYN2 : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 2 2 2 2 ...
## $ Myalgia : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 2 3 2 4 3 2 ...
## $ MyalgiaYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ RunnyNose : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 1 2 2 2 2 ...
## $ AbPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ ChestPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 2 1 1 1 ...
## $ Diarrhea : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 1 1 ...
## $ EyePn : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## $ Insomnia : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 2 2 2 ...
## $ ItchyEye : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Nausea : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 1 2 2 ...
## $ EarPn : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 1 1 1 1 1 ...
## $ Hearing : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ Pharyngitis : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 1 1 ...
## $ Breathless : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 1 1 1 2 ...
## $ ToothPn : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 1 ...
## $ Vision : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Vomit : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## $ Wheeze : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 1 1 ...
## $ BodyTemp : num 98.3 100.4 100.8 98.8 100.5 ...
## - attr(*, "na.action")= 'omit' Named int [1:5] 133 243 363 577 585
## ..- attr(*, "names")= chr [1:5] "133" "243" "363" "577" ...
summary(rawdata2)
## SwollenLymphNodes ChestCongestion ChillsSweats NasalCongestion CoughYN
## No :418 No :323 No :130 No :167 No : 75
## Yes:312 Yes:407 Yes:600 Yes:563 Yes:655
##
##
##
##
## Sneeze Fatigue SubjectiveFever Headache Weakness WeaknessYN
## No :339 No : 64 No :230 No :115 None : 49 No : 49
## Yes:391 Yes:666 Yes:500 Yes:615 Mild :223 Yes:681
## Moderate:338
## Severe :120
##
##
## CoughIntensity CoughYN2 Myalgia MyalgiaYN RunnyNose AbPain
## None : 47 No : 47 None : 79 No : 79 No :211 No :639
## Mild :154 Yes:683 Mild :213 Yes:651 Yes:519 Yes: 91
## Moderate:357 Moderate:325
## Severe :172 Severe :113
##
##
## ChestPain Diarrhea EyePn Insomnia ItchyEye Nausea EarPn
## No :497 No :631 No :617 No :315 No :551 No :475 No :568
## Yes:233 Yes: 99 Yes:113 Yes:415 Yes:179 Yes:255 Yes:162
##
##
##
##
## Hearing Pharyngitis Breathless ToothPn Vision Vomit Wheeze
## No :700 No :119 No :436 No :565 No :711 No :652 No :510
## Yes: 30 Yes:611 Yes:294 Yes:165 Yes: 19 Yes: 78 Yes:220
##
##
##
##
## BodyTemp
## Min. : 97.20
## 1st Qu.: 98.20
## Median : 98.50
## Mean : 98.94
## 3rd Qu.: 99.30
## Max. :103.10
# location to save file
<- here::here("data","processed_data","processeddata.rds")
save_data_location
saveRDS(rawdata2, file = save_data_location)
# Data Exploration
First loading all the default settings and preliminary programs.
Path to Processed Data and loading
<- here::here("data","processed_data","processeddata.rds")
data_location <- readRDS(data_location) data
View Summary of new Data, subsetted from the original raw data.
str(data) #check structure of R object
## 'data.frame': 730 obs. of 32 variables:
## $ SwollenLymphNodes: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 2 1 ...
## $ ChestCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## $ ChillsSweats : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 2 2 2 1 ...
## $ NasalCongestion : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 2 2 2 ...
## $ CoughYN : Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 2 2 2 2 ...
## $ Sneeze : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 2 1 2 1 1 ...
## $ Fatigue : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ SubjectiveFever : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 1 ...
## $ Headache : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 2 2 2 ...
## $ Weakness : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 3 3 2 4 3 3 ...
## $ WeaknessYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ CoughIntensity : Factor w/ 4 levels "None","Mild",..: 4 4 2 3 1 3 4 3 3 3 ...
## $ CoughYN2 : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 2 2 2 2 ...
## $ Myalgia : Factor w/ 4 levels "None","Mild",..: 2 4 4 4 2 3 2 4 3 2 ...
## $ MyalgiaYN : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ RunnyNose : Factor w/ 2 levels "No","Yes": 1 1 2 2 1 1 2 2 2 2 ...
## $ AbPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ ChestPain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 2 1 1 1 ...
## $ Diarrhea : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 1 1 ...
## $ EyePn : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## $ Insomnia : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 2 2 2 ...
## $ ItchyEye : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Nausea : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 1 2 2 ...
## $ EarPn : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 1 1 1 1 1 ...
## $ Hearing : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ Pharyngitis : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 1 1 ...
## $ Breathless : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 1 1 1 2 ...
## $ ToothPn : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 1 ...
## $ Vision : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Vomit : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## $ Wheeze : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 1 1 ...
## $ BodyTemp : num 98.3 100.4 100.8 98.8 100.5 ...
## - attr(*, "na.action")= 'omit' Named int [1:5] 133 243 363 577 585
## ..- attr(*, "names")= chr [1:5] "133" "243" "363" "577" ...
glimpse(data) #details of the data up to 4 levels, empty values = ""
## Rows: 730
## Columns: 32
## $ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y~
## $ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, ~
## $ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
## $ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, ~
## $ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, ~
## $ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes~
## $ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes~
## $ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi~
## $ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve~
## $ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes~
## $ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, ~
## $ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
## $ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No~
## $ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N~
## $ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, ~
## $ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,~
## $ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye~
## $ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y~
## $ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,~
## $ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y~
## $ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y~
## $ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,~
## $ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, ~
## $ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, ~
## $ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N~
## $ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, ~
## $ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N~
## $ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N~
## $ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, ~
summary(data)
## SwollenLymphNodes ChestCongestion ChillsSweats NasalCongestion CoughYN
## No :418 No :323 No :130 No :167 No : 75
## Yes:312 Yes:407 Yes:600 Yes:563 Yes:655
##
##
##
##
## Sneeze Fatigue SubjectiveFever Headache Weakness WeaknessYN
## No :339 No : 64 No :230 No :115 None : 49 No : 49
## Yes:391 Yes:666 Yes:500 Yes:615 Mild :223 Yes:681
## Moderate:338
## Severe :120
##
##
## CoughIntensity CoughYN2 Myalgia MyalgiaYN RunnyNose AbPain
## None : 47 No : 47 None : 79 No : 79 No :211 No :639
## Mild :154 Yes:683 Mild :213 Yes:651 Yes:519 Yes: 91
## Moderate:357 Moderate:325
## Severe :172 Severe :113
##
##
## ChestPain Diarrhea EyePn Insomnia ItchyEye Nausea EarPn
## No :497 No :631 No :617 No :315 No :551 No :475 No :568
## Yes:233 Yes: 99 Yes:113 Yes:415 Yes:179 Yes:255 Yes:162
##
##
##
##
## Hearing Pharyngitis Breathless ToothPn Vision Vomit Wheeze
## No :700 No :119 No :436 No :565 No :711 No :652 No :510
## Yes: 30 Yes:611 Yes:294 Yes:165 Yes: 19 Yes: 78 Yes:220
##
##
##
##
## BodyTemp
## Min. : 97.20
## 1st Qu.: 98.20
## Median : 98.50
## Mean : 98.94
## 3rd Qu.: 99.30
## Max. :103.10
Important variables: Outcome= Body temperature and our main categorical outcome is Nausea. We want to see if the other symptoms are correlated with (predict) those outcomes.
Limited selection of other symptoms (Selection of 5, based on vaugely off of the PeptoBismol jingle ):
Reduce data to limited variables and BodyTemp. Preview to confirm
<-data%>% #make into a new data frame
SelectData#select limited variables and BodyTemp
select(Nausea,
CoughYN,
Pharyngitis,
Vomit,
Diarrhea,%>%
BodyTemp)#filter for only positive Nausea data, per instruction of prompt as it being the main categorical outcome
filter(Nausea=="Yes")
summary(SelectData)
## Nausea CoughYN Pharyngitis Vomit Diarrhea BodyTemp
## No : 0 No : 29 No : 39 No :191 No :196 Min. : 97.40
## Yes:255 Yes:226 Yes:216 Yes: 64 Yes: 59 1st Qu.: 98.20
## Median : 98.60
## Mean : 98.99
## 3rd Qu.: 99.30
## Max. :103.00
##Summary Table
%>%
SelectData#grouping by Nausea, the main categorical outcome
group_by(Nausea,
CoughYN,
Pharyngitis,
Vomit, %>%
Diarrhea)#summarise the data, get observances and mean temp, calculate the standard deviation
summarise(
n=n(), #to count the number of observances
mean=mean(BodyTemp),#calculate mean body temp
sd=sd(BodyTemp) #calculate sd (Standard Deviation) of Body temp
%>%
)#arrange by highest number of n
arrange(desc(n)) %>%
print()
## `summarise()` has grouped output by 'Nausea', 'CoughYN', 'Pharyngitis', 'Vomit'. You can override using the `.groups` argument.
## # A tibble: 13 x 8
## # Groups: Nausea, CoughYN, Pharyngitis, Vomit [8]
## Nausea CoughYN Pharyngitis Vomit Diarrhea n mean sd
## <fct> <fct> <fct> <fct> <fct> <int> <dbl> <dbl>
## 1 Yes Yes Yes No No 109 99.1 1.33
## 2 Yes Yes Yes Yes No 37 99.1 1.05
## 3 Yes Yes Yes No Yes 32 98.8 1.24
## 4 Yes Yes No No No 22 98.5 0.615
## 5 Yes No Yes No No 21 98.9 1.18
## 6 Yes Yes Yes Yes Yes 14 99.1 1.45
## 7 Yes Yes No Yes Yes 5 99.1 1.46
## 8 Yes Yes No No Yes 4 98.7 0.492
## 9 Yes No Yes Yes No 3 99.6 1.95
## 10 Yes Yes No Yes No 3 98.4 0.436
## 11 Yes No No No Yes 2 98.7 0
## 12 Yes No No Yes Yes 2 98.2 0.212
## 13 Yes No No No No 1 98.7 NA
Most cases with observed experience Nausea, Cough, Pharyngitis with a mean Body temperature of 99.1. Only once case where it was just Nausea and none of the other selected variables.
Save of temp and positive for nausea
<- here::here("data","processed_data","SelectData.rds")
save_data_location
saveRDS(SelectData, file = save_data_location)
Not satisfied with my version of summary table… so looking at other examples to see how to do this better. Found someone using this table1 program. Let’s see how it goes.
library(table1)# saw this used in someone else's code, trying it out
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ BodyTemp+
+
CoughYN+
Pharyngitis+
Vomit# the | splits it to be factor's col variables-- this case of Nausea-.
| Nausea,
Diarrhea #use prior data since selecteddata only shows the positive nausea hits
data=data,
overall="Total")
No (N=475) |
Yes (N=255) |
Total (N=730) |
|
---|---|---|---|
BodyTemp | |||
Mean (SD) | 98.9 (1.19) | 99.0 (1.21) | 98.9 (1.20) |
Median [Min, Max] | 98.5 [97.2, 103] | 98.6 [97.4, 103] | 98.5 [97.2, 103] |
CoughYN | |||
No | 46 (9.7%) | 29 (11.4%) | 75 (10.3%) |
Yes | 429 (90.3%) | 226 (88.6%) | 655 (89.7%) |
Pharyngitis | |||
No | 80 (16.8%) | 39 (15.3%) | 119 (16.3%) |
Yes | 395 (83.2%) | 216 (84.7%) | 611 (83.7%) |
Vomit | |||
No | 461 (97.1%) | 191 (74.9%) | 652 (89.3%) |
Yes | 14 (2.9%) | 64 (25.1%) | 78 (10.7%) |
Diarrhea | |||
No | 435 (91.6%) | 196 (76.9%) | 631 (86.4%) |
Yes | 40 (8.4%) | 59 (23.1%) | 99 (13.6%) |
This is a very nice table, Need to look into about putting in BodyTemp as the factors in this and not just Nausea
##Histogram/Density plot
Code here is copy pasted and changes in the data source and fill aspects are based on the different comparisons.
By Nausea
%>% #using original data to show Yes and No for Nausea
dataggplot(aes(x=BodyTemp, fill=Nausea)) +
geom_density(adjust=1.5,
alpha=.5)+
labs(title= "Body temperature of people experencing nausea")
By Vomit
%>%
SelectDataggplot(aes(x=BodyTemp, fill=CoughYN)) +
geom_density(adjust=1.5,
alpha=.5)+
labs(title= "Body temperature of people experiencing nausea with a cough")
By Cough
%>%
SelectDataggplot(aes(x=BodyTemp, fill=Pharyngitis)) +
geom_density(adjust=1.5,
alpha=.5)+
labs(title= "Body temperature of people experiencing nausea with pharyngitis")
%>%
SelectDataggplot(aes(x=BodyTemp, fill=Vomit)) +
geom_density(adjust=1.5,
alpha=.5)+
labs(title= "Body temperature of people experiencing nausea and vomitted")
%>%
SelectDataggplot(aes(x=BodyTemp, fill=Diarrhea)) +
geom_density(adjust=1.5,
alpha=.5)+
labs(title= "Body temperature of people experiencing nausea with diarrhea")
##Scatterplots and Boxplots
#Nausea positive
%>% #only positive with Nausea
SelectDataggplot(aes(y=BodyTemp))+
geom_violin(aes(x=Pharyngitis, color="Pharyngitis", alpha=.5))+
geom_violin(aes(x=Vomit, color="Vomit", alpha=.5))+
geom_violin(aes(x=Diarrhea, color="Diarrhea", alpha=.5))+
geom_jitter(aes(x=Pharyngitis, color="Pharyngitis"))+
geom_jitter(aes(x=Vomit, color="Vomit"))+
geom_jitter(aes(x=Diarrhea, color="Diarrhea"))+
labs(title="Body temperature Violin graph of symptoms that occured with Nausea")+
xlab("Symptom experienced with Nausea")
Need to reshape data to get the right output in ggplot I think I need to rotate the table to long form?
#Pivot Select Data
<-SelectData%>%
PSDatagroup_by(CoughYN,
Pharyngitis,
Vomit, %>%
Diarrhea)summarize(
Temp=mean(BodyTemp),#calculate mean body temp
sd=sd(BodyTemp))%>%
#really unsure if this was correct
pivot_longer(
cols = c(CoughYN, Pharyngitis,Vomit,Diarrhea),
names_to = "Symptoms",
values_to = "Presence" )
## `summarise()` has grouped output by 'CoughYN', 'Pharyngitis', 'Vomit'. You can override using the `.groups` argument.
glimpse(PSData)
## Rows: 52
## Columns: 4
## $ Temp <dbl> 98.70000, 98.70000, 98.70000, 98.70000, 98.70000, 98.70000, 9~
## $ sd <dbl> NA, NA, NA, NA, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0~
## $ Symptoms <chr> "CoughYN", "Pharyngitis", "Vomit", "Diarrhea", "CoughYN", "Ph~
## $ Presence <fct> No, No, No, No, No, No, No, Yes, No, No, Yes, Yes, No, Yes, N~
Save Pivoted Data of mean temp and positive for nausea
<- here::here("data","processed_data","PivotSelect.rds")
save_data_location
saveRDS(PSData, file = save_data_location)
%>%
PSDataggplot(aes(x=Symptoms, y=Temp))+
geom_boxplot(aes(color=Presence))+
labs(title="Body temperature boxplot of symptoms that occured with Nausea")+
xlab("Symptom experienced with Nausea")