library(psych)
library(MVN)
library("GPArotation")
library(MVN)
library(devtools)
## Loading required package: usethis
library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.15.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
library(psych)
library(biotools)
## Loading required package: MASS
## ---
## biotools version 4.2
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x dplyr::select()  masks MASS::select()
library(cluster)
library(factoextra)
library(knitr)
hpind<-read.csv('hpindex2.csv',fill=TRUE)
new_hp2<-hpind[,7:38] #removes columns on demographics

names(new_hp2)<- c('happiness_level', 'vitamin_intake',  'enough_sleep', 'physical_activities', 'physical_appearance', 'sleep_quality', 'degree_prog_satisfaction', 'class_anticipation', 'class_participation', 'degree_program_performance', 'learnings', 'university_satisfaction', 'leisure_time', 'procrastination', 'hobbies', 'multimedia', 'pos_effect_social_med', 'positive_outlook', 'regrets', 'sense_of_meaning_purpose', 'optimism_of_future', 'life_control', 'rewarding_view_of_life', 'happiness_choice', 'family_relationship', 'peer_relationship', 'company_of_animals', 'love_and_affection', 'social_interaction', 'extra_curricular', 'campus_safety', 'safety_going_home')
kable(head(new_hp2))
happiness_level vitamin_intake enough_sleep physical_activities physical_appearance sleep_quality degree_prog_satisfaction class_anticipation class_participation degree_program_performance learnings university_satisfaction leisure_time procrastination hobbies multimedia pos_effect_social_med positive_outlook regrets sense_of_meaning_purpose optimism_of_future life_control rewarding_view_of_life happiness_choice family_relationship peer_relationship company_of_animals love_and_affection social_interaction extra_curricular campus_safety safety_going_home
3 1 2 3 3 2 4 3 3 3 3 4 2 3 3 3 3 3 2 3 3 3 3 4 3 4 4 4 3 3 3 3
3 1 3 2 3 3 3 2 2 3 3 4 2 2 2 4 3 2 2 2 2 2 3 3 4 3 3 3 2 2 3 3
3 1 4 3 2 2 4 2 2 3 3 3 2 1 3 3 2 3 1 3 3 3 3 3 3 3 3 3 2 1 3 3
3 1 3 1 3 3 2 2 2 3 2 3 2 2 3 3 3 3 2 3 3 2 3 3 3 3 4 4 2 1 3 2
1 1 2 1 2 2 1 1 2 2 2 2 1 1 4 4 3 2 1 2 2 2 2 3 3 3 3 3 2 2 3 3
3 3 1 2 3 1 3 2 3 2 2 3 2 2 2 4 3 3 2 4 4 2 4 3 4 3 1 2 3 4 2 2
kable(describe(new_hp2))
vars n mean sd median trimmed mad min max range skew kurtosis se
happiness_level 1 159 2.805031 0.6606287 3 2.806202 0.0000 1 4 3 -0.5560081 0.6706330 0.0523913
vitamin_intake 2 159 2.012579 0.9611927 2 1.922481 1.4826 1 4 3 0.4850720 -0.8896365 0.0762275
enough_sleep 3 159 2.163522 0.8181038 2 2.139535 1.4826 1 4 3 0.2461292 -0.5443294 0.0648799
physical_activities 4 159 2.245283 0.7353375 2 2.271318 1.4826 1 4 3 -0.0350027 -0.5530330 0.0583161
physical_appearance 5 159 2.584906 0.6871505 3 2.620155 0.0000 1 4 3 -0.4211245 -0.0639351 0.0544946
sleep_quality 6 159 2.157233 0.7508189 2 2.131783 0.0000 1 4 3 0.4512488 0.0990615 0.0595438
degree_prog_satisfaction 7 159 2.943396 0.7895302 3 2.992248 0.0000 1 4 3 -0.5143077 -0.0219762 0.0626138
class_anticipation 8 159 2.364780 0.7413756 2 2.387597 1.4826 1 4 3 0.0501120 -0.3536916 0.0587949
class_participation 9 159 2.540881 0.6819753 3 2.558139 1.4826 1 4 3 -0.2061818 -0.2116378 0.0540842
degree_program_performance 10 159 2.477987 0.7699784 3 2.503876 1.4826 1 4 3 -0.1334665 -0.4267370 0.0610633
learnings 11 159 3.150943 0.6674027 3 3.201550 0.0000 1 4 3 -0.4319223 0.1896944 0.0529285
university_satisfaction 12 159 3.081761 0.6747578 3 3.131783 0.0000 1 4 3 -0.5881969 0.8684358 0.0535118
leisure_time 13 159 2.119497 0.8063418 2 2.100775 1.4826 1 4 3 0.2147461 -0.6254300 0.0639471
procrastination 14 159 1.823899 0.8384328 2 1.728682 1.4826 1 4 3 0.7848008 -0.0602042 0.0664920
hobbies 15 159 3.056604 0.6773485 3 3.093023 0.0000 1 4 3 -0.4318484 0.3593880 0.0537172
multimedia 16 159 3.433962 0.5684567 3 3.457364 0.0000 1 4 3 -0.5637049 0.5083698 0.0450816
pos_effect_social_med 17 159 2.830189 0.7134117 3 2.844961 0.0000 1 4 3 -0.4720982 0.3127539 0.0565772
positive_outlook 18 159 2.735849 0.7750282 3 2.728682 1.4826 1 4 3 -0.0791834 -0.5020721 0.0614637
regrets 19 159 1.918239 0.7288673 2 1.868217 0.0000 1 4 3 0.5144907 0.1195500 0.0578029
sense_of_meaning_purpose 20 159 2.597484 0.7885213 3 2.612403 1.4826 1 4 3 -0.2437671 -0.3688615 0.0625338
optimism_of_future 21 159 2.723270 0.8412765 3 2.751938 1.4826 1 4 3 -0.2091003 -0.5669791 0.0667176
life_control 22 159 2.484277 0.7618710 3 2.511628 1.4826 1 4 3 -0.1180194 -0.4000533 0.0604203
rewarding_view_of_life 23 159 2.748428 0.7792284 3 2.759690 0.0000 1 4 3 -0.2533972 -0.3090070 0.0617968
happiness_choice 24 159 2.893082 0.8236319 3 2.945736 0.0000 1 4 3 -0.4775728 -0.2190164 0.0653183
family_relationship 25 159 3.088050 0.8142998 3 3.170543 1.4826 1 4 3 -0.7196450 0.1308747 0.0645782
peer_relationship 26 159 3.194969 0.5566399 3 3.209302 0.0000 1 4 3 -0.1755083 0.8885315 0.0441444
company_of_animals 27 159 3.119497 0.9096166 3 3.240310 1.4826 1 4 3 -0.8367982 -0.1186993 0.0721373
love_and_affection 28 159 2.962264 0.7537820 3 2.984496 0.0000 1 4 3 -0.2911746 -0.3848245 0.0597788
social_interaction 29 159 2.704403 0.7339287 3 2.689923 0.0000 1 4 3 -0.1498792 -0.2681676 0.0582043
extra_curricular 30 159 2.201258 0.7695130 2 2.186046 0.0000 1 4 3 0.3066198 -0.2177226 0.0610264
campus_safety 31 159 2.515723 0.6642341 3 2.573643 0.0000 1 4 3 -0.5078377 -0.2110440 0.0526772
safety_going_home 32 159 2.433962 0.7674411 2 2.465116 1.4826 1 4 3 -0.1550558 -0.4697048 0.0608620

A. Factor Analysis

cor<-cor(new_hp2)
#lowerCor(new_hp2)
corPlot(new_hp2,numbers=T, MAR=0.5, labels = 1:32)

The correlation matrix above shows that the variables are not that correlated to each other. To evaluate the ‘factorability’ of the data, the Bartlett’s test of Sphericity and Kaiser-Meyer-Olkin measure of sampling adequacy were performed.

A.1. Factorability Tests

Bartlett’s test of Sphericity tests the null hypothesis that the correlation matrix is an identity matrix, which means that the variables are unrelated and not ideal for factor analysis.

cortest.bartlett(new_hp2)
## R was not square, finding R from data
## $chisq
## [1] 1810.035
## 
## $p.value
## [1] 1.167642e-148
## 
## $df
## [1] 496

Since the p-value is less than 0.05, we reject the null hypothesis that the variables are unrelated.

KMO(new_hp2)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = new_hp2)
## Overall MSA =  0.8
## MSA for each item = 
##            happiness_level             vitamin_intake 
##                       0.90                       0.82 
##               enough_sleep        physical_activities 
##                       0.64                       0.77 
##        physical_appearance              sleep_quality 
##                       0.89                       0.75 
##   degree_prog_satisfaction         class_anticipation 
##                       0.75                       0.84 
##        class_participation degree_program_performance 
##                       0.79                       0.77 
##                  learnings    university_satisfaction 
##                       0.70                       0.72 
##               leisure_time            procrastination 
##                       0.85                       0.68 
##                    hobbies                 multimedia 
##                       0.73                       0.70 
##      pos_effect_social_med           positive_outlook 
##                       0.77                       0.92 
##                    regrets   sense_of_meaning_purpose 
##                       0.73                       0.86 
##         optimism_of_future               life_control 
##                       0.89                       0.87 
##     rewarding_view_of_life           happiness_choice 
##                       0.90                       0.88 
##        family_relationship          peer_relationship 
##                       0.78                       0.86 
##         company_of_animals         love_and_affection 
##                       0.51                       0.85 
##         social_interaction           extra_curricular 
##                       0.85                       0.76 
##              campus_safety          safety_going_home 
##                       0.58                       0.57

The overall Measure of Sampling Adequacy (MSA) for the set of variables is 0.8, indicating that correlations between pairs of variables can be explained by the other variables. Moreover, the individual MSAs are all above 0.5 hence, factor analysis is appropriate for the data.

A.2. Factor Analysis Proper

To aid in the selection of the appropriate method to use for factor extraction, the variables were tested for multivariate normality.

mvn(new_hp2,subset=NULL,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ p value MVN
## 1 Henze-Zirkler 1.000335       0  NO
## 
## $univariateNormality
##                Test                   Variable Statistic   p value Normality
## 1  Anderson-Darling      happiness_level         18.0345  <0.001      NO    
## 2  Anderson-Darling       vitamin_intake          9.9494  <0.001      NO    
## 3  Anderson-Darling        enough_sleep           9.8890  <0.001      NO    
## 4  Anderson-Darling    physical_activities       12.1844  <0.001      NO    
## 5  Anderson-Darling    physical_appearance       15.6425  <0.001      NO    
## 6  Anderson-Darling       sleep_quality          13.1187  <0.001      NO    
## 7  Anderson-Darling  degree_prog_satisfaction    11.6865  <0.001      NO    
## 8  Anderson-Darling     class_anticipation       12.0248  <0.001      NO    
## 9  Anderson-Darling    class_participation       14.7726  <0.001      NO    
## 10 Anderson-Darling degree_program_performance   11.3196  <0.001      NO    
## 11 Anderson-Darling         learnings            15.4846  <0.001      NO    
## 12 Anderson-Darling  university_satisfaction     16.4595  <0.001      NO    
## 13 Anderson-Darling        leisure_time          10.1749  <0.001      NO    
## 14 Anderson-Darling      procrastination         12.0124  <0.001      NO    
## 15 Anderson-Darling          hobbies             15.4015  <0.001      NO    
## 16 Anderson-Darling         multimedia           22.0239  <0.001      NO    
## 17 Anderson-Darling   pos_effect_social_med      14.8656  <0.001      NO    
## 18 Anderson-Darling      positive_outlook        10.8890  <0.001      NO    
## 19 Anderson-Darling          regrets             13.2174  <0.001      NO    
## 20 Anderson-Darling  sense_of_meaning_purpose    11.1532  <0.001      NO    
## 21 Anderson-Darling     optimism_of_future        9.3379  <0.001      NO    
## 22 Anderson-Darling        life_control          11.5135  <0.001      NO    
## 23 Anderson-Darling   rewarding_view_of_life     11.1536  <0.001      NO    
## 24 Anderson-Darling      happiness_choice        10.6861  <0.001      NO    
## 25 Anderson-Darling    family_relationship       11.7514  <0.001      NO    
## 26 Anderson-Darling     peer_relationship        22.9795  <0.001      NO    
## 27 Anderson-Darling     company_of_animals       11.6386  <0.001      NO    
## 28 Anderson-Darling     love_and_affection       11.7200  <0.001      NO    
## 29 Anderson-Darling     social_interaction       12.4321  <0.001      NO    
## 30 Anderson-Darling      extra_curricular        11.6117  <0.001      NO    
## 31 Anderson-Darling       campus_safety          16.9183  <0.001      NO    
## 32 Anderson-Darling     safety_going_home        11.5048  <0.001      NO    
## 
## $Descriptives
##                              n     Mean   Std.Dev Median Min Max 25th 75th
## happiness_level            159 2.805031 0.6606287      3   1   4  2.0    3
## vitamin_intake             159 2.012579 0.9611927      2   1   4  1.0    3
## enough_sleep               159 2.163522 0.8181038      2   1   4  2.0    3
## physical_activities        159 2.245283 0.7353375      2   1   4  2.0    3
## physical_appearance        159 2.584906 0.6871505      3   1   4  2.0    3
## sleep_quality              159 2.157233 0.7508189      2   1   4  2.0    3
## degree_prog_satisfaction   159 2.943396 0.7895302      3   1   4  3.0    3
## class_anticipation         159 2.364780 0.7413756      2   1   4  2.0    3
## class_participation        159 2.540881 0.6819753      3   1   4  2.0    3
## degree_program_performance 159 2.477987 0.7699784      3   1   4  2.0    3
## learnings                  159 3.150943 0.6674027      3   1   4  3.0    4
## university_satisfaction    159 3.081761 0.6747578      3   1   4  3.0    3
## leisure_time               159 2.119497 0.8063418      2   1   4  2.0    3
## procrastination            159 1.823899 0.8384328      2   1   4  1.0    2
## hobbies                    159 3.056604 0.6773485      3   1   4  3.0    3
## multimedia                 159 3.433962 0.5684567      3   1   4  3.0    4
## pos_effect_social_med      159 2.830189 0.7134117      3   1   4  2.0    3
## positive_outlook           159 2.735849 0.7750282      3   1   4  2.0    3
## regrets                    159 1.918239 0.7288673      2   1   4  1.0    2
## sense_of_meaning_purpose   159 2.597484 0.7885213      3   1   4  2.0    3
## optimism_of_future         159 2.723270 0.8412765      3   1   4  2.0    3
## life_control               159 2.484277 0.7618710      3   1   4  2.0    3
## rewarding_view_of_life     159 2.748428 0.7792284      3   1   4  2.0    3
## happiness_choice           159 2.893082 0.8236319      3   1   4  2.0    3
## family_relationship        159 3.088050 0.8142998      3   1   4  3.0    4
## peer_relationship          159 3.194969 0.5566399      3   1   4  3.0    4
## company_of_animals         159 3.119497 0.9096166      3   1   4  3.0    4
## love_and_affection         159 2.962264 0.7537820      3   1   4  2.5    3
## social_interaction         159 2.704403 0.7339287      3   1   4  2.0    3
## extra_curricular           159 2.201258 0.7695130      2   1   4  2.0    3
## campus_safety              159 2.515723 0.6642341      3   1   4  2.0    3
## safety_going_home          159 2.433962 0.7674411      2   1   4  2.0    3
##                                   Skew    Kurtosis
## happiness_level            -0.55600814  0.67063299
## vitamin_intake              0.48507205 -0.88963653
## enough_sleep                0.24612915 -0.54432940
## physical_activities        -0.03500273 -0.55303301
## physical_appearance        -0.42112447 -0.06393509
## sleep_quality               0.45124884  0.09906149
## degree_prog_satisfaction   -0.51430775 -0.02197619
## class_anticipation          0.05011200 -0.35369165
## class_participation        -0.20618177 -0.21163785
## degree_program_performance -0.13346652 -0.42673697
## learnings                  -0.43192227  0.18969437
## university_satisfaction    -0.58819686  0.86843580
## leisure_time                0.21474614 -0.62543004
## procrastination             0.78480080 -0.06020418
## hobbies                    -0.43184835  0.35938804
## multimedia                 -0.56370494  0.50836981
## pos_effect_social_med      -0.47209818  0.31275387
## positive_outlook           -0.07918336 -0.50207208
## regrets                     0.51449074  0.11954999
## sense_of_meaning_purpose   -0.24376706 -0.36886146
## optimism_of_future         -0.20910034 -0.56697913
## life_control               -0.11801935 -0.40005335
## rewarding_view_of_life     -0.25339725 -0.30900700
## happiness_choice           -0.47757283 -0.21901640
## family_relationship        -0.71964504  0.13087472
## peer_relationship          -0.17550834  0.88853149
## company_of_animals         -0.83679823 -0.11869927
## love_and_affection         -0.29117462 -0.38482445
## social_interaction         -0.14987918 -0.26816762
## extra_curricular            0.30661978 -0.21772263
## campus_safety              -0.50783774 -0.21104397
## safety_going_home          -0.15505581 -0.46970478
mvn_hz = mvn(new_hp2, mvnTest = "hz")
mvn_royston = mvn(new_hp2, mvnTest = "royston")

print(mvn_hz$multivariateNormality)
##            Test       HZ p value MVN
## 1 Henze-Zirkler 1.000335       0  NO
print(mvn_royston$multivariateNormality)
##      Test        H       p value MVN
## 1 Royston 1634.643 1.482197e-323  NO

The above tests rejected the assumption of multivariate normality. Because of this, the Maximum Likelihood Solution is not applicable. Thus, the Principal Components Solution will be used to estimate the factor scores.

A.2.a. Principal Components Solution

names(new_hp2) <- 1:32

#standardize the data
scaled.hp1<-scale(new_hp2)

# obtain a parallel analysis on the standardized data to determine the number of factors
fa.parallel(scaled.hp1, fa='fa')

## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA

Based on the parallel analysis scree plots, it is possible to extract 5-6 factors.

PC solutions with no rotations would be considered first.

# Extract factors from the standardized data.
pcsolution1<-principal(scaled.hp1,nfactors=6,rotate="none")
pcsolution1
## Principal Components Analysis
## Call: principal(r = scaled.hp1, nfactors = 6, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     PC1   PC2   PC3   PC4   PC5   PC6   h2   u2 com
## 1  0.62 -0.08 -0.01 -0.35  0.16  0.21 0.59 0.41 2.0
## 2  0.36  0.28  0.07 -0.09 -0.16 -0.20 0.28 0.72 3.3
## 3  0.28 -0.03  0.67 -0.11  0.22  0.33 0.70 0.30 2.2
## 4  0.33  0.23 -0.04  0.18  0.16  0.13 0.24 0.76 3.4
## 5  0.51  0.09  0.16 -0.18  0.01 -0.06 0.33 0.67 1.6
## 6  0.42  0.07  0.64 -0.12  0.03  0.20 0.64 0.36 2.1
## 7  0.40  0.49  0.17  0.28  0.08 -0.25 0.58 0.42 3.5
## 8  0.55  0.37  0.24  0.18 -0.10  0.12 0.55 0.45 2.6
## 9  0.41  0.40 -0.01  0.25 -0.13 -0.05 0.41 0.59 2.9
## 10 0.46  0.41  0.32  0.28 -0.01 -0.19 0.60 0.40 3.9
## 11 0.40  0.38  0.18  0.30 -0.40 -0.11 0.60 0.40 4.4
## 12 0.48  0.04 -0.17 -0.01  0.04 -0.07 0.27 0.73 1.3
## 13 0.49 -0.40  0.11 -0.06  0.22 -0.06 0.46 0.54 2.5
## 14 0.23  0.51 -0.06 -0.26 -0.06  0.30 0.47 0.53 2.7
## 15 0.35 -0.40  0.29  0.13 -0.05 -0.17 0.42 0.58 3.5
## 16 0.22 -0.56  0.15  0.13 -0.04 -0.39 0.55 0.45 2.4
## 17 0.33 -0.44  0.11  0.05 -0.43 -0.04 0.50 0.50 3.0
## 18 0.63 -0.22  0.10 -0.19  0.08 -0.10 0.51 0.49 1.6
## 19 0.27  0.50 -0.23 -0.28  0.08  0.09 0.47 0.53 2.9
## 20 0.63  0.01 -0.23 -0.16  0.14 -0.34 0.62 0.38 2.1
## 21 0.71  0.06 -0.26 -0.23  0.18 -0.24 0.72 0.28 1.9
## 22 0.70  0.05 -0.16 -0.18  0.19 -0.30 0.68 0.32 1.8
## 23 0.75 -0.11 -0.04 -0.27 -0.02  0.02 0.65 0.35 1.3
## 24 0.55 -0.18  0.01 -0.23 -0.20  0.02 0.42 0.58 1.9
## 25 0.51 -0.29  0.00  0.07 -0.38  0.15 0.52 0.48 2.8
## 26 0.51 -0.24 -0.20  0.22 -0.39  0.24 0.62 0.38 3.8
## 27 0.15  0.00 -0.09  0.53 -0.04 -0.12 0.33 0.67 1.3
## 28 0.71 -0.23 -0.18  0.12 -0.01  0.33 0.72 0.28 1.9
## 29 0.51 -0.04 -0.40  0.18 -0.24  0.29 0.59 0.41 3.3
## 30 0.34  0.10 -0.46  0.13  0.06  0.09 0.36 0.64 2.3
## 31 0.35 -0.19  0.00  0.38  0.58  0.30 0.73 0.27 3.4
## 32 0.28 -0.16 -0.10  0.53  0.49  0.01 0.64 0.36 2.8
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6
## SS loadings           7.33 2.71 1.93 1.85 1.64 1.33
## Proportion Var        0.23 0.08 0.06 0.06 0.05 0.04
## Cumulative Var        0.23 0.31 0.37 0.43 0.48 0.52
## Proportion Explained  0.44 0.16 0.11 0.11 0.10 0.08
## Cumulative Proportion 0.44 0.60 0.71 0.82 0.92 1.00
## 
## Mean item complexity =  2.6
## Test of the hypothesis that 6 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  516.18  with prob <  1.6e-11 
## 
## Fit based upon off diagonal values = 0.94
# Getting the residual matrix: The residual matrix is a measure of how good our model is. Ideally, the residual matrix is close to null. 


L<-pcsolution1$loadings
llT<-L%*%t(L)
uniqueness<-cor-llT

m<-matrix(0,32,32) 
diag(m)<-diag(uniqueness) 
resmatrix<-cor-(llT+m)

#head(resmatrix)
hist(resmatrix)

fa.diagram(pcsolution1)

pcsolution1
## Principal Components Analysis
## Call: principal(r = scaled.hp1, nfactors = 6, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     PC1   PC2   PC3   PC4   PC5   PC6   h2   u2 com
## 1  0.62 -0.08 -0.01 -0.35  0.16  0.21 0.59 0.41 2.0
## 2  0.36  0.28  0.07 -0.09 -0.16 -0.20 0.28 0.72 3.3
## 3  0.28 -0.03  0.67 -0.11  0.22  0.33 0.70 0.30 2.2
## 4  0.33  0.23 -0.04  0.18  0.16  0.13 0.24 0.76 3.4
## 5  0.51  0.09  0.16 -0.18  0.01 -0.06 0.33 0.67 1.6
## 6  0.42  0.07  0.64 -0.12  0.03  0.20 0.64 0.36 2.1
## 7  0.40  0.49  0.17  0.28  0.08 -0.25 0.58 0.42 3.5
## 8  0.55  0.37  0.24  0.18 -0.10  0.12 0.55 0.45 2.6
## 9  0.41  0.40 -0.01  0.25 -0.13 -0.05 0.41 0.59 2.9
## 10 0.46  0.41  0.32  0.28 -0.01 -0.19 0.60 0.40 3.9
## 11 0.40  0.38  0.18  0.30 -0.40 -0.11 0.60 0.40 4.4
## 12 0.48  0.04 -0.17 -0.01  0.04 -0.07 0.27 0.73 1.3
## 13 0.49 -0.40  0.11 -0.06  0.22 -0.06 0.46 0.54 2.5
## 14 0.23  0.51 -0.06 -0.26 -0.06  0.30 0.47 0.53 2.7
## 15 0.35 -0.40  0.29  0.13 -0.05 -0.17 0.42 0.58 3.5
## 16 0.22 -0.56  0.15  0.13 -0.04 -0.39 0.55 0.45 2.4
## 17 0.33 -0.44  0.11  0.05 -0.43 -0.04 0.50 0.50 3.0
## 18 0.63 -0.22  0.10 -0.19  0.08 -0.10 0.51 0.49 1.6
## 19 0.27  0.50 -0.23 -0.28  0.08  0.09 0.47 0.53 2.9
## 20 0.63  0.01 -0.23 -0.16  0.14 -0.34 0.62 0.38 2.1
## 21 0.71  0.06 -0.26 -0.23  0.18 -0.24 0.72 0.28 1.9
## 22 0.70  0.05 -0.16 -0.18  0.19 -0.30 0.68 0.32 1.8
## 23 0.75 -0.11 -0.04 -0.27 -0.02  0.02 0.65 0.35 1.3
## 24 0.55 -0.18  0.01 -0.23 -0.20  0.02 0.42 0.58 1.9
## 25 0.51 -0.29  0.00  0.07 -0.38  0.15 0.52 0.48 2.8
## 26 0.51 -0.24 -0.20  0.22 -0.39  0.24 0.62 0.38 3.8
## 27 0.15  0.00 -0.09  0.53 -0.04 -0.12 0.33 0.67 1.3
## 28 0.71 -0.23 -0.18  0.12 -0.01  0.33 0.72 0.28 1.9
## 29 0.51 -0.04 -0.40  0.18 -0.24  0.29 0.59 0.41 3.3
## 30 0.34  0.10 -0.46  0.13  0.06  0.09 0.36 0.64 2.3
## 31 0.35 -0.19  0.00  0.38  0.58  0.30 0.73 0.27 3.4
## 32 0.28 -0.16 -0.10  0.53  0.49  0.01 0.64 0.36 2.8
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6
## SS loadings           7.33 2.71 1.93 1.85 1.64 1.33
## Proportion Var        0.23 0.08 0.06 0.06 0.05 0.04
## Cumulative Var        0.23 0.31 0.37 0.43 0.48 0.52
## Proportion Explained  0.44 0.16 0.11 0.11 0.10 0.08
## Cumulative Proportion 0.44 0.60 0.71 0.82 0.92 1.00
## 
## Mean item complexity =  2.6
## Test of the hypothesis that 6 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  516.18  with prob <  1.6e-11 
## 
## Fit based upon off diagonal values = 0.94
pcsolution1$loadings
## 
## Loadings:
##    PC1    PC2    PC3    PC4    PC5    PC6   
## 1   0.624               -0.345  0.158  0.210
## 2   0.355  0.278               -0.164 -0.198
## 3   0.284         0.674 -0.113  0.216  0.330
## 4   0.327  0.234         0.176  0.159  0.127
## 5   0.509         0.159 -0.184              
## 6   0.423         0.637 -0.116         0.200
## 7   0.398  0.492  0.167  0.280        -0.251
## 8   0.552  0.369  0.235  0.178         0.116
## 9   0.411  0.405         0.249 -0.135       
## 10  0.464  0.408  0.320  0.279        -0.192
## 11  0.401  0.383  0.182  0.299 -0.397 -0.109
## 12  0.483        -0.166                     
## 13  0.487 -0.400  0.106         0.217       
## 14  0.229  0.507        -0.255         0.295
## 15  0.353 -0.405  0.293  0.131        -0.166
## 16  0.215 -0.561  0.151  0.131        -0.386
## 17  0.331 -0.442  0.105        -0.428       
## 18  0.631 -0.218        -0.195        -0.105
## 19  0.273  0.496 -0.227 -0.280              
## 20  0.634        -0.227 -0.165  0.136 -0.344
## 21  0.715        -0.256 -0.228  0.181 -0.239
## 22  0.704        -0.159 -0.185  0.194 -0.297
## 23  0.751 -0.115        -0.274              
## 24  0.548 -0.182        -0.228 -0.196       
## 25  0.512 -0.289               -0.384  0.148
## 26  0.505 -0.243 -0.197  0.222 -0.393  0.245
## 27  0.150                0.530        -0.119
## 28  0.713 -0.229 -0.176  0.119         0.332
## 29  0.514        -0.396  0.177 -0.238  0.286
## 30  0.342  0.101 -0.456  0.133              
## 31  0.348 -0.186         0.381  0.577  0.304
## 32  0.282 -0.159         0.532  0.491       
## 
##                  PC1   PC2   PC3   PC4   PC5   PC6
## SS loadings    7.329 2.713 1.928 1.846 1.635 1.325
## Proportion Var 0.229 0.085 0.060 0.058 0.051 0.041
## Cumulative Var 0.229 0.314 0.374 0.432 0.483 0.524
pcsolution1$rotation
## [1] "none"
pcsolution1$communality #variance explained
##         1         2         3         4         5         6         7         8 
## 0.5853012 0.2827627 0.7037048 0.2355302 0.3293638 0.6429706 0.5759960 0.5501701 
##         9        10        11        12        13        14        15        16 
## 0.4147633 0.5986296 0.5987999 0.2688509 0.4632723 0.4694280 0.4216330 0.5510083 
##        17        18        19        20        21        22        23        24 
## 0.5033272 0.5112804 0.4650432 0.6177084 0.7222532 0.6836121 0.6542660 0.4242030 
##        25        26        27        28        29        30        31        32 
## 0.5196513 0.6174285 0.3272519 0.7162328 0.5927201 0.3642315 0.7264509 0.6387419
pcsolution1$uniquenesses
##         1         2         3         4         5         6         7         8 
## 0.4146988 0.7172373 0.2962952 0.7644698 0.6706362 0.3570294 0.4240040 0.4498299 
##         9        10        11        12        13        14        15        16 
## 0.5852367 0.4013704 0.4012001 0.7311491 0.5367277 0.5305720 0.5783670 0.4489917 
##        17        18        19        20        21        22        23        24 
## 0.4966728 0.4887196 0.5349568 0.3822916 0.2777468 0.3163879 0.3457340 0.5757970 
##        25        26        27        28        29        30        31        32 
## 0.4803487 0.3825715 0.6727481 0.2837672 0.4072799 0.6357685 0.2735491 0.3612581
pcsolution1$values
##  [1] 7.3289851 2.7125623 1.9284264 1.8458333 1.6353971 1.3253830 1.2044231
##  [8] 1.1421697 1.0077672 0.9156160 0.9004088 0.8817464 0.8015789 0.7597415
## [15] 0.7056648 0.6834944 0.6359300 0.5966320 0.5505538 0.5426246 0.4668077
## [22] 0.4484879 0.4304351 0.3644519 0.3567825 0.3380279 0.3155461 0.2822585
## [29] 0.2572122 0.2376245 0.2246154 0.1728118
pcsolution1$values/length(pcsolution1$values)
##  [1] 0.229030785 0.084767571 0.060263327 0.057682291 0.051106160 0.041418218
##  [7] 0.037638222 0.035692803 0.031492724 0.028613001 0.028137776 0.027554576
## [13] 0.025049342 0.023741923 0.022052025 0.021359199 0.019872812 0.018644749
## [19] 0.017204805 0.016957019 0.014587740 0.014015248 0.013451098 0.011389122
## [25] 0.011149452 0.010563372 0.009860815 0.008820579 0.008037882 0.007425766
## [31] 0.007019233 0.005400368

Components Analysis (No Rotation)

The components analysis of the generated PC solutions with no rotation resulted in PC6 containing no variable. Different rotation methods would be explored.

pcsolution3<-principal(scaled.hp1,nfactors=6,rotate="quartimax")
fa.diagram(pcsolution3)

pcsolution4<-principal(scaled.hp1,nfactors=6,rotate="equamax")
fa.diagram(pcsolution4)

Components Analysis (EQUAMAX and QUARTIMAX)

Variables 2 and 31 do not have corresponding factor assignments using EQUAMAX and QUARTIMAX rotations. This indicates that these rotations may not be appropriate for the data.

# VARIMAX - maximizes the variability of loadings within a factor

pcsolution1v<-principal(new_hp2,nfactors=6,rotate="varimax")
fa.diagram(pcsolution1, main = 'Components Analysis - Varimax')

Components Analysis (VARIMAX)

Compared to the other rotations, VARIMAX has the most dispersed set of variables. Moreover, there is only one variable that has no factor assignment. Because of this, VARIMAX will be used.

pcsolution1v$loadings
## 
## Loadings:
##    RC1    RC4    RC6    RC2    RC3    RC5   
## 1   0.593         0.264 -0.163  0.348  0.109
## 2   0.292  0.392                      -0.189
## 3                               0.823  0.130
## 4   0.141  0.276        -0.193         0.300
## 5   0.437  0.234                0.267       
## 6   0.151  0.267                0.736       
## 7   0.190  0.701 -0.138                0.156
## 8   0.169  0.592  0.221 -0.155  0.294  0.109
## 9   0.132  0.582  0.163 -0.155              
## 10  0.164  0.722                0.192  0.104
## 11         0.721  0.251               -0.117
## 12  0.420  0.184  0.196                0.130
## 13  0.462         0.154  0.324  0.244  0.232
## 14  0.148  0.202        -0.600  0.158 -0.112
## 15  0.197         0.190  0.529  0.232       
## 16  0.221                0.700              
## 17  0.122         0.509  0.434        -0.176
## 18  0.600         0.199  0.192  0.259       
## 19  0.337  0.182        -0.561              
## 20  0.743  0.193               -0.123       
## 21  0.810  0.178  0.109                0.114
## 22  0.784  0.227                       0.117
## 23  0.681         0.362         0.226       
## 24  0.459         0.392         0.182 -0.130
## 25  0.205  0.107  0.640  0.196  0.127       
## 26  0.130  0.127  0.752                     
## 27         0.332  0.151  0.212 -0.226  0.297
## 28  0.394         0.634         0.152  0.364
## 29  0.215  0.132  0.663 -0.166 -0.154  0.197
## 30  0.260         0.277 -0.230 -0.280  0.280
## 31  0.124                       0.214  0.809
## 32  0.106  0.117         0.202         0.754
## 
##                  RC1   RC4   RC6   RC2   RC3   RC5
## SS loadings    4.634 3.032 2.898 2.130 2.109 1.972
## Proportion Var 0.145 0.095 0.091 0.067 0.066 0.062
## Cumulative Var 0.145 0.240 0.330 0.397 0.463 0.524
pcsolution1v$rotation
## [1] "varimax"
pcsolution1v$communality
##         1         2         3         4         5         6         7         8 
## 0.5853012 0.2827627 0.7037048 0.2355302 0.3293638 0.6429706 0.5759960 0.5501701 
##         9        10        11        12        13        14        15        16 
## 0.4147633 0.5986296 0.5987999 0.2688509 0.4632723 0.4694280 0.4216330 0.5510083 
##        17        18        19        20        21        22        23        24 
## 0.5033272 0.5112804 0.4650432 0.6177084 0.7222532 0.6836121 0.6542660 0.4242030 
##        25        26        27        28        29        30        31        32 
## 0.5196513 0.6174285 0.3272519 0.7162328 0.5927201 0.3642315 0.7264509 0.6387419
pcsolution1v$uniquenesses
##         1         2         3         4         5         6         7         8 
## 0.4146988 0.7172373 0.2962952 0.7644698 0.6706362 0.3570294 0.4240040 0.4498299 
##         9        10        11        12        13        14        15        16 
## 0.5852367 0.4013704 0.4012001 0.7311491 0.5367277 0.5305720 0.5783670 0.4489917 
##        17        18        19        20        21        22        23        24 
## 0.4966728 0.4887196 0.5349568 0.3822916 0.2777468 0.3163879 0.3457340 0.5757970 
##        25        26        27        28        29        30        31        32 
## 0.4803487 0.3825715 0.6727481 0.2837672 0.4072799 0.6357685 0.2735491 0.3612581
pcsolution1v$values
##  [1] 7.3289851 2.7125623 1.9284264 1.8458333 1.6353971 1.3253830 1.2044231
##  [8] 1.1421697 1.0077672 0.9156160 0.9004088 0.8817464 0.8015789 0.7597415
## [15] 0.7056648 0.6834944 0.6359300 0.5966320 0.5505538 0.5426246 0.4668077
## [22] 0.4484879 0.4304351 0.3644519 0.3567825 0.3380279 0.3155461 0.2822585
## [29] 0.2572122 0.2376245 0.2246154 0.1728118
pcsolution1v$values/length(pcsolution1$values)
##  [1] 0.229030785 0.084767571 0.060263327 0.057682291 0.051106160 0.041418218
##  [7] 0.037638222 0.035692803 0.031492724 0.028613001 0.028137776 0.027554576
## [13] 0.025049342 0.023741923 0.022052025 0.021359199 0.019872812 0.018644749
## [19] 0.017204805 0.016957019 0.014587740 0.014015248 0.013451098 0.011389122
## [25] 0.011149452 0.010563372 0.009860815 0.008820579 0.008037882 0.007425766
## [31] 0.007019233 0.005400368
L<-pcsolution1v$loadings
llT<-L%*%t(L)
uniqueness<-cor-llT
m<-matrix(0,32,32) 
diag(m)<-diag(uniqueness) 
resmatrix<-cor-(llT+m)

hist(resmatrix)

The residuals from the VARIMAX rotation are distributed along zero, indicating that the current factor model is adequate.

pcsolution1v
## Principal Components Analysis
## Call: principal(r = new_hp2, nfactors = 6, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      RC1   RC4   RC6   RC2   RC3   RC5   h2   u2 com
## 1   0.59 -0.07  0.26 -0.16  0.35  0.11 0.59 0.41 2.4
## 2   0.29  0.39  0.04 -0.07  0.04 -0.19 0.28 0.72 2.5
## 3   0.05  0.08 -0.02  0.03  0.82  0.13 0.70 0.30 1.1
## 4   0.14  0.28  0.08 -0.19  0.07  0.30 0.24 0.76 3.5
## 5   0.44  0.23  0.10 -0.02  0.27 -0.05 0.33 0.67 2.4
## 6   0.15  0.27  0.07  0.04  0.74 -0.01 0.64 0.36 1.4
## 7   0.19  0.70 -0.14 -0.06  0.03  0.16 0.58 0.42 1.4
## 8   0.17  0.59  0.22 -0.16  0.29  0.11 0.55 0.45 2.3
## 9   0.13  0.58  0.16 -0.15 -0.04  0.08 0.41 0.59 1.5
## 10  0.16  0.72 -0.03  0.03  0.19  0.10 0.60 0.40 1.3
## 11  0.01  0.72  0.25  0.00  0.04 -0.12 0.60 0.40 1.3
## 12  0.42  0.18  0.20 -0.04 -0.05  0.13 0.27 0.73 2.1
## 13  0.46 -0.09  0.15  0.32  0.24  0.23 0.46 0.54 3.4
## 14  0.15  0.20  0.10 -0.60  0.16 -0.11 0.47 0.53 1.7
## 15  0.20  0.09  0.19  0.53  0.23  0.07 0.42 0.58 2.1
## 16  0.22 -0.04  0.10  0.70  0.00  0.03 0.55 0.45 1.2
## 17  0.12  0.03  0.51  0.43  0.10 -0.18 0.50 0.50 2.4
## 18  0.60  0.06  0.20  0.19  0.26  0.06 0.51 0.49 1.9
## 19  0.34  0.18 -0.02 -0.56 -0.03 -0.05 0.47 0.53 1.9
## 20  0.74  0.19  0.07  0.05 -0.12  0.08 0.62 0.38 1.2
## 21  0.81  0.18  0.11 -0.07 -0.06  0.11 0.72 0.28 1.2
## 22  0.78  0.23  0.06  0.01 -0.02  0.12 0.68 0.32 1.2
## 23  0.68  0.09  0.36 -0.01  0.23  0.01 0.65 0.35 1.8
## 24  0.46  0.05  0.39  0.09  0.18 -0.13 0.42 0.58 2.6
## 25  0.21  0.11  0.64  0.20  0.13 -0.04 0.52 0.48 1.6
## 26  0.13  0.13  0.75  0.10 -0.04  0.09 0.62 0.38 1.2
## 27 -0.10  0.33  0.15  0.21 -0.23  0.30 0.33 0.67 4.2
## 28  0.39  0.06  0.63 -0.02  0.15  0.36 0.72 0.28 2.5
## 29  0.22  0.13  0.66 -0.17 -0.15  0.20 0.59 0.41 1.8
## 30  0.26  0.10  0.28 -0.23 -0.28  0.28 0.36 0.64 5.2
## 31  0.12 -0.03  0.09  0.04  0.21  0.81 0.73 0.27 1.2
## 32  0.11  0.12  0.01  0.20 -0.06  0.75 0.64 0.36 1.3
## 
##                        RC1  RC4  RC6  RC2  RC3  RC5
## SS loadings           4.63 3.03 2.90 2.13 2.11 1.97
## Proportion Var        0.14 0.09 0.09 0.07 0.07 0.06
## Cumulative Var        0.14 0.24 0.33 0.40 0.46 0.52
## Proportion Explained  0.28 0.18 0.17 0.13 0.13 0.12
## Cumulative Proportion 0.28 0.46 0.63 0.76 0.88 1.00
## 
## Mean item complexity =  2
## Test of the hypothesis that 6 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  516.18  with prob <  1.6e-11 
## 
## Fit based upon off diagonal values = 0.94

The cumulative variance explained by the principal components is 52%. The table below summarizes the 6 factors obtained from the analysis and the corresponding variables categorized in each factor.

Summary of Factors

B. Cluster Analysis

#Extract the scores from VARIMAX
pcscores<-factor.scores(new_hp2,f=pcsolution1v,method="Bartlett") 

scoress<-pcscores$scores

# Create clusters using the 6 factors from factor analysis
res.agnes_fa <- agnes(scoress, method = "ward")

pltree(res.agnes_fa, cex = 0.6, hang = -1,
       main = "Dendrogram")

grp_fa <- cutree(as.hclust(res.agnes_fa), k = 4)
fviz_cluster(list(data = scoress, cluster = grp_fa))

Utilizing all the 6 factors to form clusters, the cumulative variation explained by PC1 and PC2 is only 33.55%. Since this is low and the clusters overlap excessively, it was considered to remove some factors.

# Multivariate Normality test
mvn(scoress,subset=NULL,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.130275 2.392726e-05  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    RC1       0.4178    0.3257    YES   
## 2 Anderson-Darling    RC4       0.3609    0.4424    YES   
## 3 Anderson-Darling    RC6       0.6891    0.0706    YES   
## 4 Anderson-Darling    RC2       0.5213    0.1822    YES   
## 5 Anderson-Darling    RC3       0.9887    0.0128    NO    
## 6 Anderson-Darling    RC5       1.2690    0.0026    NO    
## 
## $Descriptives
##       n          Mean  Std.Dev       Median       Min      Max       25th
## RC1 159 -1.361431e-16 1.006107  0.022508334 -3.325116 2.394211 -0.6477771
## RC4 159  4.558491e-17 1.004085  0.003587867 -2.618578 2.891897 -0.6486343
## RC6 159  4.474211e-17 1.004408 -0.100346198 -3.061909 2.693130 -0.6114148
## RC2 159 -1.879381e-16 1.004047 -0.082708023 -5.061796 1.990922 -0.6555958
## RC3 159 -4.681571e-17 1.008429 -0.117191375 -2.234044 2.636265 -0.6836079
## RC5 159  1.593095e-17 1.015208  0.216338192 -3.110614 3.044366 -0.6829234
##          75th        Skew    Kurtosis
## RC1 0.6178239 -0.10104996  0.04435192
## RC4 0.6262030  0.04268047 -0.34623433
## RC6 0.6952380 -0.06060268  0.25546508
## RC2 0.7274664 -0.71101252  2.71681371
## RC3 0.7172917  0.30655986 -0.48175851
## RC5 0.7372391 -0.28023719  0.30390426
hz_test = mvn(scoress, mvnTest = "hz")
royston_test = mvn(scoress, mvnTest = "royston")
kable(hz_test$univariateNormality)
Test Variable Statistic p value Normality
Anderson-Darling RC1 0.4178 0.3257 YES
Anderson-Darling RC4 0.3609 0.4424 YES
Anderson-Darling RC6 0.6891 0.0706 YES
Anderson-Darling RC2 0.5213 0.1822 YES
Anderson-Darling RC3 0.9887 0.0128 NO
Anderson-Darling RC5 1.2690 0.0026 NO
# Extract the factors that are univariate normal to create clusters. 
pc_fin <- scoress[, -(4:6)]
mvn(pc_fin,subset=NULL,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ    p value MVN
## 1 Henze-Zirkler 1.007624 0.05420563 YES
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    RC1       0.4178    0.3257    YES   
## 2 Anderson-Darling    RC4       0.3609    0.4424    YES   
## 3 Anderson-Darling    RC6       0.6891    0.0706    YES   
## 
## $Descriptives
##       n          Mean  Std.Dev       Median       Min      Max       25th
## RC1 159 -1.361431e-16 1.006107  0.022508334 -3.325116 2.394211 -0.6477771
## RC4 159  4.558491e-17 1.004085  0.003587867 -2.618578 2.891897 -0.6486343
## RC6 159  4.474211e-17 1.004408 -0.100346198 -3.061909 2.693130 -0.6114148
##          75th        Skew    Kurtosis
## RC1 0.6178239 -0.10104996  0.04435192
## RC4 0.6262030  0.04268047 -0.34623433
## RC6 0.6952380 -0.06060268  0.25546508

Upon the removal of the non-normal factors, the distribution of the data is closer to multivariate normal.

B.1. Hierarchical Clustering

d<-dist(pc_fin, method="euclidean") 
res.hc<-hclust(d,method="ward.D2") 
plot(res.hc,cex=0.6,hang=-1) 

B.1.a. Agglomerative

res.agnes <- agnes(pc_fin, method = "ward")
# summary(res.agnes)
res.agnes$ac
## [1] 0.9588164
pltree(res.agnes, cex = 0.6, hang = -1,
       main = "Dendrogram of agnes")

plot(as.dendrogram(res.agnes), cex = 0.6, 
     horiz = TRUE)

grp <- cutree(as.hclust(res.agnes), k = 3)
table(grp)
## grp
##  1  2  3 
## 59 57 43

The obtained coefficient from the agglomerative method is 0.9588. Since this is close to 1, we can conclude that the clustering using this method is already sufficient. Based on the dendogram, we can group the observations into three clusters. The first cluster contains 59 observations, the second cluster contains 57 observations, and the third cluster contains 43 observations. These clusters are plotted below.

fviz_cluster(list(data=pc_fin, cluster=grp), main = 'Cluster Plot using Agglomerative Method')

From the cluster plot, PC1 and PC2 explain 66.8% of the total variability in the data. Compared to the previous cluster plot where all factors was utilized, the cluster plot using only 3 factors reduced the problem of excessive overlapping.

B.1.b. Divisive

res.diana <- diana(pc_fin)
pltree(res.diana, cex = 0.6, hang = -1,
       main = "Dendrogram of diana")

res.diana$dc 
## [1] 0.9129347
grp_div <- cutree(as.hclust(res.diana), k = 3)
# Number of members in each cluster
table(grp_div)
## grp_div
##   1   2   3 
## 102  42  15
fviz_cluster(list(data = pc_fin, cluster = grp_div))

The divisive coefficient is 91.29%. This is less than the obtained agglomerative coefficient of 95.88%, indicating that the agglomerative method produced better clustering.

B.2. Non-hierarchical Clustering

B.2.a. K-means clustering

df<-pc_fin
df <- na.omit(df)
df <- scale(df)
head(df)
##             RC1          RC4         RC6
## [1,]  0.2372729  0.647263867  0.94567040
## [2,] -0.6052084 -0.221401560 -0.07735138
## [3,]  0.2975854 -0.003204691 -1.36302807
## [4,]  0.2222225 -1.264993806 -0.09990577
## [5,] -1.1033870 -1.558145714 -0.16875069
## [6,]  1.7617537 -0.973842322 -0.24187815
distance <- get_dist(df)
fviz_dist(distance, gradient = list(low = "#00AFBB", mid = "white", high = "red" )) #"#FC4E07"

func1 <- function(k) {
  kmeans(df, k, nstart = 10 )$betweenss/(kmeans(df, k, nstart = 10 )$tot.withinss+kmeans(df, k, nstart = 10 )$betweenss)
}
k.values <- 1:10
values <- map_dbl(k.values, func1)
plot(k.values, values,
     type="b", pch = 19, frame = FALSE, 
     xlab="Number of clusters K",
     ylab="Metric")

B.3. Optimal Number of Clusters

fviz_nbclust(df, kmeans, method = "wss") 

fviz_nbclust(df, kmeans, method = "silhouette")

fviz_nbclust(df, kmeans, method = "gap_stat")

B.4. Final Clusters and Insights

final <- kmeans(df, 3, nstart = 25)
print(final)
## K-means clustering with 3 clusters of sizes 71, 35, 53
## 
## Cluster means:
##          RC1         RC4        RC6
## 1  0.7932786 -0.04598114 -0.3660569
## 2 -0.9731223  0.77628948 -0.6077800
## 3 -0.4200660 -0.45104662  0.8917423
## 
## Clustering vector:
##   [1] 3 3 1 1 3 1 1 2 3 1 1 1 1 3 3 2 2 1 3 3 1 1 2 3 3 3 1 2 3 1 1 2 1 1 3 2 3
##  [38] 2 3 2 1 2 1 2 1 3 2 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 3 2 1 1 1 1 3 3 1 1 1
##  [75] 3 3 1 1 3 2 2 2 3 1 1 2 1 2 3 1 3 1 3 3 3 3 3 3 2 3 1 2 3 1 3 2 3 1 3 2 1
## [112] 2 3 3 1 3 3 3 1 1 3 1 1 2 1 1 1 2 1 3 2 2 2 1 3 3 2 3 1 1 1 3 1 1 1 3 3 3
## [149] 2 3 1 1 3 1 1 3 2 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 130.88235  68.00464  91.32434
##  (between_SS / total_SS =  38.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(final, data = df)

B.5. Discussion of the Clusters

Cluster Demographics

Cluster 1: Social Relationships Cluster

Mean of the Variables in Cluster 1

This cluster is mainly composed of males, ages 19-21 years old. Majority of them are affiliated with organizations inside the campus. The top three colleges that comprise this cluster are School of Statistics, College of Engineering, and the College of Arts and Letters. Within this cluster, 76.27% have claimed that they are happy.

From the table, most of the variables with high means in Cluster 1 belong to the Social Relationship Factor. These variables are positive effect of social media, family relationships, peer relationships, love and affection, and social interaction (outside comfort zone). From the Degree Program Factor, the variables learnings and company of animals have high means. From the Perspective Factor, university satisfaction; and from the Time Management Factor, Hobbies and Multimedia.

With this, we can conclude that the happiness index of Cluster 1 is heavily influenced by the Social Relationship Factor and slightly influenced by Degree Program, Perspective, and Time Management Factors.

Cluster 2: Perspective Cluster

Mean of the Variables in Cluster 2

This cluster is mainly composed of females, ages 19-21 years old. Majority of them are affiliated with organizations inside the campus. The top three colleges that comprise this cluster are School of Statistics, College of Engineering, and the College of Science. Within this cluster, 88% have claimed that they are happy.

From the table, most of the variables with high means in Cluster 2 belong to the Perspective Factor. These variables are current level of happiness, university satisfaction, positive outlook, optimism about the future, rewarding view on life, and happiness is a choice. From the Social Relationships Factor, the variables family relationships and peer relationships have high means; and from the Time Management Factor, Hobbies and Multimedia. With this, we can conclude that the happiness index of Cluster 2 is heavily influenced by the Perspective Factor and slightly influenced by the Social Relationships and Time Management Factors.

Cluster 3: Degree Program Cluster

Mean of the Variables in Cluster 3

This cluster is mainly composed of females, ages 19-21 years old. Majority of them are affiliated with organizations inside the campus. The top three colleges that comprise this cluster are School of Statistics, College of Engineering, and the College of Social Sciences and Philosophy. Within this cluster, 63% have claimed that they are happy.

From the table, most of the variables with high means in Cluster 3 belong to the Degree Program Factor. These variables are degree program satisfaction, class participation, performance in degree program, learnings, and company of animals. From the Time Management Factor, the variables hobbies and multimedia have high means; from the Perspective Factor, university satisfaction; from the Environmental Setting Factor, safety going home at night, and from the Social Relationship Factor, peer relationship.

With this, we can conclude that the happiness index of Cluster 3 is heavily influenced by the Degree Program Factor and slightly influenced by the Time Management, Perspective, Environmental Setting, and Social Relationship Factors.

Based on the characteristics of the clusters discussed above, it can be concluded that cluster 2 is the happiest cluster since it has the highest proportion of respondents who claimed that they are happy. It is followed by cluster 1 and then by cluster 3. It can also be deduced that students from the College of Science are the happiest, followed by those from the College of Arts and Letters, and the College of Social Sciences and Philosophy. The results that came from the School of Statistics and College of Engineering were not primarily considered since they dominated all the clusters. Also, the reason that they were included in the top three colleges per cluster may be attributed to the fact that most of the respondents came from the two colleges respectively, which is why they were omitted from the cluster characteristic interpretation. This may also be due to the fact that the convenience sampling procedure was used.

Moreover, the Happiness Index of students from the College of Arts and Letters are mostly influenced by the Social Relationships Factor. As for the students in the College of Science, their Happiness Index is mostly influenced by the Perspective Factor while for the students in the College of Social Sciences and Philosophy, the Happiness Index is most affected by the Degree Program Factor. It is also important to note that the Time Management Factor and the Social Relationships Factor appear to influence all the three clusters.