Assessment of the Happiness Index of UP Diliman Undergraduate Students using Multivariate Analysis

library(psych)
library(MVN)
library("GPArotation")
library(MVN)
library(devtools)

## Loading required package: usethis

library(factoextra)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(cluster)
library(dendextend)

## 
## ---------------------
## Welcome to dendextend version 1.15.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------

## 
## Attaching package: 'dendextend'

## The following object is masked from 'package:stats':
## 
##     cutree

library(psych)
library(biotools)

## Loading required package: MASS

## ---
## biotools version 4.2

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## v purrr   0.3.4

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x dplyr::select()  masks MASS::select()

library(cluster)
library(factoextra)
library(knitr)

hpind<-read.csv('hpindex2.csv',fill=TRUE)

new_hp2<-hpind[,7:38] #removes columns on demographics

names(new_hp2)<- c('happiness_level', 'vitamin_intake',  'enough_sleep', 'physical_activities', 'physical_appearance', 'sleep_quality', 'degree_prog_satisfaction', 'class_anticipation', 'class_participation', 'degree_program_performance', 'learnings', 'university_satisfaction', 'leisure_time', 'procrastination', 'hobbies', 'multimedia', 'pos_effect_social_med', 'positive_outlook', 'regrets', 'sense_of_meaning_purpose', 'optimism_of_future', 'life_control', 'rewarding_view_of_life', 'happiness_choice', 'family_relationship', 'peer_relationship', 'company_of_animals', 'love_and_affection', 'social_interaction', 'extra_curricular', 'campus_safety', 'safety_going_home')

kable(head(new_hp2))

happiness_level	vitamin_intake	enough_sleep	physical_activities	physical_appearance	sleep_quality	degree_prog_satisfaction	class_anticipation	class_participation	degree_program_performance	learnings	university_satisfaction	leisure_time	procrastination	hobbies	multimedia	pos_effect_social_med	positive_outlook	regrets	sense_of_meaning_purpose	optimism_of_future	life_control	rewarding_view_of_life	happiness_choice	family_relationship	peer_relationship	company_of_animals	love_and_affection	social_interaction	extra_curricular	campus_safety	safety_going_home
3	1	2	3	3	2	4	3	3	3	3	4	2	3	3	3	3	3	2	3	3	3	3	4	3	4	4	4	3	3	3	3
3	1	3	2	3	3	3	2	2	3	3	4	2	2	2	4	3	2	2	2	2	2	3	3	4	3	3	3	2	2	3	3
3	1	4	3	2	2	4	2	2	3	3	3	2	1	3	3	2	3	1	3	3	3	3	3	3	3	3	3	2	1	3	3
3	1	3	1	3	3	2	2	2	3	2	3	2	2	3	3	3	3	2	3	3	2	3	3	3	3	4	4	2	1	3	2
1	1	2	1	2	2	1	1	2	2	2	2	1	1	4	4	3	2	1	2	2	2	2	3	3	3	3	3	2	2	3	3
3	3	1	2	3	1	3	2	3	2	2	3	2	2	2	4	3	3	2	4	4	2	4	3	4	3	1	2	3	4	2	2

kable(describe(new_hp2))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
happiness_level	1	159	2.805031	0.6606287	3	2.806202	0.0000	1	4	3	-0.5560081	0.6706330	0.0523913
vitamin_intake	2	159	2.012579	0.9611927	2	1.922481	1.4826	1	4	3	0.4850720	-0.8896365	0.0762275
enough_sleep	3	159	2.163522	0.8181038	2	2.139535	1.4826	1	4	3	0.2461292	-0.5443294	0.0648799
physical_activities	4	159	2.245283	0.7353375	2	2.271318	1.4826	1	4	3	-0.0350027	-0.5530330	0.0583161
physical_appearance	5	159	2.584906	0.6871505	3	2.620155	0.0000	1	4	3	-0.4211245	-0.0639351	0.0544946
sleep_quality	6	159	2.157233	0.7508189	2	2.131783	0.0000	1	4	3	0.4512488	0.0990615	0.0595438
degree_prog_satisfaction	7	159	2.943396	0.7895302	3	2.992248	0.0000	1	4	3	-0.5143077	-0.0219762	0.0626138
class_anticipation	8	159	2.364780	0.7413756	2	2.387597	1.4826	1	4	3	0.0501120	-0.3536916	0.0587949
class_participation	9	159	2.540881	0.6819753	3	2.558139	1.4826	1	4	3	-0.2061818	-0.2116378	0.0540842
degree_program_performance	10	159	2.477987	0.7699784	3	2.503876	1.4826	1	4	3	-0.1334665	-0.4267370	0.0610633
learnings	11	159	3.150943	0.6674027	3	3.201550	0.0000	1	4	3	-0.4319223	0.1896944	0.0529285
university_satisfaction	12	159	3.081761	0.6747578	3	3.131783	0.0000	1	4	3	-0.5881969	0.8684358	0.0535118
leisure_time	13	159	2.119497	0.8063418	2	2.100775	1.4826	1	4	3	0.2147461	-0.6254300	0.0639471
procrastination	14	159	1.823899	0.8384328	2	1.728682	1.4826	1	4	3	0.7848008	-0.0602042	0.0664920
hobbies	15	159	3.056604	0.6773485	3	3.093023	0.0000	1	4	3	-0.4318484	0.3593880	0.0537172
multimedia	16	159	3.433962	0.5684567	3	3.457364	0.0000	1	4	3	-0.5637049	0.5083698	0.0450816
pos_effect_social_med	17	159	2.830189	0.7134117	3	2.844961	0.0000	1	4	3	-0.4720982	0.3127539	0.0565772
positive_outlook	18	159	2.735849	0.7750282	3	2.728682	1.4826	1	4	3	-0.0791834	-0.5020721	0.0614637
regrets	19	159	1.918239	0.7288673	2	1.868217	0.0000	1	4	3	0.5144907	0.1195500	0.0578029
sense_of_meaning_purpose	20	159	2.597484	0.7885213	3	2.612403	1.4826	1	4	3	-0.2437671	-0.3688615	0.0625338
optimism_of_future	21	159	2.723270	0.8412765	3	2.751938	1.4826	1	4	3	-0.2091003	-0.5669791	0.0667176
life_control	22	159	2.484277	0.7618710	3	2.511628	1.4826	1	4	3	-0.1180194	-0.4000533	0.0604203
rewarding_view_of_life	23	159	2.748428	0.7792284	3	2.759690	0.0000	1	4	3	-0.2533972	-0.3090070	0.0617968
happiness_choice	24	159	2.893082	0.8236319	3	2.945736	0.0000	1	4	3	-0.4775728	-0.2190164	0.0653183
family_relationship	25	159	3.088050	0.8142998	3	3.170543	1.4826	1	4	3	-0.7196450	0.1308747	0.0645782
peer_relationship	26	159	3.194969	0.5566399	3	3.209302	0.0000	1	4	3	-0.1755083	0.8885315	0.0441444
company_of_animals	27	159	3.119497	0.9096166	3	3.240310	1.4826	1	4	3	-0.8367982	-0.1186993	0.0721373
love_and_affection	28	159	2.962264	0.7537820	3	2.984496	0.0000	1	4	3	-0.2911746	-0.3848245	0.0597788
social_interaction	29	159	2.704403	0.7339287	3	2.689923	0.0000	1	4	3	-0.1498792	-0.2681676	0.0582043
extra_curricular	30	159	2.201258	0.7695130	2	2.186046	0.0000	1	4	3	0.3066198	-0.2177226	0.0610264
campus_safety	31	159	2.515723	0.6642341	3	2.573643	0.0000	1	4	3	-0.5078377	-0.2110440	0.0526772
safety_going_home	32	159	2.433962	0.7674411	2	2.465116	1.4826	1	4	3	-0.1550558	-0.4697048	0.0608620

A. Factor Analysis

cor<-cor(new_hp2)
#lowerCor(new_hp2)
corPlot(new_hp2,numbers=T, MAR=0.5, labels = 1:32)

The correlation matrix above shows that the variables are not that correlated to each other. To evaluate the ‘factorability’ of the data, the Bartlett’s test of Sphericity and Kaiser-Meyer-Olkin measure of sampling adequacy were performed.

A.1. Factorability Tests

Bartlett’s test of Sphericity tests the null hypothesis that the correlation matrix is an identity matrix, which means that the variables are unrelated and not ideal for factor analysis.

cortest.bartlett(new_hp2)

## R was not square, finding R from data

## $chisq
## [1] 1810.035
## 
## $p.value
## [1] 1.167642e-148
## 
## $df
## [1] 496

Since the p-value is less than 0.05, we reject the null hypothesis that the variables are unrelated.

KMO(new_hp2)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = new_hp2)
## Overall MSA =  0.8
## MSA for each item = 
##            happiness_level             vitamin_intake 
##                       0.90                       0.82 
##               enough_sleep        physical_activities 
##                       0.64                       0.77 
##        physical_appearance              sleep_quality 
##                       0.89                       0.75 
##   degree_prog_satisfaction         class_anticipation 
##                       0.75                       0.84 
##        class_participation degree_program_performance 
##                       0.79                       0.77 
##                  learnings    university_satisfaction 
##                       0.70                       0.72 
##               leisure_time            procrastination 
##                       0.85                       0.68 
##                    hobbies                 multimedia 
##                       0.73                       0.70 
##      pos_effect_social_med           positive_outlook 
##                       0.77                       0.92 
##                    regrets   sense_of_meaning_purpose 
##                       0.73                       0.86 
##         optimism_of_future               life_control 
##                       0.89                       0.87 
##     rewarding_view_of_life           happiness_choice 
##                       0.90                       0.88 
##        family_relationship          peer_relationship 
##                       0.78                       0.86 
##         company_of_animals         love_and_affection 
##                       0.51                       0.85 
##         social_interaction           extra_curricular 
##                       0.85                       0.76 
##              campus_safety          safety_going_home 
##                       0.58                       0.57

The overall Measure of Sampling Adequacy (MSA) for the set of variables is 0.8, indicating that correlations between pairs of variables can be explained by the other variables. Moreover, the individual MSAs are all above 0.5 hence, factor analysis is appropriate for the data.

A.2. Factor Analysis Proper

To aid in the selection of the appropriate method to use for factor extraction, the variables were tested for multivariate normality.

mvn(new_hp2,subset=NULL,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ p value MVN
## 1 Henze-Zirkler 1.000335       0  NO
## 
## $univariateNormality
##                Test                   Variable Statistic   p value Normality
## 1  Anderson-Darling      happiness_level         18.0345  <0.001      NO    
## 2  Anderson-Darling       vitamin_intake          9.9494  <0.001      NO    
## 3  Anderson-Darling        enough_sleep           9.8890  <0.001      NO    
## 4  Anderson-Darling    physical_activities       12.1844  <0.001      NO    
## 5  Anderson-Darling    physical_appearance       15.6425  <0.001      NO    
## 6  Anderson-Darling       sleep_quality          13.1187  <0.001      NO    
## 7  Anderson-Darling  degree_prog_satisfaction    11.6865  <0.001      NO    
## 8  Anderson-Darling     class_anticipation       12.0248  <0.001      NO    
## 9  Anderson-Darling    class_participation       14.7726  <0.001      NO    
## 10 Anderson-Darling degree_program_performance   11.3196  <0.001      NO    
## 11 Anderson-Darling         learnings            15.4846  <0.001      NO    
## 12 Anderson-Darling  university_satisfaction     16.4595  <0.001      NO    
## 13 Anderson-Darling        leisure_time          10.1749  <0.001      NO    
## 14 Anderson-Darling      procrastination         12.0124  <0.001      NO    
## 15 Anderson-Darling          hobbies             15.4015  <0.001      NO    
## 16 Anderson-Darling         multimedia           22.0239  <0.001      NO    
## 17 Anderson-Darling   pos_effect_social_med      14.8656  <0.001      NO    
## 18 Anderson-Darling      positive_outlook        10.8890  <0.001      NO    
## 19 Anderson-Darling          regrets             13.2174  <0.001      NO    
## 20 Anderson-Darling  sense_of_meaning_purpose    11.1532  <0.001      NO    
## 21 Anderson-Darling     optimism_of_future        9.3379  <0.001      NO    
## 22 Anderson-Darling        life_control          11.5135  <0.001      NO    
## 23 Anderson-Darling   rewarding_view_of_life     11.1536  <0.001      NO    
## 24 Anderson-Darling      happiness_choice        10.6861  <0.001      NO    
## 25 Anderson-Darling    family_relationship       11.7514  <0.001      NO    
## 26 Anderson-Darling     peer_relationship        22.9795  <0.001      NO    
## 27 Anderson-Darling     company_of_animals       11.6386  <0.001      NO    
## 28 Anderson-Darling     love_and_affection       11.7200  <0.001      NO    
## 29 Anderson-Darling     social_interaction       12.4321  <0.001      NO    
## 30 Anderson-Darling      extra_curricular        11.6117  <0.001      NO    
## 31 Anderson-Darling       campus_safety          16.9183  <0.001      NO    
## 32 Anderson-Darling     safety_going_home        11.5048  <0.001      NO    
## 
## $Descriptives
##                              n     Mean   Std.Dev Median Min Max 25th 75th
## happiness_level            159 2.805031 0.6606287      3   1   4  2.0    3
## vitamin_intake             159 2.012579 0.9611927      2   1   4  1.0    3
## enough_sleep               159 2.163522 0.8181038      2   1   4  2.0    3
## physical_activities        159 2.245283 0.7353375      2   1   4  2.0    3
## physical_appearance        159 2.584906 0.6871505      3   1   4  2.0    3
## sleep_quality              159 2.157233 0.7508189      2   1   4  2.0    3
## degree_prog_satisfaction   159 2.943396 0.7895302      3   1   4  3.0    3
## class_anticipation         159 2.364780 0.7413756      2   1   4  2.0    3
## class_participation        159 2.540881 0.6819753      3   1   4  2.0    3
## degree_program_performance 159 2.477987 0.7699784      3   1   4  2.0    3
## learnings                  159 3.150943 0.6674027      3   1   4  3.0    4
## university_satisfaction    159 3.081761 0.6747578      3   1   4  3.0    3
## leisure_time               159 2.119497 0.8063418      2   1   4  2.0    3
## procrastination            159 1.823899 0.8384328      2   1   4  1.0    2
## hobbies                    159 3.056604 0.6773485      3   1   4  3.0    3
## multimedia                 159 3.433962 0.5684567      3   1   4  3.0    4
## pos_effect_social_med      159 2.830189 0.7134117      3   1   4  2.0    3
## positive_outlook           159 2.735849 0.7750282      3   1   4  2.0    3
## regrets                    159 1.918239 0.7288673      2   1   4  1.0    2
## sense_of_meaning_purpose   159 2.597484 0.7885213      3   1   4  2.0    3
## optimism_of_future         159 2.723270 0.8412765      3   1   4  2.0    3
## life_control               159 2.484277 0.7618710      3   1   4  2.0    3
## rewarding_view_of_life     159 2.748428 0.7792284      3   1   4  2.0    3
## happiness_choice           159 2.893082 0.8236319      3   1   4  2.0    3
## family_relationship        159 3.088050 0.8142998      3   1   4  3.0    4
## peer_relationship          159 3.194969 0.5566399      3   1   4  3.0    4
## company_of_animals         159 3.119497 0.9096166      3   1   4  3.0    4
## love_and_affection         159 2.962264 0.7537820      3   1   4  2.5    3
## social_interaction         159 2.704403 0.7339287      3   1   4  2.0    3
## extra_curricular           159 2.201258 0.7695130      2   1   4  2.0    3
## campus_safety              159 2.515723 0.6642341      3   1   4  2.0    3
## safety_going_home          159 2.433962 0.7674411      2   1   4  2.0    3
##                                   Skew    Kurtosis
## happiness_level            -0.55600814  0.67063299
## vitamin_intake              0.48507205 -0.88963653
## enough_sleep                0.24612915 -0.54432940
## physical_activities        -0.03500273 -0.55303301
## physical_appearance        -0.42112447 -0.06393509
## sleep_quality               0.45124884  0.09906149
## degree_prog_satisfaction   -0.51430775 -0.02197619
## class_anticipation          0.05011200 -0.35369165
## class_participation        -0.20618177 -0.21163785
## degree_program_performance -0.13346652 -0.42673697
## learnings                  -0.43192227  0.18969437
## university_satisfaction    -0.58819686  0.86843580
## leisure_time                0.21474614 -0.62543004
## procrastination             0.78480080 -0.06020418
## hobbies                    -0.43184835  0.35938804
## multimedia                 -0.56370494  0.50836981
## pos_effect_social_med      -0.47209818  0.31275387
## positive_outlook           -0.07918336 -0.50207208
## regrets                     0.51449074  0.11954999
## sense_of_meaning_purpose   -0.24376706 -0.36886146
## optimism_of_future         -0.20910034 -0.56697913
## life_control               -0.11801935 -0.40005335
## rewarding_view_of_life     -0.25339725 -0.30900700
## happiness_choice           -0.47757283 -0.21901640
## family_relationship        -0.71964504  0.13087472
## peer_relationship          -0.17550834  0.88853149
## company_of_animals         -0.83679823 -0.11869927
## love_and_affection         -0.29117462 -0.38482445
## social_interaction         -0.14987918 -0.26816762
## extra_curricular            0.30661978 -0.21772263
## campus_safety              -0.50783774 -0.21104397
## safety_going_home          -0.15505581 -0.46970478

mvn_hz = mvn(new_hp2, mvnTest = "hz")
mvn_royston = mvn(new_hp2, mvnTest = "royston")

print(mvn_hz$multivariateNormality)

##            Test       HZ p value MVN
## 1 Henze-Zirkler 1.000335       0  NO

print(mvn_royston$multivariateNormality)

##      Test        H       p value MVN
## 1 Royston 1634.643 1.482197e-323  NO

The above tests rejected the assumption of multivariate normality. Because of this, the Maximum Likelihood Solution is not applicable. Thus, the Principal Components Solution will be used to estimate the factor scores.

A.2.a. Principal Components Solution

names(new_hp2) <- 1:32

#standardize the data
scaled.hp1<-scale(new_hp2)

# obtain a parallel analysis on the standardized data to determine the number of factors
fa.parallel(scaled.hp1, fa='fa')

## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA

Based on the parallel analysis scree plots, it is possible to extract 5-6 factors.

PC solutions with no rotations would be considered first.

# Extract factors from the standardized data.
pcsolution1<-principal(scaled.hp1,nfactors=6,rotate="none")
pcsolution1

## Principal Components Analysis
## Call: principal(r = scaled.hp1, nfactors = 6, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     PC1   PC2   PC3   PC4   PC5   PC6   h2   u2 com
## 1  0.62 -0.08 -0.01 -0.35  0.16  0.21 0.59 0.41 2.0
## 2  0.36  0.28  0.07 -0.09 -0.16 -0.20 0.28 0.72 3.3
## 3  0.28 -0.03  0.67 -0.11  0.22  0.33 0.70 0.30 2.2
## 4  0.33  0.23 -0.04  0.18  0.16  0.13 0.24 0.76 3.4
## 5  0.51  0.09  0.16 -0.18  0.01 -0.06 0.33 0.67 1.6
## 6  0.42  0.07  0.64 -0.12  0.03  0.20 0.64 0.36 2.1
## 7  0.40  0.49  0.17  0.28  0.08 -0.25 0.58 0.42 3.5
## 8  0.55  0.37  0.24  0.18 -0.10  0.12 0.55 0.45 2.6
## 9  0.41  0.40 -0.01  0.25 -0.13 -0.05 0.41 0.59 2.9
## 10 0.46  0.41  0.32  0.28 -0.01 -0.19 0.60 0.40 3.9
## 11 0.40  0.38  0.18  0.30 -0.40 -0.11 0.60 0.40 4.4
## 12 0.48  0.04 -0.17 -0.01  0.04 -0.07 0.27 0.73 1.3
## 13 0.49 -0.40  0.11 -0.06  0.22 -0.06 0.46 0.54 2.5
## 14 0.23  0.51 -0.06 -0.26 -0.06  0.30 0.47 0.53 2.7
## 15 0.35 -0.40  0.29  0.13 -0.05 -0.17 0.42 0.58 3.5
## 16 0.22 -0.56  0.15  0.13 -0.04 -0.39 0.55 0.45 2.4
## 17 0.33 -0.44  0.11  0.05 -0.43 -0.04 0.50 0.50 3.0
## 18 0.63 -0.22  0.10 -0.19  0.08 -0.10 0.51 0.49 1.6
## 19 0.27  0.50 -0.23 -0.28  0.08  0.09 0.47 0.53 2.9
## 20 0.63  0.01 -0.23 -0.16  0.14 -0.34 0.62 0.38 2.1
## 21 0.71  0.06 -0.26 -0.23  0.18 -0.24 0.72 0.28 1.9
## 22 0.70  0.05 -0.16 -0.18  0.19 -0.30 0.68 0.32 1.8
## 23 0.75 -0.11 -0.04 -0.27 -0.02  0.02 0.65 0.35 1.3
## 24 0.55 -0.18  0.01 -0.23 -0.20  0.02 0.42 0.58 1.9
## 25 0.51 -0.29  0.00  0.07 -0.38  0.15 0.52 0.48 2.8
## 26 0.51 -0.24 -0.20  0.22 -0.39  0.24 0.62 0.38 3.8
## 27 0.15  0.00 -0.09  0.53 -0.04 -0.12 0.33 0.67 1.3
## 28 0.71 -0.23 -0.18  0.12 -0.01  0.33 0.72 0.28 1.9
## 29 0.51 -0.04 -0.40  0.18 -0.24  0.29 0.59 0.41 3.3
## 30 0.34  0.10 -0.46  0.13  0.06  0.09 0.36 0.64 2.3
## 31 0.35 -0.19  0.00  0.38  0.58  0.30 0.73 0.27 3.4
## 32 0.28 -0.16 -0.10  0.53  0.49  0.01 0.64 0.36 2.8
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6
## SS loadings           7.33 2.71 1.93 1.85 1.64 1.33
## Proportion Var        0.23 0.08 0.06 0.06 0.05 0.04
## Cumulative Var        0.23 0.31 0.37 0.43 0.48 0.52
## Proportion Explained  0.44 0.16 0.11 0.11 0.10 0.08
## Cumulative Proportion 0.44 0.60 0.71 0.82 0.92 1.00
## 
## Mean item complexity =  2.6
## Test of the hypothesis that 6 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  516.18  with prob <  1.6e-11 
## 
## Fit based upon off diagonal values = 0.94

# Getting the residual matrix: The residual matrix is a measure of how good our model is. Ideally, the residual matrix is close to null. 


L<-pcsolution1$loadings
llT<-L%*%t(L)
uniqueness<-cor-llT

m<-matrix(0,32,32) 
diag(m)<-diag(uniqueness) 
resmatrix<-cor-(llT+m)

#head(resmatrix)
hist(resmatrix)

fa.diagram(pcsolution1)

pcsolution1

## Principal Components Analysis
## Call: principal(r = scaled.hp1, nfactors = 6, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     PC1   PC2   PC3   PC4   PC5   PC6   h2   u2 com
## 1  0.62 -0.08 -0.01 -0.35  0.16  0.21 0.59 0.41 2.0
## 2  0.36  0.28  0.07 -0.09 -0.16 -0.20 0.28 0.72 3.3
## 3  0.28 -0.03  0.67 -0.11  0.22  0.33 0.70 0.30 2.2
## 4  0.33  0.23 -0.04  0.18  0.16  0.13 0.24 0.76 3.4
## 5  0.51  0.09  0.16 -0.18  0.01 -0.06 0.33 0.67 1.6
## 6  0.42  0.07  0.64 -0.12  0.03  0.20 0.64 0.36 2.1
## 7  0.40  0.49  0.17  0.28  0.08 -0.25 0.58 0.42 3.5
## 8  0.55  0.37  0.24  0.18 -0.10  0.12 0.55 0.45 2.6
## 9  0.41  0.40 -0.01  0.25 -0.13 -0.05 0.41 0.59 2.9
## 10 0.46  0.41  0.32  0.28 -0.01 -0.19 0.60 0.40 3.9
## 11 0.40  0.38  0.18  0.30 -0.40 -0.11 0.60 0.40 4.4
## 12 0.48  0.04 -0.17 -0.01  0.04 -0.07 0.27 0.73 1.3
## 13 0.49 -0.40  0.11 -0.06  0.22 -0.06 0.46 0.54 2.5
## 14 0.23  0.51 -0.06 -0.26 -0.06  0.30 0.47 0.53 2.7
## 15 0.35 -0.40  0.29  0.13 -0.05 -0.17 0.42 0.58 3.5
## 16 0.22 -0.56  0.15  0.13 -0.04 -0.39 0.55 0.45 2.4
## 17 0.33 -0.44  0.11  0.05 -0.43 -0.04 0.50 0.50 3.0
## 18 0.63 -0.22  0.10 -0.19  0.08 -0.10 0.51 0.49 1.6
## 19 0.27  0.50 -0.23 -0.28  0.08  0.09 0.47 0.53 2.9
## 20 0.63  0.01 -0.23 -0.16  0.14 -0.34 0.62 0.38 2.1
## 21 0.71  0.06 -0.26 -0.23  0.18 -0.24 0.72 0.28 1.9
## 22 0.70  0.05 -0.16 -0.18  0.19 -0.30 0.68 0.32 1.8
## 23 0.75 -0.11 -0.04 -0.27 -0.02  0.02 0.65 0.35 1.3
## 24 0.55 -0.18  0.01 -0.23 -0.20  0.02 0.42 0.58 1.9
## 25 0.51 -0.29  0.00  0.07 -0.38  0.15 0.52 0.48 2.8
## 26 0.51 -0.24 -0.20  0.22 -0.39  0.24 0.62 0.38 3.8
## 27 0.15  0.00 -0.09  0.53 -0.04 -0.12 0.33 0.67 1.3
## 28 0.71 -0.23 -0.18  0.12 -0.01  0.33 0.72 0.28 1.9
## 29 0.51 -0.04 -0.40  0.18 -0.24  0.29 0.59 0.41 3.3
## 30 0.34  0.10 -0.46  0.13  0.06  0.09 0.36 0.64 2.3
## 31 0.35 -0.19  0.00  0.38  0.58  0.30 0.73 0.27 3.4
## 32 0.28 -0.16 -0.10  0.53  0.49  0.01 0.64 0.36 2.8
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6
## SS loadings           7.33 2.71 1.93 1.85 1.64 1.33
## Proportion Var        0.23 0.08 0.06 0.06 0.05 0.04
## Cumulative Var        0.23 0.31 0.37 0.43 0.48 0.52
## Proportion Explained  0.44 0.16 0.11 0.11 0.10 0.08
## Cumulative Proportion 0.44 0.60 0.71 0.82 0.92 1.00
## 
## Mean item complexity =  2.6
## Test of the hypothesis that 6 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  516.18  with prob <  1.6e-11 
## 
## Fit based upon off diagonal values = 0.94

pcsolution1$loadings

## 
## Loadings:
##    PC1    PC2    PC3    PC4    PC5    PC6   
## 1   0.624               -0.345  0.158  0.210
## 2   0.355  0.278               -0.164 -0.198
## 3   0.284         0.674 -0.113  0.216  0.330
## 4   0.327  0.234         0.176  0.159  0.127
## 5   0.509         0.159 -0.184              
## 6   0.423         0.637 -0.116         0.200
## 7   0.398  0.492  0.167  0.280        -0.251
## 8   0.552  0.369  0.235  0.178         0.116
## 9   0.411  0.405         0.249 -0.135       
## 10  0.464  0.408  0.320  0.279        -0.192
## 11  0.401  0.383  0.182  0.299 -0.397 -0.109
## 12  0.483        -0.166                     
## 13  0.487 -0.400  0.106         0.217       
## 14  0.229  0.507        -0.255         0.295
## 15  0.353 -0.405  0.293  0.131        -0.166
## 16  0.215 -0.561  0.151  0.131        -0.386
## 17  0.331 -0.442  0.105        -0.428       
## 18  0.631 -0.218        -0.195        -0.105
## 19  0.273  0.496 -0.227 -0.280              
## 20  0.634        -0.227 -0.165  0.136 -0.344
## 21  0.715        -0.256 -0.228  0.181 -0.239
## 22  0.704        -0.159 -0.185  0.194 -0.297
## 23  0.751 -0.115        -0.274              
## 24  0.548 -0.182        -0.228 -0.196       
## 25  0.512 -0.289               -0.384  0.148
## 26  0.505 -0.243 -0.197  0.222 -0.393  0.245
## 27  0.150                0.530        -0.119
## 28  0.713 -0.229 -0.176  0.119         0.332
## 29  0.514        -0.396  0.177 -0.238  0.286
## 30  0.342  0.101 -0.456  0.133              
## 31  0.348 -0.186         0.381  0.577  0.304
## 32  0.282 -0.159         0.532  0.491       
## 
##                  PC1   PC2   PC3   PC4   PC5   PC6
## SS loadings    7.329 2.713 1.928 1.846 1.635 1.325
## Proportion Var 0.229 0.085 0.060 0.058 0.051 0.041
## Cumulative Var 0.229 0.314 0.374 0.432 0.483 0.524

pcsolution1$rotation

## [1] "none"

pcsolution1$communality #variance explained

##         1         2         3         4         5         6         7         8 
## 0.5853012 0.2827627 0.7037048 0.2355302 0.3293638 0.6429706 0.5759960 0.5501701 
##         9        10        11        12        13        14        15        16 
## 0.4147633 0.5986296 0.5987999 0.2688509 0.4632723 0.4694280 0.4216330 0.5510083 
##        17        18        19        20        21        22        23        24 
## 0.5033272 0.5112804 0.4650432 0.6177084 0.7222532 0.6836121 0.6542660 0.4242030 
##        25        26        27        28        29        30        31        32 
## 0.5196513 0.6174285 0.3272519 0.7162328 0.5927201 0.3642315 0.7264509 0.6387419

pcsolution1$uniquenesses

##         1         2         3         4         5         6         7         8 
## 0.4146988 0.7172373 0.2962952 0.7644698 0.6706362 0.3570294 0.4240040 0.4498299 
##         9        10        11        12        13        14        15        16 
## 0.5852367 0.4013704 0.4012001 0.7311491 0.5367277 0.5305720 0.5783670 0.4489917 
##        17        18        19        20        21        22        23        24 
## 0.4966728 0.4887196 0.5349568 0.3822916 0.2777468 0.3163879 0.3457340 0.5757970 
##        25        26        27        28        29        30        31        32 
## 0.4803487 0.3825715 0.6727481 0.2837672 0.4072799 0.6357685 0.2735491 0.3612581

pcsolution1$values

##  [1] 7.3289851 2.7125623 1.9284264 1.8458333 1.6353971 1.3253830 1.2044231
##  [8] 1.1421697 1.0077672 0.9156160 0.9004088 0.8817464 0.8015789 0.7597415
## [15] 0.7056648 0.6834944 0.6359300 0.5966320 0.5505538 0.5426246 0.4668077
## [22] 0.4484879 0.4304351 0.3644519 0.3567825 0.3380279 0.3155461 0.2822585
## [29] 0.2572122 0.2376245 0.2246154 0.1728118

pcsolution1$values/length(pcsolution1$values)

##  [1] 0.229030785 0.084767571 0.060263327 0.057682291 0.051106160 0.041418218
##  [7] 0.037638222 0.035692803 0.031492724 0.028613001 0.028137776 0.027554576
## [13] 0.025049342 0.023741923 0.022052025 0.021359199 0.019872812 0.018644749
## [19] 0.017204805 0.016957019 0.014587740 0.014015248 0.013451098 0.011389122
## [25] 0.011149452 0.010563372 0.009860815 0.008820579 0.008037882 0.007425766
## [31] 0.007019233 0.005400368

Components Analysis (No Rotation)

The components analysis of the generated PC solutions with no rotation resulted in PC6 containing no variable. Different rotation methods would be explored.

pcsolution3<-principal(scaled.hp1,nfactors=6,rotate="quartimax")
fa.diagram(pcsolution3)

pcsolution4<-principal(scaled.hp1,nfactors=6,rotate="equamax")
fa.diagram(pcsolution4)

Components Analysis (EQUAMAX and QUARTIMAX)

Variables 2 and 31 do not have corresponding factor assignments using EQUAMAX and QUARTIMAX rotations. This indicates that these rotations may not be appropriate for the data.

# VARIMAX - maximizes the variability of loadings within a factor

pcsolution1v<-principal(new_hp2,nfactors=6,rotate="varimax")
fa.diagram(pcsolution1, main = 'Components Analysis - Varimax')

Components Analysis (VARIMAX)

Compared to the other rotations, VARIMAX has the most dispersed set of variables. Moreover, there is only one variable that has no factor assignment. Because of this, VARIMAX will be used.

pcsolution1v$loadings

## 
## Loadings:
##    RC1    RC4    RC6    RC2    RC3    RC5   
## 1   0.593         0.264 -0.163  0.348  0.109
## 2   0.292  0.392                      -0.189
## 3                               0.823  0.130
## 4   0.141  0.276        -0.193         0.300
## 5   0.437  0.234                0.267       
## 6   0.151  0.267                0.736       
## 7   0.190  0.701 -0.138                0.156
## 8   0.169  0.592  0.221 -0.155  0.294  0.109
## 9   0.132  0.582  0.163 -0.155              
## 10  0.164  0.722                0.192  0.104
## 11         0.721  0.251               -0.117
## 12  0.420  0.184  0.196                0.130
## 13  0.462         0.154  0.324  0.244  0.232
## 14  0.148  0.202        -0.600  0.158 -0.112
## 15  0.197         0.190  0.529  0.232       
## 16  0.221                0.700              
## 17  0.122         0.509  0.434        -0.176
## 18  0.600         0.199  0.192  0.259       
## 19  0.337  0.182        -0.561              
## 20  0.743  0.193               -0.123       
## 21  0.810  0.178  0.109                0.114
## 22  0.784  0.227                       0.117
## 23  0.681         0.362         0.226       
## 24  0.459         0.392         0.182 -0.130
## 25  0.205  0.107  0.640  0.196  0.127       
## 26  0.130  0.127  0.752                     
## 27         0.332  0.151  0.212 -0.226  0.297
## 28  0.394         0.634         0.152  0.364
## 29  0.215  0.132  0.663 -0.166 -0.154  0.197
## 30  0.260         0.277 -0.230 -0.280  0.280
## 31  0.124                       0.214  0.809
## 32  0.106  0.117         0.202         0.754
## 
##                  RC1   RC4   RC6   RC2   RC3   RC5
## SS loadings    4.634 3.032 2.898 2.130 2.109 1.972
## Proportion Var 0.145 0.095 0.091 0.067 0.066 0.062
## Cumulative Var 0.145 0.240 0.330 0.397 0.463 0.524

pcsolution1v$rotation

## [1] "varimax"

pcsolution1v$communality

##         1         2         3         4         5         6         7         8 
## 0.5853012 0.2827627 0.7037048 0.2355302 0.3293638 0.6429706 0.5759960 0.5501701 
##         9        10        11        12        13        14        15        16 
## 0.4147633 0.5986296 0.5987999 0.2688509 0.4632723 0.4694280 0.4216330 0.5510083 
##        17        18        19        20        21        22        23        24 
## 0.5033272 0.5112804 0.4650432 0.6177084 0.7222532 0.6836121 0.6542660 0.4242030 
##        25        26        27        28        29        30        31        32 
## 0.5196513 0.6174285 0.3272519 0.7162328 0.5927201 0.3642315 0.7264509 0.6387419

pcsolution1v$uniquenesses

##         1         2         3         4         5         6         7         8 
## 0.4146988 0.7172373 0.2962952 0.7644698 0.6706362 0.3570294 0.4240040 0.4498299 
##         9        10        11        12        13        14        15        16 
## 0.5852367 0.4013704 0.4012001 0.7311491 0.5367277 0.5305720 0.5783670 0.4489917 
##        17        18        19        20        21        22        23        24 
## 0.4966728 0.4887196 0.5349568 0.3822916 0.2777468 0.3163879 0.3457340 0.5757970 
##        25        26        27        28        29        30        31        32 
## 0.4803487 0.3825715 0.6727481 0.2837672 0.4072799 0.6357685 0.2735491 0.3612581

pcsolution1v$values

##  [1] 7.3289851 2.7125623 1.9284264 1.8458333 1.6353971 1.3253830 1.2044231
##  [8] 1.1421697 1.0077672 0.9156160 0.9004088 0.8817464 0.8015789 0.7597415
## [15] 0.7056648 0.6834944 0.6359300 0.5966320 0.5505538 0.5426246 0.4668077
## [22] 0.4484879 0.4304351 0.3644519 0.3567825 0.3380279 0.3155461 0.2822585
## [29] 0.2572122 0.2376245 0.2246154 0.1728118

pcsolution1v$values/length(pcsolution1$values)

##  [1] 0.229030785 0.084767571 0.060263327 0.057682291 0.051106160 0.041418218
##  [7] 0.037638222 0.035692803 0.031492724 0.028613001 0.028137776 0.027554576
## [13] 0.025049342 0.023741923 0.022052025 0.021359199 0.019872812 0.018644749
## [19] 0.017204805 0.016957019 0.014587740 0.014015248 0.013451098 0.011389122
## [25] 0.011149452 0.010563372 0.009860815 0.008820579 0.008037882 0.007425766
## [31] 0.007019233 0.005400368

L<-pcsolution1v$loadings
llT<-L%*%t(L)
uniqueness<-cor-llT
m<-matrix(0,32,32) 
diag(m)<-diag(uniqueness) 
resmatrix<-cor-(llT+m)

hist(resmatrix)

The residuals from the VARIMAX rotation are distributed along zero, indicating that the current factor model is adequate.

pcsolution1v

## Principal Components Analysis
## Call: principal(r = new_hp2, nfactors = 6, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      RC1   RC4   RC6   RC2   RC3   RC5   h2   u2 com
## 1   0.59 -0.07  0.26 -0.16  0.35  0.11 0.59 0.41 2.4
## 2   0.29  0.39  0.04 -0.07  0.04 -0.19 0.28 0.72 2.5
## 3   0.05  0.08 -0.02  0.03  0.82  0.13 0.70 0.30 1.1
## 4   0.14  0.28  0.08 -0.19  0.07  0.30 0.24 0.76 3.5
## 5   0.44  0.23  0.10 -0.02  0.27 -0.05 0.33 0.67 2.4
## 6   0.15  0.27  0.07  0.04  0.74 -0.01 0.64 0.36 1.4
## 7   0.19  0.70 -0.14 -0.06  0.03  0.16 0.58 0.42 1.4
## 8   0.17  0.59  0.22 -0.16  0.29  0.11 0.55 0.45 2.3
## 9   0.13  0.58  0.16 -0.15 -0.04  0.08 0.41 0.59 1.5
## 10  0.16  0.72 -0.03  0.03  0.19  0.10 0.60 0.40 1.3
## 11  0.01  0.72  0.25  0.00  0.04 -0.12 0.60 0.40 1.3
## 12  0.42  0.18  0.20 -0.04 -0.05  0.13 0.27 0.73 2.1
## 13  0.46 -0.09  0.15  0.32  0.24  0.23 0.46 0.54 3.4
## 14  0.15  0.20  0.10 -0.60  0.16 -0.11 0.47 0.53 1.7
## 15  0.20  0.09  0.19  0.53  0.23  0.07 0.42 0.58 2.1
## 16  0.22 -0.04  0.10  0.70  0.00  0.03 0.55 0.45 1.2
## 17  0.12  0.03  0.51  0.43  0.10 -0.18 0.50 0.50 2.4
## 18  0.60  0.06  0.20  0.19  0.26  0.06 0.51 0.49 1.9
## 19  0.34  0.18 -0.02 -0.56 -0.03 -0.05 0.47 0.53 1.9
## 20  0.74  0.19  0.07  0.05 -0.12  0.08 0.62 0.38 1.2
## 21  0.81  0.18  0.11 -0.07 -0.06  0.11 0.72 0.28 1.2
## 22  0.78  0.23  0.06  0.01 -0.02  0.12 0.68 0.32 1.2
## 23  0.68  0.09  0.36 -0.01  0.23  0.01 0.65 0.35 1.8
## 24  0.46  0.05  0.39  0.09  0.18 -0.13 0.42 0.58 2.6
## 25  0.21  0.11  0.64  0.20  0.13 -0.04 0.52 0.48 1.6
## 26  0.13  0.13  0.75  0.10 -0.04  0.09 0.62 0.38 1.2
## 27 -0.10  0.33  0.15  0.21 -0.23  0.30 0.33 0.67 4.2
## 28  0.39  0.06  0.63 -0.02  0.15  0.36 0.72 0.28 2.5
## 29  0.22  0.13  0.66 -0.17 -0.15  0.20 0.59 0.41 1.8
## 30  0.26  0.10  0.28 -0.23 -0.28  0.28 0.36 0.64 5.2
## 31  0.12 -0.03  0.09  0.04  0.21  0.81 0.73 0.27 1.2
## 32  0.11  0.12  0.01  0.20 -0.06  0.75 0.64 0.36 1.3
## 
##                        RC1  RC4  RC6  RC2  RC3  RC5
## SS loadings           4.63 3.03 2.90 2.13 2.11 1.97
## Proportion Var        0.14 0.09 0.09 0.07 0.07 0.06
## Cumulative Var        0.14 0.24 0.33 0.40 0.46 0.52
## Proportion Explained  0.28 0.18 0.17 0.13 0.13 0.12
## Cumulative Proportion 0.28 0.46 0.63 0.76 0.88 1.00
## 
## Mean item complexity =  2
## Test of the hypothesis that 6 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  516.18  with prob <  1.6e-11 
## 
## Fit based upon off diagonal values = 0.94

The cumulative variance explained by the principal components is 52%. The table below summarizes the 6 factors obtained from the analysis and the corresponding variables categorized in each factor.

Summary of Factors

B. Cluster Analysis

#Extract the scores from VARIMAX
pcscores<-factor.scores(new_hp2,f=pcsolution1v,method="Bartlett") 

scoress<-pcscores$scores

# Create clusters using the 6 factors from factor analysis
res.agnes_fa <- agnes(scoress, method = "ward")

pltree(res.agnes_fa, cex = 0.6, hang = -1,
       main = "Dendrogram")

grp_fa <- cutree(as.hclust(res.agnes_fa), k = 4)
fviz_cluster(list(data = scoress, cluster = grp_fa))

Utilizing all the 6 factors to form clusters, the cumulative variation explained by PC1 and PC2 is only 33.55%. Since this is low and the clusters overlap excessively, it was considered to remove some factors.

# Multivariate Normality test
mvn(scoress,subset=NULL,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.130275 2.392726e-05  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    RC1       0.4178    0.3257    YES   
## 2 Anderson-Darling    RC4       0.3609    0.4424    YES   
## 3 Anderson-Darling    RC6       0.6891    0.0706    YES   
## 4 Anderson-Darling    RC2       0.5213    0.1822    YES   
## 5 Anderson-Darling    RC3       0.9887    0.0128    NO    
## 6 Anderson-Darling    RC5       1.2690    0.0026    NO    
## 
## $Descriptives
##       n          Mean  Std.Dev       Median       Min      Max       25th
## RC1 159 -1.361431e-16 1.006107  0.022508334 -3.325116 2.394211 -0.6477771
## RC4 159  4.558491e-17 1.004085  0.003587867 -2.618578 2.891897 -0.6486343
## RC6 159  4.474211e-17 1.004408 -0.100346198 -3.061909 2.693130 -0.6114148
## RC2 159 -1.879381e-16 1.004047 -0.082708023 -5.061796 1.990922 -0.6555958
## RC3 159 -4.681571e-17 1.008429 -0.117191375 -2.234044 2.636265 -0.6836079
## RC5 159  1.593095e-17 1.015208  0.216338192 -3.110614 3.044366 -0.6829234
##          75th        Skew    Kurtosis
## RC1 0.6178239 -0.10104996  0.04435192
## RC4 0.6262030  0.04268047 -0.34623433
## RC6 0.6952380 -0.06060268  0.25546508
## RC2 0.7274664 -0.71101252  2.71681371
## RC3 0.7172917  0.30655986 -0.48175851
## RC5 0.7372391 -0.28023719  0.30390426

hz_test = mvn(scoress, mvnTest = "hz")
royston_test = mvn(scoress, mvnTest = "royston")

kable(hz_test$univariateNormality)

Test	Variable	Statistic	p value	Normality
Anderson-Darling	RC1	0.4178	0.3257	YES
Anderson-Darling	RC4	0.3609	0.4424	YES
Anderson-Darling	RC6	0.6891	0.0706	YES
Anderson-Darling	RC2	0.5213	0.1822	YES
Anderson-Darling	RC3	0.9887	0.0128	NO
Anderson-Darling	RC5	1.2690	0.0026	NO

# Extract the factors that are univariate normal to create clusters. 
pc_fin <- scoress[, -(4:6)]
mvn(pc_fin,subset=NULL,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ    p value MVN
## 1 Henze-Zirkler 1.007624 0.05420563 YES
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    RC1       0.4178    0.3257    YES   
## 2 Anderson-Darling    RC4       0.3609    0.4424    YES   
## 3 Anderson-Darling    RC6       0.6891    0.0706    YES   
## 
## $Descriptives
##       n          Mean  Std.Dev       Median       Min      Max       25th
## RC1 159 -1.361431e-16 1.006107  0.022508334 -3.325116 2.394211 -0.6477771
## RC4 159  4.558491e-17 1.004085  0.003587867 -2.618578 2.891897 -0.6486343
## RC6 159  4.474211e-17 1.004408 -0.100346198 -3.061909 2.693130 -0.6114148
##          75th        Skew    Kurtosis
## RC1 0.6178239 -0.10104996  0.04435192
## RC4 0.6262030  0.04268047 -0.34623433
## RC6 0.6952380 -0.06060268  0.25546508

Upon the removal of the non-normal factors, the distribution of the data is closer to multivariate normal.

B.1. Hierarchical Clustering

d<-dist(pc_fin, method="euclidean") 
res.hc<-hclust(d,method="ward.D2") 
plot(res.hc,cex=0.6,hang=-1)

B.1.a. Agglomerative

res.agnes <- agnes(pc_fin, method = "ward")
# summary(res.agnes)
res.agnes$ac

## [1] 0.9588164

pltree(res.agnes, cex = 0.6, hang = -1,
       main = "Dendrogram of agnes")

plot(as.dendrogram(res.agnes), cex = 0.6, 
     horiz = TRUE)

grp <- cutree(as.hclust(res.agnes), k = 3)

table(grp)

## grp
##  1  2  3 
## 59 57 43

The obtained coefficient from the agglomerative method is 0.9588. Since this is close to 1, we can conclude that the clustering using this method is already sufficient. Based on the dendogram, we can group the observations into three clusters. The first cluster contains 59 observations, the second cluster contains 57 observations, and the third cluster contains 43 observations. These clusters are plotted below.

fviz_cluster(list(data=pc_fin, cluster=grp), main = 'Cluster Plot using Agglomerative Method')

From the cluster plot, PC1 and PC2 explain 66.8% of the total variability in the data. Compared to the previous cluster plot where all factors was utilized, the cluster plot using only 3 factors reduced the problem of excessive overlapping.

B.1.b. Divisive

res.diana <- diana(pc_fin)
pltree(res.diana, cex = 0.6, hang = -1,
       main = "Dendrogram of diana")

res.diana$dc

## [1] 0.9129347

grp_div <- cutree(as.hclust(res.diana), k = 3)
# Number of members in each cluster
table(grp_div)

## grp_div
##   1   2   3 
## 102  42  15

fviz_cluster(list(data = pc_fin, cluster = grp_div))

The divisive coefficient is 91.29%. This is less than the obtained agglomerative coefficient of 95.88%, indicating that the agglomerative method produced better clustering.

B.2. Non-hierarchical Clustering

B.2.a. K-means clustering

df<-pc_fin
df <- na.omit(df)
df <- scale(df)
head(df)

##             RC1          RC4         RC6
## [1,]  0.2372729  0.647263867  0.94567040
## [2,] -0.6052084 -0.221401560 -0.07735138
## [3,]  0.2975854 -0.003204691 -1.36302807
## [4,]  0.2222225 -1.264993806 -0.09990577
## [5,] -1.1033870 -1.558145714 -0.16875069
## [6,]  1.7617537 -0.973842322 -0.24187815

distance <- get_dist(df)
fviz_dist(distance, gradient = list(low = "#00AFBB", mid = "white", high = "red" )) #"#FC4E07"

func1 <- function(k) {
  kmeans(df, k, nstart = 10 )$betweenss/(kmeans(df, k, nstart = 10 )$tot.withinss+kmeans(df, k, nstart = 10 )$betweenss)
}

k.values <- 1:10
values <- map_dbl(k.values, func1)
plot(k.values, values,
     type="b", pch = 19, frame = FALSE, 
     xlab="Number of clusters K",
     ylab="Metric")

B.3. Optimal Number of Clusters

fviz_nbclust(df, kmeans, method = "wss")

fviz_nbclust(df, kmeans, method = "silhouette")

fviz_nbclust(df, kmeans, method = "gap_stat")

B.4. Final Clusters and Insights

final <- kmeans(df, 3, nstart = 25)
print(final)

## K-means clustering with 3 clusters of sizes 71, 35, 53
## 
## Cluster means:
##          RC1         RC4        RC6
## 1  0.7932786 -0.04598114 -0.3660569
## 2 -0.9731223  0.77628948 -0.6077800
## 3 -0.4200660 -0.45104662  0.8917423
## 
## Clustering vector:
##   [1] 3 3 1 1 3 1 1 2 3 1 1 1 1 3 3 2 2 1 3 3 1 1 2 3 3 3 1 2 3 1 1 2 1 1 3 2 3
##  [38] 2 3 2 1 2 1 2 1 3 2 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 3 2 1 1 1 1 3 3 1 1 1
##  [75] 3 3 1 1 3 2 2 2 3 1 1 2 1 2 3 1 3 1 3 3 3 3 3 3 2 3 1 2 3 1 3 2 3 1 3 2 1
## [112] 2 3 3 1 3 3 3 1 1 3 1 1 2 1 1 1 2 1 3 2 2 2 1 3 3 2 3 1 1 1 3 1 1 1 3 3 3
## [149] 2 3 1 1 3 1 1 3 2 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 130.88235  68.00464  91.32434
##  (between_SS / total_SS =  38.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

fviz_cluster(final, data = df)

B.5. Discussion of the Clusters

Cluster Demographics

Cluster 1: Social Relationships Cluster

Mean of the Variables in Cluster 1

This cluster is mainly composed of males, ages 19-21 years old. Majority of them are affiliated with organizations inside the campus. The top three colleges that comprise this cluster are School of Statistics, College of Engineering, and the College of Arts and Letters. Within this cluster, 76.27% have claimed that they are happy.

From the table, most of the variables with high means in Cluster 1 belong to the Social Relationship Factor. These variables are positive effect of social media, family relationships, peer relationships, love and affection, and social interaction (outside comfort zone). From the Degree Program Factor, the variables learnings and company of animals have high means. From the Perspective Factor, university satisfaction; and from the Time Management Factor, Hobbies and Multimedia.

With this, we can conclude that the happiness index of Cluster 1 is heavily influenced by the Social Relationship Factor and slightly influenced by Degree Program, Perspective, and Time Management Factors.

Cluster 2: Perspective Cluster

Mean of the Variables in Cluster 2

This cluster is mainly composed of females, ages 19-21 years old. Majority of them are affiliated with organizations inside the campus. The top three colleges that comprise this cluster are School of Statistics, College of Engineering, and the College of Science. Within this cluster, 88% have claimed that they are happy.

From the table, most of the variables with high means in Cluster 2 belong to the Perspective Factor. These variables are current level of happiness, university satisfaction, positive outlook, optimism about the future, rewarding view on life, and happiness is a choice. From the Social Relationships Factor, the variables family relationships and peer relationships have high means; and from the Time Management Factor, Hobbies and Multimedia. With this, we can conclude that the happiness index of Cluster 2 is heavily influenced by the Perspective Factor and slightly influenced by the Social Relationships and Time Management Factors.

Cluster 3: Degree Program Cluster

Mean of the Variables in Cluster 3

This cluster is mainly composed of females, ages 19-21 years old. Majority of them are affiliated with organizations inside the campus. The top three colleges that comprise this cluster are School of Statistics, College of Engineering, and the College of Social Sciences and Philosophy. Within this cluster, 63% have claimed that they are happy.

From the table, most of the variables with high means in Cluster 3 belong to the Degree Program Factor. These variables are degree program satisfaction, class participation, performance in degree program, learnings, and company of animals. From the Time Management Factor, the variables hobbies and multimedia have high means; from the Perspective Factor, university satisfaction; from the Environmental Setting Factor, safety going home at night, and from the Social Relationship Factor, peer relationship.

With this, we can conclude that the happiness index of Cluster 3 is heavily influenced by the Degree Program Factor and slightly influenced by the Time Management, Perspective, Environmental Setting, and Social Relationship Factors.

Based on the characteristics of the clusters discussed above, it can be concluded that cluster 2 is the happiest cluster since it has the highest proportion of respondents who claimed that they are happy. It is followed by cluster 1 and then by cluster 3. It can also be deduced that students from the College of Science are the happiest, followed by those from the College of Arts and Letters, and the College of Social Sciences and Philosophy. The results that came from the School of Statistics and College of Engineering were not primarily considered since they dominated all the clusters. Also, the reason that they were included in the top three colleges per cluster may be attributed to the fact that most of the respondents came from the two colleges respectively, which is why they were omitted from the cluster characteristic interpretation. This may also be due to the fact that the convenience sampling procedure was used.

Moreover, the Happiness Index of students from the College of Arts and Letters are mostly influenced by the Social Relationships Factor. As for the students in the College of Science, their Happiness Index is mostly influenced by the Perspective Factor while for the students in the College of Social Sciences and Philosophy, the Happiness Index is most affected by the Degree Program Factor. It is also important to note that the Time Management Factor and the Social Relationships Factor appear to influence all the three clusters.

happiness_level	vitamin_intake	enough_sleep	physical_activities	physical_appearance	sleep_quality	degree_prog_satisfaction	class_anticipation	class_participation	degree_program_performance	learnings	university_satisfaction	leisure_time	procrastination	hobbies	multimedia	pos_effect_social_med	positive_outlook	regrets	sense_of_meaning_purpose	optimism_of_future	life_control	rewarding_view_of_life	happiness_choice	family_relationship	peer_relationship	company_of_animals	love_and_affection	social_interaction	extra_curricular	campus_safety	safety_going_home
3	1	2	3	3	2	4	3	3	3	3	4	2	3	3	3	3	3	2	3	3	3	3	4	3	4	4	4	3	3	3	3
3	1	3	2	3	3	3	2	2	3	3	4	2	2	2	4	3	2	2	2	2	2	3	3	4	3	3	3	2	2	3	3
3	1	4	3	2	2	4	2	2	3	3	3	2	1	3	3	2	3	1	3	3	3	3	3	3	3	3	3	2	1	3	3
3	1	3	1	3	3	2	2	2	3	2	3	2	2	3	3	3	3	2	3	3	2	3	3	3	3	4	4	2	1	3	2
1	1	2	1	2	2	1	1	2	2	2	2	1	1	4	4	3	2	1	2	2	2	2	3	3	3	3	3	2	2	3	3
3	3	1	2	3	1	3	2	3	2	2	3	2	2	2	4	3	3	2	4	4	2	4	3	4	3	1	2	3	4	2	2

happiness_level	vitamin_intake	enough_sleep	physical_activities	physical_appearance	sleep_quality	degree_prog_satisfaction	class_anticipation	class_participation	degree_program_performance	learnings	university_satisfaction	leisure_time	procrastination	hobbies	multimedia	pos_effect_social_med	positive_outlook	regrets	sense_of_meaning_purpose	optimism_of_future	life_control	rewarding_view_of_life	happiness_choice	family_relationship	peer_relationship	company_of_animals	love_and_affection	social_interaction	extra_curricular	campus_safety	safety_going_home
3	1	2	3	3	2	4	3	3	3	3	4	2	3	3	3	3	3	2	3	3	3	3	4	3	4	4	4	3	3	3	3
3	1	3	2	3	3	3	2	2	3	3	4	2	2	2	4	3	2	2	2	2	2	3	3	4	3	3	3	2	2	3	3
3	1	4	3	2	2	4	2	2	3	3	3	2	1	3	3	2	3	1	3	3	3	3	3	3	3	3	3	2	1	3	3
3	1	3	1	3	3	2	2	2	3	2	3	2	2	3	3	3	3	2	3	3	2	3	3	3	3	4	4	2	1	3	2
1	1	2	1	2	2	1	1	2	2	2	2	1	1	4	4	3	2	1	2	2	2	2	3	3	3	3	3	2	2	3	3
3	3	1	2	3	1	3	2	3	2	2	3	2	2	2	4	3	3	2	4	4	2	4	3	4	3	1	2	3	4	2	2