#' ---
#' title: "Health Care Utilization: lavaan code"
#' author: "Michael Friendly"
#' date: "20 May 2019"
#' ---

#+ include=FALSE
knitr::opts_chunk$set(message=FALSE, warning=FALSE, width=90)

#' This example reproduces some of the analyses in the lecture notes for the SEM model of
#' health care utilization.
#' 
#' ## Load packages
library(lavaan)      # model fitting
library(semPlot)     # path diagrams
library(dplyr)       # data manipulation

#' ## Read the data
healthdat <- read.table("http://euclid.psych.yorku.ca/datavis/courses/factor/data/healthutil.txt")
names(healthdat)


#' ## Specify the model
#' `=~` specifies measurement equations for the latent variables; `~` for regressions; `~~` for correlations or covariances
#' 
health.mod1 <- '
# latent variables
	Self =~ esteem + marriage + control
	Ill  =~ physical + mental
	Util =~ druguse + drvisits
# regressions
	Ill  ~ age + stress + Self
	Util ~ age + stress + Ill
# correlations (phi)
	Self ~~ age + stress
	age  ~~ stress
'

#' ## fit this model using `lavaan::sem``
health.sem1 <- lavaan::sem(health.mod1, 
                           data=healthdat, 
                           estimator="ML", 
                           fixed.x=FALSE)

#' Quick look at fit measures.  This model doesn't fit well by any criteria
fitMeasures(health.sem1, c("chisq", "df", "pvalue", "cfi", "rmsea"))

#' For more detailed output, use `summary()`
summary(health.sem1, standardized=TRUE, fit.measures=TRUE)

#' ### Examine residual correlations. 
#' Look for the largest ones in absolute value
resid(health.sem1, type='cor')$cov

#' ### check modification indices

#' Just look for the largest ones and only for residual correlations
modindices(health.sem1, minimum.value=20, op="~~")

#' ## Draw path diagrams
#' `semPlot::semPaths` draws reasonable path diagrams, with many, many options.
#' I always have to play around with these to get something relatively pleasing.
#' 
semPaths(health.sem1, 
         what="std",
         style="lisrel", 
         color=list(lat="pink", man="lightblue"), 
         nCharNodes=8, 
         sizeMan=8, sizeLat=10)

semPaths(health.sem1, 
         what="std",
         color=list(lat="pink", man="lightblue"), 
         nCharNodes=0, 
         sizeMan=10, sizeLat=8, 
         edge.label.cex=1,
         rotation=2, 
         residuals=FALSE, 
         layout="tree2", label.norm="OOOOO")


#' ## New model: Add covariance between physical and drvisits
health.mod2 <- '
# latent variables
	Self =~ esteem + marriage + control
	Ill  =~ physical + mental
	Util =~ druguse + drvisits
# regressions
	Ill  ~ age + stress + Self
	Util ~ age + stress + Ill
	Self ~~ age + stress
# correlations (phi)
	age ~~ stress
	physical ~~ drvisits
'

# or, more simply: just add this covariance to Model 1
health.mod2 <- paste(health.mod1, 
  'physical ~~ drvisits
')
cat(health.mod2)


#' ## Fit model 2
health.sem2 <- lavaan::sem(health.mod2, 
                           data=healthdat, 
                           estimator="ML", 
                           fixed.x=FALSE)

#' With one more parameter, this model fits better, even if there
#' is still a lack of fit measured by the $\chi^2$ test
#' 
fitMeasures(health.sem2, c("chisq", "df", "pvalue", "cfi", "rmsea"))
summary(health.sem2, standardized=TRUE, fit.measures=FALSE)

#' Print the standardized solution, but just for the latent variables and regressions
standardizedSolution(health.sem2) %>% 
  filter(op != "~~") %>% 
  print(digits=3)

#' ## Compare models
#' These models are nested, so the $\chi^2$ test for their difference indicates whether model 2
#' is a significant improvement.
anova(health.sem1, health.sem2)