This page displays results from the Latent class growth analysis, which we used as a preliminary analysis step before conducting GMM. LCGA constrains the variance within each class, meaning that the classes that are estimated are trying to account for more variance in the data - thus more classes are often idenfified.
We selected the four class LCGA model as the best fitting model.
#For class models, read in all output files within your folder that you have all your class models
<- readModels(paste0(mplus_LCGA_clustered_full_output_data_path), recursive = TRUE) #Recursive means it reads data in sub-folders too si.classes.all
#extract the summary variables from the mplus output files. Assuming there are multiple files in the above directory, model summaries could be retained as a data.frame as follows:
<- do.call("rbind.fill",
si_summaries sapply(si.classes.all,
"[",
"summaries"))
#social isolation summaries
<- data.frame(matrix(nrow = 5,
si.class.summaries ncol = 8))
#create column names for the variables you will be adding to the empty matrix of si.class.summaries
colnames(si.class.summaries) <- c("Model",
"Parameters",
"AIC",
"BIC",
"aBIC",
"Entropy",
"VLMR LRT p-value",
"Class Proportions") #or whichever indices you want to compare; do si_summaries$ to see what's in there
#check colnames
#si.class.summaries
#create "Model" variable
<- si.class.summaries %>%
si.class.summaries mutate(
Model =
as.factor(c("Two Class", #change nrow in data frame above when adding more models
"Three Class",
"Four Class",
"Five Class",
"Six Class")))
#add summary information into data frame
<- si.class.summaries %>%
si.class.summaries mutate(
Parameters =
$Parameters) %>% #parameters col from original summaries data set
si_summariesmutate(
AIC =
$AIC) %>% #AIC col from original summaries data set
si_summariesmutate(
BIC =
$BIC) %>% #BIC col from original summaries data set
si_summariesmutate(
aBIC =
$aBIC) %>% #aBIC col from original summaries data set
si_summariesmutate(
Entropy =
$Entropy) %>% #Entropy col from original summaries data set
si_summariesmutate(
`VLMR LRT p-value` =
$T11_VLMR_PValue) #T11_VLMR_PValue col from original summaries data set
si_summaries
#check
# si.class.summaries
#for each class we want to add in the model estimated class counts - then convert this into a list with the percentage of people in each class
#two classes
$`Class Proportions`[si.class.summaries$Model == "Two Class"] <- list(c(sprintf("%.0f", si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_2traj_full_sample_clustered.out$class_counts$modelEstimated$proportion*100)))
si.class.summaries
#three classes
$`Class Proportions`[si.class.summaries$Model == "Three Class"] <- list(c(sprintf('%.0f', si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_3traj_full_sample_clustered.out$class_counts$modelEstimated$proportion*100)))
si.class.summaries
#four classes
$`Class Proportions`[si.class.summaries$Model == "Four Class"] <- list(c(sprintf('%.0f', si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_4traj_full_sample_clustered.out$class_counts$modelEstimated$proportion*100)))
si.class.summaries
#five classes
$`Class Proportions`[si.class.summaries$Model == "Five Class"] <- list(c(sprintf('%.0f', si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_5traj_full_sample_clustered.out$class_counts$modelEstimated$proportion*100)))
si.class.summaries
#six classes
$`Class Proportions`[si.class.summaries$Model == "Six Class"] <- list(c(sprintf('%.0f', si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_6traj_full_sample_clustered.out$class_counts$modelEstimated$proportion*100))) si.class.summaries
#two classes
$`Class N`[si.class.summaries$Model == "Two Class"] <- list(c(sprintf("%.0f", si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_2traj_full_sample_clustered.out$class_counts$posteriorProb$count)))
si.class.summaries
#three classes
$`Class N`[si.class.summaries$Model == "Three Class"] <- list(c(sprintf("%.0f", si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_3traj_full_sample_clustered.out$class_counts$posteriorProb$count)))
si.class.summaries
#four classes
$`Class N`[si.class.summaries$Model == "Four Class"] <- list(c(sprintf("%.0f", si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_4traj_full_sample_clustered.out$class_counts$posteriorProb$count)))
si.class.summaries
#five classes
$`Class N`[si.class.summaries$Model == "Five Class"] <- list(c(sprintf("%.0f", si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_5traj_full_sample_clustered.out$class_counts$posteriorProb$count)))
si.class.summaries
#six classes
$`Class N`[si.class.summaries$Model == "Six Class"] <- list(c(sprintf("%.0f", si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_6traj_full_sample_clustered.out$class_counts$posteriorProb$count))) si.class.summaries
#Look at summary table
kable(si.class.summaries)
Model | Parameters | AIC | BIC | aBIC | Entropy | VLMR LRT p-value | Class Proportions | Class N |
---|---|---|---|---|---|---|---|---|
Two Class | 9 | 26053.35 | 26104.75 | 26076.15 | 0.945 | 0.0008 | 10, 90 | 218 , 2014 |
Three Class | 12 | 25279.97 | 25348.50 | 25310.37 | 0.920 | 0.0608 | 15, 2 , 82 | 344 , 50 , 1838 |
Four Class | 15 | 24726.10 | 24811.76 | 24764.11 | 0.932 | 0.8002 | 81, 5 , 11, 2 | 1816, 121 , 249 , 47 |
Five Class | 18 | 24440.05 | 24542.84 | 24485.65 | 0.934 | 0.0728 | 1 , 2 , 80, 10, 7 | 26 , 36 , 1786, 231 , 154 |
Six Class | 21 | 24242.80 | 24362.72 | 24296.01 | 0.924 | 0.2323 | 3 , 1 , 5 , 2 , 76, 14 | 74 , 15 , 100 , 36 , 1699, 308 |
Reminder that LCGA constrains the variance in each class to zero.
# extract the classification probabilities for the most likely class memberships (column) by latent class (row). This shows the uncertainty rate of the probabilities. If it was perfect - the diagonal would be 1. E.g. for Class 1 - 98.3% of individuals fit that category.
<- data.frame(si.classes.all$X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_4traj_full_sample_clustered.out$class_counts$classificationProbs.mostLikely)
class.probs
# add column names
colnames(class.probs) <- c("Low stable","Decreasing","Low increasing", "High increasing")
$Class <- c("Low stable","Decreasing","Low increasing", "High increasing")
class.probs
#as.tibble(class.probs)
<- class.probs %>%
class.probs ::pivot_longer(-Class,
tidyrnames_to = "class",
values_to = "Classification probabilities")
<- class.probs[-c(2:5,7:10,12:15),-c(2)] class.probs
kable(class.probs)
Class | Classification probabilities |
---|---|
Low stable | 0.987 |
Decreasing | 0.860 |
Low increasing | 0.836 |
High increasing | 0.956 |
#create two class data frame with means and variances
<- as.data.frame(
two_class_si $X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_2traj_full_sample_clustered.out$gh5$means_and_variances_data$y_estimated_means)
si.classes.all
#name the variables
names(two_class_si) <- c("Class 1","Class 2")
#create timepoint column
$timepoint <- c(5, 7, 10, 12)
two_class_si
#convert data to long format
<- two_class_si %>%
two_class_si pivot_longer(-timepoint,
names_to = "Class",
values_to = "social.isolation_score")
#check
#two_class_si
<- ggplot(two_class_si,
two_traj_plot_clustered_full_sample aes(x = timepoint,
y = social.isolation_score,
colour = Class)) +
geom_line(size = 1.5) +
geom_point(aes(#shape = class,
size = 1)) +
scale_fill_manual(values = palette2) +
scale_color_manual(values = palette2) +
labs(x = "Age (in years)",
title = "Two class LCGA model of social isolation in E-risk",
color = "Class",
y ="Social isolation score") +
scale_y_continuous(expand = c(0.1,0.1),
limits = c(0,12),
breaks = seq(0, 12, 2)) +
scale_x_continuous(expand = c(0.1,0.1),
limits = c(5,12),
breaks = c(5, 7, 10, 12)) +
theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.text = element_text(colour="black",
size = 12),
axis.title = element_text(colour="black",
size = 12),
panel.background = element_blank(),
plot.title = element_text(size = 16, hjust = 0.5)) +
guides(shape = FALSE, size = FALSE)
two_traj_plot_clustered_full_sample
#create three class data frame with means and variances
<- as.data.frame(
three_class_si $X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_3traj_full_sample_clustered.out$gh5$means_and_variances_data$y_estimated_means)
si.classes.all
#name the variables
names(three_class_si) <- c("Class 1","Class 2","Class 3")
#create time point column
$timepoint <- c(5, 7, 10, 12)
three_class_si
#convert data to long format
<- three_class_si %>%
three_class_si pivot_longer(-timepoint,
names_to = "Class",
values_to = "social.isolation_score")
#check
#three_class_si
<- ggplot(three_class_si,
three_traj_plot_clustered_full_sample aes(x = timepoint,
y = social.isolation_score,
colour = Class)) +
geom_line(size = 1.5) +
geom_point(aes(size = 1)) +
scale_fill_manual(values = palette3) +
scale_color_manual(values = palette3) +
labs(x = "Age (in years)",
title = "Three class LCGA model of social isolation in E-risk",
color = "Class",
y ="Social isolation score") +
scale_y_continuous(expand = c(0.1,0.1),
limits = c(0,12),
breaks = seq(0, 12, 2)) +
scale_x_continuous(expand = c(0.1,0.1),
limits = c(5,12),
breaks = c(5, 7, 10, 12)) +
theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.text = element_text(colour="black",
size = 12),
axis.title = element_text(colour="black",
size = 12),
panel.background = element_blank(),
plot.title = element_text(size = 16, hjust=0.5)) +
guides(shape = FALSE, size = FALSE)
three_traj_plot_clustered_full_sample
#create four class data frame with means and variances
<- as.data.frame(
four_class_si $X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_4traj_full_sample_clustered.out$gh5$means_and_variances_data$y_estimated_means)
si.classes.all
#name the variables
names(four_class_si) <- c("Class 1","Class 2","Class 3","Class 4")
#create timepoint column
$timepoint <- c(5, 7, 10, 12)
four_class_si
#convert data to long format
<- four_class_si %>%
four_class_si pivot_longer(-timepoint,
names_to = "Class",
values_to = "social.isolation_score")
#check
#four_class_si
<- ggplot(four_class_si,
four_traj_plot_clustered_full_sample aes(x = timepoint,
y = social.isolation_score,
colour = Class)) +
geom_line(size = 1.5) +
geom_point(aes(size = 1)) +
scale_fill_manual(values = palette4) +
scale_color_manual(values = palette4) +
labs(x = "Age (in years)",
title = "Four class LCGA model of social isolation in E-risk",
color = "Class",
y ="Social isolation score") +
scale_y_continuous(expand = c(0.1,0.1),
limits = c(0,12),
breaks = seq(0, 12, 2)) +
scale_x_continuous(expand = c(0.1,0.1),
limits = c(5,12),
breaks = c(5, 7, 10, 12)) +
theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.text = element_text(colour="black",
size = 12),
axis.title = element_text(colour="black",
size = 12),
panel.background = element_blank(),
plot.title = element_text(size = 16, hjust=0.5)) +
guides(shape = FALSE, size = FALSE)
four_traj_plot_clustered_full_sample
#create five class data frame with means and variances
<- as.data.frame(
five_class_si $X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_5traj_full_sample_clustered.out$gh5$means_and_variances_data$y_estimated_means)
si.classes.all
#name the variables
names(five_class_si) <- c("Class 1","Class 2","Class 3","Class 4","Class 5")
#create timepoint column
$timepoint <- c(5, 7, 10, 12)
five_class_si
#convert data to long format
<- five_class_si %>%
five_class_si pivot_longer(-timepoint,
names_to = "Class",
values_to = "social.isolation_score")
#check
#five_class_si
<- ggplot(five_class_si,
five_traj_plot_clustered_full_sample aes(x = timepoint,
y = social.isolation_score,
colour = Class)) +
geom_line(size = 1.5) +
geom_point(aes(size = 1)) +
scale_fill_manual(values = palette5) +
scale_color_manual(values = palette5) +
labs(x = "Age (in years)",
title = "Five class LCGA model of social isolation in E-risk",
color = "Class",
y ="Social isolation score") +
scale_y_continuous(expand = c(0.1,0.1),
limits = c(0,12),
breaks = seq(0, 12, 2)) +
scale_x_continuous(expand = c(0.1,0.1),
limits = c(5,12),
breaks = c(5, 7, 10, 12)) +
theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.text = element_text(colour="black",
size = 12),
axis.title = element_text(colour="black",
size = 12),
panel.background = element_blank(),
plot.title = element_text(size = 16, hjust=0.5)) +
guides(shape = FALSE, size = FALSE)
five_traj_plot_clustered_full_sample
#create five class data frame with means and variances
<- as.data.frame(
six_class_si $X.Users.katiethompson.Documents.PhD.LISS.DTP_Louise_and_Tim.Social.isolation.trajectories_Paper.1.data_analysis.mplus.LCGA.clustered.full_sample.output..LCGA_isolation_6traj_full_sample_clustered.out$gh5$means_and_variances_data$y_estimated_means)
si.classes.all
#name the variables
names(six_class_si) <- c("Class 1","Class 2","Class 3","Class 4","Class 5","Class 6")
#create timepoint column
$timepoint <- c(5, 7, 10, 12)
six_class_si
#convert data to long format
<- six_class_si %>%
six_class_si pivot_longer(-timepoint,
names_to = "Class",
values_to = "social.isolation_score")
#check
#six_class_si
<- ggplot(six_class_si,
six_traj_plot_clustered_full_sample aes(x = timepoint,
y = social.isolation_score,
colour = Class)) +
geom_line(size = 1.5) +
geom_point(aes(size = 1)) +
scale_fill_manual(values = palette6) +
scale_color_manual(values = palette6) +
labs(x = "Age (in years)",
title = "Six class LCGA model of social isolation in E-risk",
color = "Class",
y = "Social isolation score") +
scale_y_continuous(expand = c(0.1,0.1),
limits = c(0,12),
breaks = seq(0, 12, 2)) +
scale_x_continuous(expand = c(0.1,0.1),
limits = c(5,12),
breaks = c(5, 7, 10, 12)) +
theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.text = element_text(colour="black",
size = 12),
axis.title = element_text(colour="black",
size = 12),
panel.background = element_blank(),
plot.title = element_text(size = 16, hjust=0.5)) +
guides(shape = FALSE, size = FALSE)
six_traj_plot_clustered_full_sample
#combine all plots
library(ggpubr) # Needed for combining plots
library(gridExtra)
<- ggarrange(
combined_trajectories_plot_clustered_full_sample
two_traj_plot_clustered_full_sample,
three_traj_plot_clustered_full_sample,
four_traj_plot_clustered_full_sample,
five_traj_plot_clustered_full_sample,
six_traj_plot_clustered_full_sample,ncol = 2, nrow = 3,
widths = c(2.7,2.7,2.7,2.7),
heights = c(3,3,3,3),
labels = c("A", "B", "C", "D", "E", "F"),
legend = "bottom")
# ggsave(
# "LCGA_combined_trajectories_plot_clustered_full_sample.jpeg",
# plot = combined_trajectories_plot_clustered_full_sample,
# device = "jpeg",
# path = paste0(graph_save_data_path, "LCGA/"),
# width = 9,
# height = 10
# )
combined_trajectories_plot_clustered_full_sample
Work by Katherine N Thompson
katherine.n.thompson@kcl.ac.uk