Data was provided in SPSS format (.sav) file. Here, I have converted it to an R data file, saving all SPSS labels as factors.
<- read.spss(file = paste0(data.raw_path, "Katie_18Dec20.sav"), # Read in full SPSS sav file.
dat use.value.labels = FALSE, #convert value labels into factors with those labels
to.data.frame = TRUE) #return data frame
#colnames(dat) # check colnames
Here I will only be using “elder” twin variables as each twin is included in the data set as an individual. Without doing twin analyses, the elder and younger variable IDs show the same information. I have transformed “atwinid” to just “id”.
<- dat %>% # When doing non-twin analyses - the atwin and btwin id give the same information.
dat mutate(
id = atwinid
)
<- dat %>%
dat mutate(
randomised_order = rorderp5
)# check
# colnames(dat)
Twin order, sample group, cohort, sex, zygosity, SES, and ethnicity variables below have been recoded into a factor for use in R and renamed to something familiar. Only the categorical variables need recoding to establish the levels of the variable. To see the code for this, click the “code” button on the right hand side.
Twin order
# twin order
<- dat %>%
dat mutate(
twin_order =
recode_factor(torder,
"1" = "Elder",
"2" = "Younger"))
Sample group (Low/high risk)
# sample groups
<- dat %>%
dat mutate(
risk = # this represents mothers who had their first child under 20 years old
recode_factor(risks,
"0" = "Low risk",
"1" = "High risk"))
Cohort
# cohort
<- dat %>%
dat mutate(
cohort_binary =
recode_factor(cohort,
"94" = "Born in 1994",
"95" = "Born in 1995"))
Sex
# sex
<- dat %>%
dat mutate(
sex =
recode_factor(sampsex,
"1" = "Male",
"2" = "Female"))
Zygosity
# zygosity
<- dat %>%
dat mutate(
zygosity_binary =
recode_factor(zygosity,
"1" = "MZ",
"2" = "DZ"))
SES
# SES
<- dat %>%
dat mutate(
SES =
recode_factor(seswq35,
"1" = "Low",
"2" = "Middle", #this was missing in original SPSS file but still have 2s in the data set
"3" = "High"))
Ethnicity
# ethnicity
<- dat %>%
dat mutate(
ethnicity =
recode_factor(sethnic,
"1" = "White",
"2" = "Asian",
"3" = "Black",
"4" = "Mixed race",
"5" = "Other"))
A new variable was created for SES. SES_ordered has ordered SES to show that there are order differences between the labels “low”, “middle”, and “high”.
<- dat %>%
dat mutate(
SES_ordered =
ordered(SES,
levels = c("Low",
"Middle",
"High")
) )
Mother not lived with biological father since birth, experience of any domestic violence, child harm, and maternal warmth were recoded as factors.
Mother not lived with biological father since birth
# mum_notlived_biodad_sincebirth_05
<- dat %>%
dat mutate(
mum_notlived_biodad_sincebirth_05 =
recode_factor(nobiodl5,
"0" = "No",
"1" = "Yes")
)
Domestic violence
# any_domestic_violence_05
<- dat %>%
dat mutate(
any_domestic_violence_05 =
recode_factor(anyviom5,
"0" = "No",
"1" = "Yes")
)
Child harm
# child_harm_05
<- dat %>%
dat mutate(
child_harm_05 =
recode_factor(harm3em5,
"0" = "No harm",
"1" = "Possible harm",
"2" = "Definite harm")
)
Maternal warmth
# maternal_warmth_05
<- dat %>%
dat mutate(
maternal_warmth_05 =
recode_factor(warme5,
"0" = "No warmth",
"1" = "Very little warmth",
"2" = "Some warmth",
"3" = "Moderate warmth",
"4" = "Moderately high warmth",
"5" = "High warmth")
)
Maternal depression was recoded as a factor.
# maternal_depression_lifetime_05
<- dat %>%
dat mutate(
maternal_depression_lifetime_05 =
recode_factor(fdepmm5,
"0" = "No",
"1" = "Yes")
)
Depression, anxiety, ADHD, conduct disorder, alcohol dependence, alcohol abuse, cannabis dependence, PTSD lifetime, PTSD current, psychosis binary, psychosis categorical, suicide attempt, and service use were all coded as factors.
Depression
# depression_diagnosis_12mo_18
<- dat %>%
dat mutate(
depression_diagnosis_12mo_18 =
recode_factor(dxmdee18,
"0" = "No",
"1" = "Yes")
)
Anxiety
# anxiety_diagnosis_12mo_18
<- dat %>%
dat mutate(
anxiety_diagnosis_18 =
recode_factor(dxgade18,
"0" = "No",
"1" = "Yes")
)
ADHD
# ADHD_diagnosis_12mo_18
<- dat %>%
dat mutate(
ADHD_diagnosis_18 =
recode_factor(dxadhd5_18e,
"0" = "No",
"1" = "Yes")
)
Conduct disorder
# conduct_disorder_moderate_18
<- dat %>%
dat mutate(
conduct_disorder_moderate_18 =
recode_factor(cdmode18,
"0" = "No",
"1" = "Yes")
)
Alcohol dependence
# alcohol_dependence_18
<- dat %>%
dat mutate(
alcohol_dependence_18 =
recode_factor(dxalcdepe18,
"0" = "No",
"1" = "Yes")
)
Alcohol abuse
# alcohol_abuse_18
<- dat %>%
dat mutate(
alcohol_abuse_18 =
recode_factor(dxalcabue18,
"0" = "No",
"1" = "Yes")
)
Cannabis dependence
# cannabis_dependence_18
<- dat %>%
dat mutate(
cannabis_dependence_18 =
recode_factor(dxmarje18,
"0" = "No",
"1" = "Yes")
)
PTSD lifetime
# PTSD_diagnosis_lifetime_18
<- dat %>%
dat mutate(
PTSD_diagnosis_lifetime_18 =
recode_factor(dxptsd5lfe18,
"0" = "No",
"1" = "Yes")
)
PTSD current
# PTSD_diagnosis_current_18
<- dat %>%
dat mutate(
PTSD_diagnosis_current_18 =
recode_factor(dxptsd5cue18,
"0" = "No",
"1" = "Yes")
)
Psychosis binary
# cat_psychosis_symptom_count_18
<- dat %>%
dat mutate(
cat_psychosis_symptom_count_18 =
recode_factor(psysymp01e18,
"0" = "No symptoms",
"1" = "One or more symptoms")
)
Psychosis categorical
# cat_psychosis_experiences_scale_18
<- dat %>%
dat mutate(
cat_psychosis_experiences_scale_18 =
recode_factor(psyexpce18,
"0" = "No symptoms",
"1" = "One to two symptoms",
"2" = "Three to five symptoms",
"3" = "Six or more symptoms")
)
Suicide attempt or self-harm
# suicide_attempt_selfharm_18
<- dat %>%
dat mutate(
suicide_attempt_selfharm_18 =
recode_factor(sharmsuice18,
"0" = "No",
"1" = "Yes")
)
Service use
# service_use_18
<- dat %>%
dat mutate(
service_use_18 =
recode_factor(srvusemhe18,
"0" = "No",
"1" = "Yes")
)
Not in employment or education (NEET) and highest level of education were coded as factors.
NEET
# not_in_employment_education_18
<- dat %>%
dat mutate(
not_in_employment_education_18 =
recode_factor(neete18,
"0" = "No",
"1" = "Yes")
)
Highest education
# highest_education_18
<- dat %>%
dat mutate(
highest_education_18 =
recode_factor(educachve18,
"0" = "No qualification",
"1" = "Level 1 (GCSE at grades D-G)",
"2" = "Level 2 (GCSE at grades A*-C)",
"3" = "Level 3 (A Level)"
) )
New variables were created based on edits fro the original variables. These decisions were based on power and logical use.
Moderate Means and Hard Pressed were combined into “Deprived”. Wealthy Achievers, Urban Prosperity, and Comfortably Off were combined into “Relatively affluent”. A continuous version of the original variable was also created where scored ranged from 1-5.
<- dat %>%
dat mutate(acorn_recoded_05 =
if_else(
== "Moderate Means" | #Moderate Means and Hard Pressed were combined, Wealthy Achievers, Urban Prosperity, and Comfortably Off were combined
acorn_05 == "Hard Pressed",
acorn_05 1,
0
%>%
) recode_factor(
"1" = "Deprived",
"0" = "Relatively affluent"
%>%
) relevel(ref = "Relatively affluent",
first = TRUE, #levels in ref come first
collapse = "+", #String used when constructing names for combined factor levels
xlevels = TRUE) #levels maintained even if not actually occurring
)table(dat$acorn_recoded_05)
Relatively affluent Deprived 1238 950
<- dat %>%
dat mutate(
acorn_continuous_05 =
as.numeric(acorn_05)
)
We created a binary option for SES, whereby middle and high SES were combined.
<- dat %>%
dat mutate(
SES_binary_releveled =
as.factor(fct_collapse(
SES,"Middle to high" = c("Middle","High"),
"Low" = c("Low")
)%>%
) relevel(ref = "Middle to high",
first = TRUE, #levels in ref come first
collapse = "+", #String used when constructing names for combined factor levels
xlevels = TRUE) #levels maintained even if not actually occurring
)table(dat$SES_binary_releveled)
Middle to high Low 1490 742
Possible harm and Definite harm were combined to give a binary variable of “Harm” VS “No harm”.
<- dat %>%
dat mutate(child_harm_recoded_05 =
if_else(
== "Possible harm" |
child_harm_05 == "Definite harm",
child_harm_05 1,
0
%>%
) recode_factor(
"1" = "Harm",
"0" = "No harm"
%>%
) relevel(ref = "No harm",
first = TRUE, #levels in ref come first
collapse = "+", #String used when constructing names for combined factor levels
xlevels = TRUE)
)table(dat$child_harm_recoded_05)
No harm Harm 1923 307
Maternal warmth was recoded to be continuous.
<- dat %>%
dat mutate(
maternal_warmth_continuous_05 =
as.numeric(maternal_warmth_05)
)
GSCE grades were collapsed: No qualification and Level 1 (GCSE at grades D-G) were combined. Level 2 (GCSE at grades A*-C) and Level 3 (A Level) were combined.
<- dat %>%
dat mutate(
highest_education_recode_18 =
as.factor(fct_collapse(
highest_education_18,"Level 1 or below (GCSE grades D-G or below)" = c("No qualification","Level 1 (GCSE at grades D-G)"),
"Level 2 or 3 (GCSE grades A*-C or A-level)" = c("Level 2 (GCSE at grades A*-C)","Level 3 (A Level)")
)
))table(dat$highest_education_recode_18)
Level 1 or below (GCSE grades D-G or below) 451 Level 2 or 3 (GCSE grades A*-C or A-level) 1610
We created numeric versions of all categorical antecedent and outcome variables. This is done purely to compute the correlation matrix to calculate multicollinearity between variables.
Antecedent variables
<- dat %>%
dat mutate(SES_binary_releveled_numeric = as.numeric(SES_binary_releveled)) %>%
mutate(child_harm_recoded_numeric_05 = as.numeric(child_harm_recoded_05)) %>%
mutate(mum_notlived_biodad_sincebirth_numeric_05 = as.numeric(mum_notlived_biodad_sincebirth_05)) %>%
mutate(any_domestic_violence_numeric_05 = as.numeric(any_domestic_violence_05)) %>%
mutate(maternal_depression_lifetime_numeric_05 = as.numeric(maternal_depression_lifetime_05))
Outcome variables
<- dat %>%
dat mutate(depression_diagnosis_12mo_numeric_18 = as.numeric(depression_diagnosis_12mo_18)) %>%
mutate(anxiety_diagnosis_numeric_18 = as.numeric(anxiety_diagnosis_18)) %>%
mutate(ADHD_diagnosis_numeric_18 = as.numeric(ADHD_diagnosis_18)) %>%
mutate(conduct_disorder_moderate_numeric_18 = as.numeric(conduct_disorder_moderate_18)) %>%
mutate(alcohol_dependence_numeric_18 = as.numeric(alcohol_dependence_18)) %>%
mutate(alcohol_abuse_numeric_18 = as.numeric(alcohol_abuse_18)) %>%
mutate(cannabis_dependence_numeric_18 = as.numeric(cannabis_dependence_18)) %>%
mutate(PTSD_diagnosis_lifetime_numeric_18 = as.numeric(PTSD_diagnosis_lifetime_18)) %>%
mutate(PTSD_diagnosis_current_numeric_18 = as.numeric(PTSD_diagnosis_current_18)) %>%
mutate(cat_psychosis_experiences_scale_numeric_18 = as.numeric(cat_psychosis_experiences_scale_18)) %>%
mutate(suicide_attempt_selfharm_numeric_18 = as.numeric(suicide_attempt_selfharm_18)) %>%
mutate(service_use_numeric_18 = as.numeric(service_use_18)) %>%
mutate(not_in_employment_education_numeric_18 = as.numeric(not_in_employment_education_18))
Make sue that any values coded as -9, -999 , -8, -7 or -1 are coded as NA
# Recode missing values to NAs in numeric variables
<- dat %>%
dat mutate_if(is.numeric, ~na_if(., -9)) %>% # missing
mutate_if(is.numeric, ~na_if(., -999)) %>% # missing
mutate_if(is.numeric, ~na_if(., -1)) %>% # no question
mutate_if(is.numeric, ~na_if(., -8)) %>% # missing specific to the question
mutate_if(is.numeric, ~na_if(., -7)) # missing specific to the question
# only select those who who have less than 3 combined isolation variables missing
.3missing <- dat %>%
datfilter(
< 3
na.per.person.si
)
# check the new number of rows int he data set - 2079
nrow(dat.3missing)
[1] 2214
Here I will export the full data set using dat (full sample). But you also have the option to export the dta set exluding those with three time points missing (dat.3missing).
Not using the following variables in analysis:
This code is long, so has been hidden, to view the code, select the “code” button to the right.
# variables split by blocks
# those with an "=" are just renamed with no other coding needed. Coding all done above.
# Have # the variables that aren't being used in analyses
.3missing <- dat.3missing %>%
dat.renamedselect(
# Demographics
id,
familyid,
sex,random_order = rorderp5,
zygosity_binary,
ethnicity,
risk,
cohort_binary,
twin_order,# Social factors
acorn_05,
acorn_recoded_05,
acorn_continuous_05,vandalism_05 = vndngdm5, # original data frame only reported 0 = low and 6 = high
problems_neighbours_05 = socprbm5, # original data frame only reported 0 = low and 6 = high
SES,
SES_ordered,
SES_binary_releveled,
SES_binary_releveled_numeric,number_children_school_05 = nchildren00e5,
number_children_school_free_meals_05 = schmeals00e5,
class_size_average_05 = classsize00e5,
# Home environment
resident_moves_05 = nmovel5,
mum_notlived_biodad_sincebirth_05,
mum_notlived_biodad_sincebirth_numeric_05,total_siblings_05 = tsibl5,
any_domestic_violence_05,
any_domestic_violence_numeric_05,
child_harm_05,
child_harm_recoded_05,
child_harm_recoded_numeric_05,total_social_support_05 = tssupm5,
total_activities_with_mum_05 = actvm5,
maternal_warmth_05,
maternal_warmth_continuous_05,prosocial_behaviours_combined_05 = totproe5,
# Parent characteristics
maternal_depression_lifetime_05,
maternal_depression_lifetime_numeric_05,maternal_personality_openness_05 = bfiom5,
maternal_personality_conscientiousness_05 = bficm5,
maternal_personality_extroversion_05 = bfiem5,
maternal_personality_agreeableness_05 = bfiam5,
maternal_personality_neuroticism_05 = bfinm5,
antisocial_behaviour_mum_05 = asbmm5,
antisocial_behaviour_dad_05 = asbfm5,
antisocial_behaviour_parent_05,alcoholism_mum_05 = alcmm5,
alcoholism_dad_05 = alcfm5,
alcoholism_parent_05,# Child neurodevelopment
IQ_05 = iqe5,
executive_function_05 = exfunce5,
theory_of_mind_05 = tomtote5,
# Child emotional and behavioural development
externalising_combined_05 = totexte5,
internalising_combined_excl_sis_05 = intisoe5,
ADHD_combined_05 = totadde5,
temp_negative_affect_05 = irre5,
temp_impulsivity_05 = impe5,
temp_approach_05 = appe5,
temp_sluggishness_05 = slue5,
temp_wariness_05 = ware5,
temp_undercontrolled_05 = unce5,
temp_inhibited_05 = inhe5,
temp_shy_05 = shye5,
## Age 18
# Mental health and service use
depression_diagnosis_12mo_18,
depression_diagnosis_12mo_numeric_18,depression_current_scale_18 = mdesxe18,
anxiety_diagnosis_18,
anxiety_diagnosis_numeric_18,anxiety_current_scale_18 = gadsxe18,
ADHD_diagnosis_18,
ADHD_diagnosis_numeric_18,inattentive_hyperactive_symptoms_total_18 = SR_symtot18e,
conduct_disorder_moderate_18,
conduct_disorder_moderate_numeric_18,conduct_disorder_symptoms_18 = cdsxe18,
alcohol_dependence_18,
alcohol_dependence_numeric_18,
alcohol_abuse_18,
alcohol_abuse_numeric_18,alcohol_symptom_scale_18 = alcsxe18,
cannabis_dependence_18,
cannabis_dependence_numeric_18,cannabis_symptom_scale_18 = marjsxe18,
PTSD_diagnosis_lifetime_18,
PTSD_diagnosis_lifetime_numeric_18,
PTSD_diagnosis_current_18,
PTSD_diagnosis_current_numeric_18,# psychosis_symptom_count_18 = psysympe18,
# cat_psychosis_symptom_count_18,
# psychosis_experiences_scale_18 = psyexpe18,
# only including one psychosis variable for now
cat_psychosis_experiences_scale_18,
cat_psychosis_experiences_scale_numeric_18,
suicide_attempt_selfharm_18,
suicide_attempt_selfharm_numeric_18,
service_use_18,
service_use_numeric_18,# Physical health and health risks
BMI_18 = bmie18,
CRP_log_18 = lnCRP_E18_4SD,
physical_activity_18 = phyacte18,
smoking_current_number_18 = smkcnume18,
# Coping and functioning
loneliness_18 = lonelye18,
life_satisfaction_18 = lifsate18,
technology_use_18 = teche18,
coping_with_stress_18 = copstrse18,
PSQI_global_score_18 = psqie18,
#Employment prospects
not_in_employment_education_18,
not_in_employment_education_numeric_18,
highest_education_18,
highest_education_recode_18,job_preparedness_skills_18 = jprepse18,
job_preparedness_attributes_18 = jprepae18,
optimism_18 = optime18,
job_search_activities_count_18 = jbschacte18,
## Social isolation
isolation_combined_05 = sisoe5, # only ELDER variables included here - need to add a chunk to select younger variables if decide to do twin analyses.
isolation_combined_07 = sisoe7,
isolation_combined_10 = sisoe10,
isolation_combined_12 = sisoe12,
isolation_mother_05 = sisoem5,
isolation_mother_07 = sisoem7,
isolation_mother_10 = sisoem10,
isolation_mother_12 = sisoem12,
isolation_teacher_05 = sisoet5,
isolation_teacher_07 = sisoet7,
isolation_teacher_10 = sisoet10,
isolation_teacher_12 = sisoet12,
log_isolation_combined_05 = log_sisoe5, # log values needed for trajectory analyses
log_isolation_combined_07 = log_sisoe7,
log_isolation_combined_10 = log_sisoe10,
log_isolation_combined_12 = log_sisoe12,
.5,
isolation.categorical.7,
isolation.categorical.10,
isolation.categorical.12,
isolation.categorical
na.per.person.si
)# check
# colnames(dat.renamed.3missing)
This code is long, so has been hidden, to view the code, select the “code” button to the right. Final column names have been printed below.
# same order as the variable list for the concept paper
# those with an "=" are just renamed with no other coding needed. Coding all done above.
<- dat %>%
dat.renamed.full select(
# Demographics
id,
familyid,
sex,random_order = rorderp5,
zygosity_binary,
ethnicity,
risk,
cohort_binary,
twin_order,# Social factors
acorn_05,
acorn_recoded_05,
acorn_continuous_05,vandalism_05 = vndngdm5, # original data frame only reported 0 = low and 6 = high
problems_neighbours_05 = socprbm5, # original data frame only reported 0 = low and 6 = high
SES,
SES_ordered,
SES_binary_releveled,
SES_binary_releveled_numeric,number_children_school_05 = nchildren00e5,
number_children_school_free_meals_05 = schmeals00e5,
class_size_average_05 = classsize00e5,
# Home environment
resident_moves_05 = nmovel5,
mum_notlived_biodad_sincebirth_05,
mum_notlived_biodad_sincebirth_numeric_05,total_siblings_05 = tsibl5,
any_domestic_violence_05,
any_domestic_violence_numeric_05,
child_harm_05,
child_harm_recoded_05,
child_harm_recoded_numeric_05,total_social_support_05 = tssupm5,
total_activities_with_mum_05 = actvm5,
maternal_warmth_05,
maternal_warmth_continuous_05,prosocial_behaviours_combined_05 = totproe5,
# Parent characteristics
maternal_depression_lifetime_05,
maternal_depression_lifetime_numeric_05,maternal_personality_openness_05 = bfiom5,
maternal_personality_conscientiousness_05 = bficm5,
maternal_personality_extroversion_05 = bfiem5,
maternal_personality_agreeableness_05 = bfiam5,
maternal_personality_neuroticism_05 = bfinm5,
antisocial_behaviour_mum_05 = asbmm5,
antisocial_behaviour_dad_05 = asbfm5,
antisocial_behaviour_parent_05,alcoholism_mum_05 = alcmm5,
alcoholism_dad_05 = alcfm5,
alcoholism_parent_05,# Child neurodevelopment
IQ_05 = iqe5,
executive_function_05 = exfunce5,
theory_of_mind_05 = tomtote5,
# Child emotional and behavioural development
externalising_combined_05 = totexte5,
internalising_combined_excl_sis_05 = intisoe5,
ADHD_combined_05 = totadde5,
temp_negative_affect_05 = irre5,
temp_impulsivity_05 = impe5,
temp_approach_05 = appe5,
temp_sluggishness_05 = slue5,
temp_wariness_05 = ware5,
temp_undercontrolled_05 = unce5,
temp_inhibited_05 = inhe5,
temp_shy_05 = shye5,
## Age 18
# Mental health and service use
depression_diagnosis_12mo_18,
depression_diagnosis_12mo_numeric_18,depression_current_scale_18 = mdesxe18,
anxiety_diagnosis_18,
anxiety_diagnosis_numeric_18,anxiety_current_scale_18 = gadsxe18,
ADHD_diagnosis_18,
ADHD_diagnosis_numeric_18,inattentive_hyperactive_symptoms_total_18 = SR_symtot18e,
conduct_disorder_moderate_18,
conduct_disorder_moderate_numeric_18,conduct_disorder_symptoms_18 = cdsxe18,
alcohol_dependence_18,
alcohol_dependence_numeric_18,
alcohol_abuse_18,
alcohol_abuse_numeric_18,alcohol_symptom_scale_18 = alcsxe18,
cannabis_dependence_18,
cannabis_dependence_numeric_18,cannabis_symptom_scale_18 = marjsxe18,
PTSD_diagnosis_lifetime_18,
PTSD_diagnosis_lifetime_numeric_18,
PTSD_diagnosis_current_18,
PTSD_diagnosis_current_numeric_18,# psychosis_symptom_count_18 = psysympe18,
# cat_psychosis_symptom_count_18,
# psychosis_experiences_scale_18 = psyexpe18,
# only including one psychosis variable for now
cat_psychosis_experiences_scale_18,
cat_psychosis_experiences_scale_numeric_18,
suicide_attempt_selfharm_18,
suicide_attempt_selfharm_numeric_18,
service_use_18,
service_use_numeric_18,# Physical health and health risks
BMI_18 = bmie18,
CRP_log_18 = lnCRP_E18_4SD,
physical_activity_18 = phyacte18,
smoking_current_number_18 = smkcnume18,
# Coping and functioning
loneliness_18 = lonelye18,
life_satisfaction_18 = lifsate18,
technology_use_18 = teche18,
coping_with_stress_18 = copstrse18,
PSQI_global_score_18 = psqie18,
#Employment prospects
not_in_employment_education_18,
not_in_employment_education_numeric_18,
highest_education_18,
highest_education_recode_18,job_preparedness_skills_18 = jprepse18,
job_preparedness_attributes_18 = jprepae18,
optimism_18 = optime18,
job_search_activities_count_18 = jbschacte18,
## Social isolation
isolation_combined_05 = sisoe5, # only ELDER variables included here - need to add a chunk to select younger variables if decide to do twin analyses.
isolation_combined_07 = sisoe7,
isolation_combined_10 = sisoe10,
isolation_combined_12 = sisoe12,
isolation_mother_05 = sisoem5,
isolation_mother_07 = sisoem7,
isolation_mother_10 = sisoem10,
isolation_mother_12 = sisoem12,
isolation_teacher_05 = sisoet5,
isolation_teacher_07 = sisoet7,
isolation_teacher_10 = sisoet10,
isolation_teacher_12 = sisoet12,
log_isolation_combined_05 = log_sisoe5, # log values needed for trajectory analyses
log_isolation_combined_07 = log_sisoe7,
log_isolation_combined_10 = log_sisoe10,
log_isolation_combined_12 = log_sisoe12,
.5,
isolation.categorical.7,
isolation.categorical.10,
isolation.categorical.12,
isolation.categorical
na.per.person.si
)# check
colnames(dat.renamed.full)
[1] “id”
[2] “familyid”
[3] “sex”
[4] “random_order”
[5] “zygosity_binary”
[6] “ethnicity”
[7] “risk”
[8] “cohort_binary”
[9] “twin_order”
[10] “acorn_05”
[11] “acorn_recoded_05”
[12] “acorn_continuous_05”
[13] “vandalism_05”
[14] “problems_neighbours_05”
[15] “SES”
[16] “SES_ordered”
[17] “SES_binary_releveled”
[18] “SES_binary_releveled_numeric”
[19] “number_children_school_05”
[20] “number_children_school_free_meals_05”
[21] “class_size_average_05”
[22] “resident_moves_05”
[23] “mum_notlived_biodad_sincebirth_05”
[24] “mum_notlived_biodad_sincebirth_numeric_05” [25] “total_siblings_05”
[26] “any_domestic_violence_05”
[27] “any_domestic_violence_numeric_05”
[28] “child_harm_05”
[29] “child_harm_recoded_05”
[30] “child_harm_recoded_numeric_05”
[31] “total_social_support_05”
[32] “total_activities_with_mum_05”
[33] “maternal_warmth_05”
[34] “maternal_warmth_continuous_05”
[35] “prosocial_behaviours_combined_05”
[36] “maternal_depression_lifetime_05”
[37] “maternal_depression_lifetime_numeric_05”
[38] “maternal_personality_openness_05”
[39] “maternal_personality_conscientiousness_05” [40] “maternal_personality_extroversion_05”
[41] “maternal_personality_agreeableness_05”
[42] “maternal_personality_neuroticism_05”
[43] “antisocial_behaviour_mum_05”
[44] “antisocial_behaviour_dad_05”
[45] “antisocial_behaviour_parent_05”
[46] “alcoholism_mum_05”
[47] “alcoholism_dad_05”
[48] “alcoholism_parent_05”
[49] “IQ_05”
[50] “executive_function_05”
[51] “theory_of_mind_05”
[52] “externalising_combined_05”
[53] “internalising_combined_excl_sis_05”
[54] “ADHD_combined_05”
[55] “temp_negative_affect_05”
[56] “temp_impulsivity_05”
[57] “temp_approach_05”
[58] “temp_sluggishness_05”
[59] “temp_wariness_05”
[60] “temp_undercontrolled_05”
[61] “temp_inhibited_05”
[62] “temp_shy_05”
[63] “depression_diagnosis_12mo_18”
[64] “depression_diagnosis_12mo_numeric_18”
[65] “depression_current_scale_18”
[66] “anxiety_diagnosis_18”
[67] “anxiety_diagnosis_numeric_18”
[68] “anxiety_current_scale_18”
[69] “ADHD_diagnosis_18”
[70] “ADHD_diagnosis_numeric_18”
[71] “inattentive_hyperactive_symptoms_total_18” [72] “conduct_disorder_moderate_18”
[73] “conduct_disorder_moderate_numeric_18”
[74] “conduct_disorder_symptoms_18”
[75] “alcohol_dependence_18”
[76] “alcohol_dependence_numeric_18”
[77] “alcohol_abuse_18”
[78] “alcohol_abuse_numeric_18”
[79] “alcohol_symptom_scale_18”
[80] “cannabis_dependence_18”
[81] “cannabis_dependence_numeric_18”
[82] “cannabis_symptom_scale_18”
[83] “PTSD_diagnosis_lifetime_18”
[84] “PTSD_diagnosis_lifetime_numeric_18”
[85] “PTSD_diagnosis_current_18”
[86] “PTSD_diagnosis_current_numeric_18”
[87] “cat_psychosis_experiences_scale_18”
[88] “cat_psychosis_experiences_scale_numeric_18” [89] “suicide_attempt_selfharm_18”
[90] “suicide_attempt_selfharm_numeric_18”
[91] “service_use_18”
[92] “service_use_numeric_18”
[93] “BMI_18”
[94] “CRP_log_18”
[95] “physical_activity_18”
[96] “smoking_current_number_18”
[97] “loneliness_18”
[98] “life_satisfaction_18”
[99] “technology_use_18”
[100] “coping_with_stress_18”
[101] “PSQI_global_score_18”
[102] “not_in_employment_education_18”
[103] “not_in_employment_education_numeric_18”
[104] “highest_education_18”
[105] “highest_education_recode_18”
[106] “job_preparedness_skills_18”
[107] “job_preparedness_attributes_18”
[108] “optimism_18”
[109] “job_search_activities_count_18”
[110] “isolation_combined_05”
[111] “isolation_combined_07”
[112] “isolation_combined_10”
[113] “isolation_combined_12”
[114] “isolation_mother_05”
[115] “isolation_mother_07”
[116] “isolation_mother_10”
[117] “isolation_mother_12”
[118] “isolation_teacher_05”
[119] “isolation_teacher_07”
[120] “isolation_teacher_10”
[121] “isolation_teacher_12”
[122] “log_isolation_combined_05”
[123] “log_isolation_combined_07”
[124] “log_isolation_combined_10”
[125] “log_isolation_combined_12”
[126] “isolation.categorical.5”
[127] “isolation.categorical.7”
[128] “isolation.categorical.10”
[129] “isolation.categorical.12”
[130] “na.per.person.si”
Have then selected the variables for Mplus datasets and R datasets for future analyses.
#select variables for rds file
.3missing <- dat.renamed.3missing
dat_rds<- dat.renamed.full
dat_rds.full
#select variables for csv file (for Mplus) - uses numeric values rather than factors
.3missing <- dat.3missing %>%
dat_csvselect(
id,
familyid,
rorderp5,
torder,
risks,
cohort,
sampsex,
zygosity,
seswq35,
sisoe5,
sisoe7,
sisoe10,
sisoe12,log_5 = log_sisoe5,
log_7 = log_sisoe7,
log_10 = log_sisoe10,
log_12 = log_sisoe12)
#select variables for csv file (for Mplus) - uses numeric values rather than factors
<- dat %>%
dat_csv.full select(
id,
familyid,
rorderp5,
torder,
risks,
cohort,
sampsex,
zygosity,
seswq35,
sisoe5,
sisoe7,
sisoe10,
sisoe12,log_5 = log_sisoe5,
log_7 = log_sisoe7,
log_10 = log_sisoe10,
log_12 = log_sisoe12)
# variables for categorical LCGA
<- dat %>%
dat_csv.full.cat select(
id,
familyid,
rorderp5,
torder,
risks,
cohort,
sampsex,
zygosity,
seswq35,
sisoe5,
sisoe7,
sisoe10,
sisoe12,sisoe5_cat = isolation.categorical.5,
sisoe7_cat = isolation.categorical.7,
sisoe10_cat = isolation.categorical.10,
sisoe12_cat = isolation.categorical.12)
# mother report social isolation only
<- dat %>%
dat_csv.full.mother select(
id,
familyid,
rorderp5,
torder,
risks,
cohort,
sampsex,
zygosity,
seswq35,
sisoem5,
sisoem7,
sisoem10,
sisoem12)
# teacher report social isolation only
<- dat %>%
dat_csv.full.teacher select(
id,
familyid,
rorderp5,
torder,
risks,
cohort,
sampsex,
zygosity,
seswq35,
sisoet5,
sisoet7,
sisoet10, sisoet12)
Export data
I have hashed out the trajectory csv file - each time you run this line, you need to then manually change the the NAs to be . and remove the header so that Mplus can read the file. I have been using only one export and not rerunning this line.
# save R data file
saveRDS(object = dat_rds.3missing, file = paste0(data_path, "preprocessed_isolation_trajectories_Jan2021_3missing.rds"))
saveRDS(object = dat_rds.full, file = paste0(data_path, "preprocessed_isolation_trajectories_Jan2021_full_sample.rds"))
# save csv file in two places
# write_csv(x = dat_csv.3missing, path = paste0(data_path, "Mplus_back_up/FOR_MPLUS_preprocessed_isolation_trajectories_Jan2021_3missing.csv"))
# write_csv(x = dat_csv.3missing, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_3missing.csv")
# write_csv(x = dat_csv.full, path = paste0(data_path, "Mplus_back_up/FOR_MPLUS_preprocessed_isolation_trajectories_Jan2021_full_sample.csv"))
# write_csv(x = dat_csv.full, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample.csv")
# categorical LCGA
# write_csv(x = dat_csv.full.cat, path = paste0(data_path, "Mplus_back_up/FOR_MPLUS_preprocessed_isolation_trajectories_Jan2021_full_sample_CAT.csv"))
# write_csv(x = dat_csv.full.cat, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample_CAT.csv")
# GMM for mother separately
# write_csv(x = dat_csv.full.mother, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample_MOTHER.csv")
# GMM for teacher separately
# write_csv(x = dat_csv.full.teacher, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample_TEACHER.csv")
#remember to delete the headings. There will be no NAs as they have been removed for the isolation variables but in future Mplus files need NA to be a fullstop (.)
Work by Katherine N Thompson
katherine.n.thompson@kcl.ac.uk
Social factors at age 5
ACORN variable was recoded into a factor.