Data was provided in SPSS format (.sav) file. Here, I have converted it to an R data file, saving all SPSS labels as factors.

dat <- read.spss(file = paste0(data.raw_path, "Katie_18Dec20.sav"), # Read in full SPSS sav file.
                  use.value.labels = FALSE, #convert value labels into factors with those labels
                  to.data.frame = TRUE) #return data frame 

#colnames(dat) # check colnames

Formatting existing variables

ID

Here I will only be using “elder” twin variables as each twin is included in the data set as an individual. Without doing twin analyses, the elder and younger variable IDs show the same information. I have transformed “atwinid” to just “id”.

dat <- dat %>%  # When doing non-twin analyses - the atwin and btwin id give the same information.
  mutate(
    id = atwinid
  )

dat <- dat %>%
  mutate(
    randomised_order = rorderp5
  )
# check
# colnames(dat)

Demographic variables

Twin order, sample group, cohort, sex, zygosity, SES, and ethnicity variables below have been recoded into a factor for use in R and renamed to something familiar. Only the categorical variables need recoding to establish the levels of the variable. To see the code for this, click the “code” button on the right hand side.

Twin order

# twin order
dat <- dat %>%
  mutate(
    twin_order = 
      recode_factor(torder,
        "1" = "Elder",
        "2" = "Younger"))

Sample group (Low/high risk)

# sample groups
dat <- dat %>%
  mutate(
    risk = # this represents mothers who had their first child under 20 years old
      recode_factor(risks,
        "0" = "Low risk",
        "1" = "High risk"))

Cohort

# cohort
dat <- dat %>%
  mutate(
    cohort_binary = 
      recode_factor(cohort,
        "94" = "Born in 1994",
        "95" = "Born in 1995"))

Sex

# sex
dat <- dat %>%
  mutate(
    sex = 
      recode_factor(sampsex,
        "1" = "Male",
        "2" = "Female"))

Zygosity

# zygosity
dat <- dat %>%
  mutate(
    zygosity_binary = 
      recode_factor(zygosity,
        "1" = "MZ",
        "2" = "DZ"))

SES

# SES
dat <- dat %>%
  mutate(
    SES = 
      recode_factor(seswq35,
        "1" = "Low",
        "2" = "Middle", #this was missing in original SPSS file but still have 2s in the data set
        "3" = "High"))

Ethnicity

# ethnicity
dat <- dat %>%
  mutate(
    ethnicity = 
      recode_factor(sethnic,
        "1" = "White",
        "2" = "Asian", 
        "3" = "Black",
        "4" = "Mixed race",
        "5" = "Other"))

A new variable was created for SES. SES_ordered has ordered SES to show that there are order differences between the labels “low”, “middle”, and “high”.

dat <- dat %>%
  mutate(
    SES_ordered = 
      ordered(SES,
              levels = c("Low",
                         "Middle",
                         "High")
      )
  )

Social factors at age 5

ACORN variable was recoded into a factor.

# acorn_05
dat <- dat %>%
  mutate(
    acorn_05 = 
      recode_factor(P5CACORNCategory,
        "1" = "Wealthy Achievers",
        "2" = "Urban Prosperity",
        "3" = "Comfortably Off",
        "4" = "Moderate Means",
        "5" = "Hard Pressed"))

Home environment at age 5

Mother not lived with biological father since birth, experience of any domestic violence, child harm, and maternal warmth were recoded as factors.

Mother not lived with biological father since birth

# mum_notlived_biodad_sincebirth_05
dat <- dat %>%
  mutate(
    mum_notlived_biodad_sincebirth_05 =
      recode_factor(nobiodl5,
                    "0" = "No",
                    "1" = "Yes")
  )

Domestic violence

# any_domestic_violence_05
dat <- dat %>%
  mutate(
    any_domestic_violence_05 =
      recode_factor(anyviom5,
                    "0" = "No",
                    "1" = "Yes")
  )

Child harm

# child_harm_05
dat <- dat %>%
  mutate(
    child_harm_05 =
      recode_factor(harm3em5,
                    "0" = "No harm",
                    "1" = "Possible harm",
                    "2" = "Definite harm")
  )

Maternal warmth

# maternal_warmth_05
dat <- dat %>%
  mutate(
    maternal_warmth_05 =
      recode_factor(warme5,
                    "0" = "No warmth",
                    "1" = "Very little warmth",
                    "2" = "Some warmth",
                    "3" = "Moderate warmth",
                    "4" = "Moderately high warmth",
                    "5" = "High warmth")
  )

Parent characterisrics at age 5

Maternal depression was recoded as a factor.

# maternal_depression_lifetime_05
dat <- dat %>%
  mutate(
    maternal_depression_lifetime_05 =
      recode_factor(fdepmm5,
                    "0" = "No",
                    "1" = "Yes")
  )

Mental health and service use at age 18

Depression, anxiety, ADHD, conduct disorder, alcohol dependence, alcohol abuse, cannabis dependence, PTSD lifetime, PTSD current, psychosis binary, psychosis categorical, suicide attempt, and service use were all coded as factors.

Depression

# depression_diagnosis_12mo_18
dat <- dat %>%
  mutate(
    depression_diagnosis_12mo_18 =
      recode_factor(dxmdee18,
                    "0" = "No",
                    "1" = "Yes")
  )

Anxiety

# anxiety_diagnosis_12mo_18
dat <- dat %>%
  mutate(
    anxiety_diagnosis_18 =
      recode_factor(dxgade18,
                    "0" = "No",
                    "1" = "Yes")
  )

ADHD

# ADHD_diagnosis_12mo_18
dat <- dat %>%
  mutate(
    ADHD_diagnosis_18 =
      recode_factor(dxadhd5_18e,
                    "0" = "No",
                    "1" = "Yes")
  )

Conduct disorder

# conduct_disorder_moderate_18
dat <- dat %>%
  mutate(
    conduct_disorder_moderate_18 =
      recode_factor(cdmode18,
                    "0" = "No",
                    "1" = "Yes")
  )

Alcohol dependence

# alcohol_dependence_18
dat <- dat %>%
  mutate(
    alcohol_dependence_18 =
      recode_factor(dxalcdepe18,
                    "0" = "No",
                    "1" = "Yes")
  )

Alcohol abuse

# alcohol_abuse_18
dat <- dat %>%
  mutate(
    alcohol_abuse_18 =
      recode_factor(dxalcabue18,
                    "0" = "No",
                    "1" = "Yes")
  )

Cannabis dependence

# cannabis_dependence_18
dat <- dat %>%
  mutate(
    cannabis_dependence_18 =
      recode_factor(dxmarje18,
                    "0" = "No",
                    "1" = "Yes")
  )

PTSD lifetime

# PTSD_diagnosis_lifetime_18
dat <- dat %>%
  mutate(
    PTSD_diagnosis_lifetime_18 =
      recode_factor(dxptsd5lfe18,
                    "0" = "No",
                    "1" = "Yes")
  )

PTSD current

# PTSD_diagnosis_current_18
dat <- dat %>%
  mutate(
    PTSD_diagnosis_current_18 =
      recode_factor(dxptsd5cue18,
                    "0" = "No",
                    "1" = "Yes")
  )

Psychosis binary

# cat_psychosis_symptom_count_18
dat <- dat %>%
  mutate(
    cat_psychosis_symptom_count_18 =
      recode_factor(psysymp01e18,
                    "0" = "No symptoms",
                    "1" = "One or more symptoms")
  )

Psychosis categorical

# cat_psychosis_experiences_scale_18
dat <- dat %>%
  mutate(
    cat_psychosis_experiences_scale_18 =
      recode_factor(psyexpce18,
                    "0" = "No symptoms",
                    "1" = "One to two symptoms",
                    "2" = "Three to five symptoms",
                    "3" = "Six or more symptoms")
  )

Suicide attempt or self-harm

# suicide_attempt_selfharm_18
dat <- dat %>%
  mutate(
    suicide_attempt_selfharm_18 =
      recode_factor(sharmsuice18,
                    "0" = "No",
                    "1" = "Yes")
  )

Service use

# service_use_18
dat <- dat %>%
  mutate(
    service_use_18 =
      recode_factor(srvusemhe18,
                    "0" = "No",
                    "1" = "Yes")
  )

Employment prospects

Not in employment or education (NEET) and highest level of education were coded as factors.

NEET

# not_in_employment_education_18
dat <- dat %>%
  mutate(
    not_in_employment_education_18 =
      recode_factor(neete18,
                    "0" = "No",
                    "1" = "Yes")
  )

Highest education

# highest_education_18
dat <- dat %>%
  mutate(
    highest_education_18 =
      recode_factor(educachve18,
                    "0" = "No qualification",
                    "1" = "Level 1 (GCSE at grades D-G)",
                    "2" = "Level 2 (GCSE at grades A*-C)",
                    "3" = "Level 3 (A Level)"
                    )
  )

New variable REcode based on analysis decisions

New variables were created based on edits fro the original variables. These decisions were based on power and logical use.

ACORN

Moderate Means and Hard Pressed were combined into “Deprived”. Wealthy Achievers, Urban Prosperity, and Comfortably Off were combined into “Relatively affluent”. A continuous version of the original variable was also created where scored ranged from 1-5.

dat <- dat %>%
  mutate(acorn_recoded_05 =
           if_else(
             acorn_05 == "Moderate Means" | #Moderate Means and Hard Pressed were combined, Wealthy Achievers, Urban Prosperity, and Comfortably Off were combined
               acorn_05 == "Hard Pressed",
             1,
             0
           ) %>%
           recode_factor(
             "1" = "Deprived",
             "0" = "Relatively affluent"
           ) %>%
           relevel(ref = "Relatively affluent",
            first = TRUE, #levels in ref come first
            collapse = "+", #String used when constructing names for combined factor levels
            xlevels = TRUE) #levels maintained even if not actually occurring
      )
table(dat$acorn_recoded_05)

Relatively affluent Deprived 1238 950

dat <- dat %>%
  mutate(
    acorn_continuous_05 =
      as.numeric(acorn_05)
  )

SES

We created a binary option for SES, whereby middle and high SES were combined.

dat <- dat %>%
  mutate(
    SES_binary_releveled =
      as.factor(fct_collapse(
        SES,
        "Middle to high" = c("Middle","High"),
        "Low" = c("Low")
      )
  ) %>%
    relevel(ref = "Middle to high",
            first = TRUE, #levels in ref come first
            collapse = "+", #String used when constructing names for combined factor levels
            xlevels = TRUE) #levels maintained even if not actually occurring
  )
table(dat$SES_binary_releveled)

Middle to high Low 1490 742

Child harm

Possible harm and Definite harm were combined to give a binary variable of “Harm” VS “No harm”.

dat <- dat %>%
  mutate(child_harm_recoded_05 =
           if_else(
             child_harm_05 == "Possible harm" |
               child_harm_05 == "Definite harm",
             1,
             0
           ) %>%
           recode_factor(
             "1" = "Harm",
             "0" = "No harm"
           ) %>%
           relevel(ref = "No harm",
            first = TRUE, #levels in ref come first
            collapse = "+", #String used when constructing names for combined factor levels
            xlevels = TRUE)
      )
table(dat$child_harm_recoded_05)

No harm Harm 1923 307

Maternal warmth

Maternal warmth was recoded to be continuous.

dat <- dat %>%
  mutate(
    maternal_warmth_continuous_05 = 
      as.numeric(maternal_warmth_05)
  )

Highest education

GSCE grades were collapsed: No qualification and Level 1 (GCSE at grades D-G) were combined. Level 2 (GCSE at grades A*-C) and Level 3 (A Level) were combined.

dat <- dat %>%
  mutate(
    highest_education_recode_18 =
      as.factor(fct_collapse(
        highest_education_18,
        "Level 1 or below (GCSE grades D-G or below)" = c("No qualification","Level 1 (GCSE at grades D-G)"),
        "Level 2 or 3 (GCSE grades A*-C or A-level)" = c("Level 2 (GCSE at grades A*-C)","Level 3 (A Level)")
      )
  ))
table(dat$highest_education_recode_18)

Level 1 or below (GCSE grades D-G or below) 451 Level 2 or 3 (GCSE grades A*-C or A-level) 1610

Parental antisocial behaviour and parental alcoholism

We combined maternal and paternal reports to form parental report for antisocial behaviour and alcoholism.

# create total score for mum and data to make parent
dat <- dat %>%
  mutate(antisocial_behaviour_parent_05 =
           asbmm5 + asbfm5) # mum plus dad scores
# create total score for mum and data to make parent
dat <- dat %>%
  mutate(alcoholism_parent_05 =
           alcmm5 + alcfm5) # mum plus dad scores

Numeric versions for categorical variables

We created numeric versions of all categorical antecedent and outcome variables. This is done purely to compute the correlation matrix to calculate multicollinearity between variables.

Antecedent variables

dat <- dat %>%
  mutate(SES_binary_releveled_numeric = as.numeric(SES_binary_releveled)) %>%
  mutate(child_harm_recoded_numeric_05 = as.numeric(child_harm_recoded_05)) %>%
  mutate(mum_notlived_biodad_sincebirth_numeric_05 = as.numeric(mum_notlived_biodad_sincebirth_05)) %>%
  mutate(any_domestic_violence_numeric_05 = as.numeric(any_domestic_violence_05)) %>%
  mutate(maternal_depression_lifetime_numeric_05 = as.numeric(maternal_depression_lifetime_05))

Outcome variables

dat <- dat %>%
  mutate(depression_diagnosis_12mo_numeric_18 = as.numeric(depression_diagnosis_12mo_18)) %>%
  mutate(anxiety_diagnosis_numeric_18 = as.numeric(anxiety_diagnosis_18)) %>%
  mutate(ADHD_diagnosis_numeric_18 = as.numeric(ADHD_diagnosis_18)) %>%
  mutate(conduct_disorder_moderate_numeric_18 = as.numeric(conduct_disorder_moderate_18)) %>%
  mutate(alcohol_dependence_numeric_18 = as.numeric(alcohol_dependence_18)) %>%
  mutate(alcohol_abuse_numeric_18 = as.numeric(alcohol_abuse_18)) %>%
  mutate(cannabis_dependence_numeric_18 = as.numeric(cannabis_dependence_18)) %>%
  mutate(PTSD_diagnosis_lifetime_numeric_18 = as.numeric(PTSD_diagnosis_lifetime_18)) %>%
  mutate(PTSD_diagnosis_current_numeric_18 = as.numeric(PTSD_diagnosis_current_18)) %>%
  mutate(cat_psychosis_experiences_scale_numeric_18 = as.numeric(cat_psychosis_experiences_scale_18)) %>%
  mutate(suicide_attempt_selfharm_numeric_18 = as.numeric(suicide_attempt_selfharm_18)) %>%
  mutate(service_use_numeric_18 = as.numeric(service_use_18)) %>%
  mutate(not_in_employment_education_numeric_18 = as.numeric(not_in_employment_education_18)) 

Social isolation

We needed to create several forms of the social isolation variables.

Log transformed social isolation

#colnames(dat)
dat <- dat %>%
  mutate(log_sisoe5 =
           log1p(sisoe5))
dat <- dat %>%
  mutate(log_sisoe7 =
           log1p(sisoe7))
dat <- dat %>%
  mutate(log_sisoe10 =
           log1p(sisoe10))
dat <- dat %>%
  mutate(log_sisoe12 =
           log1p(sisoe12))

#check
#colnames(dat)

Binary social isolation

Using the same categorical group cut offs as previously used in E-risk: * social isolation <= to 1, give it a 0. * social isolation > 1 and <= 2, give it a 1 * social isolation > 2, give it a 2.

dat <- dat %>%
  mutate(
    isolation.categorical.5 =
      case_when(sisoe5 > 2 ~ 2,
                sisoe5 > 1 & sisoe5 <= 2 ~ 1,
                sisoe5 <= 1 ~ 0)
  ) %>%
  mutate(
    isolation.categorical.7 =
      case_when(sisoe7 > 2 ~ 2,
                sisoe7 > 1 & sisoe7 <= 2 ~ 1,
                sisoe7 <= 1 ~ 0)
  ) %>%
  mutate(
    isolation.categorical.10 =
      case_when(sisoe10 > 2 ~ 2,
                sisoe10 > 1 & sisoe10 <= 2 ~ 1,
                sisoe10 <= 1 ~ 0)
  ) %>%
  mutate(
    isolation.categorical.12 =
      case_when(sisoe12 > 2 ~ 2,
                sisoe12 > 1 & sisoe12 <= 2 ~ 1,
                sisoe12 <= 1 ~ 0)
  ) 

# check
# table(dat$isolation.categorical.5)
# table(dat$isolation.categorical.7)
# table(dat$isolation.categorical.10)
# table(dat$isolation.categorical.12)

Missing values

Check coding

Make sue that any values coded as -9, -999 , -8, -7 or -1 are coded as NA

# Recode missing values to NAs in numeric variables
dat <- dat %>%
  mutate_if(is.numeric, ~na_if(., -9)) %>% # missing
  mutate_if(is.numeric, ~na_if(., -999)) %>% # missing
  mutate_if(is.numeric, ~na_if(., -1))  %>% # no question
  mutate_if(is.numeric, ~na_if(., -8))  %>% # missing specific to the question
  mutate_if(is.numeric, ~na_if(., -7)) # missing specific to the question

Create missingness for social isolation variables per person

# create the NA per person variable
dat <- dat %>%
  mutate(
    na.per.person.si =
      rowSums(
        is.na(dat[,colnames(dat) %in% combined_social_isolation])
      )
  )
table(dat$na.per.person.si)

0 1 2 3 2079 89 46 18

Create data set exluding those with 3 isolation data points missing

# only select those who who have less than 3 combined isolation variables missing
dat.3missing <- dat %>%
  filter(
    na.per.person.si < 3
  )

# check the new number of rows int he data set - 2079
nrow(dat.3missing)

[1] 2214

Select all variables to be used in analyses

Here I will export the full data set using dat (full sample). But you also have the option to export the dta set exluding those with three time points missing (dat.3missing).

Not using the following variables in analysis:

  • acorn_05,
  • child_harm_05,
  • maternal_warmth_05,
  • highest_education_18,
  • psychosis_symptom_count_18,
  • cat_psychosis_symptom_count_18,
  • psychosis_experiences_scale_18,
  • isolation_mother_05,
  • isolation_mother_07,
  • isolation_mother_10,
  • isolation_mother_12,
  • isolation_teacher_05,
  • isolation_teacher_07,
  • isolation_teacher_10,
  • isolation_teacher_12

Data with those missing 3 time points excluded

This code is long, so has been hidden, to view the code, select the “code” button to the right.

# variables split by blocks
# those with an "=" are just renamed with no other coding needed. Coding all done above. 
# Have # the variables that aren't being used in analyses

dat.renamed.3missing <- dat.3missing %>%
  select(
    # Demographics
    id,
    familyid,
    sex,
    random_order = rorderp5,
    zygosity_binary,
    ethnicity,
    risk,
    cohort_binary,
    twin_order,
    # Social factors
    acorn_05,
    acorn_recoded_05,
    acorn_continuous_05,
    vandalism_05 = vndngdm5, # original data frame only reported 0 = low and 6 = high
    problems_neighbours_05 = socprbm5, # original data frame only reported 0 = low and 6 = high
    SES,
    SES_ordered,
    SES_binary_releveled,
    SES_binary_releveled_numeric,
    number_children_school_05 = nchildren00e5,
    number_children_school_free_meals_05 = schmeals00e5,
    class_size_average_05 = classsize00e5,
    # Home environment
    resident_moves_05 = nmovel5,
    mum_notlived_biodad_sincebirth_05,
    mum_notlived_biodad_sincebirth_numeric_05,
    total_siblings_05 = tsibl5,
    any_domestic_violence_05,
    any_domestic_violence_numeric_05,
    child_harm_05,
    child_harm_recoded_05,
    child_harm_recoded_numeric_05,
    total_social_support_05 = tssupm5,
    total_activities_with_mum_05 = actvm5,
    maternal_warmth_05,
    maternal_warmth_continuous_05,
    prosocial_behaviours_combined_05 = totproe5,
    # Parent characteristics
    maternal_depression_lifetime_05,
    maternal_depression_lifetime_numeric_05,
    maternal_personality_openness_05 = bfiom5,
    maternal_personality_conscientiousness_05 = bficm5,
    maternal_personality_extroversion_05 = bfiem5,
    maternal_personality_agreeableness_05 = bfiam5,
    maternal_personality_neuroticism_05 = bfinm5,
    antisocial_behaviour_mum_05 = asbmm5,
    antisocial_behaviour_dad_05 = asbfm5,
    antisocial_behaviour_parent_05,
    alcoholism_mum_05 = alcmm5,
    alcoholism_dad_05 = alcfm5,
    alcoholism_parent_05,
    # Child neurodevelopment
    IQ_05 = iqe5,
    executive_function_05 = exfunce5,
    theory_of_mind_05 = tomtote5,
    # Child emotional and behavioural development
    externalising_combined_05 = totexte5,
    internalising_combined_excl_sis_05 = intisoe5,
    ADHD_combined_05 = totadde5,
    temp_negative_affect_05 = irre5,
    temp_impulsivity_05 = impe5,
    temp_approach_05 = appe5,
    temp_sluggishness_05 = slue5,
    temp_wariness_05 = ware5,
    temp_undercontrolled_05 = unce5,
    temp_inhibited_05 = inhe5,
    temp_shy_05 = shye5,
    ## Age 18
    # Mental health and service use
    depression_diagnosis_12mo_18,
    depression_diagnosis_12mo_numeric_18,
    depression_current_scale_18 = mdesxe18, 
    anxiety_diagnosis_18,
    anxiety_diagnosis_numeric_18,
    anxiety_current_scale_18 = gadsxe18, 
    ADHD_diagnosis_18,
    ADHD_diagnosis_numeric_18,
    inattentive_hyperactive_symptoms_total_18 = SR_symtot18e,
    conduct_disorder_moderate_18, 
    conduct_disorder_moderate_numeric_18,
    conduct_disorder_symptoms_18 = cdsxe18,
    alcohol_dependence_18,
    alcohol_dependence_numeric_18,
    alcohol_abuse_18,
    alcohol_abuse_numeric_18,
    alcohol_symptom_scale_18 = alcsxe18,
    cannabis_dependence_18,
    cannabis_dependence_numeric_18,
    cannabis_symptom_scale_18 = marjsxe18,
    PTSD_diagnosis_lifetime_18,
    PTSD_diagnosis_lifetime_numeric_18,
    PTSD_diagnosis_current_18,
    PTSD_diagnosis_current_numeric_18,
  #  psychosis_symptom_count_18 = psysympe18,
  #  cat_psychosis_symptom_count_18,
  #  psychosis_experiences_scale_18 = psyexpe18,
    cat_psychosis_experiences_scale_18, # only including one psychosis variable for now 
    cat_psychosis_experiences_scale_numeric_18,
    suicide_attempt_selfharm_18,
    suicide_attempt_selfharm_numeric_18,
    service_use_18,
    service_use_numeric_18,
    # Physical health and health risks
    BMI_18 = bmie18,
    CRP_log_18 = lnCRP_E18_4SD,
    physical_activity_18 = phyacte18,
    smoking_current_number_18 = smkcnume18,
    # Coping and functioning
    loneliness_18 = lonelye18,
    life_satisfaction_18 = lifsate18,
    technology_use_18 = teche18,
    coping_with_stress_18 = copstrse18,
    PSQI_global_score_18 = psqie18,
    #Employment prospects
    not_in_employment_education_18,
    not_in_employment_education_numeric_18,
    highest_education_18,
    highest_education_recode_18,
    job_preparedness_skills_18 = jprepse18,
    job_preparedness_attributes_18 = jprepae18,
    optimism_18 = optime18,
    job_search_activities_count_18 = jbschacte18,
    ## Social isolation
    isolation_combined_05 = sisoe5, # only ELDER variables included here - need to add a chunk to select younger variables if decide to do twin analyses. 
    isolation_combined_07 = sisoe7,
    isolation_combined_10 = sisoe10,
    isolation_combined_12 = sisoe12,
    isolation_mother_05 = sisoem5,
    isolation_mother_07 = sisoem7,
    isolation_mother_10 = sisoem10,
    isolation_mother_12 = sisoem12,
    isolation_teacher_05 = sisoet5,
    isolation_teacher_07 = sisoet7,
    isolation_teacher_10 = sisoet10,
    isolation_teacher_12 = sisoet12,
    log_isolation_combined_05 = log_sisoe5, # log values needed for trajectory analyses
    log_isolation_combined_07 = log_sisoe7,
    log_isolation_combined_10 = log_sisoe10,
    log_isolation_combined_12 = log_sisoe12,
    isolation.categorical.5,
    isolation.categorical.7,
    isolation.categorical.10,
    isolation.categorical.12,
    na.per.person.si
  )
# check
# colnames(dat.renamed.3missing)

Full data set

This code is long, so has been hidden, to view the code, select the “code” button to the right. Final column names have been printed below.

# same order as the variable list for the concept paper
# those with an "=" are just renamed with no other coding needed. Coding all done above. 
dat.renamed.full <- dat %>%
  select(
    # Demographics
    id,
    familyid,
    sex,
    random_order = rorderp5,
    zygosity_binary,
    ethnicity,
    risk,
    cohort_binary,
    twin_order,
    # Social factors
    acorn_05,
    acorn_recoded_05,
    acorn_continuous_05,
    vandalism_05 = vndngdm5, # original data frame only reported 0 = low and 6 = high
    problems_neighbours_05 = socprbm5, # original data frame only reported 0 = low and 6 = high
    SES,
    SES_ordered,
    SES_binary_releveled,
    SES_binary_releveled_numeric,
    number_children_school_05 = nchildren00e5,
    number_children_school_free_meals_05 = schmeals00e5,
    class_size_average_05 = classsize00e5,
    # Home environment
    resident_moves_05 = nmovel5,
    mum_notlived_biodad_sincebirth_05,
    mum_notlived_biodad_sincebirth_numeric_05,
    total_siblings_05 = tsibl5,
    any_domestic_violence_05,
    any_domestic_violence_numeric_05,
    child_harm_05,
    child_harm_recoded_05,
    child_harm_recoded_numeric_05,
    total_social_support_05 = tssupm5,
    total_activities_with_mum_05 = actvm5,
    maternal_warmth_05,
    maternal_warmth_continuous_05,
    prosocial_behaviours_combined_05 = totproe5,
    # Parent characteristics
    maternal_depression_lifetime_05,
    maternal_depression_lifetime_numeric_05,
    maternal_personality_openness_05 = bfiom5,
    maternal_personality_conscientiousness_05 = bficm5,
    maternal_personality_extroversion_05 = bfiem5,
    maternal_personality_agreeableness_05 = bfiam5,
    maternal_personality_neuroticism_05 = bfinm5,
    antisocial_behaviour_mum_05 = asbmm5,
    antisocial_behaviour_dad_05 = asbfm5,
    antisocial_behaviour_parent_05,
    alcoholism_mum_05 = alcmm5,
    alcoholism_dad_05 = alcfm5,
    alcoholism_parent_05,
    # Child neurodevelopment
    IQ_05 = iqe5,
    executive_function_05 = exfunce5,
    theory_of_mind_05 = tomtote5,
    # Child emotional and behavioural development
    externalising_combined_05 = totexte5,
    internalising_combined_excl_sis_05 = intisoe5,
    ADHD_combined_05 = totadde5,
    temp_negative_affect_05 = irre5,
    temp_impulsivity_05 = impe5,
    temp_approach_05 = appe5,
    temp_sluggishness_05 = slue5,
    temp_wariness_05 = ware5,
    temp_undercontrolled_05 = unce5,
    temp_inhibited_05 = inhe5,
    temp_shy_05 = shye5,
    ## Age 18
    # Mental health and service use
    depression_diagnosis_12mo_18,
    depression_diagnosis_12mo_numeric_18,
    depression_current_scale_18 = mdesxe18, 
    anxiety_diagnosis_18,
    anxiety_diagnosis_numeric_18,
    anxiety_current_scale_18 = gadsxe18, 
    ADHD_diagnosis_18,
    ADHD_diagnosis_numeric_18,
    inattentive_hyperactive_symptoms_total_18 = SR_symtot18e,
    conduct_disorder_moderate_18, 
    conduct_disorder_moderate_numeric_18,
    conduct_disorder_symptoms_18 = cdsxe18,
    alcohol_dependence_18,
    alcohol_dependence_numeric_18,
    alcohol_abuse_18,
    alcohol_abuse_numeric_18,
    alcohol_symptom_scale_18 = alcsxe18,
    cannabis_dependence_18,
    cannabis_dependence_numeric_18,
    cannabis_symptom_scale_18 = marjsxe18,
    PTSD_diagnosis_lifetime_18,
    PTSD_diagnosis_lifetime_numeric_18,
    PTSD_diagnosis_current_18,
    PTSD_diagnosis_current_numeric_18,
  #  psychosis_symptom_count_18 = psysympe18,
  #  cat_psychosis_symptom_count_18,
  #  psychosis_experiences_scale_18 = psyexpe18,
    cat_psychosis_experiences_scale_18, # only including one psychosis variable for now 
    cat_psychosis_experiences_scale_numeric_18,
    suicide_attempt_selfharm_18,
    suicide_attempt_selfharm_numeric_18,
    service_use_18,
    service_use_numeric_18,
    # Physical health and health risks
    BMI_18 = bmie18,
    CRP_log_18 = lnCRP_E18_4SD,
    physical_activity_18 = phyacte18,
    smoking_current_number_18 = smkcnume18,
    # Coping and functioning
    loneliness_18 = lonelye18,
    life_satisfaction_18 = lifsate18,
    technology_use_18 = teche18,
    coping_with_stress_18 = copstrse18,
    PSQI_global_score_18 = psqie18,
    #Employment prospects
    not_in_employment_education_18,
    not_in_employment_education_numeric_18,
    highest_education_18,
    highest_education_recode_18,
    job_preparedness_skills_18 = jprepse18,
    job_preparedness_attributes_18 = jprepae18,
    optimism_18 = optime18,
    job_search_activities_count_18 = jbschacte18,
    ## Social isolation
    isolation_combined_05 = sisoe5, # only ELDER variables included here - need to add a chunk to select younger variables if decide to do twin analyses. 
    isolation_combined_07 = sisoe7,
    isolation_combined_10 = sisoe10,
    isolation_combined_12 = sisoe12,
    isolation_mother_05 = sisoem5,
    isolation_mother_07 = sisoem7,
    isolation_mother_10 = sisoem10,
    isolation_mother_12 = sisoem12,
    isolation_teacher_05 = sisoet5,
    isolation_teacher_07 = sisoet7,
    isolation_teacher_10 = sisoet10,
    isolation_teacher_12 = sisoet12,
    log_isolation_combined_05 = log_sisoe5, # log values needed for trajectory analyses
    log_isolation_combined_07 = log_sisoe7,
    log_isolation_combined_10 = log_sisoe10,
    log_isolation_combined_12 = log_sisoe12,
    isolation.categorical.5,
    isolation.categorical.7,
    isolation.categorical.10,
    isolation.categorical.12,
    na.per.person.si
  )
# check
colnames(dat.renamed.full)

[1] “id”
[2] “familyid”
[3] “sex”
[4] “random_order”
[5] “zygosity_binary”
[6] “ethnicity”
[7] “risk”
[8] “cohort_binary”
[9] “twin_order”
[10] “acorn_05”
[11] “acorn_recoded_05”
[12] “acorn_continuous_05”
[13] “vandalism_05”
[14] “problems_neighbours_05”
[15] “SES”
[16] “SES_ordered”
[17] “SES_binary_releveled”
[18] “SES_binary_releveled_numeric”
[19] “number_children_school_05”
[20] “number_children_school_free_meals_05”
[21] “class_size_average_05”
[22] “resident_moves_05”
[23] “mum_notlived_biodad_sincebirth_05”
[24] “mum_notlived_biodad_sincebirth_numeric_05” [25] “total_siblings_05”
[26] “any_domestic_violence_05”
[27] “any_domestic_violence_numeric_05”
[28] “child_harm_05”
[29] “child_harm_recoded_05”
[30] “child_harm_recoded_numeric_05”
[31] “total_social_support_05”
[32] “total_activities_with_mum_05”
[33] “maternal_warmth_05”
[34] “maternal_warmth_continuous_05”
[35] “prosocial_behaviours_combined_05”
[36] “maternal_depression_lifetime_05”
[37] “maternal_depression_lifetime_numeric_05”
[38] “maternal_personality_openness_05”
[39] “maternal_personality_conscientiousness_05” [40] “maternal_personality_extroversion_05”
[41] “maternal_personality_agreeableness_05”
[42] “maternal_personality_neuroticism_05”
[43] “antisocial_behaviour_mum_05”
[44] “antisocial_behaviour_dad_05”
[45] “antisocial_behaviour_parent_05”
[46] “alcoholism_mum_05”
[47] “alcoholism_dad_05”
[48] “alcoholism_parent_05”
[49] “IQ_05”
[50] “executive_function_05”
[51] “theory_of_mind_05”
[52] “externalising_combined_05”
[53] “internalising_combined_excl_sis_05”
[54] “ADHD_combined_05”
[55] “temp_negative_affect_05”
[56] “temp_impulsivity_05”
[57] “temp_approach_05”
[58] “temp_sluggishness_05”
[59] “temp_wariness_05”
[60] “temp_undercontrolled_05”
[61] “temp_inhibited_05”
[62] “temp_shy_05”
[63] “depression_diagnosis_12mo_18”
[64] “depression_diagnosis_12mo_numeric_18”
[65] “depression_current_scale_18”
[66] “anxiety_diagnosis_18”
[67] “anxiety_diagnosis_numeric_18”
[68] “anxiety_current_scale_18”
[69] “ADHD_diagnosis_18”
[70] “ADHD_diagnosis_numeric_18”
[71] “inattentive_hyperactive_symptoms_total_18” [72] “conduct_disorder_moderate_18”
[73] “conduct_disorder_moderate_numeric_18”
[74] “conduct_disorder_symptoms_18”
[75] “alcohol_dependence_18”
[76] “alcohol_dependence_numeric_18”
[77] “alcohol_abuse_18”
[78] “alcohol_abuse_numeric_18”
[79] “alcohol_symptom_scale_18”
[80] “cannabis_dependence_18”
[81] “cannabis_dependence_numeric_18”
[82] “cannabis_symptom_scale_18”
[83] “PTSD_diagnosis_lifetime_18”
[84] “PTSD_diagnosis_lifetime_numeric_18”
[85] “PTSD_diagnosis_current_18”
[86] “PTSD_diagnosis_current_numeric_18”
[87] “cat_psychosis_experiences_scale_18”
[88] “cat_psychosis_experiences_scale_numeric_18” [89] “suicide_attempt_selfharm_18”
[90] “suicide_attempt_selfharm_numeric_18”
[91] “service_use_18”
[92] “service_use_numeric_18”
[93] “BMI_18”
[94] “CRP_log_18”
[95] “physical_activity_18”
[96] “smoking_current_number_18”
[97] “loneliness_18”
[98] “life_satisfaction_18”
[99] “technology_use_18”
[100] “coping_with_stress_18”
[101] “PSQI_global_score_18”
[102] “not_in_employment_education_18”
[103] “not_in_employment_education_numeric_18”
[104] “highest_education_18”
[105] “highest_education_recode_18”
[106] “job_preparedness_skills_18”
[107] “job_preparedness_attributes_18”
[108] “optimism_18”
[109] “job_search_activities_count_18”
[110] “isolation_combined_05”
[111] “isolation_combined_07”
[112] “isolation_combined_10”
[113] “isolation_combined_12”
[114] “isolation_mother_05”
[115] “isolation_mother_07”
[116] “isolation_mother_10”
[117] “isolation_mother_12”
[118] “isolation_teacher_05”
[119] “isolation_teacher_07”
[120] “isolation_teacher_10”
[121] “isolation_teacher_12”
[122] “log_isolation_combined_05”
[123] “log_isolation_combined_07”
[124] “log_isolation_combined_10”
[125] “log_isolation_combined_12”
[126] “isolation.categorical.5”
[127] “isolation.categorical.7”
[128] “isolation.categorical.10”
[129] “isolation.categorical.12”
[130] “na.per.person.si”

Have then selected the variables for Mplus datasets and R datasets for future analyses.

#select variables for rds file
dat_rds.3missing <- dat.renamed.3missing 
dat_rds.full <- dat.renamed.full

#select variables for csv file (for Mplus) - uses numeric values rather than factors
dat_csv.3missing <- dat.3missing %>%
  select(
    id, 
    familyid, 
    rorderp5, 
    torder, 
    risks,
    cohort,
    sampsex,
    zygosity,
    seswq35,
    sisoe5,
    sisoe7,
    sisoe10,
    sisoe12,
    log_5 = log_sisoe5,
    log_7 = log_sisoe7,
    log_10 = log_sisoe10,
    log_12 = log_sisoe12)

#select variables for csv file (for Mplus) - uses numeric values rather than factors
dat_csv.full <- dat %>%
  select(
    id, 
    familyid, 
    rorderp5, 
    torder, 
    risks,
    cohort,
    sampsex,
    zygosity,
    seswq35,
    sisoe5,
    sisoe7,
    sisoe10,
    sisoe12,
    log_5 = log_sisoe5,
    log_7 = log_sisoe7,
    log_10 = log_sisoe10,
    log_12 = log_sisoe12)

# variables for categorical LCGA
dat_csv.full.cat <- dat %>%
  select(
    id, 
    familyid, 
    rorderp5, 
    torder, 
    risks,
    cohort,
    sampsex,
    zygosity,
    seswq35,
    sisoe5,
    sisoe7,
    sisoe10,
    sisoe12,
    sisoe5_cat = isolation.categorical.5,
    sisoe7_cat = isolation.categorical.7,
    sisoe10_cat = isolation.categorical.10,
    sisoe12_cat = isolation.categorical.12)

# mother report social isolation only
dat_csv.full.mother <- dat %>%
  select(
    id, 
    familyid, 
    rorderp5, 
    torder, 
    risks,
    cohort,
    sampsex,
    zygosity,
    seswq35,
    sisoem5,
    sisoem7,
    sisoem10,
    sisoem12)

# teacher report social isolation only
dat_csv.full.teacher <- dat %>%
  select(
    id, 
    familyid, 
    rorderp5, 
    torder, 
    risks,
    cohort,
    sampsex,
    zygosity,
    seswq35,
    sisoet5,
    sisoet7,
    sisoet10,
    sisoet12)

Export data

I have hashed out the trajectory csv file - each time you run this line, you need to then manually change the the NAs to be . and remove the header so that Mplus can read the file. I have been using only one export and not rerunning this line.

# save R data file
saveRDS(object = dat_rds.3missing, file = paste0(data_path, "preprocessed_isolation_trajectories_Jan2021_3missing.rds"))
saveRDS(object = dat_rds.full, file = paste0(data_path, "preprocessed_isolation_trajectories_Jan2021_full_sample.rds"))

# save csv file in two places

# write_csv(x = dat_csv.3missing, path = paste0(data_path, "Mplus_back_up/FOR_MPLUS_preprocessed_isolation_trajectories_Jan2021_3missing.csv"))
# write_csv(x = dat_csv.3missing, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_3missing.csv")
# write_csv(x = dat_csv.full, path = paste0(data_path, "Mplus_back_up/FOR_MPLUS_preprocessed_isolation_trajectories_Jan2021_full_sample.csv"))
# write_csv(x = dat_csv.full, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample.csv") 

# categorical LCGA
# write_csv(x = dat_csv.full.cat, path = paste0(data_path, "Mplus_back_up/FOR_MPLUS_preprocessed_isolation_trajectories_Jan2021_full_sample_CAT.csv"))
# write_csv(x = dat_csv.full.cat, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample_CAT.csv") 

# GMM for mother separately 
# write_csv(x = dat_csv.full.mother, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample_MOTHER.csv") 

# GMM for teacher separately 
# write_csv(x = dat_csv.full.teacher, path = "/Users/katiethompson/Desktop/mplusisolationtrajectoriesJan2021_full_sample_TEACHER.csv") 

#remember to delete the headings. There will be no NAs as they have been removed for the isolation variables but in future Mplus files need NA to be a fullstop (.)
 

Work by Katherine N Thompson

katherine.n.thompson@kcl.ac.uk