Passive multiple imputation

This document demonstrates how to perform passive multiple imputation in a fictive simulated dataset that contains five multi-item questionnaires each containing 10 items. In this document the following packages are used: mice, mitools and dmo.

library(mice)
library(mitools)
library(dmo)

Generate simulated data

Data are generated for 100 subjects. Each subjects has observations for 5 questionnaires and 3 covariates.

#generate questionnaire data
k <- c(10,10,10,10,10)
nq <- 5
x <- dmo::gen_qdata(n=100, nq=nq, k= k)

#generate covariate data
cov <- MASS::mvrnorm(n=100, mu=c(5,5,5), Sigma=matrix(c(10,1,1,1,10,1,1,1,10),3,3))
colnames(cov) <- c("cov1", "cov2", "cov3")

#combine in data.frame
x <- data.frame(x,cov)

Generate missing values

Missing values are generated with the MAR mechanism for 25% of the subjects with a random pattern. Missings are only generated in the item scores (not in the covariates).

alpha <- 0.25

#regerate 2 random patterns for missing item data
pattern <- matrix(c(sample(c(0,1), size=2*sum(k),replace = TRUE)),nrow=2)
pattern <- cbind(pattern, matrix(c(1,1,1,1,1,1),nrow=2))
pattern
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,]    1    1    0    1    1    1    0    0    1     1     0     1     1     1
## [2,]    1    0    1    1    0    0    0    1    1     0     0     0     0     1
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## [1,]     1     1     1     1     1     1     1     0     0     1     0     0
## [2,]     1     0     0     0     0     0     0     1     0     1     0     1
##      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
## [1,]     1     0     1     0     0     1     1     1     0     0     1     0
## [2,]     0     1     1     0     0     1     0     0     1     1     1     0
##      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## [1,]     0     1     0     1     1     1     1     1     1     1     0     0
## [2,]     1     1     1     0     1     1     1     1     0     1     1     1
##      [,51] [,52] [,53]
## [1,]     1     1     1
## [2,]     1     1     1
#apply each pattern with equal frequency and equal odds
f <- c(0.5,0.5)
g <- c(4,4)

#generate missings in the data
x <- MAR(x,alpha,pattern,f,g)
colnames(x) <- c(paste0("I",1:sum(k)),c("cov1", "cov2", "cov3") )

head(x, 10)
##               I1        I2         I3         I4         I5          I6
##  [1,]  3.1462571  3.290874  0.9271871 -0.1623506 -0.9723083 -0.20171063
##  [2,]  0.3206451  2.185726 -0.2358173  0.3962231 -4.6650046 -1.23809781
##  [3,]  5.4970278        NA  0.9737249  3.4260825         NA          NA
##  [4,] -1.4323386 -1.950099  1.3413695 -3.4178319 -0.4615336  2.51685349
##  [5,] -2.3279599  4.022269  0.4458738  2.9228096  0.7745054  4.19938683
##  [6,] -2.4775660 -1.086027 -5.3737934 -3.7437083  2.0223833  0.08847822
##  [7,]  0.7981966        NA -1.2130014 -0.6597590         NA          NA
##  [8,]  1.3379731  2.097213  1.3732085  0.2546638  2.9682327 -1.39424397
##  [9,] -0.3535078  1.266254 -0.4057994  0.9987529 -1.2239257  1.01466888
## [10,]  1.1401232 -2.659367 -1.8798302  1.4925476  1.3266725  0.81223821
##                I7         I8         I9         I10        I11        I12
##  [1,]  1.91242033 -0.4214043  0.8531649 -0.31676229 -1.4641939 -0.1944635
##  [2,]  1.13533113 -0.6558489  0.9597521 -2.83934805 -1.6417241  0.6425810
##  [3,]          NA  3.3982068  6.7412951          NA         NA         NA
##  [4,] -0.44820483  1.1771828  0.6065390  1.85769410  1.8958822 -1.6180730
##  [5,]  0.11671016 -0.6701897 -0.5760394  2.98003929 -1.0833901 -1.5486132
##  [6,] -3.65579267 -3.0440517 -0.5048799 -3.86968039  0.7505467  0.3466757
##  [7,]          NA  3.0353575 -1.5503917          NA         NA         NA
##  [8,]  0.42910344  1.1053790  0.3032241  0.06621296  1.2030372 -0.8662546
##  [9,] -0.29104712  0.9386357  0.9977302 -1.06049780  2.1000508  2.7749781
## [10,] -0.09180187 -2.2873612  1.2437869 -0.91849727 -0.5553989  2.0756262
##              I13         I14        I15         I16       I17         I18
##  [1,]  1.0516945 -2.08933231  1.4895941  2.48879543  4.574463  1.59116011
##  [2,] -0.7059599 -0.03042417 -2.3419336  0.03958363 -2.246077 -1.49121577
##  [3,]         NA  2.61550582  3.7293420          NA        NA          NA
##  [4,] -4.1263187 -1.50909505  3.9685962  2.32732961 -1.619470 -1.85144688
##  [5,] -2.4251987 -3.32859038  2.2203112 -0.53218287 -3.060350 -2.11820628
##  [6,] -2.4711318 -3.32982479 -1.7527977 -2.77677102 -2.260556  4.37608450
##  [7,]         NA  2.31260367  1.5225224          NA        NA          NA
##  [8,]  5.2970722  0.98692783 -2.8685322  2.87951687  1.338754  3.15231013
##  [9,]  0.4902580  3.93698825  0.4105184  0.10130121  4.673039  1.84981814
## [10,] -1.3822248 -1.31816500 -4.7984910 -2.35949842 -3.525000  0.05957548
##              I19        I20        I21        I22         I23        I24
##  [1,] -1.6455733  0.8931313  3.9714299 -0.1187158  1.26139224  1.5535752
##  [2,]  2.6577845 -0.9671690  0.4050583 -1.7110965  0.24549076 -1.1286559
##  [3,]         NA         NA         NA  2.7488349          NA  5.4675710
##  [4,]  0.9674345 -0.3072870 -2.6237590 -0.2135094  2.53877213  2.7233193
##  [5,]  2.4294124  0.4403561 -1.4299403  1.4758068 -0.03192309 -2.2403115
##  [6,]  0.9273198 -4.5896489 -3.0703182 -1.2456986 -1.37045290 -0.4035621
##  [7,]         NA         NA         NA  2.2701966          NA -1.0509964
##  [8,]  2.1080828 -2.5539738  1.1666120  2.8867427  1.03597995 -0.5434300
##  [9,]  2.1248267  1.5972604  0.9883217 -1.4727305  2.12279258  1.7451336
## [10,] -3.6433660  2.4765570  3.6371061 -2.1347142  2.51775518 -0.6281260
##              I25        I26        I27         I28        I29        I30
##  [1,]  0.7704265  3.8755931  1.8020999  1.76621657  0.8360398 -1.2050314
##  [2,]  3.4522077  5.4422388 -1.4256174  0.72919081 -1.4408158 -0.9919587
##  [3,]         NA  4.3745714         NA  2.55965087  4.2332066         NA
##  [4,]  1.1604451  0.5835082 -2.4417903 -0.99857805 -1.0809394 -3.7517402
##  [5,] -0.9857972  1.1361026 -2.1776113 -1.68638803 -0.7143457  2.0323854
##  [6,] -4.0401374  3.0587105  0.1240616 -4.14793407  1.1152427 -2.5235164
##  [7,]         NA -3.1515464         NA  1.47733482 -1.2195260         NA
##  [8,] -1.1561234  4.5546136  0.5888149  0.18453953  2.8319360  2.6405063
##  [9,]  0.5480152  2.5191525  0.4544982  5.41221722 -0.3494604 -2.0975031
## [10,] -0.5783818  1.2119088  0.3540449 -0.03712002  0.9817221 -1.2558169
##              I31         I32        I33        I34        I35        I36
##  [1,] -1.9548588 -0.05721517 -0.3731930  1.8642296 -1.0240435 -0.5481295
##  [2,] -0.6096615  0.06052720 -1.4564173 -1.6542634 -1.1047906 -2.1710295
##  [3,]         NA  1.99913458         NA         NA  1.0164187  2.9334018
##  [4,] -1.7695761 -1.36291909 -0.4838004 -1.3468421 -1.2960929 -0.3769162
##  [5,]  0.1825161 -0.14003437 -0.8306971 -0.8501535  1.0792466  2.5394601
##  [6,]  0.7574686 -5.37038006  0.1179559 -1.5141561  0.5031438 -4.7129659
##  [7,]         NA -2.24779256         NA         NA  3.0586853  1.7216170
##  [8,]  2.6895837  0.57996985 -1.5840223 -0.1110633 -0.7468497  1.8702187
##  [9,] -0.9340286  3.39327556  2.1816202  3.6146197 -4.9769361  0.6075776
## [10,] -3.0057588  0.07163250  1.3205857 -0.9748417  0.4972020 -2.9517644
##               I37        I38        I39        I40        I41         I42
##  [1,]  1.75178796  2.3396084 -0.8845005 -0.3103354  1.6001412  0.00412063
##  [2,]  1.90801872  2.9662148 -0.9006689 -2.7749076  1.3577734 -2.72522681
##  [3,]  2.73338608         NA  0.6512588  0.6967238  0.8571315          NA
##  [4,] -2.25379567 -3.0340218  0.3159595  1.5764867  1.3119079 -4.22776850
##  [5,]  3.65988249 -0.8980934  3.5464490 -0.1472055 -0.6431294  5.92274473
##  [6,] -2.79832433  1.5271580 -1.6566391 -3.9884511  1.2635122 -1.07678520
##  [7,]  2.08763044         NA  1.0069012 -0.4982645  0.9421989          NA
##  [8,]  1.33665158  1.7242982  1.0181684  1.6017844  1.1825839  3.22054529
##  [9,]  3.94822498  4.6305676  0.0589690  1.2996921  0.4425480 -0.55003071
## [10,] -0.09309567 -0.1900496  0.6270381 -1.3882912 -5.0992871  0.91569308
##               I43        I44        I45        I46        I47        I48
##  [1,] -1.64504755 -1.5539907 -2.3755925 -0.6629368  2.6822507  2.5692895
##  [2,]  0.93039105 -3.1427754  0.7161378  0.5123397 -1.0773249  2.2211133
##  [3,]  3.30749926  3.0471240  3.9541757  1.8196311         NA  3.0007889
##  [4,]  0.09557335 -1.4739375  1.7237485  0.6354027  0.0455887  1.3867325
##  [5,]  1.06145287  0.4417150  2.3248235 -3.3453841  3.6234707  0.2868466
##  [6,] -1.83907771 -0.5126952 -1.2365011  0.7102948 -1.6183849  1.1592319
##  [7,]  1.11952338  1.4878434 -0.5512017  0.2444509         NA  0.1025022
##  [8,]  2.62906900  0.2801525  0.2235919 -1.3719588  0.1498720 -0.5178039
##  [9,]  0.77216512  0.2300077  3.6007212 -1.7620004 -0.5495756 -2.1530355
## [10,] -0.49640383  1.5505961 -3.4939294  2.1717706 -2.7818298  0.3712232
##              I49         I50      cov1     cov2     cov3
##  [1,] -3.5180188 -1.29563562 3.2293581 6.722453 3.230878
##  [2,]  0.4770418 -0.24129103 0.8642458 3.669056 8.480992
##  [3,] -0.9775305 -0.44106954 5.8498243 9.224568 7.484889
##  [4,]  0.9673087 -1.89232073 6.7438856 7.483104 3.840889
##  [5,]  1.3530680  2.59897550 4.8368150 4.485825 6.273385
##  [6,] -2.8623163  1.71620979 7.2916938 8.918616 9.248886
##  [7,] -6.0402877  1.42797833 7.8067284 7.451845 1.378459
##  [8,] -0.1739741 -1.29275409 0.6637370 9.236151 3.554878
##  [9,] -2.8767690 -1.07091219 0.5411036 6.473771 7.995718
## [10,]  1.6419668 -0.02090255 1.4131486 6.366724 9.949744

Calculate total scores for questionnaires

Calculate the total scores (sum scores) for each questionnaire, only when all items are observed.

ts <- dmo::calculate_ts(x, nq=5,k= c(10,10,10,10,10))
data <- data.frame(x,ts)

Passive multiple imputation

In passive multiple imputation, the item scores are imputed and then during the imputation, the total score is calculated based on the imputed item scores. These total scores can then be used as predictors for imputations of other variables.

There are 2 imputation models that are used in this procedure.

  1. The total score of the questionnaires are imputed directly. This model only uses the total scores of the questionnaires an the three covariates. These imputed total scores are used for the subjects that have too many item scores missing, i.e. > 75%.
  2. The item scores of the questionnaires are imputed, and the total scores are passively imputed, by re-calculating these after each imputation iteration. The re-calculated total scores are used as predictors to impute items from other questionnaires.

Initialize imputation model for total score imputation

For this model, we can use the default settings used in mice.

tsdata <- data.frame(ts, cov)
ini <- mice(tsdata, max=0, print=FALSE)
ini$meth
##   TS1   TS2   TS3   TS4   TS5  cov1  cov2  cov3 
## "pmm" "pmm" "pmm" "pmm" "pmm"    ""    ""    ""
ini$predictorMatrix
##      TS1 TS2 TS3 TS4 TS5 cov1 cov2 cov3
## TS1    0   1   1   1   1    1    1    1
## TS2    1   0   1   1   1    1    1    1
## TS3    1   1   0   1   1    1    1    1
## TS4    1   1   1   0   1    1    1    1
## TS5    1   1   1   1   0    1    1    1
## cov1   1   1   1   1   1    0    1    1
## cov2   1   1   1   1   1    1    0    1
## cov3   1   1   1   1   1    1    1    0

Set up imputation model for passive imputation

For the passive imputation model, we need to adapt the methods for the total scores, and the predictor matrix for the items.

Methods

First the imputation method is adapted for the questionnaire total scores.

ini <- mice(data, max=0, print=FALSE)
meth <- ini$meth

#for each questionnaire adapt imputation method (meth): make function to calculate TS between iterations. Example TS1:
#meth["TS1"] <- "~I(I1+I2+I3+I4+I5+I6+I7+I8+I9+I10)" - loop below automates for simulation data
nq <- 5
k <- c(10,10,10,10,10)
for(q in seq_along(1:nq)){
  meth[paste0("TS",q)] <- paste0("~I(", paste(paste0("I",1:k[q]), collapse="+"), ")")
}
meth[c("TS1", "TS2", "TS3", "TS4", "TS5")]
##                                  TS1                                  TS2 
## "~I(I1+I2+I3+I4+I5+I6+I7+I8+I9+I10)" "~I(I1+I2+I3+I4+I5+I6+I7+I8+I9+I10)" 
##                                  TS3                                  TS4 
## "~I(I1+I2+I3+I4+I5+I6+I7+I8+I9+I10)" "~I(I1+I2+I3+I4+I5+I6+I7+I8+I9+I10)" 
##                                  TS5 
## "~I(I1+I2+I3+I4+I5+I6+I7+I8+I9+I10)"

Predictor matrix

The the predictor matrix is adapted such that items are imputed by items from their own questionnaire and other total scores. In predictor matrix: target variable is in row and the predictor variables are in the column. Below the predictor matrix for imputing the first 10 items is shown.

pred <- ini$predictorMatrix
pred[paste0("I", 1:10),] <- 0
pred[paste0("I", 1:10),paste0("I", 1:10)] <- 1
pred[paste0("I", 1:10),c("TS2","TS3","TS4","TS5")] <- 1
pred[paste0("I", 11:20),] <- 0
pred[paste0("I", 11:20),paste0("I", 11:20)] <- 1
pred[paste0("I", 11:20),c("TS1","TS3","TS4","TS5")] <- 1
pred[paste0("I", 21:30),] <- 0
pred[paste0("I", 21:30),paste0("I", 21:30)] <- 1
pred[paste0("I", 21:30),c("TS1","TS2","TS4","TS5")] <- 1
pred[paste0("I", 31:40),] <- 0
pred[paste0("I", 31:40),paste0("I", 31:40)] <- 1
pred[paste0("I", 31:40),c("TS1","TS2","TS3","TS5")] <- 1
pred[paste0("I", 41:50),] <- 0
pred[paste0("I", 41:50),paste0("I", 41:50)] <- 1
pred[paste0("I", 41:50),c("TS1","TS2","TS3","TS4")] <- 1
pred[,colnames(cov)] <- 1 #covariates as predictors for all items
pred <- pred*ini$predictorMatrix
head(pred, 10)
##     I1 I2 I3 I4 I5 I6 I7 I8 I9 I10 I11 I12 I13 I14 I15 I16 I17 I18 I19 I20 I21
## I1   0  1  1  1  1  1  1  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I2   1  0  1  1  1  1  1  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I3   1  1  0  1  1  1  1  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I4   1  1  1  0  1  1  1  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I5   1  1  1  1  0  1  1  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I6   1  1  1  1  1  0  1  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I7   1  1  1  1  1  1  0  1  1   1   0   0   0   0   0   0   0   0   0   0   0
## I8   1  1  1  1  1  1  1  0  1   1   0   0   0   0   0   0   0   0   0   0   0
## I9   1  1  1  1  1  1  1  1  0   1   0   0   0   0   0   0   0   0   0   0   0
## I10  1  1  1  1  1  1  1  1  1   0   0   0   0   0   0   0   0   0   0   0   0
##     I22 I23 I24 I25 I26 I27 I28 I29 I30 I31 I32 I33 I34 I35 I36 I37 I38 I39 I40
## I1    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I2    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I3    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I4    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I5    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I6    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I7    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I8    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I9    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## I10   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##     I41 I42 I43 I44 I45 I46 I47 I48 I49 I50 cov1 cov2 cov3 TS1 TS2 TS3 TS4 TS5
## I1    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I2    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I3    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I4    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I5    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I6    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I7    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I8    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I9    0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1
## I10   0   0   0   0   0   0   0   0   0   0    1    1    1   0   1   1   1   1

Apply final imputations

For both imputation procedures we use 15 imputations and 10 iterations. The object imp1 contains the imputations for the total scores only from other total scores and the thee covariates. The object imp2 for items and total scores with passive imputation.

imp1 <- mice(tsdata, 15,maxit=10,seed=61085,print=FALSE)
imp2 <- mice(data, m=15, meth=meth,pred=pred, maxit=10, seed=12354, print=FALSE)

Check iteration plots

The the imputations for the Total scores in the total score imputation (imp1) passive imputation procedure (imp2).

plot(imp1, paste0("TS",1:5))

plot(imp2, paste0("TS",1:5))

Combine imputations

Combine imputations and select (per questionnaire) the ts from imp1 for persons with <75% of items missing; and the ts from imp2 for persons with >75% of items missing (in a questionnaire).

First make indicators for each q if missing <75%:

calculate_i <- function(x,nq,k){
  ind <- matrix(0,nrow=nrow(x), ncol=(nq))

  for (q in 1:(nq)){
    ind[,q] <- apply(x[,(((q*k[q])-k[q])+1):(q*k[q])],1,function(x) {sum(is.na(x))/length(x)})
  }
  colnames(ind) <- paste("TS",1:nq,sep="")
  ind <- ifelse(ind <0.75,1,0)
  ind
}
indicator <- calculate_i(x=data, nq=5, k=c(10,10,10,10,10))

Then select correct TS from each imputation.

implist <- lapply(1:15, function(x){
  x1 <- complete(imp1, x)
  x2 <- complete(imp2, x)
  tsimp <- lapply(1:nq, function(x){
    ifelse(indicator[,x]==1, x2[,paste0("TS",x)], x1[,paste0("TS",x)] )
  })
  tsimp <- data.frame(tsimp)
  colnames(tsimp) <- paste0("impTS",1:nq)
  data.frame(x1,tsimp)
})

Analyze and pool

Save data in an imputation list for analysis and test relation between TS1 and TS3

impdata <- imputationList(implist)
fit <- with (impdata, lm(impTS1~impTS3 )) 
summary(pool (fit))
##          term  estimate  std.error  statistic       df p.value
## 1 (Intercept) 0.5428534 0.74398580  0.7296555 94.79241  0.4674
## 2      impTS3 0.7283042 0.06281695 11.5940708 91.18819  0.0000
Iris Eekhout, PhD
Iris Eekhout, PhD
Statistician

Iris works on a variety of projects as methodologist and statistical analyst related to child health, e.g. measuring child development (D-score) and adaptive screenings for psycho-social problems (psycat).

Related