RStanで状態空間モデル:欠損値の推定

Rでデータサイエンス

欠損値の推定

パッケージ読み込み等

library(rstan)
library(dplyr)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
packageVersion("rstan")
[1] '2.32.6'

サンプルデータ

  • 実質実効為替レート指数:2020年=100(出所:日本銀行)
sample_df %>%
    ggplot(mapping = aes(x = Date, y = value, color = key)) + geom_line() + geom_point() + theme(legend.position = "top", legend.title = element_blank(), axis.title = element_blank()) + scale_color_manual(values = "blue")
Figure 1
# 欠損
Nmiss <- 5
missing_df <- sample_df
missing_row <- sample(x = seq(nrow(missing_df)), Nmiss)
missing_df[c(missing_row), 3] <- NA
missing_df$key <- missing_df$key %>%
    unique() %>%
    paste0(":欠損あり")
missing_df[missing_row, ]
         Date                                        key value
23 2020-12-01 実質実効為替レート指数:2020年=100:欠損あり    NA
51 2023-04-01 実質実効為替レート指数:2020年=100:欠損あり    NA
21 2020-10-01 実質実効為替レート指数:2020年=100:欠損あり    NA
1  2019-02-01 実質実効為替レート指数:2020年=100:欠損あり    NA
5  2019-06-01 実質実効為替レート指数:2020年=100:欠損あり    NA
sample_df[missing_row, ]
         Date                               key value
23 2020-12-01 実質実効為替レート指数:2020年=100 97.44
51 2023-04-01 実質実効為替レート指数:2020年=100 77.54
21 2020-10-01 実質実効為替レート指数:2020年=100 99.35
1  2019-02-01 実質実効為替レート指数:2020年=100 96.92
5  2019-06-01 実質実効為替レート指数:2020年=100 99.79
(g0 <- missing_df %>%
    ggplot(mapping = aes(x = Date, y = value, color = key)) + geom_path() + geom_point() + theme(legend.position = "top", legend.title = element_blank(), axis.title = element_blank()) + geom_point(mapping = aes(x = Date, y = value, color = key), data = sample_df[missing_row, ]) + scale_color_manual(values = c("blue", "red"), guide = guide_legend(override.aes = list(linetype = c("blank", "solid"), shape = c(16, NA)))))
Figure 2

Stanコード

# stan code
model_code <- "
data {
  int<lower=0> n; // サンプルサイズ
  vector[n] Y; // 為替レート指数
  int<lower=0> Nmiss; // 欠損値の数
}
parameters {
  real mu0; // 事前分布の平均
  vector[n] mu; // 状態の確率的レベル
  vector<lower=0>[Nmiss] Miss_Y; // 補間した欠測値
  real<lower=0> sdV; // 観測誤差の大きさ
  real<lower=0> sdW; // 過程誤差の大きさ
}
model {
  // 状態方程式
  // 先頭のデータを推定
  mu[1] ~ normal(mu0, sdW);  

  // 観測方程式
  {
    int nmiss;
    nmiss = 0;
    for(i in 1:n) {
      if(Y[i]!=9999){ // 欠損でない場合
        Y[i] ~ normal(mu[i], sdV); 
      }else{ // 欠損の場合
        nmiss = nmiss + 1;
        Miss_Y[nmiss] ~ normal(mu[i],sdV);
      }
    }
  }

  // 状態の遷移
  for(i in 2:n){
    mu[i] ~ normal(mu[i-1], sdW);
  }
  //観測誤差および過程誤差は事前分布を指定せず、Stan のデフォルトである無情報事前分布とする。
}"

MCMC

missing_df[missing_row, 3] <- 9999
data_list <- list(n = nrow(missing_df), Y = missing_df$value, Nmiss = Nmiss)
# data_list
stan_model_compile <- stan_model(model_code = model_code)
setwd(stan_output)
fit_sample <- sampling(object = stan_model_compile, data = data_list, iter = 30000, chain = 3, warmup = 1000, refresh = 0, sample_file = "sample_file_stan_code_09.csv", diagnostic_file = "diagnostic_file_stan_code_09.txt")

結果の確認

setwd(stan_output)
csvfiles <- dir(pattern = "sample_file_stan_code_09")
fit_sample <- read_stan_csv(csvfiles = csvfiles)
(result_summary <- fit_sample %>%
    summary() %>%
    .$summary %>%
    data.frame(check.names = F) %>%
    tibble::add_column(pars = row.names(.), .before = 1) %>%
    {
        row.names(.) <- NULL
        .
    })
        pars       mean      se_mean         sd         2.5%        25%
1        mu0  96.255443 0.0281163586  2.3720880  91.59659750  94.666500
2      mu[1]  96.216228 0.0250642662  1.7016802  92.89249500  95.079950
3      mu[2]  96.211397 0.0011592007  0.3450089  95.48669750  96.043900
4      mu[3]  96.110799 0.0026643491  0.3488032  95.49279750  95.917200
5      mu[4]  98.430539 0.0019650955  0.3451785  97.62139750  98.274600
6      mu[5]  98.931305 0.0147462874  1.1966698  96.56119750  98.140000
7      mu[6]  99.445349 0.0030206683  0.3551792  98.81629750  99.248000
8      mu[7] 102.676170 0.0047132858  0.3869786 101.69000000 102.516000
9      mu[8] 101.626130 0.0012517897  0.3328054 100.87900000 101.468000
10     mu[9] 100.468411 0.0010230662  0.3307243  99.76109750 100.305000
11    mu[10]  99.214322 0.0011975728  0.3346728  98.53140000  99.045200
12    mu[11]  98.538929 0.0009934377  0.3296214  97.83410000  98.376400
13    mu[12]  97.546323 0.0018686910  0.3396773  96.89368750  97.366200
14    mu[13]  97.529816 0.0046315193  0.3830698  96.93239500  97.300175
15    mu[14] 101.491689 0.0027845136  0.3499887 100.65300000 101.338000
16    mu[15] 102.928509 0.0018655825  0.3428343 102.12500000 102.778000
17    mu[16] 103.288572 0.0030900652  0.3542689 102.42000000 103.137000
18    mu[17] 101.025325 0.0023301384  0.3422083 100.39100000 100.837000
19    mu[18] 100.750786 0.0010392265  0.3340933 100.03000000 100.589000
20    mu[19] 100.232169 0.0011272481  0.3324691  99.53020000 100.068000
21    mu[20]  99.747891 0.0011238718  0.3393684  99.02210000  99.582000
22    mu[21]  99.207695 0.0199933384  1.2196169  96.82409500  98.412600
23    mu[22]  98.674172 0.0011495248  0.3449398  97.91879750  98.510975
24    mu[23]  97.860926 0.0133363605  1.1890597  95.51969750  97.074575
25    mu[24]  96.996419 0.0017393479  0.3424073  96.20729500  96.841900
26    mu[25]  95.023435 0.0010098558  0.3317630  94.31530000  94.862800
27    mu[26]  93.140421 0.0012767818  0.3348998  92.45960000  92.968800
28    mu[27]  91.853682 0.0011797112  0.3324496  91.17010000  91.685400
29    mu[28]  91.047943 0.0010943246  0.3319681  90.35969750  90.883500
30    mu[29]  90.319892 0.0018675032  0.3394504  89.67120000  90.139000
31    mu[30]  91.089858 0.0010546161  0.3361767  90.35499750  90.927600
32    mu[31]  91.404028 0.0015194312  0.3352245  90.64729250  91.246100
33    mu[32]  91.046537 0.0028844327  0.3510617  90.20228750  90.891900
34    mu[33]  88.337356 0.0024973813  0.3403410  87.70870000  88.150000
35    mu[34]  87.626716 0.0013248361  0.3361694  86.96019500  87.453200
36    mu[35]  87.701009 0.0015990815  0.3369765  86.93469750  87.544200
37    mu[36]  86.663441 0.0010148797  0.3336812  85.94990000  86.501400
38    mu[37]  85.832281 0.0016786373  0.3398465  85.04410000  85.677700
39    mu[38]  83.865214 0.0027231105  0.3498450  83.02300000  83.711900
40    mu[39]  79.483613 0.0044626708  0.3777599  78.89269750  79.257900
41    mu[40]  79.315605 0.0028438684  0.3497009  78.47329750  79.160500
42    mu[41]  76.100346 0.0033393804  0.3557581  75.49189500  75.898200
43    mu[42]  75.767610 0.0018831225  0.3391015  75.12349500  75.586775
44    mu[43]  76.865054 0.0038095381  0.3610105  75.97219750  76.707800
45    mu[44]  74.615018 0.0017888312  0.3379863  73.96869750  74.435100
46    mu[45]  73.816579 0.0029913231  0.3539578  73.19619750  73.620400
47    mu[46]  75.245909 0.0014175853  0.3375167  74.58189500  75.072575
48    mu[47]  77.611887 0.0017476569  0.3377250  76.83149750  77.458700
49    mu[48]  78.812324 0.0032275114  0.3551305  77.93289000  78.661800
50    mu[49]  77.233786 0.0018455471  0.3389344  76.58080000  77.054200
51    mu[50]  77.449811 0.0013912164  0.3396905  76.68689500  77.291500
52    mu[51]  76.723015 0.0109175383  1.1839728  74.39310000  75.962300
53    mu[52]  76.004129 0.0013429739  0.3417676  75.23919750  75.843200
54    mu[53]  74.408164 0.0018497519  0.3366330  73.75660000  74.225900
55    mu[54]  74.323111 0.0014093058  0.3368584  73.56459750  74.165500
56    mu[55]  73.101150 0.0012510164  0.3340922  72.40959750  72.931775
57    mu[56]  72.376926 0.0013102396  0.3345987  71.70620000  72.205200
58    mu[57]  72.469169 0.0011275701  0.3317908  71.72810000  72.311300
59    mu[58]  71.565181 0.0029948114  0.3544774  70.94509750  71.367500
60    mu[59]  73.314357 0.0023360492  0.3423743  72.49529500  73.161900
61    mu[60]  72.886344 0.0010945604  0.3466226  72.15739750  72.720500
62 Miss_Y[1]  96.218062 0.0248652076  1.7412875  92.81017250  95.055500
63 Miss_Y[2]  98.931097 0.0147071194  1.2541844  96.44360000  98.100700
64 Miss_Y[3]  99.208710 0.0200538831  1.2711763  96.71589750  98.374900
65 Miss_Y[4]  97.863270 0.0133817109  1.2440101  95.39158500  97.046000
66 Miss_Y[5]  76.723327 0.0109123558  1.2385603  74.27580000  75.932400
67       sdV   0.315600 0.0048541969  0.1797545   0.08952183   0.172326
68       sdW   1.659269 0.0018618100  0.1750063   1.34277950   1.536478
69      lp__  11.959432 1.1533569601 33.2102081 -45.13402250 -13.896850
          50%       75%      97.5%       n_eff      Rhat
1   96.257600  97.84990 100.915050   7117.7615 1.0006863
2   96.243000  97.33972  99.503500   4609.4162 1.0008985
3   96.210900  96.37923  96.951605  88581.5634 1.0000245
4   96.068700  96.26000  96.961000  17138.7338 1.0000680
5   98.461300  98.61470  99.081400  30854.6169 1.0000585
6   98.943200  99.74240 101.245025   6585.4166 1.0009426
7   99.398100  99.60060 100.311000  13825.7579 1.0001142
8  102.753000 102.90600 103.273000   6741.0310 1.0002703
9  101.638000 101.79400 102.309000  70683.4775 1.0000248
10 100.469000 100.63300 101.163025 104502.0219 1.0000039
11  99.203700  99.37330  99.951503  78097.4854 0.9999838
12  98.542400  98.70430  99.231200 110090.4260 0.9999795
13  97.522500  97.69960  98.340300  33041.3619 1.0000362
14  97.455600  97.68980  98.498700   6840.8323 1.0001823
15 101.534000 101.68800 102.114000  15798.2674 1.0000311
16 102.954000 103.10900 103.582000  33770.6680 0.9999940
17 103.337000 103.48900 103.905000  13144.0985 1.0000730
18 100.990000 101.18300 101.833025  21568.3914 1.0000158
19 100.752000 100.91500 101.461025 103351.1303 1.0000009
20 100.232000 100.39700 100.938000  86988.8625 1.0000475
21  99.751700  99.91600 100.465000  91181.9975 1.0000012
22  99.197800 100.01200 101.650000   3721.1417 1.0002531
23  98.681500  98.84400  99.391710  90042.9690 1.0000217
24  97.857400  98.64480 100.191000   7949.3687 1.0005182
25  97.019200  97.17480  97.661803  38753.6860 0.9999805
26  95.023800  95.18020  95.734200 107928.7682 0.9999843
27  93.131800  93.29790  93.885810  68801.2412 1.0000224
28  91.839400  92.01280  92.589905  79414.5786 1.0000109
29  91.042800  91.21020  91.763000  92023.8352 0.9999887
30  90.292800  90.47270  91.111002  33039.2416 1.0000305
31  91.099700  91.25930  91.784610 101612.3211 0.9999793
32  91.422300  91.57890  92.066500  48675.3927 1.0000094
33  91.088600  91.24140  91.672802  14813.1072 1.0000899
34  88.303800  88.49010  89.144000  18572.0101 1.0000212
35  87.609700  87.78550  88.380202  64386.0912 1.0000373
36  87.723200  87.87800  88.363100  44407.6648 1.0000192
37  86.661600  86.82730  87.370300 108102.1189 0.9999790
38  85.856000  86.01120  86.491712  40987.5474 1.0000222
39  83.906400  84.06210  84.489100  16505.2060 1.0000847
40  79.410300  79.64510  80.440902   7165.4246 1.0001163
41  79.362200  79.51300  79.930507  15120.7877 1.0000709
42  76.045500  76.25410  76.972105  11349.5278 1.0000790
43  75.739900  75.91910  76.559900  32426.6622 1.0000069
44  76.922600  77.07730  77.468702   8980.3876 1.0000507
45  74.589200  74.76863  75.393905  35699.2903 1.0000520
46  73.769100  73.96890  74.674105  14001.5588 1.0000403
47  75.228900  75.39940  76.014000  56688.1382 1.0000341
48  77.635800  77.79250  78.258300  37343.4201 1.0000042
49  78.864400  79.01410  79.419505  12107.1093 1.0000053
50  77.206600  77.39110  78.015505  33727.2634 1.0000006
51  77.467300  77.62260  78.127500  59618.0103 1.0000071
52  76.710300  77.49990  79.083425  11760.7220 1.0002975
53  76.017600  76.17790  76.688800  64762.9657 1.0000427
54  74.383900  74.56460  75.182603  33119.6939 1.0000969
55  74.341700  74.49500  74.990502  57132.6369 1.0000314
56  73.091500  73.26070  73.835700  71319.2219 0.9999703
57  72.362350  72.53390  73.122402  65214.9388 1.0000242
58  72.483300  72.63720  73.144703  86584.7927 0.9999921
59  71.519400  71.71960  72.424300  14010.0022 1.0001707
60  73.351500  73.50150  73.939808  21480.1981 1.0000909
61  72.879700  73.04950  73.637003 100284.6273 1.0000641
62  96.245000  97.36963  99.588100   4904.0713 1.0008164
63  98.941050  99.77390 101.374000   7272.2287 1.0008484
64  99.202900 100.04800 101.765000   4018.0433 1.0002532
65  97.864000  98.68223 100.280000   8642.2043 1.0004736
66  76.714300  77.53700  79.179410  12882.4149 1.0002230
67   0.276544   0.42142   0.746338   1371.2758 1.0010017
68   1.651525   1.76982   2.029151   8835.6171 1.0000777
69   9.804860  37.12620  74.280900    829.1168 1.0026714

トレースプロット

bayesplot::mcmc_combo(x = rstan::extract(fit_sample, permuted = F), pars = c("Miss_Y[1]", "Miss_Y[2]", "Miss_Y[3]"))
Figure 3
bayesplot::mcmc_combo(x = rstan::extract(fit_sample, permuted = F), pars = c("Miss_Y[4]", "Miss_Y[5]"))
Figure 4

時系列チャート

missing_est <- result_summary[62:66, c("pars", "mean")]
missing_df[missing_row, 3] <- missing_est[, "mean"]
missing_df[missing_row, 2] <- "補間値"
missing_df[missing_row, ]
         Date    key    value
23 2020-12-01 補間値 96.21806
51 2023-04-01 補間値 98.93110
21 2020-10-01 補間値 99.20871
1  2019-02-01 補間値 97.86327
5  2019-06-01 補間値 76.72333
sample_df[missing_row, ]
         Date                               key value
23 2020-12-01 実質実効為替レート指数:2020年=100 97.44
51 2023-04-01 実質実効為替レート指数:2020年=100 77.54
21 2020-10-01 実質実効為替レート指数:2020年=100 99.35
1  2019-02-01 実質実効為替レート指数:2020年=100 96.92
5  2019-06-01 実質実効為替レート指数:2020年=100 99.79
g0 + geom_point(mapping = aes(x = Date, y = value, color = key), data = missing_df[missing_row, ], shape = 6) + scale_color_manual(values = c("blue", "red", "saddlebrown"), guide = guide_legend(override.aes = list(linetype = c("blank", "solid", "blank"), shape = c(16, NA, 6))))
Figure 5

参考引用資料

最終更新

Sys.time()
[1] "2024-03-19 07:05:19 JST"

R、Quarto、Package

R.Version()$version.string
[1] "R version 4.3.2 Patched (2023-12-27 r85754 ucrt)"
quarto::quarto_version()
[1] '1.4.542'
packageVersion(pkg = "tidyverse")
[1] '2.0.0'

著者