RStanで一般化線形モデル:誤差構造=ガンマ分布,リンク関数=対数関数

Rでデータサイエンス

一般化線形モデル:誤差構造=ガンマ分布,リンク関数=対数関数

パッケージ読み込み等

library(rstan)
library(dplyr)
library(ggplot2)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
packageVersion("rstan")
[1] '2.32.6'

サンプルデータ

# 誤差構造をガンマ分布、リンク関数を対数関数としたサンプルデータ
n <- 40
# 説明変数
x <- runif(n = n, min = -1, max = 1)
# 偏回帰係数
b <- 3.5
a <- -0.5
# 目的変数
y0 <- exp(x * a + b)
shape <- 2
y <- rgamma(n = n, shape = shape, rate = shape/y0)
result_glm <- glm(formula = y ~ x, family = Gamma(link = "log"))
result_glm %>%
    summary()

Call:
glm(formula = y ~ x, family = Gamma(link = "log"))

Coefficients:
            Estimate Std. Error t value             Pr(>|t|)    
(Intercept)  3.37613    0.09629  35.062 < 0.0000000000000002 ***
x           -0.90460    0.16094  -5.621           0.00000189 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 0.3698624)

    Null deviance: 25.070  on 39  degrees of freedom
Residual deviance: 13.973  on 38  degrees of freedom
AIC: 338.15

Number of Fisher Scoring iterations: 5
fit <- {
    result_glm$coefficients[1] + result_glm$coefficients[2] * x
} %>%
    exp()
(g <- ggplot(mapping = aes(x = x)) + geom_point(mapping = aes(y = y, col = "obs")) + geom_line(mapping = aes(y = fit, col = "fit_by_glm()")) + theme(legend.title = element_blank()))
Figure 1

Stanコード

\[y\sim\textrm{Gamma}(\alpha,\,\beta)\quad\alpha=\phi^{-1},\beta=\dfrac{\phi^{-1}}{\mu}\]

model_code <- "
data {
  int N; //サンプルサイズ
  int K; //切片項を含んだ説明変数の数
  real y[N]; //応答変数
  matrix[N,K] X; //説明変数
}

parameters {
  vector[K] betas; //回帰係数
  real<lower=0> inverse_phi; //分散の逆数
}

transformed parameters {
  vector[N] mu; // 線形予測子 log y = X * betas
  vector[N] beta; // ガンマ分布のパラメータ rate
  
  mu = exp(X*betas); // log y = X * betas → y = exp(X * betas)
  beta = rep_vector(inverse_phi, N) ./ mu; // 要素ごとの除算 
  // rep_vector() https://mc-stan.org/docs/2_28/functions-reference/matrix-broadcast.html
}

model {  
  betas[1] ~ cauchy(0,10); // 切片項の事前分布
  betas[2:] ~ cauchy(0,2.5);// 傾きの事前分布
  
  inverse_phi ~ exponential(1); // 分散の逆数の事前分布
  y ~ gamma(inverse_phi,beta);
}

generated quantities {
  vector[N] y_rep;
  for(n in 1:N){
    y_rep[n] = gamma_rng(inverse_phi,beta[n]);
 }
}
"

MCMC

data_list <- list(N = n, K = 2, y = y, X = cbind(rep(1, n), x))
# data_list
stan_model_compile <- stan_model(model_code = model_code)
setwd(stan_output)
fit_sample <- sampling(object = stan_model_compile, data = data_list, refresh = 0, sample_file = "sample_file_stan_code_11.csv", diagnostic_file = "diagnostic_file_stan_code_11.txt")

結果の確認

setwd(stan_output)
csvfiles <- dir(pattern = "sample_file_stan_code_11")
fit_sample <- read_stan_csv(csvfiles = csvfiles)
result_summary <- fit_sample %>%
    summary() %>%
    .$summary %>%
    data.frame(check.names = F) %>%
    tibble::add_column(pars = row.names(.), .before = 1) %>%
    {
        row.names(.) <- NULL
        .
    }
result_summary[, c("pars", "mean", "2.5%", "50%", "97.5%", "Rhat")]
           pars          mean          2.5%           50%         97.5%
1      betas[1]    3.38625527    3.19417400    3.38291500    3.58679250
2      betas[2]   -0.89577921   -1.22086050   -0.89645400   -0.56111320
3   inverse_phi    2.65486634    1.70733050    2.61402500    3.90144000
4         mu[1]   22.82095472   18.23829250   22.56470000   28.63233750
5         mu[2]   16.78580667   12.42690500   16.51060000   22.54560000
6         mu[3]   41.32435110   32.85446750   40.92145000   51.62781750
7         mu[4]   20.09081598   15.62463750   19.83535000   25.81627250
8         mu[5]   13.15099193    9.05868550   12.83910000   18.87655000
9         mu[6]   65.27416080   45.94840000   64.07065000   91.43630000
10        mu[7]   38.11751582   30.75739750   37.70015000   47.05433750
11        mu[8]   38.82416682   31.21768500   38.41540000   48.08884250
12        mu[9]   13.09945593    9.00371525   12.79130000   18.82485500
13       mu[10]   69.01672883   47.89463500   67.57765000   97.99688750
14       mu[11]   17.30154805   12.90762500   17.02385000   23.04003250
15       mu[12]   29.42031775   24.15751500   29.18460000   35.82815750
16       mu[13]   61.52376022   44.10748500   60.49030000   84.91019250
17       mu[14]   55.20181405   41.07748250   54.44770000   73.88967250
18       mu[15]   55.01241168   40.97987250   54.27395000   73.55768750
19       mu[16]   15.01399386   10.76238750   14.75010000   20.75575250
20       mu[17]   17.19811688   12.81160500   16.92370000   22.94107000
21       mu[18]   33.54069665   27.53092750   33.21140000   40.84830500
22       mu[19]   48.39136078   37.28805750   47.82740000   62.35309250
23       mu[20]   21.83210000   17.29274000   21.57630000   27.68231750
24       mu[21]   56.32915653   41.64480750   55.49300000   75.82468000
25       mu[22]   66.79512870   46.67098500   65.50840000   94.13091750
26       mu[23]   33.09677862   27.18568000   32.78430000   40.27921750
27       mu[24]   13.06907528    8.97531000   12.76200000   18.79430000
28       mu[25]   53.20288728   40.08485750   52.51655000   70.40837500
29       mu[26]   14.88556208   10.64278750   14.61800000   20.62501750
30       mu[27]   34.81994830   28.42473500   34.45400000   42.59291500
31       mu[28]   34.39094180   28.12099500   34.02815000   42.00613500
32       mu[29]   18.80172913   14.31700750   18.53925000   24.52736000
33       mu[30]   71.97895458   49.16912500   70.40790000  103.37047500
34       mu[31]   56.66056165   41.77439000   55.82150000   76.40721000
35       mu[32]   19.27560672   14.79624500   19.01695000   25.02753000
36       mu[33]   50.59073182   38.55134250   49.93530000   66.08435000
37       mu[34]   28.49301075   23.38693250   28.26665000   34.67806500
38       mu[35]   18.00984645   13.56727750   17.73100000   23.71086250
39       mu[36]   44.51204405   34.92683500   44.04885000   56.38762250
40       mu[37]   16.37144130   12.04998250   16.09165000   22.10916750
41       mu[38]   24.48142625   19.82890000   24.24875000   30.30815500
42       mu[39]   33.91469993   27.77664250   33.57630000   41.35939000
43       mu[40]   26.96300595   22.02476000   26.77190000   32.91963000
44      beta[1]    0.11789967    0.06900829    0.11600550    0.17912665
45      beta[2]    0.16189303    0.08965705    0.15863700    0.25296168
46      beta[3]    0.06506858    0.03868913    0.06395915    0.09783175
47      beta[4]    0.13436529    0.07650905    0.13200100    0.20614155
48      beta[5]    0.20930004    0.11031312    0.20451750    0.33775613
49      beta[6]    0.04183650    0.02333382    0.04072910    0.06580802
50      beta[7]    0.07044999    0.04193185    0.06922950    0.10592450
51      beta[8]    0.06918579    0.04127652    0.06793175    0.10431700
52      beta[9]    0.21017434    0.11064150    0.20540250    0.33915063
53     beta[10]    0.03967882    0.02190429    0.03860110    0.06297587
54     beta[11]    0.15686592    0.08728017    0.15360950    0.24453905
55     beta[12]    0.09116016    0.05477090    0.09007790    0.13800357
56     beta[13]    0.04426452    0.02502524    0.04316830    0.06903456
57     beta[14]    0.04911352    0.02818955    0.04801165    0.07546238
58     beta[15]    0.04927622    0.02828012    0.04816955    0.07568635
59     beta[16]    0.18196072    0.09834306    0.17807400    0.28688400
60     beta[17]    0.15784858    0.08769732    0.15464600    0.24607653
61     beta[18]    0.07996768    0.04806880    0.07874160    0.12079533
62     beta[19]    0.05577842    0.03248718    0.05461555    0.08470480
63     beta[20]    0.12336567    0.07177489    0.12139200    0.18806985
64     beta[21]    0.04816804    0.02763849    0.04707455    0.07419805
65     beta[22]    0.04093022    0.02275118    0.03981880    0.06459347
66     beta[23]    0.08103530    0.04866473    0.07981410    0.12237273
67     beta[24]    0.21069318    0.11083573    0.20593250    0.33997773
68     beta[25]    0.05088999    0.02937301    0.04971750    0.07787884
69     beta[26]    0.18361260    0.09897713    0.17964750    0.29006618
70     beta[27]    0.07704793    0.04620380    0.07585725    0.11607418
71     beta[28]    0.07800214    0.04683866    0.07682225    0.11756738
72     beta[29]    0.14389113    0.08100588    0.14103200    0.22177868
73     beta[30]    0.03813121    0.02085922    0.03706315    0.06092483
74     beta[31]    0.04789734    0.02747503    0.04678040    0.07381850
75     beta[32]    0.14023363    0.07925201    0.13771550    0.21557928
76     beta[33]    0.05342652    0.03098484    0.05223205    0.08157549
77     beta[34]    0.09414194    0.05646521    0.09298375    0.14254112
78     beta[35]    0.15045609    0.08472125    0.14728050    0.23366593
79     beta[36]    0.06050500    0.03567303    0.05937840    0.09155796
80     beta[37]    0.16617482    0.09167573    0.16276350    0.26083052
81     beta[38]    0.10975628    0.06487550    0.10803000    0.16591550
82     beta[39]    0.07909057    0.04750026    0.07790195    0.11937895
83     beta[40]    0.09952769    0.05957882    0.09816270    0.15024195
84     y_rep[1]   22.84195163    3.78144800   19.75310000   60.53885750
85     y_rep[2]   16.91863700    2.80346450   14.70910000   45.34152500
86     y_rep[3]   41.70999288    7.05461975   36.23320000  105.03647500
87     y_rep[4]   19.80967855    3.43530350   16.98980000   50.36078000
88     y_rep[5]   13.33556823    2.05724500   11.16875000   35.08078250
89     y_rep[6]   63.20778713    9.75688750   53.31595000  168.62077500
90     y_rep[7]   38.31478735    5.84048700   33.18120000  102.36122500
91     y_rep[8]   39.36393037    6.30155025   33.79655000  104.64347500
92     y_rep[9]   12.85570311    2.17202750   11.04630000   34.45479750
93    y_rep[10]   70.03950297   10.81153500   60.27390000  185.25390000
94    y_rep[11]   17.29155346    2.93185900   14.75635000   45.12925250
95    y_rep[12]   29.66719168    5.05244050   26.14310000   75.95944750
96    y_rep[13]   61.79940737   10.02107300   52.69010000  163.37865000
97    y_rep[14]   54.33743075    8.59635425   46.91050000  138.73617500
98    y_rep[15]   54.26738867    8.53651375   46.63150000  141.19407500
99    y_rep[16]   14.81024523    2.29924825   12.79385000   39.84070750
100   y_rep[17]   17.02725695    2.85494600   14.80740000   43.13818750
101   y_rep[18]   33.03095655    5.49245950   28.98220000   84.54121250
102   y_rep[19]   48.56390088    7.34239550   42.36615000  126.50510000
103   y_rep[20]   21.61208772    3.62584425   18.63170000   56.33570250
104   y_rep[21]   56.60516722    9.23949050   49.09565000  148.95222500
105   y_rep[22]   66.34558110   10.47869250   56.41060000  181.61780000
106   y_rep[23]   32.80760356    5.62387075   28.43630000   83.27533500
107   y_rep[24]   13.10763843    2.19501450   11.24300000   34.84627750
108   y_rep[25]   52.72587810    8.33539700   45.46410000  133.30132500
109   y_rep[26]   15.12993523    2.40293225   13.09285000   39.59496250
110   y_rep[27]   34.48915461    5.79840850   30.40520000   88.98528250
111   y_rep[28]   34.44428391    5.68540300   29.52875000   90.42251750
112   y_rep[29]   18.69292397    3.08303725   15.99750000   48.53335250
113   y_rep[30]   71.62591986   10.58759500   61.31125000  198.46690000
114   y_rep[31]   55.52640849    9.19251500   48.29500000  144.38905000
115   y_rep[32]   19.32765038    3.26080775   16.74055000   50.90074500
116   y_rep[33]   50.80469510    8.31560275   43.27890000  133.79687500
117   y_rep[34]   29.18080757    4.83497400   25.65480000   74.23398750
118   y_rep[35]   18.08646030    2.82569000   15.83045000   46.34995750
119   y_rep[36]   44.15392853    6.61228750   38.21360000  118.84600000
120   y_rep[37]   16.29952423    2.47938725   13.89040000   43.22246500
121   y_rep[38]   24.50622428    3.89472875   21.34730000   62.36674000
122   y_rep[39]   33.84022434    5.23621050   29.09375000   89.19685750
123   y_rep[40]   27.21116091    4.68548575   23.65810000   69.95461250
124        lp__ -169.65590850 -172.98200000 -169.31600000 -168.22097500
         Rhat
1   0.9998194
2   1.0007128
3   1.0003844
4   1.0005040
5   1.0009964
6   0.9996762
7   1.0007547
8   1.0011826
9   1.0000594
10  0.9996456
11  0.9996492
12  1.0011847
13  1.0000978
14  1.0009636
15  0.9998988
16  1.0000149
17  0.9999240
18  0.9999209
19  1.0010962
20  1.0009703
21  0.9996948
22  0.9998030
23  1.0005989
24  0.9999417
25  1.0000757
26  0.9997085
27  1.0011860
28  0.9998907
29  1.0011027
30  0.9996655
31  0.9996737
32  1.0008578
33  1.0001246
34  0.9999468
35  1.0008212
36  0.9998442
37  0.9999685
38  1.0009156
39  0.9997293
40  1.0010215
41  1.0003392
42  0.9996848
43  1.0000995
44  1.0002974
45  1.0002507
46  1.0003408
47  1.0002795
48  1.0002087
49  1.0003493
50  1.0003384
51  1.0003390
52  1.0002080
53  1.0003509
54  1.0002558
55  1.0003242
56  1.0003479
57  1.0003458
58  1.0003458
59  1.0002317
60  1.0002548
61  1.0003329
62  1.0003438
63  1.0002915
64  1.0003462
65  1.0003499
66  1.0003321
67  1.0002076
68  1.0003453
69  1.0002302
70  1.0003348
71  1.0003342
72  1.0002693
73  1.0003522
74  1.0003463
75  1.0002732
76  1.0003445
77  1.0003215
78  1.0002624
79  1.0003424
80  1.0002465
81  1.0003061
82  1.0003335
83  1.0003165
84  0.9995145
85  1.0003221
86  0.9992932
87  0.9999021
88  1.0003769
89  0.9995735
90  1.0000030
91  0.9997780
92  1.0007865
93  0.9998747
94  0.9993808
95  1.0003681
96  0.9998269
97  0.9992034
98  1.0001404
99  0.9992883
100 0.9996109
101 0.9995437
102 0.9999148
103 0.9995823
104 1.0001929
105 1.0007785
106 1.0001093
107 1.0000658
108 0.9996809
109 0.9997301
110 0.9995627
111 0.9998226
112 0.9999588
113 0.9990910
114 0.9998589
115 0.9998899
116 0.9995466
117 0.9994546
118 0.9999487
119 0.9995177
120 0.9998875
121 1.0005042
122 1.0004661
123 0.9992258
124 1.0003252

\(\hat{\textrm{R}}\)の確認

rhat <- bayesplot::rhat(fit_sample)
bayesplot::mcmc_rhat(rhat = rhat) + bayesplot::yaxis_text(hjust = 1) + coord_flip() + theme(axis.text.x = element_text(angle = 90))
Figure 2

トレースプロット

bayesplot::mcmc_combo(x = rstan::extract(fit_sample, permuted = F), pars = c("betas[1]", "betas[2]", "inverse_phi"))
Figure 3

時系列チャート

mcmc_sample <- rstan::extract(fit_sample)
result_df <- data.frame(t(apply(X = mcmc_sample[["mu"]], MARGIN = 2, quantile, probs = c(0.025, 0.5, 0.975))))
colnames(result_df) <- c("lwr", "fit_median", "upr")
result_df$fit_mean <- apply(X = mcmc_sample[["mu"]], MARGIN = 2, mean)
result_df$y_rep <- apply(X = mcmc_sample[["y_rep"]], MARGIN = 2, mean)
result_df
         lwr fit_median       upr fit_mean    y_rep
1  18.238293   22.56470  28.63234 22.82095 22.84195
2  12.426905   16.51060  22.54560 16.78581 16.91864
3  32.854468   40.92145  51.62782 41.32435 41.70999
4  15.624638   19.83535  25.81627 20.09082 19.80968
5   9.058686   12.83910  18.87655 13.15099 13.33557
6  45.948400   64.07065  91.43630 65.27416 63.20779
7  30.757397   37.70015  47.05434 38.11752 38.31479
8  31.217685   38.41540  48.08884 38.82417 39.36393
9   9.003715   12.79130  18.82486 13.09946 12.85570
10 47.894635   67.57765  97.99689 69.01673 70.03950
11 12.907625   17.02385  23.04003 17.30155 17.29155
12 24.157515   29.18460  35.82816 29.42032 29.66719
13 44.107485   60.49030  84.91019 61.52376 61.79941
14 41.077483   54.44770  73.88967 55.20181 54.33743
15 40.979872   54.27395  73.55769 55.01241 54.26739
16 10.762387   14.75010  20.75575 15.01399 14.81025
17 12.811605   16.92370  22.94107 17.19812 17.02726
18 27.530928   33.21140  40.84831 33.54070 33.03096
19 37.288058   47.82740  62.35309 48.39136 48.56390
20 17.292740   21.57630  27.68232 21.83210 21.61209
21 41.644808   55.49300  75.82468 56.32916 56.60517
22 46.670985   65.50840  94.13092 66.79513 66.34558
23 27.185680   32.78430  40.27922 33.09678 32.80760
24  8.975310   12.76200  18.79430 13.06908 13.10764
25 40.084857   52.51655  70.40838 53.20289 52.72588
26 10.642788   14.61800  20.62502 14.88556 15.12994
27 28.424735   34.45400  42.59292 34.81995 34.48915
28 28.120995   34.02815  42.00614 34.39094 34.44428
29 14.317007   18.53925  24.52736 18.80173 18.69292
30 49.169125   70.40790 103.37048 71.97895 71.62592
31 41.774390   55.82150  76.40721 56.66056 55.52641
32 14.796245   19.01695  25.02753 19.27561 19.32765
33 38.551342   49.93530  66.08435 50.59073 50.80470
34 23.386932   28.26665  34.67807 28.49301 29.18081
35 13.567277   17.73100  23.71086 18.00985 18.08646
36 34.926835   44.04885  56.38762 44.51204 44.15393
37 12.049982   16.09165  22.10917 16.37144 16.29952
38 19.828900   24.24875  30.30815 24.48143 24.50622
39 27.776643   33.57630  41.35939 33.91470 33.84022
40 22.024760   26.77190  32.91963 26.96301 27.21116

平均値

g + geom_point(mapping = aes(y = result_df$fit_mean, col = "fit_by_mcmc_mean")) + geom_ribbon(mapping = aes(ymin = result_df$lwr, ymax = result_df$upr), alpha = 0.1)
Figure 4

中央値

g + geom_point(mapping = aes(y = result_df$fit_median, col = "fit_by_mcmc_median")) + geom_ribbon(mapping = aes(ymin = result_df$lwr, ymax = result_df$upr), alpha = 0.1)
Figure 5

参考引用資料

最終更新

Sys.time()
[1] "2024-03-21 05:12:09 JST"

R、Quarto、Package

R.Version()$version.string
[1] "R version 4.3.2 Patched (2023-12-27 r85754 ucrt)"
quarto::quarto_version()
[1] '1.4.542'
packageVersion(pkg = "tidyverse")
[1] '2.0.0'

著者