Rで統計的仮説検定:Chow test

Rでデータサイエンス

Chow test

  • 有意水準は全て5%とする。

切片のみ同一

サンプルデータの作成

seed <- 20230816
set.seed(seed = seed)
n <- 50
point <- 35
t1 <- seq(point)
t2 <- (point + 1):n
a1 <- 2
a2 <- 2
b1 <- 4
b2 <- 3.7
y1 <- a1 + t1 * b1 + rnorm(length(t1), mean = 0, sd = 3)
y2 <- a2 + t2 * b2 + rnorm(length(t2), mean = 0, sd = 3)
(df <- data.frame(t = c(t1, t2), y = c(y1, y2), term = c(rep("Term1", length(t1)), rep("Term2", length(t2)))))
    t          y  term
1   1   3.863241 Term1
2   2  11.722096 Term1
3   3  12.944833 Term1
4   4  17.492437 Term1
5   5  16.168074 Term1
6   6  22.872704 Term1
7   7  32.985584 Term1
8   8  36.442453 Term1
9   9  34.628942 Term1
10 10  45.549038 Term1
11 11  44.670762 Term1
12 12  51.005359 Term1
13 13  55.545485 Term1
14 14  54.708978 Term1
15 15  62.299010 Term1
16 16  64.674359 Term1
17 17  72.368692 Term1
18 18  75.321882 Term1
19 19  75.122143 Term1
20 20  86.075641 Term1
21 21  87.191592 Term1
22 22  88.437253 Term1
23 23  94.403623 Term1
24 24  98.113283 Term1
25 25 100.219371 Term1
26 26 107.085904 Term1
27 27 112.227931 Term1
28 28 113.468189 Term1
29 29 122.418015 Term1
30 30 120.641862 Term1
31 31 121.383866 Term1
32 32 130.961203 Term1
33 33 137.622751 Term1
34 34 137.690520 Term1
35 35 142.895719 Term1
36 36 136.037506 Term2
37 37 132.313567 Term2
38 38 139.830293 Term2
39 39 146.168075 Term2
40 40 154.956023 Term2
41 41 155.357537 Term2
42 42 160.212748 Term2
43 43 161.350665 Term2
44 44 168.653233 Term2
45 45 169.812002 Term2
46 46 167.765342 Term2
47 47 177.200945 Term2
48 48 181.472261 Term2
49 49 182.689621 Term2
50 50 189.772597 Term2
library(ggplot2)
library(dplyr)
df %>%
    ggplot(mapping = aes(x = t, y = y, col = term)) + geom_point() + geom_smooth(method = "lm", se = F) + geom_smooth(mapping = aes(x = t, y = y), data = df, method = "lm", inherit.aes = F, linetype = "dashed", se = F) + theme_minimal()
Figure 1

残差平方和の算出

# 全体
rss <- sum(lm(y ~ t, data = df)$resi^2)
# 期間1
rss1 <- sum(lm(y ~ t, data = head(df, point))$resi^2)
# 期間2
rss2 <- sum(lm(y ~ t, data = tail(df, n - point))$resi^2)
list(rss = rss, rss1 = rss1, rss2 = rss2)
$rss
[1] 1024.505

$rss1
[1] 205.2629

$rss2
[1] 124.0788

F値の算出

k <- 2  # 全パラメータ数(本例の場合は説明変数×1 + 切片項×1)
# 制約ありの場合の自由度(分子)
dof_n <- k
# 制約なしの場合の自由度(分子)
dof_d <- (length(t1) - k) + (length(t2) - k)
# F値
(F_value <- ((rss - (rss1 + rss2))/dof_n)/((rss1 + rss2)/dof_d))
[1] 48.5476

p値の算出

  • H0:a1=a2,b1=b2(構造変化なし)
pf(q = F_value, df1 = dof_n, df2 = dof_d, lower.tail = F)
[1] 0.000000000004613741
  • 帰無仮説は棄却される。

(参考)関数 sctest {strucchange}による算出

library(strucchange)
sctest(df$y ~ df$t, type = "Chow", point = point)

    Chow test

data:  df$y ~ df$t
F = 48.548, p-value = 0.000000000004614

切片、傾きとも同一

サンプルデータの作成

set.seed(seed = seed)
n <- 50
point <- 35
t1 <- seq(point)
t2 <- (point + 1):n
a1 <- 2
a2 <- 2
b1 <- 4
b2 <- 4
y1 <- a1 + t1 * b1 + rnorm(length(t1), mean = 0, sd = 3)
y2 <- a2 + t2 * b2 + rnorm(length(t2), mean = 0, sd = 3)
df <- data.frame(t = c(t1, t2), y = c(y1, y2), term = c(rep("Term1", length(t1)), rep("Term2", length(t2))))
df %>%
    ggplot(mapping = aes(x = t, y = y, col = term)) + geom_point() + geom_smooth(method = "lm", se = F) + geom_smooth(mapping = aes(x = t, y = y), data = df, method = "lm", inherit.aes = F, linetype = "dashed", se = F) + theme_minimal()
Figure 2
fun_chow_test <- function(df, point, k) {
    rss <- sum(lm(y ~ t, data = df)$resi^2)
    rss1 <- sum(lm(y ~ t, data = head(df, point))$resi^2)
    rss2 <- sum(lm(y ~ t, data = tail(df, n - point))$resi^2)
    dof_n <- k
    dof_d <- (length(t1) - k) + (length(t2) - k)
    F_value <- ((rss - (rss1 + rss2))/dof_n)/((rss1 + rss2)/dof_d)
    p_value <- pf(q = F_value, df1 = dof_n, df2 = dof_d, lower.tail = F)
    return(list(F_value = F_value, p_value = p_value))
}

F値とp値の算出

fun_chow_test(df = df, point = point, k = k)
$F_value
[1] 0.5761839

$p_value
[1] 0.5660431
  • 帰無仮説は棄却されない。

(参考)関数 sctest {strucchange}による算出

sctest(df$y ~ df$t, type = "Chow", point = point)

    Chow test

data:  df$y ~ df$t
F = 0.57618, p-value = 0.566

傾きのみ同一

サンプルデータの作成

set.seed(seed = seed)
n <- 50
point <- 35
t1 <- seq(point)
t2 <- (point + 1):n
a1 <- 6
a2 <- 2
b1 <- 4
b2 <- 4
y1 <- a1 + t1 * b1 + rnorm(length(t1), mean = 0, sd = 3)
y2 <- a2 + t2 * b2 + rnorm(length(t2), mean = 0, sd = 3)
df <- data.frame(t = c(t1, t2), y = c(y1, y2), term = c(rep("Term1", length(t1)), rep("Term2", length(t2))))
df %>%
    ggplot(mapping = aes(x = t, y = y, col = term)) + geom_point() + geom_smooth(method = "lm", se = F) + geom_smooth(mapping = aes(x = t, y = y), data = df, method = "lm", inherit.aes = F, linetype = "dashed", se = F) + theme_minimal()
Figure 3

F値とp値の算出

fun_chow_test(df = df, point = point, k = k)
$F_value
[1] 6.992074

$p_value
[1] 0.002231469
  • 帰無仮説は棄却される。

(参考)関数 sctest {strucchange}による算出

sctest(df$y ~ df$t, type = "Chow", point = point)

    Chow test

data:  df$y ~ df$t
F = 6.9921, p-value = 0.002231

参考引用資料

最終更新

Sys.time()
[1] "2024-04-09 16:47:41 JST"

R、Quarto、Package

R.Version()$version.string
[1] "R version 4.3.3 (2024-02-29 ucrt)"
quarto::quarto_version()
[1] '1.4.542'
packageVersion(pkg = "tidyverse")
[1] '2.0.0'

著者

東京:05:04:13
ロンドン:21:04:13
ニューヨーク:16:04:13