<- function(n_x, n_y, mean_x, mean_y, sd_x_sample, sd_y_sample, iter, conf.level, seed) {
fun_sim set.seed(seed = seed)
<- t.test.p.value.by.actual <- vector()
t.test.p.value.by.ftest for (iii in seq(iter)) {
<- sample(x = sd_x_sample, size = 1)
sd_x <- sample(x = sd_y_sample, size = 1)
sd_y <- rnorm(n = n_x, mean = mean_x, sd = sd_x)
x <- rnorm(n = n_y, mean = mean_y, sd = sd_y)
y # F検定
<- var.test(x = x, y = y, ratio = 1, alternative = "two.sided", conf.level = conf.level)$p.value
var.test.p.value <- (var.test.p.value > (1 - conf.level))
var.equal # F検定の結果(p値)による
<- t.test(x = x, y = y, alternative = "two.sided", paired = F, var.equal = var.equal, conf.level = conf.level)$p.value
t.test.p.value.by.ftest[iii] # 実際の分散による
<- t.test(x = x, y = y, alternative = "two.sided", paired = F, var.equal = (sd_x == sd_y), conf.level = conf.level)$p.value
t.test.p.value.by.actual[iii]
}return(list(t.test.p.value.by.ftest = t.test.p.value.by.ftest, t.test.p.value.by.actual = t.test.p.value.by.actual))
}
Rで統計的仮説検定:F検定からt検定への2段階検定のシミュレーション
Rでデータサイエンス
F検定からt検定への2段階検定のシミュレーション
- F検定対立仮説 : alternative hypothesis: true ratio of variances is not equal to 1
- t検定対立仮説 : alternative hypothesis: true difference in means is not equal to 0
シミュレーション
平均値が同一の例
<- 20230507
seed <- 10000
iter <- 30
n_x <- 30
n_y <- c(2, 5)
sd_x_sample <- c(2, 5)
sd_y_sample <- 0.95
conf.level <- 10
mean_x <- 10
mean_y <- fun_sim(n_x = n_x, n_y = n_y, mean_x = mean_x, mean_y = mean_y, sd_x_sample = sd_x_sample, sd_y_sample = sd_y_sample, iter = iter, conf.level = conf.level, seed = seed)
tmp <- tmp$t.test.p.value.by.ftest
t.test.p.value.by.ftest <- tmp$t.test.p.value.by.actual t.test.p.value.by.actual
F検定の結果(p値)で分散均一・不均一を判断した場合のt検定
1 - sum(t.test.p.value.by.ftest > (1 - conf.level))/iter
[1] 0.0485
実際の分散によるt検定
1 - sum(t.test.p.value.by.actual > (1 - conf.level))/iter
[1] 0.0485
平均値が異なる例
<- 10
mean_x <- 15
mean_y <- fun_sim(n_x = n_x, n_y = n_y, mean_x = mean_x, mean_y = mean_y, sd_x_sample = sd_x_sample, sd_y_sample = sd_y_sample, iter = iter, conf.level = conf.level, seed = seed)
tmp <- tmp$t.test.p.value.by.ftest
t.test.p.value.by.ftest <- tmp$t.test.p.value.by.actual t.test.p.value.by.actual
F検定の結果(p値)で分散均一・不均一を判断した場合のt検定
1 - sum(t.test.p.value.by.ftest < (1 - conf.level))/iter
[1] 0.008
実際の分散によるt検定
1 - sum(t.test.p.value.by.actual < (1 - conf.level))/iter
[1] 0.0079
t.test {stats}のコード
getS3method(f = "t.test", class = "default")
function (x, y = NULL, alternative = c("two.sided", "less", "greater"),
mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95,
...)
{
alternative <- match.arg(alternative)
if (!missing(mu) && (length(mu) != 1 || is.na(mu)))
stop("'mu' must be a single number")
if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) ||
conf.level < 0 || conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
if (!is.null(y)) {
dname <- paste(deparse1(substitute(x)), "and", deparse1(substitute(y)))
if (paired)
xok <- yok <- complete.cases(x, y)
else {
yok <- !is.na(y)
xok <- !is.na(x)
}
y <- y[yok]
}
else {
dname <- deparse1(substitute(x))
if (paired)
stop("'y' is missing for paired test")
xok <- !is.na(x)
yok <- NULL
}
x <- x[xok]
if (paired) {
x <- x - y
y <- NULL
}
nx <- length(x)
mx <- mean(x)
vx <- var(x)
if (is.null(y)) {
if (nx < 2)
stop("not enough 'x' observations")
df <- nx - 1
stderr <- sqrt(vx/nx)
if (stderr < 10 * .Machine$double.eps * abs(mx))
stop("data are essentially constant")
tstat <- (mx - mu)/stderr
method <- if (paired)
"Paired t-test"
else "One Sample t-test"
estimate <- setNames(mx, if (paired)
"mean difference"
else "mean of x")
}
else {
ny <- length(y)
if (nx < 1 || (!var.equal && nx < 2))
stop("not enough 'x' observations")
if (ny < 1 || (!var.equal && ny < 2))
stop("not enough 'y' observations")
if (var.equal && nx + ny < 3)
stop("not enough observations")
my <- mean(y)
vy <- var(y)
method <- paste(if (!var.equal)
"Welch", "Two Sample t-test")
estimate <- c(mx, my)
names(estimate) <- c("mean of x", "mean of y")
if (var.equal) {
df <- nx + ny - 2
v <- 0
if (nx > 1)
v <- v + (nx - 1) * vx
if (ny > 1)
v <- v + (ny - 1) * vy
v <- v/df
stderr <- sqrt(v * (1/nx + 1/ny))
}
else {
stderrx <- sqrt(vx/nx)
stderry <- sqrt(vy/ny)
stderr <- sqrt(stderrx^2 + stderry^2)
df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny -
1))
}
if (stderr < 10 * .Machine$double.eps * max(abs(mx),
abs(my)))
stop("data are essentially constant")
tstat <- (mx - my - mu)/stderr
}
if (alternative == "less") {
pval <- pt(tstat, df)
cint <- c(-Inf, tstat + qt(conf.level, df))
}
else if (alternative == "greater") {
pval <- pt(tstat, df, lower.tail = FALSE)
cint <- c(tstat - qt(conf.level, df), Inf)
}
else {
pval <- 2 * pt(-abs(tstat), df)
alpha <- 1 - conf.level
cint <- qt(1 - alpha/2, df)
cint <- tstat + c(-cint, cint)
}
cint <- mu + cint * stderr
names(tstat) <- "t"
names(df) <- "df"
names(mu) <- if (paired)
"mean difference"
else if (!is.null(y))
"difference in means"
else "mean"
attr(cint, "conf.level") <- conf.level
rval <- list(statistic = tstat, parameter = df, p.value = pval,
conf.int = cint, estimate = estimate, null.value = mu,
stderr = stderr, alternative = alternative, method = method,
data.name = dname)
class(rval) <- "htest"
rval
}
<bytecode: 0x0000023df5c3a438>
<environment: namespace:stats>
最終更新
Sys.time()
[1] "2024-04-07 04:40:52 JST"
R、Quarto、Package
R.Version()$version.string
[1] "R version 4.3.3 (2024-02-29 ucrt)"
::quarto_version() quarto
[1] '1.4.542'
packageVersion(pkg = "tidyverse")
[1] '2.0.0'