python | numpy and pandas | R | R vs Stata | R graphic | LaTeX | asymptote | Blog |
5 %% 3
cumsum(1:5)
choose(5,3)
library(combinat) combn(5,3)
library(combinat) permn(5)
N <- 10 runif(N) # 一様分布 rnorm(N) # 正規分布 rlnorm(N) # 対数正規分布 rexp(N) # 指数分布 rgamma(N, 1) # Gamma分布 rbinom(N, 50, .4) # 二項分布 rpois(N, 1.5) # Poisson分布 rnegbin(N, 1, 1.2) # 負の二項分布
dunif(.2) dnorm(.4)
punif(.8) pnorm(1.96)
qunif(.7) qnorm(.05)
x <- runif(50) mean(x) # mean sd(x) # standard deviation var(x) # variance median(x) # median
x = rlnorm(200) quantile(x) quantile(x, c(.1, .3, .5, .7, .9))
# mock data sex <- c("F", "M", "F", "F", "M", "F", "M", "F", "M") age <- c("20-29", "10-19", "10-19", "20-29", "20-29", "10-19", "20-29", "20-29", "10-19") nation <- c("Japan", "USA", "USA", "USA", "Japan", "USA", "JAPAN", "JAPAN", "USA") height <- c(1.5, 1.7, 1.65, 1.45, 1.8, 1.5, 1.6, 1.6, 1.65) dat <- data.frame(sex, age, nation, height) xtabs(~ sex + age, data=dat) xtabs(~ sex + age + nation, data=dat) data.frame(xtabs(~ sex + age, data=dat)) # データフレームに変換
tbl <- xtabs(~ sex + age, data=dat) addmargins(tbl) addmargins(tbl, 1)
aggregate(height ~ sex + age, data=dat, FUN=mean) # 戻り値はデータフレーム
# mock data N <- 1000 x1 <- runif(N) x2 <- rnorm(N) eps <- rnorm(N) y <- x1 - x2 + eps dat <- data.frame(x1, x2, y, eps) lm.obj <- lm(y ~ x1 + x2, data=dat) # display the result summary(lm.obj)
library(sandwich) V.homo <- vcov(lm.obj) # valid under homogeneity assumption V.rob <- sandwich(lm.obj) # heterogeneity robust V.rob_adj <- sandwich(lm.obj, adjust=T) # heterogeneity robust + small sample adjustment
library(AER) coeftest(lm.obj) coeftest(lm.obj, vcov = V.rob) coeftest(lm.obj, vcov = V.rob_adj)
library(AER) linearHypothesis(lm.obj, "x1 + x2 = 0") linearHypothesis(lm.obj, c("x1 = 1", "x2 = -1"))
library(AER) # mock data N <- 1000 eps <- rnorm(N) z <- rexp(N) x1 <- rnorm(N) + .3*eps - .3*z # x1 is endogenous x2 <- rnorm(N) y <- x1 - x2 + eps dat <- data.frame(x1, x2, z, y, eps) iv.obj <- ivreg(y ~ x1 + x2 | x2 + z, data=dat) summary(iv.obj)
# mock data N <- 1000 x1 <- runif(N) x2 <- rnorm(N) size <- ceiling(rlnorm(N)) prob <- exp(x1 - x2) mu <- size * prob fun <- function(mu) { return(rpois(1, mu)) } y <- sapply(mu, fun) dat <- data.frame(x1, x2, size, y) poi.obj <- glm(y ~ x1 + x2 + offset(log(size)), family="poisson", data=dat) summary(poi.obj)
# mock data N <- 1000 x1 <- runif(N) x2 <- runif(N) u <- rnorm(N) y <- as.integer(x1 - x2 >= u) dat <- data.frame(x1, x2, u, y) log.obj <- glm(y ~ x1 + x2, family=binomial(link="logit"), data=dat) summary(log.obj) prob.obj <- glm(y ~ x1 + x2, family=binomial(link="probit"), data=dat) summary(prob.obj)
library(survival) # mock data N <- 1000 x1 <- runif(N) x2 <- runif(N) eps <- rnorm(N) y.star <- x1 - x2 + eps y <- y.star y[y <= 0] <- 0 dat <- data.frame(x1, x2, y.star, y, eps) tob.obj <- survreg(Surv(y, y>0, type="left") ~ x1 + x2, data=dat, dist="gaussian") summary(tob.obj)
V.Hess <- vcov(poi.obj) # estimated by hessian V.OP <- solve( meat(poi.obj) * length(probit.obj$y) ) # estimated by score V.rob <- sandwich(poi.obj) # robust v-cov matrix. used for quasi maximum likelihood
library(sampleSelection) # mock data N <- 1000 x1 <- runif(N) x2 <- rnorm(N) z <- rnorm(N) e1 <- rnorm(N) e2 <- .6 * e1 + .4 * rnorm(N) v <- 1.5 * x1 - z + e2 y.star <- x1 - x2 + e1 y <- y.star y[v <= 0] <- NA d <- as.integer(v > 0) dat <- data.frame(x1, x2, y, z, v, d, y.star, e2, e1) heck.obj <- heckit(d ~ x1 + z, y ~ x1 + x2, data=dat) summary(heck.obj)
obj <- function(x) { return(- x[1]^2 - x[2]^2 + x[1]*x[2] + x[1] + x[2]) } optim(c(0, 0), obj, method="BFGS", control=list(fnscale=-1, trace=1)) # default is minimization. "fnscale=-1" switches to maximization # 改訂版 (?) library(optimx) optimx(c(10, 10), obj, method="BFGS", control=list(fnscale=-1, trace=1))
lhs <- function(x) { v1 <- x[2] - exp(x[1]) v2 <- x[2] - 1/x[1] return(c(v1, v2)) } # many solvers... library(nleqslv) nleqslv(c(1, 1), lhs) library(rootSolve) multiroot(lhs, c(1, 1)) library(BB) BBsolve(c(1,1), lhs)
# mock data x = rnorm(7) y = rnorm(7) fun <- function(x, y) { return(x) } X <- outer(x, x, fun) Y <- t(outer(y, y, fun)) Z <- sin(X) + cos(Y) + rnorm(49) X <- as.vector(X) Y <- as.vector(Y) Z <- as.vector(Z) library(scatterplot3d) scatterplot3d(X, Y, Z, pch=4, type="h", highlight.3d=T)
# mock data fun <- function(x, y) { return(- x^2 - y^2 + x*y + x + y) } X <- seq(-2, 4, by=.5) Y <- X Z <- outer(X, Y, fun) persp(X, Y, Z, theta = 30, phi = 40, expand = 0.7, col = "lightblue", ticktype="detailed") # add title ttl = expression(z == - x^2 - y^2 + x*y + x + y) title(main = ttl)
library(chron) leap.year(2012) leap.year(2013)
library(chron) day.of.week(month=9, day=10, year=2012) day.of.week(month=2, day=30, year=2012) # !!!
toupper("aBcDE") tolower("aBcDE")
dir.create("folder") dir.create("folder2/subfolder", recursive=T) # 再帰的に
file.exists("folder/file.txt") file.exists("folder")
file.path("folder/subfolder", "file.txt")
sessionInfo()