Index
Linear Models
Go to top
OLS
Data:
ols.csv
# Stata script
# R script
library(AER)
dat = read.csv("ols.csv", as.is=T)
obj <- lm(y ~ x1 + x2, data=dat)
# homogeneity assumption
coeftest(obj)
# heterogeneity robust
coeftest(obj, vcov=sandwich(obj, adjust=T))
# output ----- #
> coeftest(obj)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.101201 0.174460 -0.5801 0.5632
x1 1.232367 0.300225 4.1048 8.439e-05 ***
x2 -0.855581 0.094823 -9.0229 1.726e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> coeftest(obj, vcov=sandwich(obj, adjust=T))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.101201 0.161529 -0.6265 0.5324
x1 1.232367 0.297047 4.1487 7.175e-05 ***
x2 -0.855581 0.083069 -10.2997 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Octave script
dat = dlmread("ols.csv", ",", 1, 0);
N = size(dat, 1);
X = [ones(N,1), dat(:,1:2)];
y = dat(:,3);
beta = (X'*X)^(-1)*X'*y;
e_hat = y - X*beta;
V = N/(N-3) * e_hat'*e_hat * (X'*X)^(-1);
se = diag(V).^(1/2);
B = diag(e_hat);
V2 = N/(N-3) * (X'*X)^(-1) * X'*B*B*X * (X'*X)^(-1);
se2 = diag(V2).^(1/2);
[beta, se, se2]
# output ----- #
[beta, se, se2]
ans =
-0.101201 1.744604 0.161529
1.232367 3.002250 0.297047
-0.855581 0.948234 0.083069
IV regression
Data:
iv.csv
# Stata script
# R script
library(AER)
dat <- read.csv("iv.csv", as.is=T)
obj <- ivreg(y ~ x1 + x2 | x2 + z, data=dat)
coeftest(obj)
# output ----- #
> coeftest(obj)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.10775 0.18290 -0.5892 0.557127
x1 1.12543 0.39695 2.8352 0.005574 **
x2 -0.91774 0.10561 -8.6899 8.992e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Octave script
dat = dlmread("iv.csv", ",", 1, 0);
N = size(dat, 1);
X = [ones(N,1), dat(:,1:2)];
Z = [ones(N,1), dat(:,2:3)];
y = dat(:,4);
# output ----- #
Maximum Likelihood
Go to top
Poisson regression
Data:
poi.csv
# Stata script
# R script
library(AER)
dat <- read.csv("poi.csv", as.is=T)
obj <- glm(y ~ x1 + x2 + offset(log(size)), family="poisson", data=dat)
coeftest(obj)
# for quasi-ML
coeftest(obj, vcov=sandwich)
# output ----- #
> coeftest(obj)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.093782 0.100247 0.9355 0.3495
x1 1.015126 0.137667 7.3738 1.659e-13 ***
x2 -0.922526 0.048780 -18.9120 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> coeftest(obj, vcov=sandwich)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.093782 0.110811 0.8463 0.3974
x1 1.015126 0.165541 6.1322 8.67e-10 ***
x2 -0.922526 0.067473 -13.6724 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Logit, Probit
Data:
lpit.csv
# Stata script
# R script
library(AER)
dat <- read.csv("lpit.csv", as.is=T)
obj1 <- glm(y ~ x1 + x2, family=binomial(link="logit"), data=dat)
obj2 <- glm(y ~ x1 + x2, family=binomial(link="probit"), data=dat)
coeftest(obj1)
coeftest(obj2)
# output ----- #
> coeftest(obj1)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.22005 0.53631 0.4103 0.68158
x1 2.13568 0.82923 2.5755 0.01001 *
x2 -1.92279 0.77210 -2.4903 0.01276 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> coeftest(obj2)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.12307 0.32966 0.3733 0.708897
x1 1.32454 0.49697 2.6652 0.007694 **
x2 -1.17468 0.46437 -2.5296 0.011418 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tobit
Data:
tobit.csv
# Stata script
# R script
library(AER)
dat <- read.csv("tobit.csv", as.is=T)
obj <- survreg(Surv(y, y>0, type="left") ~ x1 + x2, data=dat, dist="gaussian")
coeftest(obj)
# output ----- #
> coeftest(obj)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.25846 0.23189 1.1146 0.265039
x1 0.60177 0.33165 1.8145 0.069607 .
x2 -0.99512 0.31500 -3.1591 0.001582 **
Log(scale) -0.15737 0.10877 -1.4468 0.147955
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Heckit
Data:
heckit.csv
# Stata script
# R script
library(AER)
library(sampleSelection)
dat <- read.csv("heck.csv", as.is=T)
obj <- heckit(d ~ x1 + z, y ~ x1 + x2, data=dat)
summary(obj)
# note: coeftest() does not work properly
# output ----- #
> summary(obj)
--------------------------------------------
Tobit 2 model (sample selection model)
2-step Heckman / heckit estimation
100 observations (28 censored and 72 observed)
9 free parameters (df = 92)
Probit selection equation:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1162 0.3046 0.382 0.70357
x1 1.8334 0.6027 3.042 0.00306 **
z -1.1082 0.2223 -4.986 2.89e-06 ***
Outcome equation:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.2392 0.2589 -0.924 0.357910
x1 1.2318 0.3488 3.531 0.000649 ***
x2 -1.0503 0.0908 -11.567 < 2e-16 ***
Multiple R-Squared:0.7397, Adjusted R-Squared:0.7282
Error terms:
Estimate Std. Error t value Pr(>|t|)
invMillsRatio 0.8990 0.2493 3.607 0.000503 ***
sigma 0.8671 NA NA NA
rho 1.0369 NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
--------------------------------------------
Estimation using an optimization solver
- Define the log-likelihood function as the average.
- Estimate the Hessian somehow (call this H). Some optimizer returns the Hessian as by-product. Or compute the score and compute the crossproduct s*s'/N, which is equal to H in the limit.
- Variance estimate is inv(-H)/n.
# Stata script
library(AER)
N <- 30
x <- runif(N)
mu_true <- 1 + x*2
y <- numeric(0)
for (i in 1:N) {
y <- c(y, rpois(1, exp(mu_true[i])))
}
obj <- function(beta, x, y) {
mu <- beta[1] + beta[2]*x
out <- mean(y*mu - exp(mu))
return(out)
}
res <- optim(c(1,1), obj, x=x, y=y, method="BFGS", control=list(fnscale=-1, trace=1), hessian=T)
V <- solve(-res$hessian)/N
cat("estimate", res$par, "se", diag(V)^0.5, "\n")
# should be the same as
coeftest(glm(y~x, family="poisson"))
# Alternatively, may use score
mu <- res$par[1] + res$par[2]*x
score1 <- y-exp(mu)
score2 <- x*y-x*exp(mu)
score <- matrix(c(score1, score2), ncol=2, byrow=F)
H <- crossprod(score, score)/N
V <- solve(H*N)
cat("estimate", res$par, "se", diag(V)^0.5, "\n")
# this is identical to
obj <- glm(y~x, family="poisson")
V.OP <- solve(meat(obj)*N)
coeftest(obj, vcov=V.OP)
# output --- #
estimate 1.154196 1.81395 se 0.1672227 0.2405329
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.15421 0.16722 6.9023 5.117e-12 ***
x 1.81393 0.24053 7.5413 4.652e-14 ***
---
estimate 1.154196 1.81395 se 0.1491571 0.2116985
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.15421 0.14916 7.7383 1.007e-14 ***
x 1.81393 0.21170 8.5685 < 2.2e-16 ***
---
Hypothesis Testing
Go to top
Linear test
Data:
ols.csv
# Stata script
# R script
library(AER)
dat = read.csv("ols.csv", as.is=T)
obj <- lm(y ~ x1 + x2, data=dat)
linearHypothesis(obj, "x1 = 1")
# may omit "=0"
linearHypothesis(obj, "x1 + x2")
# multiple equations
linearHypothesis(obj, c("x1 = 1", "x2 = -1"))
# specify variance estimate
linearHypothesis(obj, "x1 = 0", vcov=sandwich(obj, adjust=T))
# chi-square test
linearHypothesis(obj, "x1 = 1", test="Chisq")
# output ----- #
> linearHypothesis(obj, "x1 = 1")
Linear hypothesis test
Hypothesis:
x1 = 1
Model 1: restricted model
Model 2: y ~ x1 + x2
Res.Df RSS Df Sum of Sq F Pr(>F)
1 98 75.345
2 97 74.882 1 0.46245 0.599 0.4408
> linearHypothesis(obj, "x1 + x2")
Linear hypothesis test
Hypothesis:
x1 + x2 = 0
Model 1: restricted model
Model 2: y ~ x1 + x2
Res.Df RSS Df Sum of Sq F Pr(>F)
1 98 75.892
2 97 74.882 1 1.0098 1.3081 0.2556
> linearHypothesis(obj, c("x1 = 1", "x2 = -1"))
Linear hypothesis test
Hypothesis:
x1 = 1
x2 = - 1
Model 1: restricted model
Model 2: y ~ x1 + x2
Res.Df RSS Df Sum of Sq F Pr(>F)
1 99 76.889
2 97 74.882 2 2.0073 1.3001 0.2772
> linearHypothesis(obj, "x1 = 0", vcov=sandwich(obj, adjust=T))
Linear hypothesis test
Hypothesis:
x1 = 0
Model 1: restricted model
Model 2: y ~ x1 + x2
Note: Coefficient covariance matrix supplied.
Res.Df Df F Pr(>F)
1 98
2 97 1 17.212 7.175e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> linearHypothesis(obj, "x1 = 1", test="Chisq")
Linear hypothesis test
Hypothesis:
x1 = 1
Model 1: restricted model
Model 2: y ~ x1 + x2
Res.Df RSS Df Sum of Sq Chisq Pr(>Chisq)
1 98 75.345
2 97 74.882 1 0.46245 0.599 0.4389