library("glmx")
## artificial data from geometric regression
set.seed(1)
d <- data.frame(x = runif(200, -1, 1))
d$y <- rnbinom(200, mu = exp(0 + 3 * d$x), size = 1)
### negative binomial regression ###
## negative binomial regression via glmx
if(require("MASS")) {
m_nb1 <- glmx(y ~ x, data = d,
family = negative.binomial, xlink = "log", xstart = 0)
summary(m_nb1)
## negative binomial regression via MASS::glm.nb
m_nb2 <- glm.nb(y ~ x, data = d)
summary(m_nb2)
## comparison
if(require("lmtest")) {
logLik(m_nb1)
logLik(m_nb2)
coeftest(m_nb1)
coeftest(m_nb2)
exp(coef(m_nb1, model = "extra"))
m_nb2$theta
exp(coef(m_nb1, model = "extra")) * sqrt(vcov(m_nb1, model = "extra"))
m_nb2$SE.theta
}}[1] 0.251833
## if the score (or gradient) contribution of the extra parameters
## is supplied, then estimation can be speeded up:
negbin <- function(theta) {
fam <- negative.binomial(theta)
fam$loglik.extra <- function(y, mu, theta) digamma(y + theta) - digamma(theta) +
log(theta) + 1 - log(mu + theta) - (y + theta)/(mu + theta)
fam
}
m_nb3 <- glmx(y ~ x, data = d,
family = negbin, xlink = "log", xstart = 0, profile = FALSE)
all.equal(coef(m_nb1), coef(m_nb3), tolerance = 1e-7)[1] TRUE
Call:
glmx(formula = factor(y > 0) ~ x, data = d, family = nbbin, xlink = "log",
xstart = 0)
Deviance residuals:
Min 1Q Median 3Q Max
0.0254 0.2368 0.5597 1.3687 7.2594
Coefficients (binomial model with nblogit link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.1827 0.3417 -0.535 0.59275
x 2.2709 0.7165 3.170 0.00153 **
Extra parameters (with log link):
Estimate Std. Error z value Pr(>|z|)
log(theta) 1.412 2.345 0.602 0.547
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Log-likelihood: -95.02 on 3 Df
Dispersion: 1
Number of iterations in BFGS optimization: 9 (profile) 1 (full)
Call:
hurdle(formula = y ~ x, data = d, dist = "negbin", zero.dist = "negbin")
Pearson residuals:
Min 1Q Median 3Q Max
-1.0850 -0.6256 -0.3790 0.4535 4.4274
Count model coefficients (truncated negbin with log link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.1178 0.2188 -0.538 0.590
x 3.2573 0.3271 9.959 <2e-16 ***
Log(theta) 0.2066 0.2970 0.696 0.487
Zero hurdle model coefficients (censored negbin with log link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.1827 0.3416 -0.535 0.59274
x 2.2709 0.7165 3.170 0.00153 **
Log(theta) 1.4115 2.3451 0.602 0.54725
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Theta: count = 1.2295, zero = 4.1021
Number of iterations in BFGS optimization: 29
Log-likelihood: -342.8 on 6 Df