MINI MINI MANI MO
%
% Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
%
\name{ore.glm}
\alias{ore.glm}
\alias{ore.glm.control}
\alias{summary.ore.glm}
\alias{print.summary.ore.glm}
\alias{predict.ore.glm}
\alias{residuals.ore.glm}
\alias{vcov.ore.glm}
\concept{regression}
\title{Oracle R Enterprise Generalized Linear Models}
\description{
Functions for fitting and using generalized linear models on
\code{ore.frame} data.
}
\usage{
### Fitting function
ore.glm(formula, data, weights, family = gaussian(), start = NULL,
control = list(...), contrasts = NULL, xlev = NULL,
ylev = NULL, yprob = NULL, ...)
### Fit control function
ore.glm.control(devlre = 8, maxit = 25, linesearch = FALSE,
trace = getOption("ore.trace", FALSE), ...)
### Specific methods for ore.glm objects
\S3method{summary}{ore.glm}(object, dispersion = NULL, correlation = FALSE,
symbolic.cor = FALSE, ...)
\S3method{vcov}{ore.glm}(object, ...)
\S3method{predict}{ore.glm}(object, newdata = NULL, type = c("link", "response"),
se.fit = FALSE, dispersion = NULL, na.action = na.pass, supplemental.cols = NULL,
...)
\S3method{residuals}{ore.glm}(object, type = c("deviance", "pearson", "working", "response", "partial"),
...)
### Inherited methods for ore.glm objects
#coef(object, ...)
#coefficients(object, ...)
#deviance(object, ...)
#effects(object, ...)
#extractAIC(fit, scale, k = 2, ...)
#family(object, ...)
#fitted(object, ...)
#fitted.values(object, ...)
#formula(x, ...)
#logLik(object, ...)
#model.frame(formula, ...)
#nobs(object, ...)
#weights(object, ...)
}
\arguments{
\item{formula}{A \code{\link[stats]{formula}} object representing the
model to be fit.}
\item{data}{An \code{ore.frame} object specifying the data for the
model.}
\item{weights}{An optional \code{ore.number} object specifying the
analytic weights in the model.}
\item{ supplemental.cols }{ Additional columns to include in the prediction
result from the \code{newdata} data set. }
\item{family}{A \code{\link[stats]{family}} object specifying the
generalized linear model family details. This is the same type of
family object that is used by the \code{\link[stats]{glm}} function
in the \pkg{stats} package, which includes the
\code{\link[MASS]{negative.binomial}} function from the \pkg{MASS}
package as well as the \code{\link[statmod]{tweedie}} function from
the \pkg{statmod} package.}
\item{start}{An optional \code{\link[base]{numeric}} vector specifying
the initial coefficient estimates in the linear predictor.}
\item{control}{An optional \code{\link[base]{list}} object containing
a list of fit control parameters to be interpreted by the
\code{ore.glm.control} function.}
\item{contrasts}{An optional named \code{\link[base]{list}} to be
supplied to the \code{contrasts.arg} argument of
\code{\link[stats]{model.matrix}}.}
\item{xlev}{An optional named \code{\link[base]{list}} of
\code{\link[base]{character}} vectors specifying the
\code{\link[base]{levels}} for each
\code{\link[OREbase:ore.factor-class]{ore.factor}} variable.}
\item{ylev}{An optional \code{\link[base]{character}} vector to
specify the response variable levels in
\code{\link[stats]{binomial}} generalized linear models.}
\item{yprob}{An optional numeric value between 0 and 1 specifying the
overall probability of \code{y != ylev[1]} in
\code{\link[stats]{binomial}} generalized linear models.}
\item{devlre}{A positive number specifying the minimum log relative
error of the residual deviance convergence criterion,
\eqn{-log10(|dev - dev_{old}|/|dev|) \ge devlre}.}
\item{maxit}{A positive integer specifying the maximum number of
Fisher scoring iterations.}
\item{linesearch}{A logical value indicating whether a line search
should be used after each Fisher scoring iteration. If \code{FALSE},
then a line search will used after the first two iterations and any
subsequent iteration that results in an increase in the residual
deviance.}
\item{trace}{The control parameter that controls the output produced
at each Fisher scoring iteration;
a value of \code{FALSE} or \code{0} indicating no output,
a value of \code{TRUE} or \code{1} indicating the printing of the
residual deviance for each iteration, or
a value of \code{2} indicating the printing of the residual deviance
and runtime breakdown for each iteration. The default value is
regulated by the global option \code{ore.trace}.}
\item{object, newdata}{An \code{ore.glm} object.}
\item{dispersion, correlation, symbolic.cor}{Argument not implemented.}
\item{type}{A character string specifying the type of predictions or
residuals to produce.}
\item{se.fit}{A logical value indicating whether to return the
standard errors for the predictions.}
\item{na.action}{The manner in which \code{NA} values are handled,
either \code{na.omit} or \code{na.pass}.}
\item{\dots}{Additional arguments.}
}
\details{
The \code{ore.glm} function fits generalized linear models using a
Fisher scoring iteratively re-weighted least squares (IRLS)
algorithm. Instead of the traditional step halving to prevent the
selection of less optimal coefficient estimates, a line search is used
to select new coefficient estimates at each iteration starting from
the current coefficient estimates and moving through the Fisher
scoring suggested estimates using the formula
\eqn{(1 - \alpha) * old + \alpha * suggested} where \eqn{\alpha} in
\eqn{[0, 2]}.
Each iteration consists of up to three embedded R map/reduce
operations: an IRLS operation and up to two line search operations.
The IRLS map/reduce operations are on the matrix cross-products based
off of \code{model.matrix} or \code{sparse.model.matrix} function
calls depending on the underlying sparsity of the model matrix. After
the algorithm has either converged or reached the maximum number of
iterations, a final embedded R map/reduce operation is used to
generate the complete set of model-level statistics.
The \code{\link[OREbase:ore.options]{"ore.parallel"}} global option is
used by \code{ore.glm} to determine the preferred degree of
parallelism to use within the Oracle R Enterprise server.
}
\value{
For \code{ore.glm}, returns an \code{ore.glm} object.
For \code{summary.ore.glm}, returns a \code{summary.ore.glm} object.
Note: the training data referenced by argument \code{data} is needed to
produce meta information about the \code{ore.glm} object.
}
\references{
\href{http://www.oracle.com/technetwork/database/database-technologies/r/r-enterprise/documentation/index.html}{Oracle R Enterprise}
}
\author{
Oracle \email{oracle-r-enterprise@oracle.com}
}
\seealso{
\link[OREstats]{model.matrix,formula-method} (\pkg{OREstats} package),
\code{\link{ore.lm}},
\code{\link[stats]{glm}},
\code{\link[stats]{family}},
\code{\link[OREbase:ore.options]{ore.parallel}}
}
\examples{
\dontshow{
if (!interactive())
ore.connect(user = Sys.getenv("ORE_USERNAME", "rquser"),
sid = Sys.getenv("ORACLE_SID"),
host = Sys.getenv("HOST"),
password = Sys.getenv("ORE_PASSWORD", "rquser"),
port = if (.Platform$OS.type == "windows")
Sys.getenv("ORACLE_PORT")
else
Sys.getenv("TCPPORT"),
all = TRUE)
}
# Load libraries for examples
library(OREstats)
library(rpart) # kyphosis and solder data sets
# Logistic regression
KYPHOSIS <- ore.push(kyphosis)
kyphFit1 <- ore.glm(Kyphosis ~ ., data = KYPHOSIS, family = binomial())
kyphFit2 <- glm(Kyphosis ~ ., data = kyphosis, family = binomial())
summary(kyphFit1)
summary(kyphFit2)
# Poisson regression
SOLDER <- ore.push(solder)
solFit1 <- ore.glm(skips ~ ., data = SOLDER, family = poisson())
solFit2 <- glm(skips ~ ., data = solder, family = poisson())
summary(solFit1)
summary(solFit2)
# Negative binomial regression
solFit3 <- ore.glm(skips ~ ., data = SOLDER, family = MASS::negative.binomial(10))
solFit4 <- glm(skips ~ ., data = solder, family = MASS::negative.binomial(10))
print(summary(solFit3))
print(summary(solFit4))
# Tweedie regression
solFit5 <- ore.glm(skips ~ ., data = SOLDER, family = statmod::tweedie(1.5))
solFit6 <- glm(skips ~ ., data = solder, family = statmod::tweedie(1.5))
print(summary(solFit5))
print(summary(solFit6))
}
\keyword{regression}
OHA YOOOO