MINI MINI MANI MO
%
% Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
%
% NAME
% ore.doEval.Rd - embedded functions
%
% DESCRIPTION
% Methods for embedded R execution.
%
% NOTES
%
% MODIFIED (MM/DD/YY)
% qinwan 09/24/15 - update FUN argument description
% ffeli 09/03/15 - add INDEX argument descriptions
% qinwan 06/24/15 - add FUN.OWNER argument
% qinwan 05/21/15 - support lob type
% lbzhang 04/10/14 - add control arg ore.envAsEmptyenv
% paboyoun 12/27/13 - add ore.parallel option
% paboyoun 11/19/13 - add ore.na.omit control argument
% paboyoun 05/09/13 - change parallel argument default value
% lzhang 04/26/13 - bug 16587457: add DOP hint in ore.*Apply functions
% demukhin 11/12/12 - add control arguments
% qinwan 11/06/12 - doc R/Package version script
% demukhin 11/02/12 - bug 14539151: document INDEX
% demukhin 09/15/12 - prj: auto connect
%
\name{ore.doEval}
\alias{ore.doEval}
\alias{ore.groupApply}
\alias{ore.indexApply}
\alias{ore.rowApply}
\alias{ore.tableApply}
\title{Oracle R Enterprise Embedded R Script Execution Functions}
\description{
Runs a function within the Oracle database under various conditions.
}
\usage{
ore.doEval(FUN, ..., FUN.VALUE = NULL, FUN.NAME = NULL, FUN.OWNER = NULL)
ore.groupApply(X, INDEX, FUN, ..., FUN.VALUE = NULL,
FUN.NAME = NULL, FUN.OWNER = NULL,
parallel = getOption("ore.parallel", NULL))
ore.indexApply(times, FUN, ..., FUN.VALUE = NULL,
FUN.NAME = NULL, FUN.OWNER = NULL,
parallel = getOption("ore.parallel", NULL))
ore.rowApply(X, FUN, ..., FUN.VALUE = NULL,
FUN.NAME = NULL, FUN.OWNER = NULL, rows = 1,
parallel = getOption("ore.parallel", NULL))
ore.tableApply(X, FUN, ..., FUN.VALUE = NULL,
FUN.NAME = NULL, FUN.OWNER = NULL)
}
\arguments{
\item{X}{
An \code{\linkS4class{ore.frame}} object.
}
\item{INDEX}{
A \code{\linkS4class{ore.vector}} or \code{\linkS4class{ore.frame}} object
containing \code{\linkS4class{ore.factor}} objects or columns, each of
which is the same length as argument \code{X}. It is used to partition
the data in \code{X} before sending it to function \code{FUN}.
The counterpart supported R types are logical, integer, numeric,
character, factor.
}
\item{times}{
The number of times to execute the function.
}
\item{FUN}{
The function to be applied. For functions \code{ore.groupApply},
\code{ore.rowApply}, and \code{ore.tableApply} the first argument to
the \code{FUN} argument must represent a
\code{\link[base]{data.frame}} object. For function
\code{ore.indexApply}, the first argument to \code{FUN} must
represent the index number. For function \code{ore.doEval}, no
arguments are required for \code{FUN}. The function specified by
\code{FUN} cannot recursively call embedded R APIs.
Cannot be used with argument \code{FUN.NAME}.
}
\item{\dots}{
Additional arguments to \code{FUN}.
Arguments that start with \code{ore.} are special control
arguments. They are not passed to the function specified by
\code{FUN} or \code{FUN.NAME} arguments, but instead control what
happens before or after the execution of the closure. The following
control arguments are supported:
\enumerate{
\item \code{ore.drop} - controls the object type for the input
data. If \code{TRUE}, a one column \code{data.frame} will be
converted to a \code{vector}. The default value is \code{TRUE}.
\item \code{ore.na.omit} - controls the handling of missing values
in the input data. If \code{TRUE}, rows or vector elements,
depending on the \code{ore.drop} setting, containing missing
values will be removed from the input data. If all the rows in a
chunk contain missing values, the input data for that chunk will
be an empty \code{data.frame} or \code{vector}. The default value
is \code{FALSE}.
\item \code{ore.connect} - controls whether to automatically
connect to Oracle R Enterprise inside the closure. This is
equivalent to doing an \code{ore.connect} call with the same
credentials as the client session. The default value is
\code{FALSE}.
\item \code{ore.graphics} - controls whether to start a graphical
driver and look for images. The default value is \code{TRUE}.
\item \code{ore.png.*} - if \code{ore.graphics} is \code{TRUE},
additional parameters for the \code{\link[grDevices]{png}}
graphics device driver. The naming convention for these arguments
is to add an \code{ore.png.} prefix to the arguments of the
\code{\link[grDevices]{png}} function. For example, if
\code{ore.png.height} is supplied, argument \code{height} will be
passed to the \code{\link[grDevices]{png}} function. If not set,
the standard default values for the \code{\link[grDevices]{png}}
function are used.
\item \code{ore.envAsEmptyenv} - controls whether referenced
environments in an object should be replaced with an empty
environment during serialization. Some types of input parameters
and returned objects, such as \code{list}, \code{formula}, are
serialized before being saved to the database. If \code{TRUE}, the
referenced environment in the object will be replaced with an
empty environment whose parent is \code{.GlobalEnv}, and
therefore, the objects in the original referenced environment will
not be serialized. In some situations, this could significantly
reduce the size of serialized objects. If \code{FALSE}, all the
objects in the referenced environment will be serialized, and
could be unserialized and recovered later. The default value is
regulated by the global option \code{ore.envAsEmptyenv}.
}
}
\item{FUN.VALUE}{
A \code{data.frame} or \code{\linkS4class{ore.frame}} to use as a
template for the return value.
The attribute \code{ora.type} can be applied to a \code{data.frame} column
to specify that the corresponding output column of a
\code{\linkS4class{ore.frame}} uses a \code{CLOB} or \code{BLOB} type.
}
\item{FUN.NAME}{
A character string specifying the name of a serialized \R script,
which contains a single \R function definition, within the Oracle R
Enterprise in-database \R script archive. Cannot be used with
\code{FUN}.
Oracle R Enterprise comes with a number of predefined graphical
scripts. All predefined scripts have a reserved name that start with
\code{RQG$} followed by a function name from the \pkg{graphics}
package that the script wraps. Depending on the function it either
takes the first, the first and second or all columns of the input
\code{\link[base]{data.frame}}. Thus, predefined scripts can only be
used with \code{ore.tableApply}, \code{ore.groupApply}, or
\code{ore.rowApply}. Each function also has a \code{...} so that it
can pass any parameter to the function that it wraps. Here is a list
of predefined graphical scripts:
\enumerate{
\item \code{RQG$plot1d} - a wrapper for \code{\link[graphics]{plot}}.
Works on the first column of the input
\code{\link[base]{data.frame}} object.
\item \code{RQG$plot2d} - a wrapper for \code{\link[graphics]{plot}}.
Works on the first two columns of the input
\code{\link[base]{data.frame}} object.
\item \code{RQG$hist} - a wrapper for \code{\link[graphics]{hist}}.
Works on the first column of the input
\code{\link[base]{data.frame}} object.
\item \code{RQG$boxplot} - a wrapper for \code{\link[graphics]{boxplot}}.
Works on the first column of the input
\code{\link[base]{data.frame}} object.
\item \code{RQG$smoothScatter} - a wrapper for
\code{\link[graphics]{smoothScatter}}. Works on the first two
columns of the input \code{\link[base]{data.frame}} object.
\item \code{RQG$cdplot} - a wrapper for \code{\link[graphics]{cdplot}}.
Works on the first two columns of the input
\code{\link[base]{data.frame}} object.
\item \code{RQG$pairs} - a wrapper for \code{\link[graphics]{pairs}}.
Works on all columns of the input
\code{\link[base]{data.frame}} object.
\item \code{RQG$matplot} - a wrapper for \code{\link[graphics]{matplot}}.
Works on all columns of the input
\code{\link[base]{data.frame}} object.
}
Oracle R Enterprise also comes with a number of predefined \R and
package version scripts. These scripts start with \code{RQ$}
followed by an \R function name that the script wraps and can only
be used with \code{ore.doEval}. Here is a list of these predefined
scripts:
\enumerate{
\item \code{RQ$R.Version} - a wrapper for \code{\link[base]{R.Version}}.
Takes no argument and returns \R version-relevant information.
\item \code{RQ$getRversion} - a wrapper for
\code{\link[base]{getRversion}}. Takes no argument and returns \R
version number.
\item \code{RQ$installed.packages} - a wrapper for
\code{\link[utils]{installed.packages}}. Takes no argument and
returns package name, version number, and package installation
location of installed packages.
\item \code{RQ$packageVersion} - a wrapper for
\code{\link[utils]{packageVersion}}. Takes package name as
argument and returns package version number.
}
}
\item{FUN.OWNER}{
An optional character string specifying the owner of the \code{FUN.NAME}
\R script. The user that creates an \R script with
\code{\link{ore.scriptCreate}} is the owner of that script. The
\code{RQSYS} schema is the owner of the global and pre-defined \R scripts.
When \code{FUN.OWNER} is not specified or is \code{NULL}, then
Oracle R Enterprise looks for the owner in the following order:
user of the current session, \code{RQSYS}. Argument \code{FUN.OWNER}
is only used with argument \code{FUN.NAME}.
}
\item{rows}{
The maximum number of rows in each chunk.
}
\item{parallel}{
A preferred degree of parallelism to use in the embedded R job;
either a positive integer greater than or equal to \code{2} for a
specific degree of parallelism,
a value of \code{FALSE} or \code{1} for no parallelism,
a value of \code{TRUE} for the \code{data} argument's default
parallelism, or
\code{NULL} for the database default for the operation.
The default value is regulated by the global option
\code{ore.parallel}.
}
}
\details{
Function \code{ore.doEval} executes a function, either \code{FUN} or
\code{FUN.NAME}, within an \R process running inside the Oracle database.
Function \code{ore.groupApply} partitions an in-database data set by a
(potentially derived) column and executes a function on those
partitions within \R processes running inside the Oracle database. Each
partition must fit wholly within a single \R process.
Function \code{ore.indexApply} executes a function \code{index} number
of times inside the Oracle database.
Function \code{ore.rowApply} partitions an in-database data set into
row chunks and executes a function on those partitions within \R
processes running inside the Oracle database. Each partition must fit
wholly within a single \R process.
Function \code{ore.tableApply} executes a function on an in-database
data set.
Either argument \code{FUN} or \code{FUN.NAME} must be supplied. For
security reasons, use of argument \code{FUN} requires
\option{RQADMIN} Oracle database privileges. Because creation of the \R
script represented by argument \code{FUN.NAME} has to be published by
someone with \option{RQADMIN} credentials, it can be used by anyone
authorized to use Oracle R Enterprise.
Argument \code{FUN.OWNER} can be used with argument \code{FUN.NAME} to
uniquely specify an \R function defined in the \R script repository.
The \code{parallel} argument regulates the use of a
\option{/*+ parallel */}, \option{/*+ parallel(DOP) */}, or a
\option{/*+ no_parallel */} hint being added to the underlying SQL
query. Consult Oracle database documentation for more information.
The function to be applied specified via argument \code{FUN} or
\code{FUN.NAME} is automatically connected to Oracle R Enterprise with
the same credentials as the client session invoking it. Only an
equivalent of \code{ore.connect} is invoked. Functions such as
\code{ore.sync}, \code{ore.attach}, and \code{ore.get} should be
called explicitly.
}
\value{
If argument \code{FUN.VALUE} is supplied, an
\code{\linkS4class{ore.frame}} object that conforms to the
\code{FUN.VALUE} template is returned.
If argument \code{FUN.VALUE} is not supplied, then functions
\code{ore.doEval} and \code{ore.tableApply} return an
\code{\linkS4class{ore.object}} while functions \code{ore.groupApply},
\code{ore.indexApply}, and \code{ore.rowApply} return an
\code{\linkS4class{ore.list}}.
}
\references{
\href{http://www.oracle.com/technetwork/database/database-technologies/r/r-enterprise/documentation/index.html}{Oracle R Enterprise}
}
\author{
Oracle \email{oracle-r-enterprise@oracle.com}
}
\seealso{
\code{\link{ore.scriptCreate}},
\code{\link[OREbase]{ore.options}}
}
\examples{
## ore.doEval
eval1 <- ore.doEval(function() "Hello, world")
eval2 <-
ore.doEval(function()
data.frame(x = "Hello, world", stringsAsFactors = FALSE))
eval3 <-
ore.doEval(function()
data.frame(x = "Hello, world", stringsAsFactors = FALSE),
FUN.VALUE =
data.frame(x = character(), stringsAsFactors = FALSE))
out.df <- data.frame(x = character(), y = raw(), stringsAsFactors = FALSE)
attr(out.df$x, "ora.type") <- "clob"
attr(out.df$y, "ora.type") <- "blob"
eval4 <-
ore.doEval(function() {
res <- data.frame(x = "Hello, world",stringsAsFactors = FALSE)
res$y[[1L]] <- charToRaw("Hello, world")
res},
FUN.VALUE = out.df)
eval1
class(eval1) # ore.object
eval2
class(eval2) # ore.object
eval3
class(eval3) # ore.frame
eval4$x
rawToChar(ore.pull(eval4$y))
## copy data to the database
IRIS <- ore.push(iris)
## ore.groupApply
grpAp1 <-
ore.groupApply(IRIS, IRIS$Species,
function(df)
if(nrow(df) == 0)
NULL
else
summary(lm(Sepal.Length ~ ., data = df[1:4])),
parallel = TRUE)
grpAp2 <-
ore.groupApply(IRIS, IRIS$Species,
function(df) {
if (nrow(df) == 0) {
species <- character()
cf <- numeric()
names(cf) <- character()
} else {
species <- as.character(df$Species[1])
cf <- coef(lm(Sepal.Length ~ .,
data = df[1:4]))
}
data.frame(Species = species,
CoefName = names(cf),
CoefValue = unname(cf),
stringsAsFactors = FALSE)
},
FUN.VALUE =
data.frame(Species = character(),
CoefName = character(),
CoefValue = numeric(),
stringsAsFactors = FALSE),
parallel = TRUE)
class(grpAp1) # ore.list
class(grpAp2) # ore.frame
## ore.indexApply
ore.indexApply(5, function(i) i)
if (interactive())
ore.indexApply(5, function(i) summary(rnorm(100)), parallel = TRUE)
## ore.rowApply
# create a classification tree for iris data
library(rpart)
irisRpart <- rpart(Species ~ ., data = iris)
irisPred <-
ore.rowApply(IRIS,
function(df, model) {
library(rpart)
cbind(df, PRED = predict(model, df, type = "class"))
}, model = irisRpart,
FUN.VALUE =
cbind(iris[integer(),], PRED = character()),
rows = 50, parallel = TRUE)
## ore.tableApply
ore.tableApply(IRIS, function(df) summary(df))
}
\keyword{data}
\keyword{programming}
\keyword{iteration}
\keyword{category}
\keyword{database}
\keyword{ORE}
OHA YOOOO