MINI MINI MANI MO

Path : /opt/oracle/product/18c/dbhomeXE/R/library/OREmodels/doc/man/en/
File Upload :
Current File : //opt/oracle/product/18c/dbhomeXE/R/library/OREmodels/doc/man/en/ore.randomForest.Rd

%
% Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
%
\name{ore.randomForest}
\alias{ore.randomForest}
\alias{grabTree}
\alias{grabTree.ore.randomForest}
\alias{predict.ore.randomForest}
\alias{print.ore.randomForest}
\title{Oracle R Enterprise randomForest Function}
\description{
  Create random forest models in parallel on \code{ore.frame} data for
  classification.
}
\usage{
ore.randomForest(formula, data, ntree=500, mtry = NULL,
           replace = TRUE, classwt = NULL, cutoff = NULL,
           sampsize = if(replace) nrow(data) else ceiling(0.632*nrow(data)),
           nodesize = 1L, maxnodes = NULL, confusion.matrix = FALSE,
           groups = getOption("ore.parallel", NULL), na.action = na.fail, ...)

### Specific methods for ore.randomForest objects
\method{grabTree}{ore.randomForest}(object, k = 1L, labelVar = FALSE, ...)
\method{predict}{ore.randomForest}(object, newdata,
                                   type = c("response", "prob", "vote", "all"),
                                   norm.votes = TRUE,
                                   supplemental.cols = NULL,
                                   cache.model = TRUE, ...)
\method{print}{ore.randomForest}(x, ...)
}
\arguments{
  \item{formula}{A \code{\link[stats]{formula}} object representing the
    random forest model to be trained.}
  \item{data}{An \code{ore.frame} object specifying the data for the
    model.}
  \item{ntree}{The total number of trees to grow.}
  \item{mtry}{The number of variables randomly sampled as candidates at each
    tree node split. If not specified, it is set to be the floored
    integer of the square root of the number of attributions in \code{formula}.}
  \item{replace}{A logical value indicating whether to execute sampling
    with replacement.}
  \item{classwt}{A vector of priors of the classes. If specified, the
    length of the vector should be equal to the number of classes in the
    target column. The vector does not need to add up to 1.}
  \item{cutoff}{A vector of cutoff values. If specified, the length of
    the vector should be equal to the number of classes in the target
    column. When determining the prediction class for an observation,
    the one with the maximum ratio of proportion of votes to cutoff is
    selected. If not specified, the default is \code{1/k} where \code{k}
    is the number of classes.}
  \item{sampsize}{The size of the sample to draw for growing trees.}
  \item{nodesize}{The minimum size of terminal nodes.}
  \item{maxnodes}{The maximum number of terminal nodes of each tree to
    be grown. If not specified, trees can be grown to the maximum size
    subject to the limits of \code{nodesize}.}
  \item{confusion.matrix}{A logical value indicating whether to
    calculate the confusion matrix. Note that this confusion matrix is
    not based on OOB (out-of-bag), it is the result of applying the built
    random forest model to the entire training data.}
  \item{groups}{The number of tree groups that the total number of
    trees are divided into. The default is equal to the value of
    the option \code{ore.parallel}. If the system memory is limited, it
    is recommented to set a large number for this argument, so that the
    size of each group is kept small to prevent from out of memory. Otherwise,
    the argument should be left as the default.}
  \item{na.action}{The manner in which \code{NA} values are
    handled. With the default \code{na.fail}, it fails if the training
    data contains \code{NA}.}
  \item{\dots}{Additional arguments.}
  \item{object, x}{An \code{ore.randomForest} object.}
  \item{k}{An integer indicating which tree's information to extract.}
  \item{labelVar}{A logical value indicating whether the \code{split
      var} and \code{prediction} columns in the returned frame use
    meaningful labels.}
  \item{newdata}{An \code{ore.frame} object, the test data.}
  \item{type}{This argument specifies the type of the output. The value
    could be \code{response}, \code{prob}, \code{votes}, or \code{all}
    indicating the type of output: predicted values, matrix of class
    probabilities, matrix of vote counts, or both the vote matrix and
    predicted values.}
  \item{norm.votes}{A logical value indicating whether the vote counts
    in the output vote matrix should be normalized. The argument is
    ignored if \code{type} is \code{response} or \code{prob}.}
  \item{supplemental.cols}{Additional columns to include in the prediction
    result from the \code{newdata} data set.}
  \item{cache.model}{A logical value indicating whether the entire
    random forest model is cached in memory during prediction.}
}
\value{
  For \code{ore.randomForest}, it returns an object of class
  \code{ore.randomForest}. Some of its components are as follows:
  \item{forest}{An \code{ore.frame} object storing the serialized grown trees.}
  \item{DOP}{The degree of parallelism used to build the model.}
  \item{confusion}{The confusion matrix as the result of applying the
    built model onto the training data if \code{confusion.matrix} is
    specified as \code{TRUE}.}

  For \code{grabTree.ore.randomForest}, it returns an \code{ore.frame}
  with the \code{kth} tree information. Each row represents one node,
  and it contains the node id, the child nodes, the split variable, the
  split point, the status of the node (terminal: \code{-1};
  non-terminal: \code{1}), and the prediction.

  For \code{predict.ore.randomForest}, it returns an \code{ore.frame}
  with prediction and/or voting matrix, depending on the argument \code{type}.
}
\details{
  The \code{ore.randomForest} function builds a random forest model by
  growing trees in parallel. The function returns an
  \code{ore.randomForest} object. It requires that the Oracle R Distribution
  (ORD) or \code{randomForest} package be installed. The Oracle R
  Distribution is preferred to the package \code{randomForest} for
  better performance and compatibility. A warning will be issued if the package
  \code{randomForest} is used.
  
  The scoring method \code{predict} runs in parallel. The default value
  of \code{cache.model} \code{TRUE} is recommended when sufficient
  memory is available. Otherwise, \code{cache.model} should be set to
  \code{FALSE} to prevent memory overuse.

  The \code{\link[OREbase:ore.options]{"ore.parallel"}} global option is
  used by \code{ore.randomForest} to determine the preferred degree of
  parallelism to use within the Oracle R Enterprise server.
}
\references{
  \href{http://www.oracle.com/technetwork/database/database-technologies/r/r-enterprise/documentation/index.html}{Oracle R Enterprise}
}
\author{
  Oracle \email{oracle-r-enterprise@oracle.com}
}
\seealso{
  \code{\link[randomForest]{randomForest}},
  \code{\link[OREbase:ore.options]{ore.parallel}}
}
\examples{
  IRIS <- ore.push(iris)
  mod <- ore.randomForest(Species~., IRIS)
  tree10 <- grabTree(mod, k = 10, labelVar = TRUE)
  ans <- predict(mod, IRIS, type="all", supplemental.cols="Species")
  table(ans$Species, ans$prediction)
}
\keyword{randomForest}

OHA YOOOO