MINI MINI MANI MO

Path : /opt/oracle/product/18c/dbhomeXE/ctx/admin/
Current File : //opt/oracle/product/18c/dbhomeXE/ctx/admin/dr0cls.pkh
Rem Copyright (c) 1998, 2017, Oracle and/or its affiliates. 
Rem All rights reserved.
Rem
Rem    NAME
Rem      dr0cls.pkh - Classification Training Service
Rem
Rem    DESCRIPTION
Rem
Rem    RETURNS
Rem 
Rem    NOTES
Rem
Rem 
Rem    BEGIN SQL_FILE_METADATA
Rem      SQL_SOURCE_FILE: ctx_src_2/src/dr/admin/dr0cls.pkh
Rem      SQL_SHIPPED_FILE: ctx/admin/dr0cls.pkh
Rem      SQL_PHASE: DR0CLS_PKH
Rem      SQL_STARTUP_MODE: NORMAL
Rem      SQL_IGNORABLE_ERRORS: NONE
Rem      SQL_CALLING_FILE: ctx/admin/ctxpkh.sql
Rem    END SQL_FILE_METADATA
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem       surman   01/23/15 - 20411134: Add SQL metadata tags
Rem       shuroy   05/01/14 - Adding SA_TRAIN proc
Rem       aczarlin 01/23/14 - enable 128 byte database objects
Rem       surman   03/15/13 - 16473661: Common start and end scripts
Rem       hsarkar  06/08/11 - Logical Standby Support
Rem       shorwitz 08/28/08 - Bug 7237890
Rem       daliao   09/02/03 - bug-3096033, no default preference
Rem       daliao   01/13/03 - add clustering
Rem       daliao   09/24/02 - add another generic training API. 
Rem       daliao   01/31/02 - daliao_classification
Rem	daliao	10/2/2001 - Creation 
Rem

@@?/rdbms/admin/sqlsessstart.sql

CREATE OR REPLACE PACKAGE ctx_cls AUTHID current_user AS

/*------------------------------- TYPE DEFINITIONS ----------------------------*/
/* in-memory table for document assignment */
TYPE doc_rec IS RECORD (
	docid     number,   -- document ID to identify the document
	clusterid number,   -- the ID of the cluster the document is assigned to
	score     number    -- the similarity score between document and cluster
);
TYPE doc_tab is TABLE OF doc_rec INDEX BY BINARY_INTEGER;

/* in-memory table for cluster information */
TYPE cluster_rec IS RECORD (
	clusterid number,         -- cluster ID to identify a cluster
	descript  drvutl.dr_extrabuf, --string to describe the cluster
        label drvutl.dr_longbuf,  -- a suggested label for the cluster
        sze number,     -- number of documents assigned to the cluster
	quality_score number,     -- the quality score of the cluster
	parent    number          -- parent cluster id. negative means no parent
); 
TYPE cluster_tab IS TABLE OF cluster_rec INDEX BY BINARY_INTEGER;

TYPE docid_tab IS TABLE OF number INDEX BY BINARY_INTEGER;

-- Sentiment Constants 

SENTIMENT_POS constant number := 1; /* Positive sentiment id */ 
SENTIMENT_NEG constant number := 2; /* Negative sentiment id */
SENTIMENT_NUL constant number := 0; /* Neutral sentiment id */

/*------------------------------- train for ctx-rules --------------------------*/
/*
   NAME
     train - automatically generate ctx-rules from training examples
  
   DESCRIPTION
     This procedure will generate the ctx-rules for a given set of training 
     examples. The training examples are contained in the following two tables
	
	table1:  doctab must have the following columns:
		 docid		number primary key
		 text		doc. column which can be indexed by context index
        table2:  category table must have the following columns:
		 docid		CONSTRAINT fk_id REFERENCES doctab(id)
		 category_id	number	
	
	the foreign key in category table is recommended by not required.

     The rules will be written to the result table specified.
     The query table must have the following columns:
  
       category_id		number         (the category_id)       
       query	              	drvutl.dr_extrabuf (the rule)
       confidence               number         (the confidence level 
						(percentage) that a document
						is relevant if this rule is 
						satisfied )
  		
     The names of table and column are not necessary the same as above.       

   ARGUMENTS
     index_name               - the name of the text index
     docid                    - the name of docid column in document table
     cattab		      - the name of category table
     catdocid		      - the name of docid column in categroy table 
     catid		      - the name of category ID column in category table
     restab		      - the name of result table
     rescatid                 - the name of category ID column in result table
     resquery                 - the name of query column in result table
     resconfid                - the name of confidence column in result table
     pref_name                - the name of preference
*/

PROCEDURE train (
  index_name    in varchar2,
  docid	 	in varchar2,
  cattab        in varchar2,
  catdocid      in varchar2,
  catid         in varchar2, 	
  restab        in varchar2,
  rescatid      in varchar2,
  resquery      in varchar2,
  resconfid     in varchar2,
  pref_name     in varchar2 DEFAULT NULL
);
PRAGMA SUPPLEMENTAL_LOG_DATA(train, AUTO);

/*------------------------------- generic train API ---------------------------*/
/*
   NAME
     train - automatically generate predicative model from examples for 
             classification
  
   DESCRIPTION
     This procedure will generate the predicative model from a given set of 
     training examples. The training examples are contained in the following 
     two tables
	
	table1:  doctab must have the following columns:
		 docid	  	number primary key
		 text	 	doc. column which can be indexed by context index 
        table2:  category table must have the following columns:
		 docid		number
		 category_id	number	

     The names of table and column are not necessary the same as above.       

     The predicative model (classifier) will be written to the result tables.
     The result table is either created by users before calling this function or 
     created in this program with the specified table name and under the current 
     user (if the specified table does not exist).     
     If user create the result table (which can support table schema for 
     different users), the table should have the following three columns with the 
     exact column names:
		cat_id number
		type number(3) not null
		rule clob
  		
   ARGUMENTS
     index_name               - the name of the text index
     docid                    - the name of docid column in document table
     cattab		      - the name of category table
     catdocid		      - the name of docid column in categroy table 
     catid		      - the name of category ID column in category table
     restab                   - the name of generated result table
     pref_name                - the name of preference
*/
PROCEDURE train (
  index_name    	in varchar2,
  docid	 		in varchar2,
  cattab        	in varchar2,
  catdocid      	in varchar2,
  catid         	in varchar2, 	
  restab 	    	in varchar2,
  pref_name     	in varchar2
);
PRAGMA SUPPLEMENTAL_LOG_DATA(train, AUTO);

/*----------------------------Sentiment Model Train API --------------------*/
/*   NAME
       sa_train_model - generate sentiment model based on provided training
                        set data.

     DESCRIPTION
       This procedure generates the sentiment model based on training
       examples. The training examples and their true labels are provided
       in the form of two tables:
 
        table1:  doctab must have the following columns:
                 docid       number primary key
                 text        doc. column which can be indexed by context index
        table2:  category table must have the following columns:
                 docid          number
                 category_id    number (0 , 1 , 2)

        The names of tables and columns are not required to be the same as 
        above but they need to be specified when calling the sa_train_model api.
        The category id's though should be 0 for neutral 1 for positive and 2 
        for negative. The procedure should also be provided a sentiment classifier 
        preference.
   
        The final model generated will be written into the classifier table 
        with the as provided by the user. A ctxrule index of the form 
        DR$clsfier_name$RS will also be created on this table. The table will 
        contain the following columns:
                                       cat_id number
                                       type number(3) not null
                                       rule clob

     ARGUMENTS
       clsfier_name             - the name of the classifier
       index_name               - the name of the text index on training set
       docid                    - the name of docid column in document table
       cattab                   - the name of category table  
       catdocid                 - the name of docid column in categroy table
       catid                    - the name of category ID column in category table
       pref_name                - the name of sentiment_classifier preference
*/
PROCEDURE sa_train_model (
  clsfier_name          in varchar2,
  index_name            in varchar2,
  docid                 in varchar2,
  cattab                in varchar2,
  catdocid              in varchar2,
  catid                 in varchar2,
  pref_name             in varchar2 default NULL
);
PRAGMA SUPPLEMENTAL_LOG_DATA(sa_train_model, AUTO);

/*------------------------------Sentiment Model Drop API--------------------*/
/*   NAME
       sa_drop_model - Drop an existing sentiment model

     DESCRIPTION
       This procedure is used to drop an existing sentiment model.

     ARGUMENTS
       model_name               - The name of the sentiment model
*/ 
PROCEDURE sa_drop_model(
  model_name in varchar2);
PRAGMA SUPPLEMENTAL_LOG_DATA(sa_drop_model, AUTO);


/* ---------- clustering API for permanent table result------------------------*/
/*
  NAME 
    clustering - clustering a collection 
  
  DESCRIPTION 
    This procedure will generate a set of sub-group (clusters) from a provided
    collection of documents. The collection is given by a table which having a 
    context index built with or without population. The collection table at least
    has the following two collums, whose name may not be exactly the same. 
	 docid	  	number primary key
	 text	 	doc. column which can be indexed by context index

    The output of clustering is represented by two tables:

	table 1: document membership table having the following collums with the
		 exact same names
	 docid     number -- document ID to identify a document
	 clusterid number -- the ID of the cluster the document is assigned to
	 score     number -- the similarity score between document and cluster

	table 2: cluster description table having the following collums with the
		 exact same names
	 clusterid number         -- cluster ID to identify a cluster
	 descript  drvutl.dr_extrabuf --string to describe the cluster
         label     drvutl.dr_longbuf  -- a suggested label for the cluster
         size      number         -- number of documents assigned to the cluster
	 quality_score number     -- the quality score of the cluster
	 parent    number         -- parent cluster id. negative means no parent
	
     The output tables can either be created by users before calling this 
     function or created in this program with the specified table name and 
     under the current user (if the specified table does not exist). 

   ARGUMENTS:
     index_name               - the name of the text index
     docid                    - the name of docid column in document table
     doctab_name              - the name of document membership table	
     clstab_name              - the name of cluster description table   
     pref_name                - the name of the preference  	
*/
PROCEDURE clustering (
  index_name            in varchar2,
  docid                 in varchar2,
  doctab_name           in varchar2,
  clstab_name           in varchar2,
  pref_name             in varchar2 DEFAULT NULL
);
PRAGMA SUPPLEMENTAL_LOG_DATA(clustering, AUTO);

/* ---------- clustering API for in-memory table result------------------------*/
/*
  NAME 
    clustering - clustering a collection 
  
  DESCRIPTION 
    This procedure will generate a set of sub-group (clusters) from a provided
    collection of documents. The collection is given by a table which having a 
    context index built with or without population. The collection table at least
    has the following two collums, whose name may not be exactly the same. 
	 docid	  	number primary key
	 text	 	doc. column which can be indexed by context index

    The output of clustering is represented by two in-memory tables:

	table 1: document membership table ctx_cls.doc_tab
	table 2: cluster description table ctx_cls.cluster_tab

   ARGUMENTS:
     index_name               - the name of the text index
     docid                    - the name of docid column in document table
     dids                     - docid list to be clustered
     doctab_name              - the name of document membership table	
     clstab_name              - the name of cluster description table   
     pref_name                - the name of the preference  	
*/
PROCEDURE clustering (
  index_name            in varchar2,
  docid                 in varchar2,
  dids                  in docid_tab,
  doctab_name           in out nocopy doc_tab,
  clstab_name           in out nocopy cluster_tab,
  pref_name             in varchar2 DEFAULT NULL
);
--PRAGMA SUPPLEMENTAL_LOG_DATA(clustering, AUTO);

END ctx_cls;
/

@?/rdbms/admin/sqlsessend.sql
OHA YOOOO