
c_long_sql_statement_length CONSTANT INTEGER := 32767; 

SUBTYPE SQL_STATEMENT_TYPE      IS VARCHAR2(32767);
SUBTYPE LONG_SQL_STATEMENT_TYPE IS DBMS_SQL.VARCHAR2A;

TYPE TABLE_ARRAY is TABLE OF VARCHAR2(62);
TYPE LSTMT_REC_TYPE IS RECORD (
    lstmt dbms_sql.VARCHAR2A,
    lb    BINARY_INTEGER DEFAULT 1,
    ub    BINARY_INTEGER DEFAULT 0);
TYPE LSTMT_REC_TYPE_ARRAY is TABLE OF LSTMT_REC_TYPE;
TYPE QUERY_ARRAY is TABLE OF SQL_STATEMENT_TYPE;
TYPE TARGET_VALUES_LIST      IS TABLE OF  VARCHAR2(32);
TYPE VALUE_COUNT_LIST        IS TABLE OF  NUMBER;

PROCEDURE dump_varchar2a(vc2a dbms_sql.VARCHAR2A) IS
  v_str varchar2(32767);
BEGIN
  DBMS_OUTPUT.PUT_LINE('dump_varchar2a:');
  FOR i IN 1..vc2a.COUNT LOOP
    v_str := vc2a(i);
    DBMS_OUTPUT.PUT_LINE(v_str);
  END LOOP;
END;

PROCEDURE ls_append(
  r_lstmt IN OUT NOCOPY LSTMT_REC_TYPE,
  p_txt   VARCHAR2) 
IS
BEGIN
  r_lstmt.ub := r_lstmt.ub + 1;
  r_lstmt.lstmt(r_lstmt.ub) := p_txt;
END ls_append;

PROCEDURE ls_append(
  r_lstmt IN OUT NOCOPY LSTMT_REC_TYPE,
  p_txt   LSTMT_REC_TYPE) IS
BEGIN
  FOR i IN p_txt.lb..p_txt.ub LOOP
    r_lstmt.ub := r_lstmt.ub + 1;
    r_lstmt.lstmt(r_lstmt.ub) := p_txt.lstmt(i);
  END LOOP;
END ls_append;

FUNCTION query_valid(
  p_query VARCHAR2) RETURN BOOLEAN
IS
  v_is_valid  BOOLEAN;
BEGIN
  BEGIN
    EXECUTE IMMEDIATE p_query;
    v_is_valid := TRUE;
  EXCEPTION WHEN OTHERS THEN
    v_is_valid := FALSE;
  END;
  RETURN v_is_valid;
END query_valid;

FUNCTION table_exist(
  p_table_name  VARCHAR2) RETURN BOOLEAN IS
BEGIN
  RETURN query_valid('SELECT * FROM ' || dbms_assert.simple_sql_name(p_table_name));
END table_exist;

FUNCTION model_exist(
  p_model_name  VARCHAR2) RETURN BOOLEAN 
IS
  v_model_cnt NUMBER;
  v_model_exists BOOLEAN := FALSE;
BEGIN
  SELECT COUNT(*) INTO v_model_cnt FROM DM_USER_MODELS WHERE NAME = UPPER(p_model_name);
  IF v_model_cnt > 0 THEN
    v_model_exists := TRUE;
  END IF;
  --DBMS_OUTPUT.PUT_LINE('model exist: '||v_model_exists);
  RETURN v_model_exists;
EXCEPTION WHEN OTHERS THEN 
  RETURN FALSE;
END model_exist;

PROCEDURE drop_table(
  p_table_name  VARCHAR2)
IS
  v_stmt  SQL_STATEMENT_TYPE; 
BEGIN
  v_stmt := 'DROP TABLE '||dbms_assert.simple_sql_name(p_table_name)||' PURGE';
  EXECUTE  IMMEDIATE v_stmt;
EXCEPTION WHEN OTHERS THEN
  NULL;
  --DBMS_OUTPUT.PUT_LINE('Failed drop_table: '||p_table_name);
END drop_table;

PROCEDURE drop_view(
  p_view_name  VARCHAR2)
IS
  v_stmt  SQL_STATEMENT_TYPE; 
BEGIN
  v_stmt := 'DROP VIEW '||dbms_assert.simple_sql_name(p_view_name);
  EXECUTE  IMMEDIATE v_stmt;
EXCEPTION WHEN OTHERS THEN 
  NULL;
  --DBMS_OUTPUT.PUT_LINE('Failed drop_view: '||p_view_name);
END drop_view;

PROCEDURE drop_model(
  p_model_name  VARCHAR2)
IS
  v_diagnostics_table VARCHAR2(30);
BEGIN
  DBMS_DATA_MINING.DROP_MODEL(p_model_name);
  SELECT SETTING_VALUE INTO v_diagnostics_table
  FROM TABLE(DBMS_DATA_MINING.GET_MODEL_SETTINGS(p_model_name))
  WHERE SETTING_NAME = 'GLMS_DIAGNOSTICS_TABLE_NAME';
  IF (v_diagnostics_table IS NOT NULL) THEN
    drop_table(v_diagnostics_table);
  END IF;
EXCEPTION WHEN OTHERS THEN
  NULL;
  --DBMS_OUTPUT.PUT_LINE('Failed drop_model: '||p_model_name);
END drop_model;

FUNCTION create_new_temp_table_name(prefix IN VARCHAR2, len IN NUMBER)
RETURN VARCHAR2 IS
  v_table_name      VARCHAR2(30);
  v_seed            NUMBER;
BEGIN
  dbms_random.seed(SYS_GUID());
  v_table_name := 'DM$T' || SUBSTR(prefix, 0, 4) || dbms_random.string(NULL, len-8);
  --DBMS_OUTPUT.PUT_LINE('create_new_temp_table_name: '||v_table_name);
  RETURN v_table_name;
END create_new_temp_table_name;

FUNCTION create_new_temp_table_name(prefix IN VARCHAR2)
RETURN VARCHAR2 IS
BEGIN
  RETURN create_new_temp_table_name(prefix, 30);
END create_new_temp_table_name;

FUNCTION ADD_TEMP_TABLE(tempTables IN OUT NOCOPY TABLE_ARRAY, temp_table IN VARCHAR2) RETURN VARCHAR2 IS
BEGIN
  tempTables.EXTEND;
  tempTables(tempTables.COUNT) := temp_table;
  return temp_table;
END;

PROCEDURE DROP_TEMP_TABLES(tempTables IN OUT NOCOPY TABLE_ARRAY) IS
  v_temp  VARCHAR2(30);
BEGIN
  FOR i IN 1..tempTables.COUNT LOOP
    v_temp := tempTables(i);
    drop_table(v_temp);
    drop_view(v_temp);
    tempTables.DELETE(i);
  END LOOP;
END;

PROCEDURE CHECK_RESULTS(drop_output IN BOOLEAN,
                        result_name IN VARCHAR2) IS 
BEGIN
  -- drop all results if drop = true, otherwise make sure all results don't exist already (raise exception)
  IF result_name IS NOT NULL THEN
    IF drop_output THEN
      drop_table(result_name);
      drop_view(result_name);
    ELSIF (table_exist(result_name)) THEN
      RAISE_APPLICATION_ERROR(-20000, 'Result table exists: '||result_name);
    END IF;
  END IF;
END;

PROCEDURE CHECK_MODEL(drop_output IN BOOLEAN,
                        model_name IN VARCHAR2) IS 
BEGIN
  -- drop all results if drop = true, otherwise make sure all results don't exist already (raise exception)
  IF model_name IS NOT NULL THEN
    IF drop_output THEN
      drop_model(model_name);
    ELSIF (model_exist(model_name)) THEN
      RAISE_APPLICATION_ERROR(-20001, 'Model exists: '||model_name);
    END IF;
  END IF;
END;

PROCEDURE create_table_from_query(query IN OUT NOCOPY LSTMT_REC_TYPE)
IS
  v_cursor      NUMBER;
  v_feedback    INTEGER;
BEGIN
  v_cursor := DBMS_SQL.OPEN_CURSOR;

  DBMS_SQL.PARSE(
    c             => v_cursor,
    statement     => query.lstmt, 
    lb            => query.lb,
    ub            => query.ub,
    lfflg         => FALSE,
    language_flag => dbms_sql.native);
  v_feedback := DBMS_SQL.EXECUTE(v_cursor);
  DBMS_SQL.CLOSE_CURSOR(v_cursor);

EXCEPTION WHEN OTHERS THEN
  IF DBMS_SQL.IS_OPEN(v_cursor) THEN
    DBMS_SQL.CLOSE_CURSOR(v_cursor);
  END IF;
  RAISE;
END;

FUNCTION get_row_count(tableName IN VARCHAR2)
RETURN INTEGER IS
   v_stmt  VARCHAR(100); 
   qcount INTEGER := 0;
BEGIN
  v_stmt := 'SELECT COUNT(*) FROM '|| tableName;
  EXECUTE  IMMEDIATE v_stmt INTO qcount;
  RETURN qcount;
END get_row_count;

PROCEDURE SET_EQUAL_DISTRIBUTION ( 
   counts IN OUT VALUE_COUNT_LIST )
IS
  v_minvalue          NUMBER := 0;
BEGIN
  FOR i IN counts.FIRST..counts.LAST 
  LOOP
    IF ( i = counts.FIRST )
      THEN
         v_minvalue := counts(i);
      ELSIF ( counts(i) > 0 AND v_minvalue > counts(i) )
      THEN
        v_minvalue := counts(i);
    END IF;
  END LOOP;
  
  FOR i IN counts.FIRST..counts.LAST 
  LOOP
    counts(i) := v_minvalue;
  END LOOP;
END SET_EQUAL_DISTRIBUTION;

PROCEDURE GET_STRATIFIED_DISTRIBUTION ( 
   table_name         VARCHAR2, 
   attribute_name     VARCHAR2,
   percentage         NUMBER,
   attr_values    IN OUT NOCOPY TARGET_VALUES_LIST,
   counts         IN OUT NOCOPY VALUE_COUNT_LIST,
   counts_sampled IN OUT NOCOPY VALUE_COUNT_LIST )
IS
  v_tmp_stmt           VARCHAR2(4000);
BEGIN
  v_tmp_stmt := 
      'SELECT /*+ noparallel(t)*/ ' || attribute_name || 
      ', count(*), ROUND ( ( count(*) * ' || percentage || ') / 100.0 ) FROM '|| table_name || 
      ' WHERE ' || attribute_name ||' IS NOT NULL GROUP BY ' || attribute_name;
  EXECUTE IMMEDIATE v_tmp_stmt
  BULK COLLECT INTO attr_values, counts, counts_sampled;
END GET_STRATIFIED_DISTRIBUTION;

FUNCTION GENERATE_STRATIFIED_SQL ( 
                        v_2d_temp_view    VARCHAR2,
                        src_table_name    VARCHAR2, 
                        attr_names        TARGET_VALUES_LIST,
                        attribute_name    VARCHAR2,
                        percentage        NUMBER,
                        op                VARCHAR2,
                        equal_distribution IN BOOLEAN DEFAULT FALSE) RETURN LSTMT_REC_TYPE
IS
  v_tmp_lstmt           LSTMT_REC_TYPE;
  attr_values_res       TARGET_VALUES_LIST;
  counts_res            VALUE_COUNT_LIST;
  counts_sampled_res    VALUE_COUNT_LIST;
  tmp_str               VARCHAR2(4000);
  sample_count          PLS_INTEGER;

BEGIN
  GET_STRATIFIED_DISTRIBUTION(src_table_name, attribute_name, percentage, attr_values_res, counts_res, counts_sampled_res);
  IF ( equal_distribution = TRUE )
  THEN
    SET_EQUAL_DISTRIBUTION(counts_sampled_res);
  END IF;

  v_tmp_lstmt.ub := 0; -- initialize 
  ls_append(v_tmp_lstmt, 'CREATE TABLE ');
  ls_append(v_tmp_lstmt, v_2d_temp_view);
  ls_append(v_tmp_lstmt, ' AS ');
  ls_append(v_tmp_lstmt, '( SELECT ');

  FOR i IN attr_names.FIRST..attr_names.LAST 
  LOOP
    IF ( i != attr_names.FIRST )
    THEN
       ls_append(v_tmp_lstmt,',');
    END IF;

    ls_append(v_tmp_lstmt, attr_names(i));
  END LOOP;
  
  ls_append(v_tmp_lstmt, ' FROM (SELECT /*+ no_merge */ t.*, row_number() over(partition by '||attribute_name||' order by ora_hash(ROWNUM)) RNUM FROM ' || src_table_name || ' t) WHERE RNUM = 1 OR ');

  FOR i IN attr_values_res.FIRST..attr_values_res.LAST 
  LOOP
    IF ( i != attr_values_res.FIRST )
    THEN
       tmp_str := ' OR ';
    END IF;
    IF ( counts_res(i) <= 2 ) THEN
      sample_count := counts_res(i);
    ELSE
      sample_count := counts_sampled_res(i);
    END IF;
    tmp_str := tmp_str || 
     '( ' || attribute_name || ' = ''' || REPLACE(attr_values_res(i), '''', '''''') || '''' ||
      ' AND ORA_HASH(RNUM,(' || counts_res(i) || ' -1),12345) ' || op || sample_count || ') ';
    ls_append(v_tmp_lstmt, tmp_str );
  END LOOP;
  ls_append(v_tmp_lstmt, ') ');
  return v_tmp_lstmt;
END GENERATE_STRATIFIED_SQL;
  
