Rem
Rem $Header: dbgendev/src/langdata/plsql/setup/setup_pkg.pkb /main/39 2025/08/13 01:29:21 jiangnhu Exp $
Rem
Rem setup_pkg.pkb
Rem
Rem Copyright (c) 2024, 2025, Oracle and/or its affiliates.
Rem
Rem    NAME
Rem      setup_pkg.pkb - Package body of setup_pkg
Rem
Rem    DESCRIPTION
Rem      Defines the procedures/functions responsible for setting up the
Rem	 database of the user of Lang-Data.
Rem
Rem    NOTES
Rem      NONE
Rem
Rem    BEGIN SQL_FILE_METADATA
Rem    SQL_SOURCE_FILE: dbgendev/src/langdata/plsql/setup/setup_pkg.pkb
Rem    SQL_SHIPPED_FILE:
Rem    SQL_PHASE:
Rem    SQL_STARTUP_MODE: NORMAL
Rem    SQL_IGNORABLE_ERRORS: NONE
Rem    END SQL_FILE_METADATA
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    dadoshi     07/30/25 - Pass mount_dir to populate_config_table
Rem    deveverm    07/28/25 - DBAI-1011: changed rerank function to use cached
Rem                           model
Rem    saloshah    07/24/25 - DBAI-1052 : Add user_search_row_obj,
Rem                           user_search_row_table
Rem    dadoshi     07/22/25 - JIRA_DBAI1056: Add drilldown_row_obj,
Rem                           drilldown_row_table,
Rem                           drilldown_description_row_obj,
Rem                           drilldown_description_table types for APEX
Rem    dadoshi     07/22/25 - JIRA_DBAI1049: Add report_row_obj,
Rem                           report_row_table, report_description_row_obj,
Rem                           report_description_table types for APEX
Rem    deveverm    07/16/25 - DBAI-1050: added cleanup for apex_filters tables
Rem    jiangnhu    07/09/25 - DBAI-314: Add python function 
Rem                           rerank_filter_value_candidates
Rem    pryarla     07/03/25 - DBAI-882: Added filter_rec_type
Rem    jiangnhu    07/02/25 - DBAI-1006: Parameterize hard-coded names in
Rem                           create_resource_plan
Rem    arevathi    07/01/25 - Change onnx_model_dir to mount_dir
Rem    fgurrola    06/27/25 - DBAI-776: Add name entities initialization call.
Rem    deveverm    06/25/25 - DBAI-771: Removed ALL_MINILM_L12_V2, removed 
Rem                           grants for test pkgs
Rem    deveverm    06/25/25 - DBAI-771: Removed ALL_MINILM_L12_V2, removed
Rem                           grants for test pkgs
Rem                           grants for test pkgs
Rem    deveverm    06/25/25 - DBAI-939: added dropping and recreating
Rem                           langdatajobclass
Rem    jiangnhu    05/28/25 - DBAI-576, DBAI-828: Schedule periodical check for
Rem                           background metadata and index fixup
Rem    arevathi    05/19/25 - Add role grants procedure
Rem    deveverm    05/19/25 - DBAI-752: added env params HF_HUB_OFFLINE and
Rem                           HF_HUB_CACHE to process_entities
Rem    deveverm    05/15/25 - Bug Fix: python functions should not be created
Rem                           on cloud deployments
Rem    deveverm    04/17/25 - DBAI-658: load multilingual-e5-base model
Rem    jiangnhu    04/11/25 - DBAI-739: Add python function plot_search_vectors
Rem    dadoshi     04/11/25 - JIRA_DBAI525: Update ranked_doc_obj type
Rem    deveverm    04/10/25 - DBAI-720: Added compilation branch for ADB
Rem    dadoshi     26/03/25 - JIRA_DBAI689: Add create_stoplist procedure.
Rem    dadoshi     03/20/25 - JIRA_DBAI525: Add re_rank_search_results python
Rem                           function
Rem    jiangnhu    03/06/25 - DBAI-545: Use config for cpu limit
Rem    dadoshi     02/24/25 - JIRA_DBAI578: Add populate_config_table
Rem    jiangnhu    02/14/25 - Update create_resource_plan to cleanup first
Rem    jiangnhu    02/14/25 - DBAI-575: Remove c_unknown_exception_code
Rem    jiangnhu    02/13/25 - DBAI-524: Remove create_user_table_indexes
Rem    jiangnhu    01/23/25 - Move create_global_temp_tables to
Rem                           create_tables.sql to fix installation issue
Rem    jiangnhu    12/05/24 - DBAI-421: Add create_resource_plan
Rem    saloshah    11/28/24 - DBAI-327: Removed check_annotations_changes_job
Rem    jiangnhu    11/12/24 - JIRA_DBAI-425: Add create_global_temp_tables, 
Rem                           create_python_functions, create_types
Rem    jiangnhu    10/30/24 - DBAI-383: Add create_user_table_indexes
Rem    jiangnhu    10/29/24 - Fix syntax error
Rem    dadoshi     10/21/24 - Remove text wrapping
Rem    dadoshi     10/18/24 - JIRA_DBAI-399: Update template
Rem    pryarla     10/16/24 - Created
Rem

DECLARE
    v_cloud_service VARCHAR2(10);
BEGIN
    SELECT sys_context('USERENV', 'CLOUD_SERVICE') 
    INTO v_cloud_service FROM dual;

    IF v_cloud_service IS NOT NULL THEN
        EXECUTE IMMEDIATE 'ALTER SESSION SET PLSQL_CCFLAGS = ''Is_Cloud:true''';
    ELSE
        EXECUTE IMMEDIATE 
            'ALTER SESSION SET PLSQL_CCFLAGS = ''Is_Cloud:false''';
    END IF;
END;
/

create or replace package body lang_data_setup_pkg as

    PROCEDURE create_python_functions IS
    BEGIN
        $IF $$Is_Cloud = TRUE $THEN
                null;
        $ELSE
        BEGIN
-- local_files_only in GLiNER is set to true to disable Gliner to look online 
-- for the model weights, forcing it to use our prepolated model cache
-- HF_HUB_OFFLINE: This env variable forces hugging_face to not send HTTPS 
-- requests to hub
-- HF_HUB_CACHE: This env variable dictates the model cache path to hugging_face
            sys.pyqScriptCreate(
                'process_entities',
                '
def func(query, labels, cache_dir):
    import os
    os.environ["HF_HUB_OFFLINE"]="1"
    os.environ["HF_HUB_CACHE"]=cache_dir
    os.environ["HF_HOME"] = cache_dir
    import oml
    from gliner import GLiNER
    if not hasattr(oml, "model"):
        oml.model = GLiNER.from_pretrained(
            "numind/NuNerZero", 
            local_files_only=True
        )

    # Define the merge_entities logic within the same script
    def merge_entities(entities):
        if not entities:
            return []
        merged = []
        current = entities[0]
        for next_entity in entities[1:]:
            if next_entity["label"] == current["label"] and (
                next_entity["start"] == current["end"] + 1
                or next_entity["start"] == current["end"]
            ):
                current["text"] = current["text"] + " " + next_entity["text"]
                current["end"] = next_entity["end"]
            else:
                merged.append(current)
                current = next_entity
        # Append the last entity
        merged.append(current)
        return [(e["start"], e["end"], e["text"], e["label"]) for e in merged]

    # Load the GLiNER model and predict entities
    label_list = [label.strip().lower() for label in labels.split(",")]
    entities = oml.model.predict_entities(query, label_list)

    # Merge entities and return the result
    return merge_entities(entities)
                ',
                TRUE,
                TRUE
            );
            lang_data_logger_pkg.log_info(
                'Python function process_entities created successfully.'
            );
        EXCEPTION
            WHEN OTHERS THEN
                lang_data_logger_pkg.log_error(
                    'Failed to create Python function : process_entities' ||
                    SQLERRM
                );
                RAISE;
        END;
        -- This function takes a table name, retrieves search result data
        -- (including vectors), performs dimensionality reduction using PCA,
        -- generates a 2D plot of the vectors, and returns the plot as image
        -- data.
        BEGIN
            sys.pyqScriptCreate(
                'plot_search_vectors',
                q'[
def func(table_name):
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    import io
    import base64
    import oml
    from sklearn.decomposition import PCA

    input_plot_data = oml.sync(table=table_name).pull()

    input_plot_data.columns = input_plot_data.columns.str.lower()
    input_plot_data = input_plot_data.sort_values(by=["report_id", "type"], ascending=[True, True]).reset_index(drop=True)

    query_row = input_plot_data[input_plot_data["type"] == "actual query"].iloc[0]
    query_vector = np.array(query_row["vector"], dtype=float)
    expected_report_id = query_row["report_id"]

    grouped_vectors = {}
    start_index = 0
    for (report_id, report_title), group_df in input_plot_data[input_plot_data["type"] != "actual query"].groupby(["report_id", "report_title"]):
        desc_vectors = group_df[group_df["type"] == "description"]["vector"].apply(lambda v: np.array(v, dtype=float)).tolist()
        query_vectors = group_df[group_df["type"] == "sample query"]["vector"].apply(lambda v: np.array(v, dtype=float)).tolist()
        highlights = group_df[group_df["highlight"] == 1]
        highlight_idx = highlights.index[0] - group_df.index[0] if not highlights.empty else None
        grouped_vectors[report_id] = {
            "desc_vectors": desc_vectors,
            "query_vectors": query_vectors,
            "highlight": highlight_idx,
            "start_index_in_all_vectors": start_index,
            "title": report_title,
        }
        start_index += len(desc_vectors) + len(query_vectors)

    if not any(grouped_vectors.values()):
        return ("No vectors",)

    all_vectors = []
    highlights = []
    for report_id, vectors in grouped_vectors.items():
        all_vectors.extend(vectors["desc_vectors"] + vectors["query_vectors"])
        if vectors["highlight"] is not None:
            highlights.append((vectors["start_index_in_all_vectors"] + vectors["highlight"], report_id))
    all_vectors.append(query_vector)

    pca = PCA(n_components=2)
    transformed_vectors = pca.fit_transform(all_vectors)
    query_vector_transformed = transformed_vectors[-1]

    colormap = plt.get_cmap("tab10")
    colors = [colormap(i / len(grouped_vectors)) for i in range(len(grouped_vectors))]

    plt.figure(figsize=(10, 8))
    expected_report_color = None
    color_index = 0
    for report_id, vectors in grouped_vectors.items():
        color = colors[color_index]
        color_index += 1
        desc_start = vectors["start_index_in_all_vectors"]
        desc_trans = transformed_vectors[desc_start:desc_start + len(vectors["desc_vectors"])]
        query_trans = transformed_vectors[desc_start + len(vectors["desc_vectors"]):desc_start + len(vectors["desc_vectors"]) + len(vectors["query_vectors"])]
        plt.scatter(desc_trans[:, 0], desc_trans[:, 1], color=color, marker="o", label=f"{vectors['title']} (desc)")
        plt.scatter(query_trans[:, 0], query_trans[:, 1], color=color, marker="^", label=f"{vectors['title']} (query)")
        combined = np.concatenate([desc_trans, query_trans])
        centroid = np.mean(combined, axis=0)
        radius = np.max(np.linalg.norm(combined - centroid, axis=1))
        lw = 3 if report_id == expected_report_id else 1.5
        if report_id == expected_report_id:
            expected_report_color = color
        circle = patches.Circle(centroid, radius, edgecolor=color, facecolor='none', linestyle='--', linewidth=lw)
        plt.gca().add_patch(circle)

    plt.scatter(query_vector_transformed[0], query_vector_transformed[1], color='red', marker='o', s=100, label='Query Vector')
    for index, report_id in highlights:
        vec = transformed_vectors[index]
        plt.scatter(vec[0], vec[1], facecolor='none', edgecolor='red', marker='s', s=150)
        plt.plot([query_vector_transformed[0], vec[0]], [query_vector_transformed[1], vec[1]], color='grey', linestyle='--')
        dist = np.linalg.norm(query_vector_transformed - vec)
        plt.text((query_vector_transformed[0] + vec[0]) / 2, (query_vector_transformed[1] + vec[1]) / 2, f'{dist:.2f}', color='black')

    if expected_report_color:
        plt.plot([], [], color=expected_report_color, linestyle='--', linewidth=3, label='Expected Report')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.gca().set_aspect('equal', adjustable='box')
    plt.title("Visualized Search Results")
    plt.tight_layout()

    buffer = io.BytesIO()
    plt.savefig(buffer, format="png")
    buffer.seek(0)
    img_data = buffer.getvalue()
    plt.close()
    return (img_data,)
                ]',
                TRUE,
                TRUE
            );

            lang_data_logger_pkg.log_info('Python function plot_search_vectors created successfully.');
        EXCEPTION
            WHEN OTHERS THEN
                lang_data_logger_pkg.log_error('Failed to create Python function: plot_search_vectors. ' || SQLERRM);
                RAISE;
        END;
        BEGIN
            sys.pyqScriptCreate(
        're_rank_search_results',
        '
def re_rank_search_results(input_reranking_pairs, cache_dir):
    import os
    os.environ["HF_HUB_OFFLINE"]="1"
    os.environ["HF_HUB_CACHE"]=cache_dir
    os.environ["HF_HOME"] = cache_dir
    import json
    import oml
    if not hasattr(oml, "cross_encoder"):
        from sentence_transformers import CrossEncoder
        oml.cross_encoder = CrossEncoder("BAAI/bge-reranker-base")
    if input_reranking_pairs.empty:
        raise ValueError("Input DataFrame for reranking is empty.")
    input_pairs = input_reranking_pairs.to_json(orient="records")
    search_results_list = json.loads(input_pairs)
    
    cross_encoder_inputs = []
    query_text = search_results_list[0]["QUERY_TEXT"]  # Extract query_text from the first row

    for obj in search_results_list:
        cross_encoder_inputs.append([query_text, obj["DOC_TEXT"]])
    
    cross_scores = oml.cross_encoder.predict(cross_encoder_inputs)

    for idx, score in enumerate(cross_scores):
        search_results_list[idx]["RANK_SCORE"] = score
    
    search_results_list.sort(key=lambda x: x["RANK_SCORE"], reverse=True)

    for i, obj in enumerate(search_results_list, start=1):
        obj["RANK"] = i

    return [(sr["DOC_TEXT_ID"], sr["RANK"], float(sr["RANK_SCORE"])) for sr in search_results_list]
            ',
            TRUE,
            TRUE
        );
            lang_data_logger_pkg.log_info(
                'Python function re_rank_search_results created successfully.'
            );
        EXCEPTION
            WHEN OTHERS THEN
                lang_data_logger_pkg.log_error(
                    'Failed to create Python function : re_rank_search_results' ||
                    SQLERRM
                );
                RAISE;
        END;
        BEGIN
            sys.pyqScriptCreate(
                'rerank_filter_value_candidates',
                q'[
def rerank_filter_value_candidates(query, candidates_str, cache_dir):
    candidates = [candidate.strip() for candidate in candidates_str.split(",")]
    if not candidates:
        raise ValueError("Input filter value candidates list is empty.")

    def normalize(text):
        return text.lower().strip()

    normalized_query = normalize(query)
    normalized_candidates = [normalize(c) for c in candidates]
    for idx, norm_candidate in enumerate(normalized_candidates):
        if norm_candidate and norm_candidate in normalized_query:
            return (candidates[idx], 1.0, idx + 1)
    
    import os
    os.environ["HF_HUB_OFFLINE"] = "1"
    os.environ["HF_HUB_CACHE"] = cache_dir
    os.environ["HF_HOME"] = cache_dir

    import oml
    if not hasattr(oml, "cross_encoder"):
        from sentence_transformers import CrossEncoder
        oml.cross_encoder = CrossEncoder(
            "BAAI/bge-reranker-base",
            local_files_only=True
        )
    pairs = [(query, candidate) for candidate in candidates]
    scores = oml.cross_encoder.predict(pairs)
    top_index = scores.argmax()
    return (candidates[top_index], float(scores[top_index]), int(top_index) + 1)
                ]',
                TRUE,
                TRUE
            );

            lang_data_logger_pkg.log_info(
                'Python function rerank_filter_value_candidates created ' || 
                'successfully.'
            );
        EXCEPTION
            WHEN OTHERS THEN
                lang_data_logger_pkg.log_error(
                    'Failed to create Python function: ' || 
                    'rerank_filter_value_candidates. ' ||
                    SQLERRM
                );
                RAISE;
        END;
        $END
    END create_python_functions;

    PROCEDURE create_stoplist IS
        v_additional_stopwords CONSTANT SYS.ODCIVARCHAR2LIST := 
            SYS.ODCIVARCHAR2LIST(
                'show', 'give', 'list', 'find', 'employee', 'employees', 
                'profile', 'profiles', 'retrieve', 'find',
                'display', 'listed', 'information', 'need', 'data', 'provide', 
                'please', 'exactly', 'many', 'details', 'name'
            );
    BEGIN
        -- Insert stop words from ctxsys.ctx_stopwords
        FOR r IN (SELECT spw_word FROM ctxsys.ctx_stopwords) LOOP
            INSERT INTO langdata$stoplist (spw_word, source)
            SELECT r.spw_word, 'CTXSYS'
            FROM DUAL
            WHERE NOT EXISTS (
                SELECT 1 FROM langdata$stoplist WHERE spw_word = r.spw_word
            );
        END LOOP;

        -- Insert additional stop words
        FOR i IN v_additional_stopwords.FIRST .. v_additional_stopwords.LAST 
        LOOP
            INSERT INTO langdata$stoplist (spw_word, source)
            SELECT v_additional_stopwords(i), 'LANGDATA'
            FROM DUAL
            WHERE NOT EXISTS (
                SELECT 1 FROM langdata$stoplist
                WHERE spw_word = v_additional_stopwords(i)
            );
        END LOOP;

    EXCEPTION
        WHEN OTHERS THEN
            lang_data_logger_pkg.log_error(
                'Failed to create stoplist for Lang-Data: ' || SQLERRM
            );
            RAISE;
    END create_stoplist;

    PROCEDURE load_onnx_model(
        model_path IN VARCHAR2,  -- Directory path or Bucket URL
        filename   IN VARCHAR2,  -- ONNX model file name
        modelname  IN VARCHAR2,  -- Target model name
        cred_name  IN VARCHAR2 DEFAULT NULL -- Credential name
    ) IS
    BEGIN
        -- Step 1: Drop the existing ONNX model if it exists 
                -- (using force=true to override)
        DBMS_VECTOR.DROP_ONNX_MODEL(
            model_name => modelname, 
            force => TRUE
        );

        $IF $$Is_Cloud = TRUE $THEN
            lang_data_logger_pkg.log_info(
                'Autonomous Database environment detected');

            EXECUTE IMMEDIATE 
                'CREATE OR REPLACE DIRECTORY LANGDATA_MODEL_DIR as ''langdata_models''';
            lang_data_logger_pkg.log_info('MODEL_DIR Directory created...');
            DBMS_CLOUD.GET_OBJECT(                        
                credential_name => cred_name,
                directory_name => 'LANGDATA_MODEL_DIR',
                object_uri => model_path||filename);
    
    
            DBMS_VECTOR.LOAD_ONNX_MODEL(
                directory => 'LANGDATA_MODEL_DIR',
                file_name => filename,
                model_name => modelname);
        $ELSE
            -- Step 2: Create or replace the directory DM_DUMP 
                    -- using the provided model_path
            EXECUTE IMMEDIATE
                    'CREATE OR REPLACE DIRECTORY LANGDATA_MODEL_DIR AS ''' || model_path || '''';

            lang_data_logger_pkg.log_info('MODEL_DIR Directory created...');

            -- Step 3: Load the ONNX model using DBMS_VECTOR.LOAD_ONNX_MODEL
            DBMS_VECTOR.LOAD_ONNX_MODEL(
                directory => 'LANGDATA_MODEL_DIR',     -- Directory alias
                file_name => filename,      -- ONNX model file name
                model_name => modelname      -- Target model name
            );

            -- Optional: Output a success message
            lang_data_logger_pkg.log_info(
                'ONNX model "' || filename || '" loaded successfully into "' ||
                modelname || '" from ' || model_path
            );
        $END
        
    END load_onnx_model;

    PROCEDURE create_resource_plan
    IS
        v_lang_data_cpu_limit     NUMBER;
        v_other_groups_cpu_limit  NUMBER;
        v_consumer_group          VARCHAR2(10);
        v_job_class_name           VARCHAR2(100);
        v_resource_plan_name       VARCHAR2(100);
        v_consumer_group_name      VARCHAR2(100);
    BEGIN
        v_job_class_name := lang_data_config_pkg.get_config_parameter(
            'LANG_DATA_JOB_CLASS_NAME'
        );
        v_resource_plan_name := lang_data_config_pkg.get_config_parameter(
            'LANG_DATA_RESOURCE_PLAN_NAME'
        );
        v_consumer_group_name := lang_data_config_pkg.get_config_parameter(
            'LANG_DATA_CONSUMER_GROUP_NAME'
        );

        $IF $$Is_Cloud = TRUE $THEN
            v_lang_data_cpu_limit := TO_NUMBER(
                lang_data_config_pkg.get_config_parameter(
                    'LANG_DATA_CPU_LIMIT'));

            IF v_lang_data_cpu_limit > 57 THEN
                v_consumer_group := 'HIGH';
            ELSIF v_lang_data_cpu_limit > 29 THEN
                v_consumer_group := 'MEDIUM';
            ELSE
                v_consumer_group := 'LOW';
            END IF;
            BEGIN
            
                DBMS_SCHEDULER.CREATE_JOB_CLASS(
                    job_class_name           => v_job_class_name,
                    resource_consumer_group  => v_consumer_group,
                    log_history              => 30 
                        -- Retain job logs for 30 days
                );
            EXCEPTION
                WHEN OTHERS THEN
                    IF SQLCODE = -27477 THEN
                        lang_Data_logger_pkg.log_debug(
                            v_job_class_name || ' already exists, '||
                            'deleting existing jobclass'
                        );
                        DBMS_SCHEDULER.DROP_JOB_CLASS(v_job_class_name);
                        DBMS_SCHEDULER.CREATE_JOB_CLASS(
                            job_class_name           => v_job_class_name,
                            resource_consumer_group  => v_consumer_group,
                            log_history              => 30 
                                -- Retain job logs for 30 days
                        );
                    ELSE
                        lang_data_logger_pkg.log_debug(
                            'Failed to create ' || v_job_class_name
                        );
                        raise;
                    END IF;
            END;
        $ELSE
        BEGIN
            v_lang_data_cpu_limit := TO_NUMBER(
                lang_data_config_pkg.get_config_parameter('LANG_DATA_CPU_LIMIT')
            );
            v_other_groups_cpu_limit := TO_NUMBER(
                lang_data_config_pkg.get_config_parameter(
                    'LANG_DATA_CPU_LIMIT_OTHER_GROUPS'
                )
            );

            lang_data_cleanup_pkg.drop_resource_plan;
            -- Start the pending area for resource plan modifications
            DBMS_RESOURCE_MANAGER.CREATE_PENDING_AREA;

            -- Create resource plan for langdata with reasonable defaults
            DBMS_RESOURCE_MANAGER.CREATE_PLAN(
                v_resource_plan_name,
                'Resource plan for langdata'
            );

            -- Define a consumer group for langdata
            DBMS_RESOURCE_MANAGER.CREATE_CONSUMER_GROUP(
                v_consumer_group_name,
                'langdata application resource group'
            );

            -- Assign CPU and memory allocations to langdata consumer group
            DBMS_RESOURCE_MANAGER.CREATE_PLAN_DIRECTIVE(
                plan                     => v_resource_plan_name,
                group_or_subplan         => v_consumer_group_name,
                comment                  => 'Resource limits for langdata ' ||
                                            'application',
                -- CPU limit for langdata group
                cpu_p1                   => v_lang_data_cpu_limit,
                -- Max degree of parallelism for langdata
                parallel_degree_limit_p1 => 10
            );

            -- Add OTHER_GROUPS to handle sessions that don't belong to
            -- langdataResourceGroup
            DBMS_RESOURCE_MANAGER.CREATE_PLAN_DIRECTIVE(
                plan                     => v_resource_plan_name,
                group_or_subplan         => 'OTHER_GROUPS',
                comment                  => 'Default group for other sessions',
                cpu_p1                   => v_other_groups_cpu_limit
            );

            -- Validate and submit the changes in the pending area
            DBMS_RESOURCE_MANAGER.VALIDATE_PENDING_AREA;
            DBMS_RESOURCE_MANAGER.SUBMIT_PENDING_AREA;

            -- Activate the resource plan
            DBMS_RESOURCE_MANAGER.SWITCH_PLAN(v_resource_plan_name);

            -- Create a job class associated with the resource consumer group
            DBMS_SCHEDULER.CREATE_JOB_CLASS(
                job_class_name           => v_job_class_name,
                resource_consumer_group  => v_consumer_group_name,
                log_history              => 30 -- Retain job logs for 30 days
            );

            lang_data_logger_pkg.log_info(
                'Resource plan and job class created successfully.'
            );
        EXCEPTION
                WHEN OTHERS THEN
                lang_data_logger_pkg.log_error(
                    'Error creating resource plan: ' || SQLERRM
                );
            DBMS_RESOURCE_MANAGER.CLEAR_PENDING_AREA;
            RAISE;
        END;
        $END

    END create_resource_plan;

    PROCEDURE execute_sql_script (
        work_dir IN VARCHAR2,
        file_name IN VARCHAR2,
        delimiter IN VARCHAR2
    )
    IS
        file_handle UTL_FILE.FILE_TYPE;
        line_text   VARCHAR2(32767);
        sql_script  CLOB := EMPTY_CLOB();
        chunk_size  CONSTANT PLS_INTEGER := 32767;
        oracle_home VARCHAR2(200) := SYS_CONTEXT('USERENV', 'ORACLE_HOME');
    BEGIN
        lang_data_logger_pkg.log_info('Oracle Home = ' || oracle_home);
        
        EXECUTE IMMEDIATE
                'CREATE OR REPLACE DIRECTORY SQL_DIR AS ''' || work_dir || '''';
        -- Open the file for reading
        file_handle := UTL_FILE.FOPEN('SQL_DIR', file_name, 'r');

        -- Read the file line by line and build the SQL script
        LOOP
            BEGIN
                UTL_FILE.GET_LINE(file_handle, line_text);
                IF TRIM(SUBSTR(line_text, 1, 2)) = '@?' THEN
                    IF LENGTH(sql_script) > 0 AND 
                        SUBSTR(sql_script, -1) = CHR(10) THEN
                        sql_script := SUBSTR(
                                            sql_script, 1, 
                                            LENGTH(sql_script) - 1
                                        );
                    END IF;
                    EXIT;
                END IF;
                IF TRIM(SUBSTR(line_text, 1, 2)) = '--' OR 
                   TRIM(SUBSTR(line_text, 1, 2)) = '@@' OR
                   TRIM(SUBSTR(line_text, 1, 3)) = 'Rem' THEN
                    CONTINUE; -- Skip this line
                END IF;
                sql_script := sql_script || line_text || CHR(10);
            EXCEPTION
                WHEN NO_DATA_FOUND THEN
                    EXIT; -- End of file
            END;
        END LOOP;

        -- Close the file handle
        UTL_FILE.FCLOSE(file_handle);

        -- Split the SQL script by delimiter and execute each statement
        FOR statement IN (
            SELECT TRIM(
                REGEXP_SUBSTR(sql_script, '[^' || delimiter || ']+', 1, LEVEL)
            ) AS sql_stmt
            FROM DUAL
            CONNECT BY REGEXP_SUBSTR(
                sql_script, '[^' || delimiter || ']+', 1, LEVEL
            ) IS NOT NULL
        )
        LOOP
            BEGIN
                -- Skip empty or invalid statements
                IF statement.sql_stmt IS NULL OR 
                        LENGTH(TRIM(statement.sql_stmt)) = 0 THEN
                    lang_data_logger_pkg.log_info(
                        'Skipping empty or whitespace-only statement.'
                    );
                    CONTINUE;
                END IF;
                -- Execute the SQL statement
                lang_data_logger_pkg.log_info(
                    'SQL Statement: ' || statement.sql_stmt
                );
                EXECUTE IMMEDIATE statement.sql_stmt;
            EXCEPTION
                WHEN OTHERS THEN
                    lang_data_logger_pkg.log_error(
                        'Error executing statement: ' || statement.sql_stmt
                    );
                    lang_data_logger_pkg.log_error('Error: ' || SQLERRM);
            END;
        END LOOP;

        EXECUTE IMMEDIATE 'DROP DIRECTORY SQL_DIR';

        lang_data_logger_pkg.log_info('SQL script executed successfully.');
    END execute_sql_script;

    procedure grant_langdata_execution_privileges_by_role IS
    BEGIN
        -- Grants to lang_data_app_expert
        EXECUTE IMMEDIATE 
            'GRANT EXECUTE ON lang_data TO ' || 
                lang_data_auth_pkg.c_lang_data_app_expert;
        EXECUTE IMMEDIATE 
            'GRANT EXECUTE ON lang_data_errors_pkg TO ' ||
                lang_data_auth_pkg.c_lang_data_app_expert;

        -- Grants to lang_data_user
        EXECUTE IMMEDIATE 
            'GRANT EXECUTE ON lang_data TO ' ||
                lang_data_auth_pkg.c_lang_data_user;
        EXECUTE IMMEDIATE 
            'GRANT EXECUTE ON lang_data_errors_pkg TO ' ||
                lang_data_auth_pkg.c_lang_data_user;


    END grant_langdata_execution_privileges_by_role;

    procedure init(
        mount_dir IN VARCHAR2,
        p_setup_db IN BOOLEAN DEFAULT false
    ) is
        v_current_user VARCHAR2(100);
        v_backend_schema_version VARCHAR2(20);
        v_schema_version_exists NUMBER := 0;
        v_sql_dir VARCHAR2(2000);
        oracle_home VARCHAR2(200) := SYS_CONTEXT('USERENV', 'ORACLE_HOME');
        v_lookback_minutes VARCHAR2(10);
        onnx_model_dir VARCHAR2(200) := mount_dir || '/models';
    begin

        -- Setup the Configuration Parameter table.
        lang_data_config_pkg.populate_config_table(p_mount_dir => mount_dir);

        v_backend_schema_version := lang_data_config_pkg.get_config_parameter(
            p_name => 'LANG_DATA_SCHEMA_VERSION'
        );
        lang_data_logger_pkg.log_info(
            'LANG_DATA_SCHEMA_VERSION = ' || v_backend_schema_version
        );
        
        -- Get the current session user
        v_current_user := SYS_CONTEXT('USERENV', 'SESSION_USER');
        
		-- Create roles if they do not exist
        begin
            execute immediate 'CREATE ROLE ' || 
            lang_data_auth_pkg.c_lang_data_user;
            lang_data_logger_pkg.log_info(
                'Created ' || lang_data_auth_pkg.c_lang_data_user);
        exception
            when others then
				-- Role already exists error
                if sqlcode != -1921 then
                    raise;
                end if;
        end;

        begin
            execute immediate 'CREATE ROLE ' || 
            lang_data_auth_pkg.c_lang_data_app_expert;
            lang_data_logger_pkg.log_info(
                'Created ' || lang_data_auth_pkg.c_lang_data_app_expert);
        exception
            when others then
				-- Role already exists error
                if sqlcode != -1921 then
                    raise;
                end if;
        end;

        begin
            execute immediate 'grant ' || lang_data_auth_pkg.c_lang_data_user ||
             ' to ' || v_current_user;
            lang_data_logger_pkg.log_info(
                'granted ' || lang_data_auth_pkg.c_lang_data_user || 
                ' to ' || v_current_user);
            execute immediate 'grant ' || 
                lang_data_auth_pkg.c_lang_data_app_expert ||
                ' to ' || v_current_user;
            lang_data_logger_pkg.log_info(
                'granted ' || lang_data_auth_pkg.c_lang_data_app_expert || 
                ' to ' || v_current_user);
        exception
            when others then
                lang_data_logger_pkg.log_error(
                    'failed to grant ' || lang_data_auth_pkg.c_lang_data_user ||
                     ' to ' || v_current_user || ': ' || sqlerrm);
                raise;
        end;
        
        lang_data_setup_pkg.create_python_functions;
        lang_data_setup_pkg.create_stoplist;
        lang_data_setup_pkg.create_resource_plan;

        lang_data_logger_pkg.log_info('Oracle Home = ' || oracle_home);
        
        $IF $$Is_Cloud = TRUE $THEN
            -- Don't run load_onnx_model in case of autonomous database
            null;
        $ELSE
            
            load_onnx_model(
                model_path  => onnx_model_dir,
                filename  => 'multilingual-e5-base.onnx',
                modelname => 'MULTILINGUAL_E5_BASE'
            );
        $END

        -- Execute the SQL Scripts
        IF p_setup_db THEN
            v_sql_dir := oracle_home || 
                        '/dbgendev/src/langdata/install/langdata/backend/sql/';
            execute_sql_script(
                work_dir => v_sql_dir,
                file_name => 'create_tables.sql',
                delimiter => ';'
            );
            execute_sql_script(
                work_dir => v_sql_dir,
                file_name => 'create_triggers.sql',
                delimiter => '/'
            );
        END IF;

        -- Check if SCHEMA_VERSION_TABLE is empty, if so set the version
        BEGIN
            EXECUTE IMMEDIATE 'SELECT COUNT(1) FROM langdata$schemaversion' INTO v_schema_version_exists;
            
            IF v_schema_version_exists = 0 THEN
                EXECUTE IMMEDIATE '
                    INSERT INTO langdata$schemaversion ' || 
                    '(version_number, description, created_at) ' ||
                    'VALUES (:version, :description, SYSTIMESTAMP)' USING
                    v_backend_schema_version, 'Init Version';
                lang_data_logger_pkg.log_info(
                    'Schema version set to ' || v_backend_schema_version);
            END IF;
        EXCEPTION
            WHEN OTHERS THEN
                lang_data_logger_pkg.log_fatal(
                    'Failed to set schema version: ' || SQLERRM);
                RAISE;
        END;

        v_lookback_minutes := lang_data_config_pkg.get_config_parameter(
            p_name => 'LANG_DATA_FLASHBACK_LOOKBACK_MINUTES'
        );
        lang_data_utils_pkg.create_or_replace_job(
            p_job_name        => 'JOB_LANGDATA_BACKGROUND_FIXUP',
            p_job_action      => 'BEGIN lang_data_utils_pkg.background_fixup; END;',
            p_job_class       => 'LangdataJobClass',
            p_comments        => 'Sync drilldown values and indexes',
            p_priority        => 1,
            p_restart_on_fail => TRUE,
            p_restart_on_rec  => TRUE,
            p_repeat_interval => 'FREQ=MINUTELY;INTERVAL=' || v_lookback_minutes
        );
        lang_data_utils_pkg.create_or_replace_job(
            p_job_name        => 'JOB_LANGDATA_APEX_FILTERS_CLEANING',
            p_job_action      => 
                'BEGIN '
                || 'DELETE FROM langdata$apex_filters ' 
                || 'WHERE created_at < SYSTIMESTAMP - INTERVAL ''1'' DAY; '
                || 'COMMIT; END; ',
            p_job_class       => 'LangdataJobClass',
            p_comments        => 'Sync drilldown values and indexes',
            p_priority        => 1,
            p_restart_on_fail => TRUE,
            p_restart_on_rec  => TRUE,
            p_repeat_interval => 'FREQ=DAILY;INTERVAL=' || v_lookback_minutes
        );

        IF lang_data_config_pkg.g_custom_entities = FALSE THEN
            lang_data_named_entities_pkg.initialize_named_entities;
        END IF;

    end init;

end lang_data_setup_pkg;
/

