Rem
Rem $Header: dbgendev/src/langdata/plsql/utils/utils_pkg.pks /main/47 2025/08/17 19:34:27 deveverm Exp $
Rem
Rem utils_pkg.pks
Rem
Rem Copyright (c) 2024, 2025, Oracle and/or its affiliates.
Rem
Rem    NAME
Rem      utils_pkg.pks - Package specification for utility functions and
Rem                      procedures
Rem
Rem    DESCRIPTION
Rem      This package provides a wide range of utility functions and procedures 
Rem      used across the application
Rem
Rem    NOTES
Rem      None
Rem
Rem    BEGIN SQL_FILE_METADATA
Rem    SQL_SOURCE_FILE: dbgendev/src/langdata/plsql/utils/utils_pkg.pks
Rem    SQL_SHIPPED_FILE:
Rem    SQL_PHASE:
Rem    SQL_STARTUP_MODE: NORMAL
Rem    SQL_IGNORABLE_ERRORS: NONE
Rem    END SQL_FILE_METADATA
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    deveverm    08/14/25 - Added validate_vector, modified domain functions
Rem                           to remove min/max, gamma, vector_dimensions
Rem    jiangnhu    08/05/25 - Remove validate_match_document, add
Rem                           validate_match_document_and_grant_table_privileges
Rem                           in lang_data pkg
Rem    dadoshi     08/04/25 - JIRA_DBAI1147/1149: Added get_match_confidence
Rem    dadoshi     08/04/25 - JIRA_DBAI1147: Update get_or_create_domain to
Rem                           take in min/max distance, vector dimension for
Rem                           that namespace and gamma required as optional
Rem                           parameters
Rem    dadoshi     08/04/25 - JIRA_DBAI1147: Added get_domain_details
Rem    jiangnhu    08/04/25 - Move grant_read_on_user_tables to lang_data pkg
Rem    fgurrola    07/29/25 - DBAI-1070: Add normalize_schema_name function.
Rem    ruohli      07/24/25 - DBAI-1076: Added update_domain, delete_domain, and
Rem                           get_all_domains
Rem    dadoshi     07/22/25 - JIRA_DBAI1098: Add get_text_by_id for APEX
Rem    arevathi    07/14/25 - Add grant_read_on_user_tables
Rem    jiangnhu    06/24/25 - DBAI-909: Integrate text augmentation with domain
Rem    jiangnhu    06/09/25 - DBAI-844: Remove
Rem                           update_annotation_and_comment_records
Rem    ruohli      06/06/25 - DBAI-842: Added parallel query for 
Rem                           alculate_report_description_regression and remove
Rem                           top 2 matching logic
Rem    jiangnhu    06/03/25 - Implement check_annotation_changes_for_user_tables
Rem                           replace DDL trigger with polling
Rem    jiangnhu    05/28/25 - DBAI-576, DBAI-828: Implement
Rem                           sync_user_tables_changed_values,
Rem                           manage_langdata_tables_vector_indexes,
Rem                           background_fixup
Rem    jiangnhu    05/22/25 - DBAI-767: Add drop_value_vector_partition
Rem    dadoshi     05/20/25 - JIRA_DBAI804: Add print_clob for debugging
Rem    dadoshi     05/15/25 - JIRA_DBAI804: Add json_array_to_clob() function
Rem    jiangnhu    04/30/25 - DBAI-755: Use COMMENT to augment text
Rem    jiangnhu    04/24/25 - DBAI-662: Extend the logic of filters: support
Rem                           table column in another PDB or database
Rem    dadoshi     26/03/25 - JIRA_DBAI689: Add is_stopword() function.
Rem    dadoshi     04/11/25 - Add fetch_top_n_similairty_search_report_text
Rem    jiangnhu    04/09/25 - DBAI-731: Remove create_context_index,
Rem                           trg_recreate_context_index, 
Rem                           create_user_table_indexes,
Rem                           decrement_context_index_reference,
Rem                           decrement_user_table_index_references
Rem    jiangnhu    04/04/25 - Move get_entities_from_text from utils pkg to 
Rem                           named entities pkg to avoid mutual dependency
Rem    jiangnhu    04/02/25 - DBAI-624: Implement function get_embedding
Rem    jiangnhu    04/01/25 - DBAI-661: Implement
Rem                           drop_enumerable_set_value_vector_tables
Rem    deveverm    04/01/25 - DBAI-523: modified
Rem                           fetch_top_k_similarity_search_reports and
Rem                           fetch_top_k_similarity_search_drilldowns to add
Rem                           p_use_records, removed
Rem                         fetch_good_drilldown_description_change_search_records
Rem    dadoshi     03/26/25 - JIRA_DBAI689: Add is_stopword() function.
Rem    jiangnhu    03/26/25 - DBAI-692: Make enumeration limit config
Rem    jiangnhu    03/24/25 - DBAI-551: Implement augment_text_with_ner_entities
Rem    anisbans    03/24/25 - DBAI-518: Update table name
Rem    anisbans    03/21/25 - Added documentation for
Rem                           decrement_vector_table_references
Rem    jiangnhu    03/19/25 - DBAI-543: Better naming conventions for
Rem                           augmentation/amending
Rem    jiangnhu    03/14/25 - DBAI-661: Implement
Rem                           create_value_vector_from_enumerable_set
Rem    anisbans    03/13/25 - DBAI-518: Update table name
Rem    anisbans    03/13/25 - Added documentation
Rem    deveverm    03/11/25 - DBAI-546: added schema_name for cross_schema
Rem                           support
Rem    anisbans    03/11/25 - DBAI-556: Add decrement_user_table_index_references 
Rem                                    and decrement_context_index_reference  
Rem    jiangnhu    03/06/25 - DBAI-542: Add get_schema_version for backend
Rem    jiangnhu    03/06/25 - DBAI-632: Implement update_annotation_records
Rem    jiangnhu    03/07/25 - Move re_augmented_affected_texts,
Rem                           update_affected_texts to utils pkg
Rem    jiangnhu    02/28/25 - Update augment_text, match_substring_with_context
Rem                           to handle the case when config variables not set
Rem    jiangnhu    02/24/25 - Update check_table_exists to have parameter
Rem                           is_user_table
Rem    jiangnhu    02/18/25 - Add get_annotation
Rem    jiangnhu    02/14/25 - DBAI-575: Remove c_unknown_exception_code
Rem    jiangnhu    02/13/25 - DBAI-524, DBAI-555: Implement
Rem                           create_context_index, create_user_table_indexes,
Rem                           recreate_user_index
Rem    saloshah    02/11/25 - Added update_job_status and get_all_jobs procedure
Rem    jiangnhu    02/03/25 - DBAI-511: Update
Rem                           check_annotation_changes_table_column, update
Rem                           amend_description_no_update to
Rem                           generate_amendment_text
Rem    jiangnhu    01/31/25 - DBAI-511: Update function augment_query to
Rem                           procedure augment_text, add augmented_tokens
Rem    jiangnhu    01/29/25 - DBAI-505: Implement centroid version of
Rem                           search_from_query procedure
Rem    jiangnhu    12/03/24 - JIRA_DBAI-421: Add create_or_replace_job
Rem    saloshah    11/28/24 - DBAI-327: Added get_filter_descriptions and
Rem                           amend_description_no_update
Rem    jiangnhu    11/26/24 - Add check_column_exists
Rem    jiangnhu    11/22/24 - JIRA_DBAI-425: Add get_filter_description,
Rem                           check_table_exists,
Rem                           fetch_top_k_similarity_search_reports
Rem    arevathi    11/12/24 - JIRA_DBAI345: added update_table_column
Rem    arevathi    10/30/24 - Added
Rem                           fetch_good_drilldown_description_change_search_records
Rem    arevathi    10/29/24 - Added fetch_good_search_records and
Rem                           fetch_top_k_similarity_search_reports
Rem    dadoshi     10/29/24 - Format utils_pkg
Rem    dadoshi     10/25/24 - Add fetch_good_drilldown_search_records and
Rem                           fetch_top_k_similarity_search_drilldowns
Rem    dadoshi     10/25/24 - Add update_table_column()
Rem    dadoshi     10/25/24 - Fix get_sample_queries_paginated in utils
Rem    dadoshi     10/25/24 - Update fetch_and_update_filter_description()
Rem    dadoshi     10/25/24 - Add amend_description() to utils
Rem    deveverm    10/24/24 - added get_job
Rem    jiangnhu    10/22/24 - Format code
Rem    jiangnhu    10/18/24 - Fix text wrapping
Rem    jiangnhu    10/17/24 - Modify header 
Rem    pryarla     10/16/24 - Created
Rem

CREATE OR REPLACE PACKAGE lang_data_utils_pkg IS
/*
    PROCEDURE split_cursor
    ----------------------------------------------------------------------
    Description:
        Extracts and splits a cursor string into two components: a timestamp 
        (`created_at`) and an identifier (`id`). The cursor string is expected 
        to follow the format 'YYYY-MM-DD HH24:MI:SS.FF|ID'. This procedure 
        helps separate these two pieces of data for further processing.

        This function is particularly useful when dealing with cursor strings 
        that contain both a timestamp and an identifier, and there is a need 
        to handle each component independently.

    Parameters:
        - p_cursor (IN VARCHAR2): The input cursor string in the format 
                                  'YYYY-MM-DD HH24:MI:SS.FF|ID', representing 
                                  the timestamp and ID to be split.
        - p_created_at (OUT TIMESTAMP): The extracted `created_at` timestamp 
                                       portion from the cursor string.
        - p_id (OUT VARCHAR2): The extracted `id` portion of the cursor string.

    Returns:
        - p_created_at (OUT TIMESTAMP): The extracted timestamp value from the 
                                        cursor string.
        - p_id (OUT VARCHAR2): The extracted identifier value from the cursor 
                               string.

    Exceptions:
        - None. If the input `p_cursor` is `NULL`, both `p_created_at` and 
          `p_id` will be set to `NULL`, and no further action will be taken.
    
    ----------------------------------------------------------------------
    PROCEDURE validate_enumerable_column
    ----------------------------------------------------------------------
    Description:
        Validates whether a given table column is suitable for use as an 
        enumerable filter. This procedure checks the column's data type and 
        whether it has a unique constraint, which are both prerequisites for 
        treating a column as enumerable in the application.

        It supports both local and remote (via database link) tables.

        This procedure performs the following validations:
        - Retrieves the column's data type from ALL_TAB_COLUMNS (optionally via
          DB link).
        - Checks if the column is part of a UNIQUE index.
        - If the column is neither of a supported data type nor uniquely
          constrained, it raises an error indicating that the column is not
          enumerable.

    Parameters:
        - p_table_name     (IN VARCHAR2): Name of the table containing the column.
        - p_column_name    (IN VARCHAR2): Name of the column to validate.
        - p_schema_name    (IN VARCHAR2): Owner/schema of the table.
        - p_db_link_name   (IN VARCHAR2): Optional database link to use for
                                          accessing remote table metadata.
                                          If NULL, accesses local data.

    Returns:
        - None. 

    Exceptions:
        - c_invalid_match_document (-20005):
            Raised when the column is not found, or when it is not a supported
            enumerable type and lacks a unique constraint. Detailed messages are 
            logged to aid debugging.
    
    ----------------------------------------------------------------------
    FUNCTION get_annotation
    ----------------------------------------------------------------------
    Description:
        Retrieves the annotation value for a specified column in a table, 
        optionally supporting access via a database link. Annotations are
        fetched from the `ALL_ANNOTATIONS_USAGE` view based on the provided 
        table, column, schema, and annotation name (defaults to 'DESCRIPTION').

        Supports querying across database links by dynamically constructing and 
        executing the SQL with the optional `p_db_link_name` parameter.

    Parameters:
        - p_table_name (IN VARCHAR2): The name of the table.
        - p_column_name (IN VARCHAR2): The name of the column.
        - p_schema_name (IN VARCHAR2): The schema that owns the table.
        - p_annotation_name (IN VARCHAR2, default 'DESCRIPTION'): The name of
          the annotation to fetch.
        - p_db_link_name (IN VARCHAR2, default NULL): Optional database link to
          query a remote database.

    Returns:
        - VARCHAR2: The annotation value for the specified column, or NULL if no
          annotation is found.

    Exceptions:
        - Returns NULL in case of any errors (e.g., no data found, database link
          error, etc.).
    
    ----------------------------------------------------------------------
    FUNCTION get_comment
    ----------------------------------------------------------------------
    Description:
        Retrieves the standard column comment for a specified table column,
        optionally through a database link. Column comments are extracted from
        the `ALL_COL_COMMENTS` view using the provided table, column, and schema
        name.

        Supports remote metadata access via dynamic SQL when a database link is
        provided.

    Parameters:
        - p_table_name (IN VARCHAR2): The name of the table.
        - p_column_name (IN VARCHAR2): The name of the column.
        - p_schema_name (IN VARCHAR2): The schema that owns the table.
        - p_db_link_name (IN VARCHAR2, default NULL): Optional database link to
          query a remote database.

    Returns:
        - VARCHAR2: The comment for the specified column, or NULL if no comment
          exists.

    Exceptions:
        - Returns NULL in case of any errors (e.g., no data found, database link
          error, etc.).
    
    ----------------------------------------------------------------------
    FUNCTION get_filter_description
    ----------------------------------------------------------------------
    Description:
        Retrieves the description for a specified filter, optionally including 
        additional information if provided. The function first attempts to fetch
        the description from the `user_annotations_usage` table based on the
        filter's associated table and column (if provided). If no description is
        found or if the description does not contain the additional description,
        the function constructs a combined description using the filter name and
        any additional description passed as a parameter. 

        This function is used in the `get_filter_values` function to retrieve
        the description of a single filter and to select the entity with the
        highest similarity to the filter description when there are multiple
        matches for entities of the same type. It plays a crucial role in
        ensuring that filters are populated with the most relevant descriptions
        when there are multiple matching entities.

    Parameters:
        - p_filter_name (IN VARCHAR2): The name of the filter for which the 
                                        description is being retrieved.
        - p_table_name (IN VARCHAR2): The name of the table that the filter is
                                      associated with (optional).
        - p_column_name (IN VARCHAR2): The name of the column that the filter is
                                       associated with (optional).
        - p_schema_name (IN VARCHAR2): The name of the schema to which 
                                    p_table_name belongs.
        - p_additional_description (IN VARCHAR2): An additional description 
                                                  of filter from the match
                                                  document (optional).
        - p_db_link_name (IN VARCHAR2): Optional. Database link name if the
                                        target table is remote.

    Returns:
        - VARCHAR2: The combined filter description, which may include the
                    original filter annotation and the additional description.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION fetch_and_update_filter_descriptions
    ----------------------------------------------------------------------
    Description:
        Fetches and updates filter descriptions from a given match document 
        in JSON format. The function extracts the filters from the document, 
        retrieves their corresponding annotations from the
        `user_annotations_usage` table, and appends any provided descriptions.
        If the existing annotation differs from the new description, the
        annotation is updated in the database with the new description.

        This function is used by the `expand_text` function to update 
        filter descriptions, ensuring that the latest descriptions are reflected
        in the annotations and that the vector data is updated accordingly.

    Parameters:
        - p_match_document (IN JSON): The match document containing the
                                       `filters` array, which specifies the
                                       filters and their associated descriptions

    Returns:
        - VARCHAR2: A comma-separated list of filter names and their 
                    descriptions.

    Exceptions:
        - None.

    ----------------------------------------------------------------------
    FUNCTION expand_text
    ----------------------------------------------------------------------
    Description:
        Amends an original description by appending filter descriptions. The
        function retrieves the filter descriptions using the
        `fetch_and_update_filter_descriptions` function, and then combines these
        descriptions with the original description. 

        This function ensures that the final description includes relevant
        filter information, enhancing the detail and clarity of the description.

    Parameters:
        - p_match_document (IN JSON): The match document containing the
                                       `filters` array, which specifies the
                                       filters and their associated descriptions
        - p_original_text (IN VARCHAR2): The original text.
        - p_text_type (IN VARCHAR2): The type of text.

    Returns:
        - VARCHAR2: The expanded text, which combines the original
                    text with the filter descriptions.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION check_table_exists
    ----------------------------------------------------------------------
    Description:
        Determines whether a table exists in the specified schema, optionally 
        over a database link. The function queries the `ALL_TABLES` view 
        (local or remote) to verify if a table with the given name exists.
        
        If a `db_link_name` is provided, the check is executed against the 
        remote database. If any error occurs during execution (e.g., invalid 
        DB link, insufficient privileges), the function returns `FALSE`.

    Parameters:
        - p_table_name (IN VARCHAR2): The name of the table to check.
        - p_schema_name (IN VARCHAR2): Optional. The schema that owns the table.
        - p_db_link_name (IN VARCHAR2): Optional. A database link name to use 
                                        when querying remote metadata.

    Returns:
        - BOOLEAN: `TRUE` if the table exists, otherwise `FALSE`.

    Exceptions:
        - None. All exceptions are caught and the function returns `FALSE`.

    ----------------------------------------------------------------------
    FUNCTION check_column_exists
    ----------------------------------------------------------------------
    Description:
        Checks whether a column exists in the specified table, optionally 
        over a database link. The function queries the `ALL_TAB_COLUMNS` 
        view (local or remote) to determine if the column is present.

        If a `db_link_name` is provided, the check is executed against the 
        remote database. If any error occurs (e.g., invalid DB link, 
        insufficient privileges), the function returns `FALSE`.

    Parameters:
        - p_table_name (IN VARCHAR2): The name of the table to check.
        - p_column_name (IN VARCHAR2): The name of the column to check.
        - p_schema_name (IN VARCHAR2): Optional. The schema that owns the table.
        - p_db_link_name (IN VARCHAR2): Optional. A database link name to use 
                                        when querying remote metadata.

    Returns:
        - BOOLEAN: `TRUE` if the column exists, otherwise `FALSE`.

    Exceptions:
        - None. All exceptions are caught and the function returns `FALSE`.
    
    ----------------------------------------------------------------------
    FUNCTION check_report_exists
    ----------------------------------------------------------------------
    Description:
        Checks if a report with the specified ID exists in the 
        `langdata$reports` table. It returns `TRUE` if the report exists, 
        and `FALSE` otherwise.

    Parameters:
        - p_report_id (IN VARCHAR2): The ID of the report to check for existence

    Returns:
        - BOOLEAN: `TRUE` if the report exists in the `langdata$reports` table, 
                   otherwise `FALSE`.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION check_drilldown_exists
    ----------------------------------------------------------------------
    Description:
        Checks if a drilldown document with the specified ID exists in the 
        `langdata$drilldowndocuments` table. It returns `TRUE` if the drilldown
        document exists, and `FALSE` otherwise.

    Parameters:
        - p_drilldown_id (IN VARCHAR2): The ID of the drilldown document to
                                        check for existence.

    Returns:
        - BOOLEAN: `TRUE` if the drilldown document exists in the 
                   `langdata$drilldowndocuments` table, otherwise `FALSE`.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION check_db_link_exists
    ----------------------------------------------------------------------
    Description:
        Determines whether a database link with the specified name exists 
        and is accessible in the current session. The function queries the 
        `ALL_DB_LINKS` view to check for a matching database link that starts 
        with the given name (case-insensitive match via `LIKE ...%`).

    Parameters:
        - p_db_link_name (IN VARCHAR2): The name (or prefix) of the database 
                                        link to check for existence.

    Returns:
        - BOOLEAN: `TRUE` if at least one matching DB link exists, otherwise
          `FALSE`.

    Exceptions:
        - None. 

    ----------------------------------------------------------------------------
    FUNCTION is_stopword
    ----------------------------------------------------------------------------
    Description:
        Checks if the the provided word is a stop-word using the
        langdata$stoplist table.
    
    Parameters:
        - p_word (IN VARCHAR2): The word to check.
    
    Returns:
        - BOOLEAN: `TRUE` if the word provided is a stop-word for Lang-Data,
                    otherwise `FALSE`.
    
    Exceptions:
        - None.
    ----------------------------------------------------------------------------
    FUNCTION match_substring_with_context
    ----------------------------------------------------------------------
    Description:
        Extracts a substring from a given text, including a specified number of 
        words before and after the substring for context.
        This function is intended to deal with the following cases:
            - There are multiple entities of the same type
            - There are multiple matches for a token in the text during
              tokenization.
        In this case, match_substring_with_context is used to get entity or
        token with context, which will be used to computed the similarity
        with filter description. This context-aware matching enhances the
        accuracy of operations like fuzzy matching and semantic comparisons.
        
    Parameters:
        - p_text (IN VARCHAR2): The text to search within.
        - p_substring (IN VARCHAR2): The substring to match.
        - p_num_words_before (IN NUMBER): Number of words before the match to 
                                          include.
        - p_num_words_after (IN NUMBER): Number of words after the match to
                                         include.
        
    Returns:
        - VARCHAR2: The substring along with its context or 'No Match' if no 
          match is found.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    PROCEDURE augment_text
    ----------------------------------------------------------------------
    Description:
        Augments an input text by processing each token and finding its 
        corresponding description from relevant database tables. The procedure 
        splits the input text into tokens, normalizes them, and then searches 
        across columns of tables with `VARCHAR2` or `CLOB` types to identify 
        the most relevant description for each token. Context-aware matching 
        is employed to improve the accuracy of matches by considering 
        neighboring tokens in the input.

        For each token, if a matching description is found based on fuzzy 
        matching and vector distance, the token is augmented with the description 
        and added to the final reconstructed text. If no match is found, the 
        token is retained without augmentation. 

        The procedure returns the augmented text and a JSON array of tokens 
        that were augmented.

    Parameters:
        - p_text (IN VARCHAR2): The input text to be augmented.
        - p_domain_id (IN VARCHAR2, optional): An optional domain identifier 
            used to restrict token description lookup to value vector partitions 
            associated with this domain. If NULL, no domain-specific filtering 
            is applied.
        - p_augmented_text (OUT VARCHAR2): The reconstructed text with tokens 
                                           augmented by their corresponding 
                                           descriptions.
        - p_augmented_tokens (OUT JSON): A JSON array containing the list of 
                                         tokens that were augmented during 
                                         processing.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION get_filter_descriptions
    ----------------------------------------------------------------------
    Description:
        Retrieves the descriptions of filters from a given match document 
        in JSON format. It extracts the filters and their associated
        descriptions from the match document and checks the existing annotations
        in the `user_annotations_usage` table for each filter's corresponding
        table and column. If a description is found, it combines it with any
        provided description, ensuring no duplicates. 
        This function is used in `generate_expansion_text`, which works for
        update-annotation-workflow.

        The function returns a comma-separated list of filter names and their
        descriptions.

    Parameters:
        - p_match_document (IN JSON): The match document containing the
                                       `filters` array, which specifies the
                                       filters and their associated descriptions

    Returns:
        - VARCHAR2: A comma-separated list of filter names and their 
                    descriptions.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION generate_expansion_text
    ----------------------------------------------------------------------
    Description:
        Generates an expansion text by extracting filter descriptions from
        the given match document and formatting them according to the 
        text type. This function does not rely on any existing
        description but instead builds the expansion purely from the
        provided match document using `get_filter_descriptions`.

        The expansion text is used to dynamically update descriptions
        or queries in the `update_affected_texts` procedure and related 
        workflows.

    Parameters:
        - p_match_document (IN JSON): The match document containing the
                                      `filters` array, specifying the filters
                                      and their corresponding descriptions.
        - p_text_type (IN VARCHAR2): The type of description being
                                            generated, such as 'report',
                                            'drilldown', or 'query'.

    Returns:
        - VARCHAR2: The generated amendment text that includes the filter
                    descriptions formatted based on the text type.

    Exceptions:
        - None. 
    
    ----------------------------------------------------------------------
    FUNCTION check_search_record_exists
    ----------------------------------------------------------------------
    Description:
        Checks if a search record with the specified ID exists in the 
        `langdata$searchrecords` table. The function queries the table using 
        the provided search ID and returns `TRUE` if the record exists, 
        or `FALSE` otherwise.

    Parameters:
        - p_search_id (IN VARCHAR2): The search ID to check for existence 
                                      in the `langdata$searchrecords` table.

    Returns:
        - BOOLEAN: `TRUE` if the search record with the specified ID exists, 
                   `FALSE` otherwise.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    PROCEDURE drop_value_vector_partition
    ----------------------------------------------------------------------
    Description:
        Drops a specific value vector partition and its corresponding metadata 
        from the unified `langdata$drilldownvalues` table and 
        `langdata$value_vector_partition_descriptions` table.

        This procedure:
        - Deletes all rows in `langdata$drilldownvalues` that belong to the
          specified partition (as identified by `partition_name`).
        - Removes the corresponding row in the partition description metadata 
          table.
        - Issues an `ALTER TABLE DROP PARTITION` to remove the physical 
          partition from the main vector table.

    Parameters:
        - p_partition_name (IN VARCHAR2):
            The name of the partition to drop (should match the value used 
            in the `partition_name` column of `langdata$drilldownvalues`).

    Returns:
        - None.

    Exceptions:
        - None.

    Notes:
        - The partition name is expected to follow the format used in 
          `LANGDATA$DRILLDOWNVALUES_<SCHEMA>_<TABLE>_<COLUMN>[...])`.
        - The actual partition to be dropped is computed as:
              P_<partition_name_suffix>
          where the suffix is derived by stripping the first 25 characters
          and replacing `$` with `_`.
        - This procedure assumes the partition already exists; if it does not,
          `ORA-14400` or similar errors may be raised.
    
    ----------------------------------------------------------------------
    PROCEDURE create_value_vector_table
    ----------------------------------------------------------------------
    Description:
        Creates a value vector table for a given column by embedding its 
        distinct non-null values using VECTOR_EMBEDDING. The vector table is 
        used for filter token similarity matching during query augmentation.

        This procedure:
        - Retrieves unique values from the specified column (optionally from a
          remote DB).
        - Validates the number of unique values against a configured enumeration
          limit.
        - Embeds the values using `lang_data_utils_pkg.get_embedding`.
        - Creates a vector index if the row count exceeds a threshold.
        - Adds a context index if the column is of type VARCHAR2 or CLOB.
        - Drops any preexisting vector table if `p_force_regenerate` is TRUE.

    Parameters:
        - p_table_name (IN VARCHAR2):
            The name of the source table containing the column to embed.

        - p_column_name (IN VARCHAR2):
            The name of the column whose distinct values will be vectorized.

        - p_schema_name (IN VARCHAR2):
            The schema that owns the source table.

        - p_force_regenerate (IN BOOLEAN DEFAULT FALSE):
            If TRUE, the existing vector table will be dropped and recreated 
            if it already exists. If FALSE, the procedure will exit early 
            when the target table exists.

        - p_override_enumeration_limit (IN BOOLEAN DEFAULT FALSE):
            If TRUE, allows bypassing the enumeration limit check for very 
            high cardinality columns. If FALSE and the number of unique 
            values exceeds the configured threshold, an error is raised.

        - p_db_link_name (IN VARCHAR2 DEFAULT NULL):
            Optional. If provided, metadata and source data will be accessed 
            via this database link, enabling support for remote tables.
        
        - p_domain_id (IN VARCHAR2 DEFAULT NULL):
            Optional. If specified, this domain ID is recorded alongside the 
            vector partition and used for downstream filtering and augmentation. 
            If NULL, the vector table will not be associated with any domain.

    Returns:
        - None.

    Exceptions:
        - c_enumeration_limit_exceeded:
            Raised when the column's distinct value count exceeds the configured 
            limit and `p_override_enumeration_limit` is FALSE.

    Notes:
        - The vector table is created with the name format:
              LANGDATA$DRILLDOWNVALUES_<SCHEMA>_<TABLE>_<COLUMN>[_<DBLINK>]
        - A temporary view is created with the same naming pattern using 
          `LANGDATA_TEMP_VIEW_...` for processing.
        - If the column has a `DESCRIPTION` annotation, it is embedded as part 
          of the value string.
        - A vector index (IVF) is created only if the number of distinct values 
          exceeds 10,000.
        - A context index is also created on the value column if the datatype 
          is VARCHAR2 or CLOB.
        - The temporary view is dropped at the end of the process.
    
    ----------------------------------------------------------------------
    Procedure create_value_vector_from_enumerable_set
    ----------------------------------------------------------------------
    Description:
        This procedure creates a value vector table from a given enumerable 
        set by embedding the values using the VECTOR_EMBEDDING function. 
        If the enumerable set contains more than 10,000 values, a vector 
        index is also created to optimize searches.

    Parameters:
        - p_enumerable_set (IN JSON_ARRAY_T):
            The JSON array containing the values to be embedded into the 
            vector table.

        - p_filter_name (IN VARCHAR2):
            The name of the filter used for naming the vector table.

        - p_filter_description (IN VARCHAR2):
            An optional description of the filter. If provided, the description 
            is included in the embedding process.

        - p_force (IN BOOLEAN DEFAULT FALSE):
            A flag to indicate whether the process should be forced.
        
        - p_domain_id (IN VARCHAR2 DEFAULT NULL):
            Optional. If specified, this domain ID is associated with the 
            generated value vector partition and can be used for domain-scoped 
            augmentation. If NULL, no domain is assigned.

    Exception: 
        - None.

    Notes:
        - The procedure creates a table named `LANGDATA$DRILLDOWNVALUES_
          <FILTER_NAME>`.
        - If `p_filter_description` is provided, the embedding process 
          includes this description to provide better context.
        - If the enumerable set contains more than 10,000 values, an 
          optimized vector index is created on the embedded vector column.
    
    ----------------------------------------------------------------------
    PROCEDURE create_value_vector_job
    ----------------------------------------------------------------------
    Description:
        Creates a job that generates value vector table for the specified table 
        and column. If the table does not exist, a job is created using
        `DBMS_SCHEDULER` to run the `create_value_vector_table` procedure.
        Additionally, the procedure inserts the job details into the 
        `langdata$jobs_history` table for tracking.

    Parameters:
        - p_match_document (IN JSON): The match document containing the filter 
                                       information, which determines the table 
                                       and column to process.
        
        - p_document_id (IN VARCHAR2): 
            A unique document identifier used to generate a stable value vector 
            table name when the filter is based on an enumerable set. This
            ensures that each document's vectors are uniquely tracked. Required 
            only when `enumerable_set` is present in the filter.
        
        - p_domain_id (IN VARCHAR2 DEFAULT NULL):
            Optional. If provided, this domain ID is passed to the vector table 
            creation process, ensuring that generated embeddings are associated 
            with the correct domain scope.

    Returns:
        - None.

    Exceptions:
        - None. If the job already exists (error `-27477`), the procedure logs 
          a debug message. For any other unexpected errors, the procedure logs 
          the error and raises a generic error.

    ----------------------------------------------------------------------
    PROCEDURE get_all_jobs
    ----------------------------------------------------------------------
    Description:
        The get_all_jobs displays all the running and latest finished 
        background jobs which created value vectors.
    Parameters:
        - p_job_cursor OUT SYS_REFCURSOR :
          A reference cursor used to output the result set containing the 
          named entities (id and name) from the database.

    Exceptions:
        - c_unknown_exception_code (-20001):
        A catch-all exception handler for any unknown issues encountered 
        during the execution. A custom error with code -20001 is raised 
        using lang_data_errors_pkg with the message "Unknown Exception."

    ----------------------------------------------------------------------
    PROCEDURE update_job_status
    ----------------------------------------------------------------------
    Description:
        The update_job_status creates/stops a job which creates value vectors for 
        the given job name and action to be performed(RUN/STOP) on the job.
        The table and column name for which value vectors are generated is 
        extracted from the langdata$jobs_history.
    Parameters:
        - p_action (IN VARCHAR2): The action to be the performed on the job.
        - p_job_name (IN VARCHAR2): Job name on which action has to be performed.

    Returns:
        - None.

    Exceptions:
        - c_resource_not_found (-20004)::
        Raised when no job with job_name is found, A error with a 
        code -20004 is raised with a custom  error message.
        - c_unknown_exception_code (-20001):
        A catch-all exception handler for any unknown issues encountered 
        during the execution. A custom error with code -20001 is raised 
        using lang_data_errors_pkg with the message "Unknown Exception."

    ----------------------------------------------------------------------
    Procedure get_sample_queries_paginated
    ----------------------------------------------------------------------
    Description:
        Retrieves paginated sample queries for a specific report from the 
        langdata$samplequeries table. It allows controlled retrieval using 
        pagination with a serialized cursor. The sample queries include 
        fields such as ID, query text, and augmented query text.

    Parameters:
    ----------------------------------------------------------------------
    - p_id (IN VARCHAR2):
        The unique identifier of the report for which sample queries 
        are being retrieved.

    - p_limit (IN NUMBER DEFAULT 10):
        Specifies the maximum number of sample query rows to fetch in one 
        call. If not provided, the default value is 10.

    - p_cursor (IN OUT VARCHAR2):
        An optional serialized pagination cursor, represented in the 
        format `created_at|id`. For the first page, this value is NULL, 
        and the procedure will return the next cursor value for subsequent 
        pages.

    - p_sample_queries (OUT SYS_REFCURSOR):
        A cursor that returns the result set of sample queries, including 
        fields such as ID, query text, and augmented query text.

    Exceptions:
    ----------------------------------------------------------------------
    - None.

    ----------------------------------------------------------------------
    PROCEDURE update_table_column
    ----------------------------------------------------------------------
    Description:
        This procedure dynamically updates a specified column in a given table 
        with the given value, filtered by ID. 

    Parameters:
    ----------------------------------------------------------------------
    - p_table_name (IN VARCHAR2):
        The name of the table in which the specified column will be updated. 

    - p_column_name (IN VARCHAR2):
        The name of the column to be updated.

    - p_id (IN VARCHAR2):
        The unique identifier value for the row to be updated. 

    - p_value (IN VARCHAR2):
        The new value to assign to the specified column. The procedure 
        updates the column with this value for the identified row.

    - p_error_message (IN VARCHAR2):
        A custom error message that the procedure raises if no rows 
        are affected by the update. If not provided, a default message is used.

    Exceptions:
    ----------------------------------------------------------------------
    - c_resource_not_found (-20004)::
        Raised when no record with the specified ID is found, A error with a 
        code -20004 is raised with a custom  error message.

    ----------------------------------------------------------------------
    Procedure: fetch_good_search_records
    ----------------------------------------------------------------------
    Description:
        Retrieves valid search records with Positive feedback rating and
        relevent to the given description. 

    Parameters:
    ----------------------------------------------------------------------
    - new_description (IN VARCHAR2):
        The description used to filter search records

    - p_good_search_records (OUT SYS_REFCURSOR):
        An output cursor that contains the resulting set of valid search 
        records. 

    Exceptions:
    ----------------------------------------------------------------------
    - None.

    ----------------------------------------------------------------------------
    Procedure: fetch_top_n_similairty_search_drilldown_text
    ----------------------------------------------------------------------------
    Description:
        Retrieves the top N most similar texts associated with a drilldown, i.e, 
        top-N closest texts among the sample queries, previous search records, 
        and drilldown descriptions associated with a report to a search query. 
        The procedure returns a list of the top N texts for a given drilldown
        and the given search query.

    Parameters:
        - p_query (IN  VARCHAR2): 
            The search query used.

        - p_drilldown_id (IN  VARCHAR2):
            ID of the drilldown for which Top-N texts are to be fetched.

        - p_n (IN  NUMBER DEFAULT 3):
            The number of top texts associated with a drilldown.

        - p_use_records (IN  BOOLEAN DEFAULT TRUE):
            Boolean representing whether to consider previous search records 
            to fetch top-N texts of a drilldown.

        - p_texts (OUT SYS.ODCIVARCHAR2LIST):
            List of the Top-N texts associated with a drilldown.

    Exceptions:
        None.
    ----------------------------------------------------------------------------
    Procedure: fetch_top_n_similairty_search_report_text
    ----------------------------------------------------------------------------
    Description:
        Retrieves the top N most similar texts associated with a report, i.e, 
        top-N closest texts among the sample queries, previous search records, 
        and report descriptions associated with a report to a search query. The
        procedure returns a list of the top N texts for a given report and the 
        given search query.

    Parameters:
        - p_query (IN  VARCHAR2): 
            The search query used.

        - p_report_id (IN  VARCHAR2):
            ID of the report for which Top-N texts are to be fetched.

        - p_n (IN  NUMBER DEFAULT 3):
            The number of top texts associated with a report.

        - p_use_records (IN  BOOLEAN DEFAULT TRUE):
            Boolean representing whether to consider previous search records 
            to fetch top-N texts of a report.

        - p_texts (OUT SYS.ODCIVARCHAR2LIST):
            List of the Top-N texts associated with a report.

    Exceptions:
        None.
    ----------------------------------------------------------------------------
    Procedure: fetch_top_k_similarity_search_reports
    ----------------------------------------------------------------------------
    Description:
        Retrieves the top K search reports most similar to a given query. 
        The procedure returns a cursor with reports that are best match for the
        input query, ordered by similarity score.

    Parameters:
    ----------------------------------------------------------------------
    - p_query (IN VARCHAR2):
         The search query used to find similar reports.

     - p_k (IN NUMBER):
         The number of top results to fetch based on similarity. 
         The default value is 5.
    
    - p_domain_id (IN VARCHAR2):
         Optional parameter to filter based on domain_id.

     - p_results (OUT SYS_REFCURSOR):
         An output cursor that returns the result set of the top K similarity
         search reports

    Exceptions:
    ----------------------------------------------------------------------
    - None.
    
    ----------------------------------------------------------------------
    PROCEDURE re_augment_affected_texts
    ----------------------------------------------------------------------
    Description:
        Re-augments affected texts across multiple tables where the specified
        value (`p_value`) exists in the `augmented_tokens` JSON field. It
        updates the augmented text and tokens, and recalculates the vector
        embeddings.

        The procedure targets:
        - `langdata$searchrecords`
        - `langdata$samplequeries`
        - `langdata$reportdescriptions`
        - `langdata$drilldowndescriptions`

        For each matching record, it calls `lang_data_utils_pkg.augment_text`
        to generate new augmented text and tokens, and updates the vector
        embeddings using the `VECTOR_EMBEDDING` function.

    Parameters:
        - p_value (IN VARCHAR2): The token or value to search for within the
                                 `augmented_tokens` JSON array in each table.

    Updates:
        - Updates the augmented text and tokens using `augment_text`.
        - Updates the vector embeddings using `VECTOR_EMBEDDING`.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    PROCEDURE update_affected_texts
    ----------------------------------------------------------------------
    Description:
        Updates affected report descriptions, drilldown descriptions, 
        sample queries and user queries based on table and column references.
        If `p_enumerable` is true, it iterates through all distinct values in
        the specified column of the given table and re-augments the related
        descriptions and queries calling `re_augment_affected_texts`.

        The procedure also re-amends existing descriptions and queries using the
        match document to generate new amendment text and update the vector
        embeddings for:
        - Reports (`langdata$reportdescriptions`)
        - Drilldowns (`langdata$drilldowndescriptions`)
        - Sample queries (`langdata$samplequeries`)

        The procedure supports both local and remote sources (via DB link).

    Parameters:
        - p_table_name (IN VARCHAR2): The name of the table to process.
        - p_column_name (IN VARCHAR2): The column within the table to examine.
        - p_enumerable (IN BOOLEAN): A flag indicating whether to iterate 
                                     through distinct values of the column.
        - p_schema_name (IN VARCHAR2): The name of the schema to which 
                                    p_table_name belongs.
        - p_db_link_name (IN VARCHAR2): Optional. A database link name if the 
                                        table is remote.
    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    PROCEDURE check_annotation_changes_for_table_column
    ----------------------------------------------------------------------
    Description:
        This procedure checks if annotation value of a specific column given
        the table_name and column_name have changed and updates
        langdata$annotations_catalog,affected descriptions, drilldown values,
        affected queries and also regenerate value vectors. The procedure
        supports both local and remote sources (via DB link).

    Parameters:
        - p_table_name (IN VARCHAR2):
            The table_name for which annotation changes are checked.
        - p_column_name (IN VARCHAR2):
            The column_name for which annotation changes are checked.
        - p_schema_name (IN VARCHAR2): The name of the schema to which 
                                    p_table_name belongs.
        - p_db_link_name (IN VARCHAR2): Optional. A database link name if the 
                                        table is remote.
        - p_immediate (IN BOOLEAN): Set this to false to submit a job for this
                                    procedure, default is false.
    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    PROCEDURE check_annotation_changes_for_user_tables
    ----------------------------------------------------------------------
    Description:
        Iterates over all rows in the `langdata$annotations_catalog` table and 
        invokes `check_annotation_changes_for_table_column` for each tracked 
        (schema, table, column, db link) entry.

        This procedure is used to perform a full sweep check of all registered 
        columns to determine whether annotation values have changed, and 
        triggers corresponding updates to annotations, descriptions, drilldown 
        values, related queries, and value vector embeddings.

        The check supports both local and remote sources (via DB link) and 
        delegates per-column logic to
        `check_annotation_changes_for_table_column`.

    Parameters:
        - None.

    Exceptions:
        - None.

    ----------------------------------------------------------------------
    PROCEDURE create_or_replace_job
    ----------------------------------------------------------------------
    Description:
        Creates or replaces a job in the Oracle scheduler. The procedure first 
        attempts to drop any existing job with the specified job name. If the
        job does not exist, it proceeds without any issue. Then, the procedure
        creates a new job with the specified attributes, including job name,
        PL/SQL action, job class, comments, priority, and restart settings.

    Parameters:
        - p_job_name (IN VARCHAR2): The name of the job to be created or
                                    replaced.
        - p_job_action (IN VARCHAR2): The PL/SQL block or program to execute as
                                      part of the job.
        - p_job_class (IN VARCHAR2): The job class to associate with the job.
        - p_comments (IN VARCHAR2): Comments for the job.
        - p_priority (IN NUMBER): The priority of the job.
        - p_restart_on_fail (IN BOOLEAN): Whether the job should restart on
                                          failure.
        - p_restart_on_rec (IN BOOLEAN): Whether the job should restart on
                                         recovery.

    Returns:
        - None.

    Exceptions:
        - None.
    ----------------------------------------------------------------------
    FUNCTION generate_id
    ----------------------------------------------------------------------
    Description:
        The generate_id function generates a unique identifier (UUID) in a
        lowercase, hyphen-separated format using SYS_GUID(). 
        It converts the raw GUID to a 36-character UUID by applying RAWTOHEX and
        formatting it with REGEXP_REPLACE.

    Parameters:
        NONE

    Returns:
        - VARCHAR2: Unique Identifier of length 36.

    Exceptions:
        - None.

    ----------------------------------------------------------------------
    PROCEDURE decrement_vector_table_references
    ----------------------------------------------------------------------
    Description:
        Iterates over a JSON document containing filters and manages 
        reference counts for vector tables associated with specified 
        table-column pairs. If a vector table's reference count reaches 
        zero, the table is dropped to free up resources.

    Parameters:
        - p_match_document (IN JSON): A JSON document containing an array 
                                    of filters specifying table and 
                                    column names.

    Returns:
        - None.

    Exceptions:
        - Raises an error if issues occur while dropping a vector table.
        - Logs if the match document has missing key "filters".

    ----------------------------------------------------------------------
    PROCEDURE drop_enumerable_set_value_vector_tables
    ----------------------------------------------------------------------
    Description:
        This procedure drops dynamically generated drilldown vector value tables
        for a given document (report or drilldown) based on its match_document
        JSON.

        It parses the `filters` array in the match_document and drops the
        corresponding LANGDATA$DRILLDOWNVALUES_* tables only for filters
        that are not NER-based and contain an enumerable_set.

    Parameters:
        - p_document_id (IN VARCHAR2):
            The unique identifier of the document (report_id or drilldown_id).
            Used to generate hash for constructing vector table names.

        - p_match_document (IN JSON):
            The JSON object that contains filtering logic for the document.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION get_embedding(p_document IN VARCHAR2)
    ----------------------------------------------------------------------
    Description:
        Generates a VECTOR embedding from the given input text using the 
        default embedding model (`ALL_MINILM_L12_V2`).

    Parameters:
        - p_document (IN VARCHAR2): The input text string to be embedded.

    Returns:
        - VECTOR: The embedding vector generated from the input text.

    Exceptions:
        - None.

    ----------------------------------------------------------------------
    FUNCTION augment_text_with_ner_entities
    ----------------------------------------------------------------------
    Description:
        Enhances a given input text by appending entity labels to recognized 
        named entities within the text. The function receives a list of 
        identified entities in JSON format, each including the start and end 
        positions, the entity text, and its label.

        The function modifies the original text by inserting the label after 
        each entity in the format: `text (label)`. The replacement is performed 
        in reverse order of entity positions to avoid offset shifting.

    Parameters:
        - p_text (IN VARCHAR2): The original input text to augment.
        - p_entities (IN JSON): A JSON array of entity objects containing:
            - start (NUMBER): Start index of the entity in the text.
            - end (NUMBER): End index of the entity in the text.
            - text (VARCHAR2): The entity string.
            - label (VARCHAR2): The type/category of the entity.

    Returns:
        - VARCHAR2: The input text with named entities augmented by their labels.

    Exceptions:
        - None.

    ----------------------------------------------------------------------
    FUNCTION get_or_create_domain
    ----------------------------------------------------------------------
    Description:
        Retrieves the domain_id for the given domain name. If the domain 
        does not exist, it inserts a new entry into langdata$domains with 
        a generated ID (with the domain name normalized to lowercase), and 
        returns the ID.

    Parameters:
        - p_domain_name (IN VARCHAR2): The name of the domain to retrieve or 
                                        create.

    Returns:
        - VARCHAR2: The domain_id associated with the domain name.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION get_match_confidence
    ----------------------------------------------------------------------------
    Description: 
        Computes the match confidence for the given vector distance.
    
    Parameters: 
        - p_distance (IN NUMBER): Vector distance of which the match confidence
                                  is to be computed.
    Returns:
        - NUMBER: Match confidence.

    Exceptions:
        - None.
    ----------------------------------------------------------------------------
    FUNCTION json_array_to_clob
    ----------------------------------------------------------------------
    Description:
        Converts a JSON_ARRAY_T containing JSON_OBJECT_T elements into a
        CLOB representing a valid JSON array. This function is useful when
        working with large JSON arrays that need to be serialized as text
        without hitting VARCHAR2 size limits.

        Each object in the input array is serialized using `to_string` and
        concatenated into a single CLOB string enclosed in square brackets,
        forming a valid JSON array representation.

    Parameters:
        - p_json_array (IN JSON_ARRAY_T): A JSON array of objects to be
          serialized into a single JSON-formatted CLOB.

    Returns:
        - CLOB: A CLOB containing the serialized JSON array.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    Procedure: sync_user_tables_changed_values
    ----------------------------------------------------------------------
    Description:
        Detects data changes (INSERT, UPDATE, DELETE) across all registered 
        tables and columns defined in the metadata table 
        `langdata$value_vector_metadata` using Oracle Flashback Versions Query.

        For each row that has changed in the past 15 minutes, the procedure:
        - Retrieves the earliest and latest value of the target column using 
        Flashback Versions (avoiding reliance on VERSIONS_OPERATION to ensure 
        safety against ROWID reuse).
        - For each of these values (earliest and latest):
            - If the value exists in the user table but not in the vector table,
            it is inserted into `langdata$drilldownvalues` along with its
            embedding.
            - If the value exists in the vector table but no longer exists in
            the user table, it is removed from the vector table.

    Parameters:
        None.

    Exceptions:
        None.
    
    ----------------------------------------------------------------------
    Procedure: manage_langdata_tables_vector_indexes
    ----------------------------------------------------------------------
    Description:
        Manages vector indexes for key langdata tables based on row volume.
        This procedure ensures efficient use of vector indexes by:
        
        - Checking if a specified vector index exists on a target table.
        - Dropping existing vector indexes if the table has fewer than 10,000
          rows.
        - Creating vector indexes if missing and the table has 10,000 or more
          rows.
        
        Vector indexes are created using cosine distance with 95% target
        accuracy and neighbor partitioning.

        The following tables and columns are managed:
        - LANGDATA$REPORTDESCRIPTIONS.DESCRIPTION_VECTOR
        - LANGDATA$DRILLDOWNDESCRIPTIONS.DDD_VECTOR
        - LANGDATA$SEARCHRECORDS.QUERY_VECTOR
        - LANGDATA$REPORTQUERYCLUSTER.CENTROID_VECTOR
        - LANGDATA$DRILLDOWNQUERYCLUSTER.CENTROID_VECTOR

    Parameters:
        None.

    Exceptions:
        None
    
    ----------------------------------------------------------------------
    Procedure: background_fixup
    ----------------------------------------------------------------------
    Description:
        Composite background maintenance procedure that performs:
        
        1. Synchronization of changed values across all registered user tables
        and columns via `sync_user_tables_changed_values`.
        2. Management of vector index presence and optimization for large tables
        via `manage_langdata_tables_vector_indexes`.

        Designed to be scheduled as a background maintenance job to keep the
        value vector infrastructure and index layout up to date.

    Parameters:
        None.

    Exceptions:
        None.

    ----------------------------------------------------------------------
    FUNCTION update_domain
    ----------------------------------------------------------------------
    Description:
        Updates the name of the domain specified by the given domain ID 
        to the new name provided.

    Parameters:
        - p_domain_id (IN VARCHAR2): The unique identifier of the domain to 
                                     update.
        - p_new_name (IN VARCHAR2): The new name for the domain.

    Returns:
        - BOOLEAN: TRUE if the update was successful, FALSE otherwise.

    Exceptions:
        - c_resource_not_found: Raised if the domain does not exist.
        - c_invalid_update: Raised if the new name already exists for 
                            another domain.

    ----------------------------------------------------------------------
    FUNCTION delete_domain
    ----------------------------------------------------------------------
    Description:
        Deletes the domain specified by the given domain ID from the 
        langdata$domains table.

    Parameters:
        - p_domain_id (IN VARCHAR2): The unique identifier of the 
                                     domain to delete.

    Returns:
        - BOOLEAN: TRUE if the deletion was successful, FALSE otherwise.

    Exceptions:
        - c_invalid_update: Raised if the domain does not exist or 
                            if it is referenced in reports or drilldowns.

    ----------------------------------------------------------------------
    FUNCTION get_all_domains
    ----------------------------------------------------------------------
    Description:
        Retrieves all domains from the langdata$domains table.

    Parameters:
        None.

    Returns:
        - SYS_REFCURSOR: A reference cursor containing the list of all domains.

    Exceptions:
        - None.
    
    ----------------------------------------------------------------------
    FUNCTION: normalize_schema_name
    ----------------------------------------------------------------------
    Description:
        Capitalizes a schema name only if it doesn't include double quotes, so
        it preserved mixed case schema names.

    Parameters:
        - p_schema_name (IN VARCHAR2): The schema name to normalize.

    Returns:
        - VARCHAR2: Schema name capitalized if it doesn't include double quotes,
                    otherwise it returns the schema name unmodified.

    Exceptions:
        None.

    ----------------------------------------------------------------------
    FUNCTION validate_vector
    ----------------------------------------------------------------------
    Description:
        Validates the provided vector to ensure it has the required dimensions.
    
    Parameters:
        - p_vector (IN VECTOR): The vector to validate.
    
    Returns:
        - BOOLEAN: TRUE if the vector is valid, FALSE otherwise.
    
    Exceptions:
        None.
*/

    -- Function to split the cursor into created_at and id
    -- p_cursor is the input cursor string in the format
    -- 'YYYY-MM-DD HH24:MI:SS.FF|ID'
    -- Returns created_at and id as output parameters
    PROCEDURE split_cursor (
        p_cursor IN VARCHAR2,
        p_created_at OUT TIMESTAMP,
        p_id OUT VARCHAR2
    );

    PROCEDURE validate_enumerable_column (
        p_table_name     IN VARCHAR2,
        p_column_name    IN VARCHAR2,
        p_schema_name    IN VARCHAR2,
        p_db_link_name   IN VARCHAR2 DEFAULT NULL
    );

    FUNCTION get_annotation(
        p_table_name        VARCHAR2,
        p_column_name       VARCHAR2,
        p_schema_name       VARCHAR2,
        p_annotation_name   VARCHAR2 DEFAULT 'DESCRIPTION',
        p_db_link_name      VARCHAR2 DEFAULT NULL
    ) RETURN VARCHAR2;

    FUNCTION get_comment(
        p_table_name VARCHAR2,
        p_column_name VARCHAR2,
        p_schema_name VARCHAR2 DEFAULT NULL,
        p_db_link_name  VARCHAR2 DEFAULT NULL
    ) RETURN VARCHAR2;

    FUNCTION get_filter_description (
        p_filter_name           IN  VARCHAR2,
        p_table_name            IN  VARCHAR2 DEFAULT NULL,
        p_column_name           IN  VARCHAR2 DEFAULT NULL,
        p_schema_name           IN  VARCHAR2 DEFAULT NULL,
        p_additional_description IN  VARCHAR2 DEFAULT NULL,
        p_db_link_name           IN  VARCHAR2 DEFAULT NULL
    ) RETURN VARCHAR2;

    -- Function to fetch filter annotations of the filters in match_document
    FUNCTION fetch_and_update_filter_descriptions (
        p_match_document JSON
    ) RETURN VARCHAR2;

    -- Function to expand a text based on filters in match document
    FUNCTION expand_text (
        p_match_document JSON,
        p_original_text VARCHAR2,
        p_text_type VARCHAR2
    ) RETURN VARCHAR2;

    FUNCTION check_table_exists (
        p_table_name    IN VARCHAR2,
        p_schema_name   IN VARCHAR2 DEFAULT NULL,
        p_db_link_name  IN VARCHAR2 DEFAULT NULL
    ) RETURN BOOLEAN;

    FUNCTION check_column_exists (
        p_table_name    IN VARCHAR2,
        p_column_name   IN VARCHAR2,
        p_schema_name   IN VARCHAR2 DEFAULT NULL,
        p_db_link_name  IN VARCHAR2 DEFAULT NULL
    ) RETURN BOOLEAN;
    
    -- Function to check if the report ID exists in langdata$reports
    FUNCTION check_report_exists (
        p_report_id IN VARCHAR2
    ) RETURN BOOLEAN;

    -- Function to check if the drilldown document ID exists in
    -- langdata$drilldowndocuments
    FUNCTION check_drilldown_exists (
        p_drilldown_id IN VARCHAR2
    ) RETURN BOOLEAN;

    FUNCTION check_db_link_exists (
        p_db_link_name  IN VARCHAR2
    ) RETURN BOOLEAN;

    FUNCTION is_stopword (
        p_word IN VARCHAR2
    ) RETURN BOOLEAN;

    FUNCTION match_substring_with_context (
        p_text           IN VARCHAR2,
        p_substring      IN VARCHAR2,
        p_num_words_before IN NUMBER DEFAULT 5,
        p_num_words_after  IN NUMBER DEFAULT 2
    ) RETURN VARCHAR2;

    -- Procedure to augment a text or description
    PROCEDURE augment_text(
        p_text               IN VARCHAR2,
        p_domain_id          IN VARCHAR2 DEFAULT NULL,
        p_augmented_text     OUT VARCHAR2,
        p_augmented_tokens   OUT JSON
    );

    FUNCTION get_filter_descriptions (
        p_match_document JSON
    ) RETURN VARCHAR2;

    FUNCTION generate_expansion_text (
        p_match_document IN JSON,
        p_text_type IN VARCHAR2
    ) RETURN VARCHAR2;

    -- Function to check if the search record exists in langdata$searchrecords
    FUNCTION check_search_record_exists (
        p_search_id IN VARCHAR2
    ) RETURN BOOLEAN;

    PROCEDURE drop_value_vector_partition(p_partition_name IN VARCHAR2);

    PROCEDURE create_value_vector_table (
        p_table_name                  IN VARCHAR2,
        p_column_name                 IN VARCHAR2,
        p_schema_name                 IN VARCHAR2,
        p_force_regenerate            IN BOOLEAN DEFAULT FALSE,
        p_override_enumeration_limit  IN BOOLEAN DEFAULT FALSE,
        p_db_link_name                IN VARCHAR2 DEFAULT NULL,
        p_domain_id                   IN VARCHAR2 DEFAULT NULL
    );

    PROCEDURE create_value_vector_from_enumerable_set (
        p_enumerable_set      IN JSON_ARRAY_T,
        p_document_id         IN VARCHAR2,
        p_filter_name         IN VARCHAR2,
        p_filter_description  IN VARCHAR2,
        p_force               IN BOOLEAN DEFAULT FALSE,
        p_domain_id           IN VARCHAR2 DEFAULT NULL
    );
    
    -- Procedure to create value vector job
    PROCEDURE create_value_vector_job(
        p_match_document        IN JSON,
        p_document_id           IN VARCHAR2,
        p_domain_id             IN VARCHAR2 DEFAULT NULL
    );
    
    PROCEDURE get_all_jobs (
        p_job_cursor OUT SYS_REFCURSOR
    );

    PROCEDURE update_job_status(
        p_action   IN VARCHAR2,
        p_job_name IN VARCHAR2
    );

    -- Procedure to get sample queries based on either report_id or drilldown_id
    PROCEDURE get_sample_queries_paginated (
        p_report_id      IN VARCHAR2 DEFAULT NULL,
        p_drilldown_id   IN VARCHAR2 DEFAULT NULL,
        p_limit          IN NUMBER DEFAULT 10,
        p_cursor         IN OUT VARCHAR2,
        p_sample_queries OUT SYS_REFCURSOR
    );
    
    -- Procedure to get background job
    PROCEDURE get_job(
        p_table_name    IN VARCHAR2,
        p_column_name   IN VARCHAR2,
        p_schema_name   IN VARCHAR2 DEFAULT NULL,
        p_db_link_name  IN VARCHAR2 DEFAULT NULL,
        p_job_name      OUT VARCHAR2,
        p_status        OUT VARCHAR2,
        p_start_date    OUT TIMESTAMP,
        p_is_running    OUT BOOLEAN,
        p_error_msg     OUT VARCHAR2,
        p_run_duration  OUT INTERVAL DAY TO SECOND
    );

    PROCEDURE fetch_top_n_similairty_search_drilldown_text (
        p_query             IN  VARCHAR2,
        p_drilldown_id      IN  VARCHAR2,
        p_n                 IN  NUMBER DEFAULT 3,
        p_use_records       IN  BOOLEAN DEFAULT TRUE,
        p_texts             OUT SYS.ODCIVARCHAR2LIST
    );

    PROCEDURE fetch_top_n_similairty_search_report_text (
        p_query             IN  VARCHAR2,
        p_report_id         IN  VARCHAR2,
        p_n                 IN  NUMBER DEFAULT 3,
        p_use_records       IN  BOOLEAN DEFAULT TRUE,
        p_texts             OUT SYS.ODCIVARCHAR2LIST
    );

    -- Procedure to get top k similarity search reports for a given query
    PROCEDURE fetch_top_k_similarity_search_reports (
        p_query             IN  VARCHAR2,
        p_k                 IN  NUMBER DEFAULT 5,
        p_use_records       IN  BOOLEAN DEFAULT TRUE,
        p_domain_id         IN  VARCHAR2 DEFAULT NULL,
        p_results           OUT SYS_REFCURSOR
    );

    PROCEDURE fetch_top_k_similarity_search_reports_using_centroid (
        p_query             IN  VARCHAR2,
        p_k                 IN  NUMBER DEFAULT 5,
        p_domain_id         IN  VARCHAR2 DEFAULT NULL,
        p_results           OUT SYS_REFCURSOR
    );

    -- Procedure to update value of the given column of a table based on the
    -- given id.
    PROCEDURE update_table_column (
        p_id            IN VARCHAR2,
        p_table_name    IN VARCHAR2,
        p_column_name   IN VARCHAR2,
        p_value         IN VARCHAR2,
        p_error_message IN VARCHAR2 DEFAULT NULL
    );

    -- Procedure to fetch top k similarity drilldown documents based on the
    -- given query and the matched report id
    PROCEDURE fetch_top_k_similarity_search_drilldowns (
        p_query             IN  VARCHAR2,
        p_matched_report_id IN  VARCHAR2,
        p_k                 IN  NUMBER DEFAULT 5,
        p_use_records       IN  BOOLEAN DEFAULT TRUE,
        p_domain_id         IN  VARCHAR2 DEFAULT NULL,
        p_results           OUT SYS_REFCURSOR
    );

    PROCEDURE fetch_top_k_similarity_search_drilldowns_using_centroid (
        p_query             IN  VARCHAR2,
        p_matched_report_id IN  VARCHAR2,
        p_k                 IN  NUMBER DEFAULT 5,
        p_domain_id         IN  VARCHAR2 DEFAULT NULL,
        p_results           OUT SYS_REFCURSOR
    );
    

    PROCEDURE re_augment_affected_texts(
        p_value VARCHAR2 
    );
    PROCEDURE update_affected_texts(
        p_table_name  IN VARCHAR2,
        p_column_name IN VARCHAR2,
        p_enumerable  IN BOOLEAN,
        p_schema_name IN VARCHAR2,
        p_db_link_name  IN VARCHAR2 DEFAULT NULL
    );

    PROCEDURE check_annotation_changes_for_table_column(
        p_table_name  IN VARCHAR2,  -- Table name to check
        p_column_name IN VARCHAR2,  -- Column name to check
        p_schema_name IN VARCHAR2,
        p_db_link_name  IN VARCHAR2 DEFAULT NULL,
        p_immediate   IN BOOLEAN DEFAULT FALSE
    );

    PROCEDURE check_annotation_changes_for_user_tables;

    -- Public Procedure to create or replace background job
    PROCEDURE create_or_replace_job (
        p_job_name        IN VARCHAR2,   -- Name of the job
        p_job_action      IN VARCHAR2,   -- PL/SQL block or program to execute
        p_job_class       IN VARCHAR2,   -- Job class to associate
        p_comments        IN VARCHAR2,   -- Comments for the job
        p_priority        IN NUMBER,     -- Job priority (1 = highest)
        p_restart_on_fail IN BOOLEAN,    -- Restart on failure
        p_restart_on_rec  IN BOOLEAN,    -- Restart on recovery
        p_repeat_interval  IN VARCHAR2 DEFAULT NULL  -- Optional repeat interval
    );

    FUNCTION generate_id RETURN VARCHAR2;

    PROCEDURE update_report_query_cluster(
        p_report_id VARCHAR2
    );

    PROCEDURE update_drilldown_query_cluster(
        p_drilldown_id VARCHAR2
    );

    PROCEDURE decrement_vector_table_references (
        p_match_document  IN JSON
    );

    FUNCTION get_schema_version
    RETURN VARCHAR2;

    PROCEDURE drop_enumerable_set_value_vector_tables (
        p_document_id     IN VARCHAR2,
        p_match_document  IN JSON
    );

    FUNCTION augment_text_with_ner_entities (
        p_text IN VARCHAR2,
        p_entities IN JSON
    ) RETURN VARCHAR2;

    FUNCTION get_embedding(p_document IN VARCHAR2)
    RETURN VECTOR;

    FUNCTION get_or_create_domain(
        p_domain_name IN VARCHAR2
    ) RETURN VARCHAR2;

    FUNCTION get_domain_id(
        p_domain_name IN VARCHAR2
    ) RETURN VARCHAR2;

    FUNCTION get_match_confidence (
        p_distance     IN NUMBER
    ) RETURN NUMBER;

    FUNCTION update_domain(
        p_domain_id IN VARCHAR2,
        p_new_name IN VARCHAR2
    ) RETURN BOOLEAN;

    FUNCTION delete_domain(
        p_domain_id IN VARCHAR2
    ) RETURN BOOLEAN;

    FUNCTION get_all_domains RETURN SYS_REFCURSOR;
    
    FUNCTION json_array_to_clob (
        p_json_array JSON_ARRAY_T
    ) RETURN CLOB;

    -- Utility method to print CLOB for debugging purpose
    PROCEDURE print_clob(p_clob IN CLOB);

    PROCEDURE sync_user_tables_changed_values;

    PROCEDURE manage_langdata_tables_vector_indexes;

    PROCEDURE background_fixup;

    -- Utility Method to retrieve the text of a description/sample-query/
    -- search-record using their ID.
    -- Needed for APEX as passing the "long" descrition texts with characters
    -- like ',' lead to the text following that character leaking to 
    -- other page items.
    FUNCTION get_text_by_id(
        p_description_id IN VARCHAR2
    ) RETURN VARCHAR2;

    FUNCTION normalize_schema_name (
        p_schema_name IN VARCHAR2
    ) RETURN VARCHAR2;

    FUNCTION validate_vector(
        p_vector IN VECTOR
    ) RETURN BOOLEAN;

END lang_data_utils_pkg;
/
