Rem
Rem $Header: dbgendev/src/langdata/plsql/analytics/analytics_pkg.pks /main/15 2025/07/23 21:20:40 ruohli Exp $
Rem
Rem analytics_pkg.pks
Rem
Rem Copyright (c) 2024, 2025, Oracle and/or its affiliates.
Rem
Rem    NAME
Rem      analytics_pkg.pks - Lang data analytics package
Rem
Rem    DESCRIPTION
Rem      This package contains specifications of the procedures for analytics 
Rem      of the lang data package.
Rem
Rem    NOTES
Rem      None
Rem
Rem    BEGIN SQL_FILE_METADATA
Rem    SQL_SOURCE_FILE: dbgendev/src/langdata/plsql/analytics/analytics_pkg.pks
Rem    SQL_SHIPPED_FILE:
Rem    SQL_PHASE:
Rem    SQL_STARTUP_MODE: NORMAL
Rem    SQL_IGNORABLE_ERRORS: NONE
Rem    END SQL_FILE_METADATA
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    ruohli      07/21/25 - DBAI-1111: get_matched_queries_by_drilldown_rank
Rem                           and get_drilldown_filter_usage
Rem    sathyavc    07/07/25 - DBAI-883: Add functions to track API metrics over 
Rem                           specified time windows. Remove update_api_metrics
Rem    pryarla     07/03/25 - DBAI-882: Added get_top_filters function
Rem    ruohli      07/01/25 - DBAI-884: Added get_matched_queries_by_report_rank
Rem    sathyavc    06/26/25 - DBAI-881: Add top reports, bottom reports and
Rem                           most asked questions
Rem    sathyavc    06/25/25 - DBAI-881: Modify feedback count APIs to be
Rem                           filtered based on time.
Rem    anisbans    06/13/25 - Added the function 
Rem                           get_top_k_most_searched_reports
Rem    saloshah    05/19/25 - DBAI-746: Added the update_api_metrics
Rem    arevathi    04/07/25 - Added Drilldown Metrics
Rem    anisbans    02/21/25 - Return JSON_OBJECT_T in get_metrics
Rem    jiangnhu    02/14/25 - DBAI-575: Remove c_unknown_exception_code
Rem    deveverm    10/30/24 - Created
Rem

CREATE OR REPLACE PACKAGE lang_data_analytics_pkg IS

/*
    ----------------------------------------------------------------------
    Package Name: lang_data_analytics_pkg
    Description: This package contains procedures related to analytics of 
    langdata. Each procedure returns different metrics over the langdata search 
    record tables
    ----------------------------------------------------------------------
    Author: Deepanshu Verma
    Created: 10/30/2024
    ----------------------------------------------------------------------
    Public Procedures:
    ----------------------------------------------------------------------
    
    PROCEDURE get_metrics
    ----------------------------------------------------------------------
    Description:
        Calculates the performance of lang data, using different metrics.
        Note: It uses get_all_user_search_records internally, which fetches the 
        records of the current user (Unless you have LANG_DATA_APP_EXPERT 
        role). Hence the metrics are calculated on the records returned by 
        get_all_user_search_records procedure.
        
        Parameters:
        - p_metrics (OUT JSON):
            JSON containing the following metrics:
                - success_rate
                - precision_at_1
                - precision_at_3
                - mean_reciprocal_rank
                - positive_feedback_percentage
                - precision_drilldown
                - precision_drilldown_within_report
                - filter_success_rate
                - filter_failure_no_data
                - filter_failure_no_fuzzy
                - filter_failure_no_entity
                - total_positive_feedback
                - total_negative_feedback
                - total_no_feedback

        - p_timestamp (IN TIMESTAMP DEFAULT NULL)
            Timestamp at or after which the feedback counts for the 
            `total_positive_feedback`, `total_negative_feedback`, and 
            `total_no_feedback` metrics will be tallied. By default (NULL), 
            these metrics will be measured for all time. 

    Exceptions:        
        - None.
    ----------------------------------------------------------------------
    Metrics: 
    
    1. Success rate:
        This metric is the percentage of queries with feedback/acceptance whose 
        accepted report belongs to the list of reports suggested by langdata. 
    
    2. Precision Metrics (k):
        This metric is the percentage of queries with feedback/acceptance whose 
        accepted report belongs to the top k reports suggested by langdata. 
    
    3. Mean_reciprocal_rank:
        This metric is mean of the reciprocal of the rank of the accepted 
        report accross all the queries. This measures the overall ranking 
        process of langdata. Higher score means better ranking process.
    
    4. Positive feedback percentage:
        This metric shows the percentage of feedbacks given by the end user 
        that were Positive. 

    5. Drilldown Precision Metrics (k):
        Percentage of queries with user feedback where the top-ranked drilldown
        matches the expected drilldown.
    
    6. Drilldown Precision Metrics with-in Accepted Report (k): 
        Percentage of queries where the expected drilldown matches the 
        top-ranked drilldown, given that it's corresponding report is the 
        expected report.

    7. Identified Filters (filter_success_rate): This metric is the percentage 
        of filters with non-default values in all accepted reports.

    8. Filter Failure due to no data (filter_failure_no_data): This metric is 
        the percentage of filter identification that failed due to missing or 
        empty value vector tables.

    9. Filter Failure due to no match with fuzzy (filter_failure_no_fuzzy): 
        This metric is the percentage of failed filter identification and uses
        default values as top 3 results failed fuzzy matching.

    10. Filter Failure due to no entity (filter_failure_no_entity):
        This metric is the percentage of failed filter identification and uses 
        default values no entity of given type was found in the search record.
    
    11. Total number of positive feedbacks :
        This metric is used to retrieve the total number of positive feedback 
        ratings for the report matches corresponding to search records.

    12. Total number of negative feedbacks :
        This metric is used to retrieve the total number of negative feedback 
        ratings for the report matches corresponding to search records.

    13. Total number of no feedbacks :
        This metric is used to retrieve the total number of records that do 
        not have any feedback ratings for the report matches corresponding to 
        search records.
    
    14. Top report metrics :
        This metric is used to show the number of times a report with the given
        ID has been identified as top 1, top 3 and top 5 using it's similarity 
        rank.

    15. Top drilldown metrics :
        This metric is used to show the number of times a drilldown document 
        with the given ID has been identified as top 1, top 3 and top 5 
        using it's similarity rank.
         
    PROCEDURE update_api_logs
----------------------------------------------------------------------
    Description:
        Logs every time an external API call is made. It calculates the time
        taken by the latest API call, the average time taken, number of times 
        the API is called and no of times the call fails.

    Parameters:
----------------------------------------------------------------------
        - p_api_name (IN VARCHAR2):
            The name of the external API for which metrics are being 
            retrieved.

        - p_start_time (IN NUMBER):
            This parameter is used to record the start time of the API.
            
        - p_end_time  (IN NUMBER):
            This parameter is used to record when the API call ended.

        - p_failed (IN BOOLEAN DEFAULT FALSE):
            This field is used to indicate success or failure of an API call.
            By default, it is set to FALSE. In case the API call has failed,
            this value would be TRUE.            
        
        - p_failure_code (IN NUMBER DEFAULT NULL);
            This paramter is used to record the error code (SQLCODE) in case of
            a failure. By default, it is set to NULL (in cases of success).
        
        - p_failure_message (IN VARCHAR2 DEFAULT NULL);
            This paramter is used to record the error message (SQLERRM) in case 
            of a failure. By default, it is set to NULL (in cases of success).  

    Exceptions:
----------------------------------------------------------------------
        - None  

    FUNCTION get_all_api_stats
----------------------------------------------------------------------
    Description:
        Calculates number of calls, average call time, last call duration,
        last called timestamp, and failure count for all external APIs.

    Parameters:
----------------------------------------------------------------------
        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp at or after which API statistics are calculated.

    Returns:
----------------------------------------------------------------------
        - CLOB representing JSON array of one object containing statistics for 
        each external API as key-value pairs.   

    Exceptions:
----------------------------------------------------------------------
        - None   

    FUNCTION get_api_daily_stats
----------------------------------------------------------------------
    Description:
        Calculates daily number of calls and average call time for the specified 
        external API.

    Parameters:
----------------------------------------------------------------------
        - p_api_name (IN VARCHAR2)
            The name of the API for which the API statistics are calculated.

        - p_num_error_messages (IN NUMBER DEFAULT 5)
            The maximum number of chronologically latest error messages per day
            returned. 

        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp at or after which API statistics are calculated.

    Returns:
----------------------------------------------------------------------
        - CLOB representing JSON array of one object containing statistics of 
        the external API for each day as key-value pairs.   

    Exceptions:
----------------------------------------------------------------------
        - None   

    FUNCTION get_api_failures
----------------------------------------------------------------------
    Description:
        Calculates failure stastics grouped by error code for the specified 
        external API.

    Parameters:
----------------------------------------------------------------------
        - p_api_name (IN VARCHAR2)
            The name of the API for which the failure statistics are calculated.

        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp at or after which failure statistics are calculated.

    Returns:
----------------------------------------------------------------------
        - CLOB representing JSON array of one object containing failure 
        statistics of the external API for each failure code as key-value pairs.   

    Exceptions:
----------------------------------------------------------------------
        - None   

    FUNCTION get_top_k_most_searched_reports
------------------------------------------------------------------------------
    Description:
        Returns the top K most searched reports based on the `search_count`
        stored in the `analytics_data` JSON column of the `langdata$reports` 
        table. This procedure assumes that `update_report_search_count` 
        has already been called for relevant search records, ensuring that 
        the `search_count` is up-to-date.
        
    Parameters:
----------------------------------------------------------------------
        - p_k  (IN NUMBER):
            The number of top searched reports to return. 

        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp at or after which the search count for the most 
            searched reports should be considered.
        
        - p_result (OUT CLOB):
            A JSON array containing the top K most searched reports. Each 
            report object includes the following keys:
            - "id"           : Report ID
            - "report_title" : Title of the report
            - "search_count" : Number of times the report was searched

    Exceptions: 
----------------------------------------------------------------------       
        - None.

    FUNCTION get_top_filters
------------------------------------------------------------------------------
    Description:
        This function analyzes recent report match documents to identify the 
        most frequently used filters, separating them into NER (Named Entity 
        Recognition) and enumerated filters based on the `use_ner` flag. It 
        returns the top-K filters in each category based on their frequency 
        of use.
        
    Parameters:
----------------------------------------------------------------------
        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            If provided, only report matches created at or after this timestamp 
            will be considered. If NULL, all report matches will be processed.

        - p_top_k (IN NUMBER):
            The number of top filters to return for each category 
            (NER and enumerated). The output will include at most K filters 
            per category, sorted by usage frequency in descending order.

        - p_top_filters (OUT CLOB):
            A JSON object containing two arrays: 
            - `top_ner_filters`: An array of JSON objects, each with a 
            `filter_name` and `count`, representing the most frequent NER 
            filters.
            - `top_enumerated_filters`: Same structure, but for filters that 
            do not use NER (i.e., `use_ner = false`).

    Exceptions: 
----------------------------------------------------------------------       
        - None.

    FUNCTION get_bottom_k_least_searched_reports
------------------------------------------------------------------------------
    Description:
        Returns the bottom K least searched reports based on the `search_count`
        stored in the `analytics_data` JSON column of the `langdata$reports` 
        table. This procedure assumes that `update_report_search_count` 
        has already been called for relevant search records, ensuring that 
        the `search_count` is up-to-date.
        
    Parameters:
----------------------------------------------------------------------
        - p_k  (IN NUMBER):
            The number of top searched reports to return.

        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp at or after which the search count for the least 
            searched reports should be considered.
        
        - p_result (OUT CLOB):
            A JSON array containing the top K most searched reports. Each 
            report object includes the following keys:
            - "id"           : Report ID
            - "report_title" : Title of the report
            - "search_count" : Number of times the report was searched

    Exceptions: 
----------------------------------------------------------------------       
        - None.

    FUNCTION get_top_k_most_asked_questions
------------------------------------------------------------------------------
    Description:
        Returns the top K most asked questions, last asked after the specified 
        timestamp, based on the `asked_count` stored in the 
        `langdata$question_stats` table.
        
    Parameters:
----------------------------------------------------------------------
        - p_k  (IN NUMBER):
            The number of most asked questions to return. 
        
        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp after which the most asked questions should be
            `last_asked`.
        
        - p_result (OUT CLOB):
            A JSON array containing the top K most asked questions. Each 
            object includes the following keys:
            - "question_id"           : Question ID
            - "question_text" : Title of the report
            - "asked_count" : Number of times the report was searched

    Exceptions: 
----------------------------------------------------------------------       
        - None.

    FUNCTION get_top_k_most_negative_feedback_questions
------------------------------------------------------------------------------
    Description:
        Returns the top K questions with the highest count of negative feedback,
        last asked after the specified timestamp.
        
    Parameters:
----------------------------------------------------------------------
        - p_k  (IN NUMBER):
            The number of most asked questions to return. 
        
        - p_timestamp (IN TIMESTAMP DEFAULT NULL):
            The timestamp after which the most asked questions should be
            `last_asked`.
        
        - p_result (OUT CLOB):
            A JSON array containing the top K most asked questions. Each 
            object includes the following keys:
            - "question_id"           : Question ID
            - "question_text" : Title of the report
            - "asked_count" : Number of times the report was searched

    Exceptions: 
----------------------------------------------------------------------       
        - None.

    PROCEDURE get_matched_queries_by_report_rank
------------------------------------------------------------------------------
    Description:
        This procedure retrieves a list of queries matched by report rank.
        
    Parameters:
----------------------------------------------------------------------
        - p_report_id (IN VARCHAR2): The ID of the report to retrieve 
                                     queries from. 

        - p_rank      (IN PLS_INTEGER): The rank of the report matches 
                                        to retrieve (1-based). |
        - p_k        (IN PLS_INTEGER): The maximum number of queries to return. 
        - p_queries   (OUT SYS.ODCIVARCHAR2LIST): The list of retrieved queries. 

    Exceptions: 
----------------------------------------------------------------------       
        - None.


    PROCEDURE get_report_filter_usage
------------------------------------------------------------------------------
    Description:
        This procedure retrieves the filter usage statistics for a given report.
        
    Parameters:
----------------------------------------------------------------------
        - p_report_id (in VARCHAR2): The ID of the report to retrieve 
                                     filter usage statistics from
        - p_default_count (OUT NUMBER): The count of default filters 
                                         used in the report. 
        - p_non_default_count (OUT NUMBER):The count of non-default 
                                           filters used in the report.

    Exceptions: 
----------------------------------------------------------------------       
        - c_invalid_parameters_code: When the p_report_id is not exist

    PROCEDURE get_matched_queries_by_drilldown_rank
------------------------------------------------------------------------------
    Description:
        This procedure retrieves a list of queries matched by drilldown rank.
        
    Parameters:
----------------------------------------------------------------------
        - p_drilldown_id (IN VARCHAR2): The ID of the drilldown to retrieve 
                                     queries from. 

        - p_rank      (IN PLS_INTEGER): The rank of the drilldown matches 
                                        to retrieve (1-based). |
        - p_k        (IN PLS_INTEGER): The maximum number of queries to return. 
        - p_queries   (OUT SYS.ODCIVARCHAR2LIST): The list of retrieved queries. 

    Exceptions: 
----------------------------------------------------------------------       
        - None.


    PROCEDURE get_drilldown_filter_usage
------------------------------------------------------------------------------
    Description:
        This procedure retrieves the filter usage statistics for a given 
        drilldown.
        
    Parameters:
----------------------------------------------------------------------
        - p_drilldown_id (in VARCHAR2): The ID of the drilldown to retrieve 
                                     filter usage statistics from
        - p_default_count (OUT NUMBER): The count of default filters 
                                         used in the drilldown. 
        - p_non_default_count (OUT NUMBER):The count of non-default 
                                           filters used in the drilldown.

    Exceptions: 
----------------------------------------------------------------------       
        - c_invalid_parameters_code: When the p_drilldown_id is not exist

*/

    PROCEDURE get_success_rate_metrics (
        p_records               IN JSON_ARRAY_T,
        p_success_rate          OUT NUMBER
    );

    PROCEDURE get_precision_metrics (
        p_records               IN JSON_ARRAY_T,
        k                       IN INTEGER,
        p_precision             OUT NUMBER
    );

    PROCEDURE get_mean_reciprocal_rank (
        p_records               IN JSON_ARRAY_T,
        p_mean_reciprocal_rank  OUT NUMBER
    );
    
    PROCEDURE get_positive_feedback_percentage(
        p_records               IN JSON_ARRAY_T,
        p_positive_feedback_percentage    OUT NUMBER
    );

    PROCEDURE get_drilldown_precision_metrics(
        p_records               IN JSON_ARRAY_T,
        k                       IN INTEGER,
        p_precision             OUT NUMBER
    );

    PROCEDURE get_drilldown_within_report_metrics (
        p_records               IN JSON_ARRAY_T,
        k                       IN INTEGER,
        p_precision             OUT NUMBER
    );

    PROCEDURE get_filter_metrics(
        p_records               IN JSON_ARRAY_T,
        p_success_rate          OUT NUMBER,
        p_default_no_data       OUT NUMBER,
        p_default_fuzzy_fail    OUT NUMBER,
        p_default_no_entity     OUT NUMBER
    );
    
    -- Returns top report metrics as a JSON object
    PROCEDURE get_top_report_metrics(
        p_report_id     IN  VARCHAR2,
        p_metrics       OUT JSON_OBJECT_T
    );

    --This procedure returns the total number reports that have
    --received positive feedbacks 
    PROCEDURE get_total_positive_feedback(
        p_timestamp IN TIMESTAMP DEFAULT NULL,
        p_total_positive_feedback     OUT NUMBER
    );

    --This procedure returns the total number reports that have
    --received negative feedbacks 
    PROCEDURE get_total_negative_feedback(
        p_timestamp IN TIMESTAMP DEFAULT NULL,
        p_total_negative_feedback     OUT NUMBER
    );

    --This procedure returns the total number reports that have
    --not received any feedbacks 
    PROCEDURE get_total_no_feedback(
        p_timestamp IN TIMESTAMP DEFAULT NULL,
        p_total_no_feedback     OUT NUMBER
    );

    PROCEDURE update_api_logs (
        p_api_name          IN VARCHAR2, 
        p_start_time        IN NUMBER,  
        p_end_time          IN NUMBER,
        p_failed            IN BOOLEAN DEFAULT FALSE,
        p_failure_code      IN NUMBER DEFAULT NULL,
        p_failure_message   IN VARCHAR2 DEFAULT NULL
    );

    FUNCTION get_all_api_stats (
        p_timestamp IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    FUNCTION get_api_daily_stats (
        p_api_name           IN VARCHAR2,
        p_num_error_messages IN NUMBER DEFAULT 5,
        p_timestamp          IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    FUNCTION get_api_failures (
        p_api_name  IN VARCHAR2,
        p_timestamp IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    -- Returns top drilldown document metrics as a JSON object
    PROCEDURE get_top_drilldown_metrics(
        p_drilldown_id  IN  VARCHAR2,
        p_metrics       OUT JSON_OBJECT_T
    );

    PROCEDURE get_metrics(
        p_timestamp IN TIMESTAMP DEFAULT NULL,
        p_metrics               OUT JSON_OBJECT_T
    );
    
    FUNCTION get_top_k_most_searched_reports(
        p_k                     IN NUMBER,
        p_timestamp             IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    FUNCTION get_bottom_k_least_searched_reports(
        p_k                     IN NUMBER,
        p_timestamp             IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    FUNCTION get_top_k_most_asked_questions(
        p_k                     IN NUMBER,
        p_timestamp             IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    FUNCTION get_top_k_most_negative_feedback_questions(
        p_k                     IN NUMBER,
        p_timestamp             IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;

    FUNCTION get_top_filters(
        p_top_k         IN NUMBER,
        p_timestamp     IN TIMESTAMP DEFAULT NULL
    ) RETURN CLOB;
    

    PROCEDURE get_matched_queries_by_report_rank (
        p_report_id IN VARCHAR2,
        p_rank      IN PLS_INTEGER,
        p_k         IN PLS_INTEGER,
        p_queries   OUT SYS.ODCIVARCHAR2LIST
    );

    PROCEDURE get_report_filter_usage(
        p_report_id in VARCHAR2,
        p_default_count OUT NUMBER,
        p_non_default_count OUT NUMBER
    );

    PROCEDURE get_matched_queries_by_drilldown_rank (
        p_drilldown_id IN VARCHAR2,
        p_rank      IN PLS_INTEGER,
        p_k         IN PLS_INTEGER,
        p_queries   OUT SYS.ODCIVARCHAR2LIST
    );

    PROCEDURE get_drilldown_filter_usage(
        p_drilldown_id in VARCHAR2,
        p_default_count OUT NUMBER,
        p_non_default_count OUT NUMBER
    );

    
END lang_data_analytics_pkg;
/
