Rem
Rem $Header: dbgendev/src/langdata/plsql/llm/llm_pkg.pks /main/4 2025/08/13 01:29:21 jiangnhu Exp $
Rem
Rem llm_pkg.pls
Rem
Rem Copyright (c) 2025, Oracle and/or its affiliates.
Rem
Rem    NAME
Rem      llm_pkg.pls - Langdata LLM Package
Rem
Rem    DESCRIPTION
Rem      This package handles all the interactions with the llms.
Rem
Rem    NOTES
Rem      None
Rem
Rem    BEGIN SQL_FILE_METADATA
Rem    SQL_SOURCE_FILE: dbgendev/src/langdata/plsql/llm/llm_pkg.pks
Rem    SQL_SHIPPED_FILE:
Rem    SQL_PHASE:
Rem    SQL_STARTUP_MODE: NORMAL
Rem    SQL_IGNORABLE_ERRORS: NONE
Rem    END SQL_FILE_METADATA
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    jiangnhu    07/23/25 - DBAI-764: Implement
Rem                           rerank_filter_value_candidates
Rem    pryarla     07/21/25 - DBAI-1075: Generate sample queries using LLM
Rem    pryarla     05/15/25 - DBAI-737: Created LLM package
Rem

CREATE OR REPLACE PACKAGE lang_data_llm_pkg IS

    /**
    Function: validate_llm
    Description: 
        Validates if a given DBMS_CLOUD_AI profile name is functional.
        It does this by invoking a simple prompt using the profile and 
        checking for successful execution.
    
    Parameters:
        - p_profile_name (IN VARCHAR2) - The name of the DBMS_CLOUD_AI profile
            to validate.
    
    Returns:
        - BOOLEAN - TRUE if the LLM call succeeds with the given profile; 
            otherwise, an exception is raised.
    
    Exceptions:
        - lang_data_errors_pkg.c_invalid_dbms_cloud_ai_profile - If the 
            DBMS_CLOUD_AI call fails.
     */
    FUNCTION validate_llm_profile (
        p_profile_name IN VARCHAR2
    ) RETURN BOOLEAN;


    /**
    Function: query_llm
    Description: 
        Uses LANG_DATA_DBMS_CLOUD_AI_PROFILE_NAME from lang_data_config_pkg 
        to chat with llm using DBMS_CLOUD_AI package. 
    
    Parameters:
        p_prompt (IN CLOB) - Prompt to query the llm.
    
    Returns:
        CLOB - LLM response if the call is successfull.
    
    Exceptions:
            
    */
    FUNCTION query_llm (
        p_prompt IN CLOB
    ) RETURN CLOB;


    /**
    Function: detect_named_entities

    Description:
        Detects and extracts named entities from a given input text using an
        LLM.
    
        This function sends a structured JSON prompt to the LLM, requesting 
        entity extraction based on the provided labels. It expects a strict 
        JSON response containing a list of entities with their start offset,
        length, text, and label. The response is cleaned, parsed, and 
        restructured into a new JSON format where each entity contains:
            - start: Start character index (0-indexed)
            - end: End character index (calculated as offset + length - 1)
            - text: Extracted text segment
            - label: Type or label of the entity
        The function returns a JSON array of extracted entities.
    
    Parameters:
        - p_query (IN VARCHAR2):
            The input text from which named entities should be detected.
        - p_labels (IN VARCHAR2):
            Comma-separated list of labels (entity types) that the LLM 
            should use when extracting entities.
    
    Returns:
        - JSON:
            A JSON array containing extracted named entities, each 
            represented as a JSON object with 'start', 'end', 'text', and 
            'label' keys.

    Example:
        Input:
            p_query  = 'Emma works at Oracle.'
            p_labels = 'person,org'
            
        Output:
            [
                {"start":0, "end":3, "text":"Emma", "label":"person"},
                {"start":14, "end":19, "text":"Oracle", "label":"org"}
            ]
    Notes:
       - Assumes the LLM returns a JSON object prefixed with optional ```json```
        markers, which are cleaned.
       - The function assumes the LLM returns strictly the requested format 
        without additional explanations.
    */
    FUNCTION detect_named_entities(
        p_query  IN VARCHAR2,
        p_labels IN VARCHAR2
    ) RETURN JSON;

    /**
    Procedure: rerank_filter_value_candidates

    Description:
        Reranks a list of candidate filter values for a given user query using 
        a Large Language Model (LLM). The procedure constructs a structured 
        system prompt and calls the LLM to score the semantic similarity between
        the query and each candidate. The top-ranked candidate (by score) is 
        returned along with its score and index.

        This procedure is intended for use in Autonomous Database environments 
        where LLMs are configured via internal services.

    Parameters:
        - p_query (IN VARCHAR2):
            The user query string for which candidate filter values are to be 
            reranked.

        - p_filter_candidates (IN JSON_ARRAY_T):
            A JSON array of candidate strings to be scored against the input 
            query.

        - p_top_candidate (OUT VARCHAR2):
            The candidate string with the highest semantic similarity score.

        - p_top_score (OUT NUMBER):
            The score (float between 0 and 1) of the top-ranked candidate.

        - p_top_index (OUT INTEGER):
            The 1-based index of the top-ranked candidate within the input array

    Behavior:
        - Constructs a system prompt including the query and candidate strings.
        - Calls `query_llm` to obtain scores from an LLM.
        - Parses and validates the response, ensuring one score per candidate 
          and full coverage.
        - Selects the candidate with the highest score and returns it.

    Example:
        Input:
            p_query = 'Show me large transactions',
            p_filter_candidates = 
                '["Balance Inquiry", "Wire Transfer", "Large Withdrawals"]'

        Output:
            p_top_candidate = 'Large Withdrawals'
            p_top_score = 0.94
            p_top_index = 3

    Notes:
        - The LLM must return a JSON object like:
            {
              "scores": [
                { "score": 0.75, "candidate_str": "Balance Inquiry" },
                { "score": 0.91, "candidate_str": "Wire Transfer" },
                { "score": 0.94, "candidate_str": "Large Withdrawals" }
              ]
            }

        - If the LLM output is invalid (missing or extra candidates), an error 
          is raised.
    */
    PROCEDURE rerank_filter_value_candidates(
        p_query              IN VARCHAR2,
        p_filter_candidates  IN JSON_ARRAY_T,
        p_top_candidate      OUT VARCHAR2,
        p_top_score          OUT NUMBER,
        p_top_index          OUT INTEGER
    );

    /**
    Function: get_prompt_additional_info

    Description: 
        Generates a section for the LLM Prompt describing the already
        existing sample questions for any report/drilldown.
    
    Parameters:
        - p_sample_queries (IN SYS_REFCURSOR):
            List of the sample questions.
    
    Returns:
        - CLOB:
            Prompt section with the sample queries listed.
    */
    FUNCTION get_prompt_additional_info (
        p_sample_queries IN SYS_REFCURSOR
    ) RETURN CLOB;

    /**
    Function: generate_sample_queries

    Description:
        Generates a set of user queries that could be answered by a given 
        report definition using an LLM.

        The function constructs a structured system prompt using the report's 
        title, description, and filter information. It instructs the LLM to 
        generate a specified number of distinct, natural-language queries 
        that would logically return results from the report.

    Parameters:
        - p_title (IN VARCHAR2):
            The title of the report for which user queries should be 
            generated.

        - p_description (IN VARCHAR2):
            A description explaining what the report covers and what kinds of 
            questions it can answer.

        - p_filter_information (IN VARCHAR2):
            A JSON string describing the report's filters, including 
            enumerable and named-entity filters.

        - p_sample_queries (IN SYS_REFCURSOR DEFAULT NULL):
            Optional parameter of the list of sample queries, to be taken
            into consideration while generating new ones.

        - p_count (IN NUMBER):
            The exact number of user queries to generate.

    Returns:
        - CLOB:
            A JSON array of user queries (as strings), with exactly 
            `p_count` entries. No additional text or metadata is included.

    Example:
        Input:
            p_title = 'Customer Transactions Report',
            p_description = 'Summarizes debit and credit transactions across all
                                 accounts.',
            p_filter_information = '[{
                                        "filter_name":"Account Type", 
                                        "enumerable_set":["Checking","Savings"],
                                        "default_value":"Checking"
                                    }]',
            p_count = 3
        Output:
            [
              "Show me recent credit transactions for my Checking account.",
              "List all debit transactions from my Savings account last month.",
              "What were the highest deposits in my Checking account this year?"
            ]

    Notes:
        - The output returned from the LLM is expected to be a valid JSON array 
          of strings.
    */    

    FUNCTION generate_sample_queries(
        p_title                 IN VARCHAR2,
        p_description           IN VARCHAR2,
        p_filter_information    IN VARCHAR2,
        p_sample_queries        IN SYS_REFCURSOR DEFAULT NULL,
        p_count                 IN NUMBER DEFAULT 10
    ) RETURN CLOB;

END lang_data_llm_pkg;
/
