Skip to main content
The following examples illustrate BigQuery queries for common analytics, assuming logs have been copied to a table called glean_customer_event_table.

User Activity & Engagement

Output: Count of unique users per day who performed qualifying actions
Grain: Daily aggregation
Use Case: Track overall platform engagement and user activity trends
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH
glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- change date ranges as needed
    CAST(DATE_TRUNC(`timestamp`, DAY) AS DATE) >= CURRENT_DATE - 30
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, processed_ca_logs AS (
  SELECT
    TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
    , jsonPayload.clientevent.event AS event_name
    , timestamp AS event_timestamp
    , jsonPayload.user.userid AS user_id
    , jsonPayload.clientevent.category AS category
    , jsonPayload.clientevent.label AS label
    , jsonPayload.clientevent.pagepath AS page_path
    , jsonPayload.clientevent.SessionTrackingToken AS stt
  FROM
    glean_customer_event
  WHERE jsonPayload.Type = 'CLIENT_EVENT'
)
, -- We do not attribute most of the events fired to DAU unless that event correspond to a
-- user action that leads to some level of value creation. The below filter capture workflows
-- corresponding to such greenlisted user actions
filtered_ca_logs AS (
  SELECT
    *
  FROM
    processed_ca_logs
  WHERE
    (
      -- Depending on category, opening links/documents counts towards DAU
      event_name IN (
        'OpenDocument'
        , 'OpenLink'
      )
      AND category IN (
        -- Autocomplete and Search interactions count towards DAU
        'Search Result'
        , 'Autocomplete'
        -- New tab page(NTP) and Homepage interactions count towards DAU
        , 'Feed'
        , 'Calendar'
        , 'New Tab Page'
        -- Directory tab interactions count towards DAU
        , 'Org Chart'
        , 'Person Card'
        , 'Teams'
        , 'Profile'
        , 'People Celebrations'
        , 'Person Attribution'
        -- User Generated Content(UGC) interactions count towards DAU
        , 'Announcements'
        , 'Answers'
        , 'Collections'
        , 'Featured Question and Answer'
        , 'Generated Question and Answer'
        , 'Pins'
        -- Golinks interactions counts towards DAU
        , 'Shortcuts'
        -- Admin and Setup page interactions count towards DAU
        , 'Verification'
        , 'Datasource Auth'
        , 'Insights'
        -- Feature related interactions count towards DAU
        , 'Chat'
        , 'Result Preview'
        , 'App Card'
        , 'Customer Card'
        , 'Search'
        -- Other tangible user interactions that count towards DAU
        , 'Feedback'
      )
    )
    OR (
      -- Depending on category, certain feature related clicks count towards DAU
      event_name IN (
        'Click'
      )
      AND category IN (
        -- Autocomplete and Search interactions count towards DAU
        'Autocomplete'
        , 'Search Result'
        , 'Datasource Filter'
        , 'Facets'
        -- New tab page(NTP) and Homepage interactions count towards DAU
        , 'Feed'
        , 'Calendar'
        -- Directory tab interactions count towards DAU
        , 'Org Chart'
        , 'Person Card'
        , 'Teams'
        , 'Profile'
        , 'People Celebrations'
        , 'Person Attribution'
        -- Sidebar interactions count towards DAU
        , 'Sidebar Tabs'
        -- User Generated Content(UGC) interactions count towards DAU
        , 'Announcements'
        , 'Answers'
        , 'Collections'
        -- Golinks interactions counts towards DAU
        , 'Shortcuts'
        -- Admin and Setup page interactions count towards DAU
        , 'Datasource Auth'
        , 'User Menu'
        , 'Admin Console'
        -- Other tangible user interactions that count towards DAU
        , 'Feedback'
      )
    )
    -- CRUD operations on User Generated Content(UGC) always count always count towards DAU
    OR (
      event_name IN (
        'Add'
        , 'Create'
        , 'Delete'
      ) AND category IN (
        'Announcements'
        , 'Answers'
        , 'Collections'
      )
    )
    OR (
      event_name IN
      (
        'View'
      )
      AND category IN
      (
        -- User Generated Content(UGC) interactions count towards DAU
        'Announcements',
        'Answers',
        'Collections',
        -- Directory tab interactions count towards DAU
        'Person Card',
        'Team Card',
        'Org Chart'
      ) and page_path not in ('/', '/ntp', '/search')
    )
)
, feature_logs AS (
  SELECT
    DISTINCT jsonPayload.user.userid AS user_id
    , TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
  FROM
    glean_customer_event
  WHERE
    ((
      jsonPayload.Type IN ('SEARCH')
      -- User initiated search queries only
      AND jsonpayload.Search.initiator = 'USER'
    )
    OR (
      jsonPayload.Type IN ('CHAT')
      -- User initiated chat queries only
      AND jsonpayload.Chat.initiator = 'USER'
    )
    OR (
      jsonPayload.Type IN ('SEARCH_CLICK','CHAT_FEEDBACK','AI_SUMMARY','AI_ANSWER','SEARCH_FEEDBACK')
    )
    OR (
      jsonPayload.Type IN ('SHORTCUT')
      AND jsonPayload.shortcut.event IN ('REDIRECT', 'CREATE', 'DELETE', 'UPDATE')
    )
    OR (
      jsonPayload.Type IN ('AUTOCOMPLETE')
      -- Filtering autocomplete queries to include only those with a query length greater than 1 to ensure only intentional engagement is considered.
      AND jsonPayload.autocomplete.querylength>=1.0
    ))
)
SELECT
  utc_day
  , COUNT(DISTINCT user_id) AS DAU
FROM (
  SELECT
    utc_day
    , user_id
  FROM
    filtered_ca_logs
  UNION ALL
  SELECT
    utc_day
    , user_id
  FROM
    feature_logs
)
GROUP BY
  1
ORDER BY
  1 DESC
Output: Daily counts of searches and unique users performing webapp searches
Grain: Daily aggregation
Use Case: Monitor webapp search adoption and usage patterns
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- Optional project filtering - only filter if project_id is specified
    (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
  TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
  , COUNT(DISTINCT jsonPayload.User.UserId) AS num_users
  , COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_searches
FROM glean_customer_event
WHERE jsonPayload.Type = 'SEARCH'
  AND jsonPayload.Search.Initiator = 'USER'
  AND jsonPayload.Search.Modality = 'FULLPAGE'
GROUP BY 1
ORDER BY 1
Output: Daily counts of embedded searches and users by host domain
Grain: Daily aggregation by embedding domain
Use Case: Track embedded search component adoption across different platforms and domains
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- Optional project filtering - only filter if project_id is specified
    (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
  TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
  , jsonPayload.Search.HostDomain
  , COUNT(DISTINCT jsonPayload.User.UserId) AS num_users
  , COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_searches
FROM glean_customer_event
WHERE jsonPayload.Type = 'SEARCH'
  AND jsonPayload.Search.Initiator = 'USER'
  AND jsonPayload.Search.Modality = 'EMBEDDED_SEARCH'
GROUP BY 1, 2
ORDER BY 1, 2

Search Analytics

Output: Daily search metrics including queries, sessions, queries per session, and unique users
Grain: Daily aggregation over 7-day period
Use Case: Analyze search engagement depth and session patterns
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 7 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
  TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
  ,COUNT(DISTINCT jsonPayload.User.UserId) AS num_users
  ,COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_queries
  ,COUNT(DISTINCT jsonPayload.Search.SessionTrackingToken) AS num_sessions
  ,COUNT(DISTINCT jsonPayload.Search.TrackingToken)/COUNT(DISTINCT jsonPayload.Search.SessionTrackingToken) AS Queries_per_session
FROM
  glean_customer_event
WHERE
  jsonPayload.Type = 'SEARCH'
  -- filter for user queries (exclude Gleanbot requests)
  AND LOWER(jsonPayload.Search.Initiator) IN ('user')
  AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
GROUP BY 1
ORDER BY 1;
Output: Daily counts of search result clicks and queries that received clicks
Grain: Daily aggregation over 7-day period
Use Case: Measure search result relevance and click-through rates
DECLARE lookback_period INT64 DEFAULT 7;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL  -- Set to NULL to include all projects, or specify a project ID

WITH 
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
  SELECT 
    DATE(timestamp) AS datepartition
    , resource.labels.project_id AS project_id
    , jsonPayload.User.UserId AS userid
    , jsonPayload
    , timestamp
  FROM 
    -- replace <glean_customer_event_table> with your firm's table name
    <glean_customer_event_table>
  WHERE 
    timestamp >= CASE 
      WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
      WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
      WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
      WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
      ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))  -- Default to DAY
    END
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)

SELECT
  TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
  ,count(jsonPayload.SearchClick.TrackingToken) AS num_clicks
  ,count(DISTINCT jsonPayload.SearchClick.TrackingToken) AS num_queries_with_clicks
FROM
  glean_customer_event
WHERE
  jsonPayload.Type = 'SEARCH_CLICK'
GROUP BY 1
ORDER BY 1
Output: Daily clicks, queries with clicks, and average position by datasource
Grain: Daily aggregation by datasource over 7-day period
Use Case: Evaluate datasource performance and result positioning effectiveness
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 7 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
  TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
  ,jsonPayload.searchclick.datasource
  ,count(jsonPayload.SearchClick.TrackingToken) AS num_clicks
  ,count(DISTINCT jsonPayload.SearchClick.TrackingToken) AS num_queries_with_clicks
  ,avg(jsonPayload.searchclick.position)+1 as average_position
FROM
  glean_customer_event
WHERE
  jsonPayload.Type = 'SEARCH_CLICK'
GROUP BY 1,2
ORDER BY 1,3 desc;
Output: List of search queries ranked by frequency and user count
Grain: Query-level aggregation over 14-day period
Use Case: Identify popular search topics and content gaps
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 14 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
  SELECT
    DISTINCT `timestamp`
    , jsonPayload.Search.SessionTrackingToken AS stt
    , jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
    , jsonPayload.Search.Query AS search_query
    , jsonPayload.user.userid
    , jsonPayload.user.useremail
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH'
    -- filter for user queries (exclude Gleanbot requests)
    AND LOWER(jsonPayload.Search.Initiator) IN ('user')
    AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
SELECT
  search_query
  , COUNT(DISTINCT qtt) AS frequency
  , COUNT(DISTINCT userid) AS user_count
FROM
  queries_data
GROUP BY
  1
ORDER BY
  2 DESC
Output: Individual search terms ranked by frequency and user count
Grain: Search term-level aggregation over 14-day period
Use Case: Understand user vocabulary and optimize search suggestions
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 14 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
  SELECT
    DISTINCT `timestamp`
    , jsonPayload.Search.SessionTrackingToken AS stt
    , jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
    , jsonPayload.Search.Query AS search_query
    , jsonPayload.user.userid
    , jsonPayload.user.useremail
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH'
    -- filter for user queries (exclude Gleanbot requests)
    AND LOWER(jsonPayload.Search.Initiator) IN ('user')
    AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
, query_terms_list AS (
  SELECT
    *
    , SPLIT(search_query, ' ') AS query_terms
  FROM
    queries_data
)
, query_terms_flattened AS (
  SELECT
    timestamp
    , stt
    , qtt
    , userid
    , useremail
    , query_term
  FROM
    query_terms_list
    , UNNEST(query_terms) AS query_term
)
SELECT
  query_term
  , COUNT(*) AS frequency
  , COUNT(DISTINCT userid) AS user_count
FROM
  query_terms_flattened
GROUP BY
  1
ORDER BY
  2 DESC

Search Quality & Performance

Output: Daily ratio of satisfied sessions (sessions with clicks) to total sessions
Grain: Daily aggregation over 14-day period
Use Case: Measure search quality and user satisfaction trends
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 14 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
  SELECT
    DISTINCT TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
    , jsonPayload.Search.SessionTrackingToken AS stt
    , jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH'
    -- filter for user queries (exclude Gleanbot requests)
    AND LOWER(jsonPayload.Search.Initiator) IN ('user')
    AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
, clicks_data AS (
  SELECT
    DISTINCT jsonPayload.SearchClick.TrackingToken AS qtt
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH_CLICK'
)
SELECT
  q.utc_day --metric date
  , COUNT(DISTINCT IF(c.qtt IS NOT NULL, q.stt, NULL)) / GREATEST(COUNT(DISTINCT q.stt), 1) AS session_satisfaction --Ratio of satisfied sessions to all sessions
FROM
  queries_data q
LEFT JOIN
  clicks_data c
ON
  q.qtt = c.qtt
GROUP BY 1
ORDER BY 1;
Output: Daily mean average precision score based on click positions
Grain: Daily aggregation over 14-day period
Use Case: Evaluate search result ranking quality and relevance
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 14 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, clicks_data AS (
  SELECT
    jsonPayload.SearchClick.TrackingToken AS qtt
    ,jsonPayload.searchclick.position AS position
    ,MAX(TIMESTAMP_TRUNC(`timestamp`, DAY)) AS utc_day
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH_CLICK'
  GROUP BY
    1,2
)
, precision_data AS (
  SELECT
    utc_day
    , qtt
    , position
    , COUNT(*) OVER (PARTITION BY qtt ORDER BY position) / (1 + position) AS precision
  FROM
    clicks_data
)
, average_precision_data AS (
  SELECT
    utc_day
    , qtt
    , AVG(precision) AS average_precision
  FROM
    precision_data
  GROUP BY
    1, 2
)
SELECT
  utc_day --metric_date
  , AVG(average_precision) AS mean_average_precision
FROM
  average_precision_data
GROUP BY
  1
ORDER BY
  1;
Output: Daily median time in milliseconds from first query to last click in a session
Grain: Daily aggregation over 14-day period
Use Case: Measure search session efficiency and time-to-information
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- filter for data from the last 14 days (adjust as needed)
    `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
  SELECT
    DISTINCT TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
    , jsonPayload.Search.SessionTrackingToken AS stt
    , jsonPayload.Search.TrackingToken AS qtt
    , timestamp
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH'
    -- filter for user queries (exclude background and slackbot requests)
    AND LOWER(jsonPayload.Search.Initiator) IN ('user')
    AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
, clicks_data AS (
  SELECT
    jsonPayload.SearchClick.TrackingToken AS qtt
    , MAX(timestamp) as timestamp
  FROM
    glean_customer_event
  WHERE
    jsonPayload.Type = 'SEARCH_CLICK'
  GROUP BY 1
)
, sessions_w_clicks AS (
  SELECT
    utc_day --metric_date
    , stt
    , first_query_ts
    , last_click_ts
    , timestamp_diff(last_click_ts, first_query_ts, millisecond) AS time_from_first_query_to_last_click
  FROM (
    SELECT
      stt
      , MIN(utc_day) AS utc_day
      , MIN(q.timestamp) AS first_query_ts
      , MIN(c.timestamp) AS first_click_ts
      , MAX(c.timestamp) AS last_click_ts
      , COUNTIF(c.qtt IS NOT NULL) AS num_clicks
    FROM
      queries_data q
    LEFT JOIN
      clicks_data c
    ON
      q.qtt = c.qtt
    GROUP BY
      1
  )
  WHERE
    num_clicks > 0
)
SELECT
  utc_day --metric_date
  -- , avg(time_from_first_query_to_last_click) as mean_time_from_first_query_to_last_click -- surface average if desired
  , approx_quantiles(time_from_first_query_to_last_click, 100)[offset(50)] AS p50_time_first_query_to_last_click_ms
FROM
  sessions_w_clicks
GROUP BY
  1
ORDER BY
  1
Output: A list of all search queries annotated with whether an AI-generated answer was present for each query.
Grain: Individual query-answer pairs with configurable time periods
Use Case: Understand AI answer usage patterns, track which searches trigger AI responses, and analyze AI answer effectiveness
-- Parameterized lookback period using variables
DECLARE lookback_period INT64 DEFAULT 1;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL 

WITH 
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
  SELECT 
    DATE(timestamp) AS datepartition
    , resource.labels.project_id AS project_id
    , jsonPayload.User.UserId AS userid
    , jsonPayload
    , timestamp
  FROM 
    -- replace <glean_customer_event_table> with your firm's table name
    <glean_customer_event_table>
  WHERE 
    timestamp >= CASE 
      WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
      WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
      WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
      WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
      ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))  -- Default to DAY
    END
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, search_events AS (
  SELECT 
    datepartition
    , project_id
    , userid
    , jsonPayload.Search.Query AS query
    , jsonPayload.Search.Modality AS modality
    , jsonPayload.Search.Initiator AS initiator
    , jsonPayload.Search.TrackingToken AS trackingtoken
  FROM 
    glean_customer_event
  WHERE 
    jsonPayload.Type = 'SEARCH'
    AND LOWER(jsonPayload.Search.Initiator) IN ('user')
)
, -- Direct AI Answer events
direct_ai_answers AS (
  SELECT 
    datepartition
    , project_id
    , userid
    , jsonPayload.AIAnswer.TrackingToken AS trackingtoken
    , jsonPayload.AIAnswer.Response AS response
  FROM 
    glean_customer_event
  WHERE 
    jsonPayload.Type = 'AI_ANSWER'
    AND jsonPayload.AIAnswer.TrackingToken != ''
)
, workflow_conversation AS (
  SELECT 
    jsonPayload.workflowconversation.workflowrunid as runid
    , MAX(CASE WHEN message.author='USER' THEN text END) as user_query
    , MAX(CASE WHEN message.author='GLEAN' THEN text END) as response
  FROM
    glean_customer_event
	, UNNEST(jsonPayload.workflowconversation.messages) message
  WHERE
    jsonPayload.type='WORKFLOW_CONVERSATION'
  GROUP BY
    runid
)
, -- Workflow-based AI Answer events
workflow_ai_answers AS (
  SELECT 
    datepartition
    , project_id
    , userid
    , jsonPayload.WorkflowRun.SourceTrackingToken AS trackingtoken
    , wc.response
  FROM 
    glean_customer_event ai_answer
  LEFT JOIN 
    workflow_conversation wc
  ON
    ai_answer.jsonPayload.workflowrun.runid = wc.runid
  WHERE 
    ai_answer.jsonPayload.Type = 'WORKFLOW_RUN'
    AND ai_answer.jsonPayload.WorkflowRun.Feature = 'AI_ANSWER'
    AND ai_answer.jsonPayload.WorkflowRun.Initiator = 'GLEAN'
    AND ai_answer.jsonPayload.WorkflowRun.Platform = 'WEB'
)
, -- Combine both AI answer types
all_ai_answers AS (
  SELECT * FROM direct_ai_answers
  UNION ALL
  SELECT * FROM workflow_ai_answers
)
SELECT 
    -- Search Information (from original queries)
    s.datepartition
  , s.trackingtoken
  , s.query
  , s.modality
  , s.userid
  -- AI Answer Information
  , aa.response
  , CASE WHEN aa.trackingtoken IS NOT NULL THEN TRUE ELSE FALSE END AS is_ai_answer_present
FROM 
  search_events s 
LEFT JOIN 
  all_ai_answers aa 
ON 
  aa.datepartition = s.datepartition 
  AND aa.project_id = s.project_id
  AND aa.userid = s.userid 
  AND aa.trackingtoken = s.trackingtoken
ORDER BY 
  aa.datepartition DESC
  , aa.project_id
  , aa.userid
Output: Search queries linked to AI-generated answers that have an upvote or downvote associated with them, with user and project context
Grain: Individual query-answer pairs with configurable time periods
Use Case: Understand distribution of AI answer feedback (upvote/downvote), and analyze AI answer feedback rate
-- Parameterized lookback period using variables
DECLARE lookback_period INT64 DEFAULT 1;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL  -- Set to NULL to include all projects, or specify a project ID

BEGIN
WITH 
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
  SELECT 
    DATE(timestamp) AS datepartition
    , resource.labels.project_id AS project_id
    , jsonPayload.User.UserId AS userid
    , jsonPayload
    , timestamp
  FROM 
    -- replace <glean_customer_event_table> with your firm's table name
    <glean_customer_event_table>
  WHERE 
    timestamp >= CASE 
      WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
      WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
      WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
      WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
      ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))  -- Default to DAY
    END
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)

, search_events AS (
  SELECT 
    datepartition
    , project_id
    , userid
    , jsonPayload.Search.Query AS query
    , jsonPayload.Search.Modality AS modality
    , jsonPayload.Search.Initiator AS initiator
    , jsonPayload.Search.TrackingToken AS trackingtoken
  FROM 
    glean_customer_event
  WHERE 
    jsonPayload.Type = 'SEARCH'
)
, -- Direct AI Answer events
direct_ai_answers AS (
  SELECT 
    datepartition
    , project_id
    , userid
    , jsonPayload.AIAnswer.TrackingToken AS trackingtoken
  FROM 
    glean_customer_event
  WHERE 
    jsonPayload.Type = 'AI_ANSWER'
    AND jsonPayload.AIAnswer.TrackingToken != ''
)
, -- Workflow-based AI Answer events
workflow_ai_answers AS (
  SELECT 
    datepartition
    , project_id
    , userid
    , jsonPayload.WorkflowRun.SourceTrackingToken AS trackingtoken
  FROM 
    glean_customer_event ai_answer
  WHERE 
    ai_answer.jsonPayload.Type = 'WORKFLOW_RUN'
    AND ai_answer.jsonPayload.WorkflowRun.Feature = 'AI_ANSWER'
    AND ai_answer.jsonPayload.WorkflowRun.Initiator = 'GLEAN'
    AND ai_answer.jsonPayload.WorkflowRun.Platform = 'WEB'
)
, -- Combine both AI answer types
all_ai_answers AS (
  SELECT * FROM direct_ai_answers
  UNION ALL
  SELECT * FROM workflow_ai_answers
)


, -- Get all AI answers that have an upvote (thumbs up) or downvote (thumbs down)
ai_answer_votes AS (
  SELECT
      jsonpayload.aianswervote.trackingtoken
      ,jsonpayload.aianswervote.vote
  FROM glean_customer_event
  WHERE
      jsonpayload.type = 'AI_ANSWER_VOTE'
)


SELECT 
  -- Dimensional Context
  aa.datepartition
  , aa.project_id
  , aa.userid
  
  -- Search Information (from original queries)
  , s.query AS search_query
  , s.modality AS search_modality
  , s.initiator AS search_initiator
  
  -- AI Answer Vote Information
  , aav.vote AS ai_answer_vote
FROM 
  all_ai_answers aa
LEFT JOIN 
  search_events s 
ON 
  aa.datepartition = s.datepartition 
  AND aa.project_id = s.project_id
  AND aa.userid = s.userid 
  AND aa.trackingtoken = s.trackingtoken
LEFT JOIN 
  ai_answer_votes aav
ON 
  aa.trackingtoken = aav.trackingtoken
WHERE
  -- Filter out AI answers without corresponding search events or missing initiators
  s.initiator != ''
  -- Filter out search queries without an AI answer. 
  -- Note: If you want to get the % of searches that have an AI Answer, commenet this field, so you'll get the entire list of searches along with their AI Answer Vote status
  AND aav.vote != ''
ORDER BY 
  aa.datepartition DESC
  , aa.project_id
  , aa.userid;
END;

Chat & AI Analytics

Metrics: Active Chat Users, # of Chats
Grain: Per Day, Per User
This query identifies users who had at least one chat interaction on a given date; one row per user per day.
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL
DECLARE workflow_ids ARRAY<STRING> DEFAULT [
  'DOC_CONTEXT_READER',
  'ORIGINAL_MESSAGE_SEARCH',
  'DIRECT_LLM_RESPONSE',
  'REACT_TOOLS_2_HOPS',
  'DEFAULT_CHAT',
  'WORLD_MODE_V2',
  'DEEP_RESEARCH_PYAGENT',
  'DEEP_RESEARCH_PREVIEW'
];

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- Optional project filtering - only filter if project_id is specified
    (project_id IS NULL OR resource.labels.project_id = project_id)
)

, -- Extract the user level data from the most recent date to get most recent user attributes
-- Concatenate aliasids at a userid level

product_snapshot_agg AS (
  SELECT
    jsonPayload.productsnapshot.user.id AS userid,
    MAX(jsonPayload.productsnapshot.user.signuptime) AS signuptime,
    ARRAY_CONCAT_AGG(jsonPayload.productsnapshot.user.aliasids) AS aliasids,
    MAX(jsonPayload.productsnapshot.user.department) AS department

  FROM glean_customer_event
  WHERE

  -- Adjust as needed
  DATE(timestamp) = CURRENT_DATE - INTERVAL 1 DAY
  AND jsonPayload.type = 'PRODUCT_SNAPSHOT'
  GROUP BY 1
)

, -- [Optional]: For users belonging to departments with less than 5 members, assign them a single bucket
latest_orgchart_data AS (
  SELECT
    userid,
    signuptime,
    aliasids,
    CASE WHEN COUNT(userid) OVER (PARTITION BY department) < 5
    THEN "Departments less than 5 Users"
    ELSE department
    END AS department
  FROM product_snapshot_agg
)

, -- Map each alias to the canonical UserID
id_to_alias AS (
  SELECT
    DISTINCT aliasid,
    userid AS canonicalid
  FROM
    latest_orgchart_data, UNNEST(aliasids) AS aliasid
)

, -- [IMP]: Calculate chat usage metrics.
/*
This CTE merges data across three event types: CHAT, WORKFLOW, and WORKFLOW_RUN.
The WORKFLOW event type was active from mid-February to mid-June 2025, after which it was replaced by WORKFLOW_RUN.
Before that period, all chat-related activity was logged under the CHAT event type.

To calculate accurate Chat usage metrics, we first UNION WORKFLOW and WORKFLOW_RUN. Since only one is ever populated
at a given time, this is a safe operation. We then perform a FULL OUTER JOIN with CHAT to deduplicate any overlapping
runs that appear in both CHAT and WORKFLOW/WORKFLOW_RUN logs.
*/

chat_usage as (
  -- Workflow data + Workflow run data
  SELECT
    COALESCE(wf.datepartition, chat.datepartition) AS datepartition,
    COALESCE(wf.userid, chat.userid) AS userid,
    COUNT(DISTINCT wf.run_id) + COUNT(DISTINCT CASE WHEN wf.run_id IS NULL THEN chat.qtt END) AS num_chat_queries
  FROM (
    SELECT
      jsonPayload.workflow.runid AS run_id,
      jsonPayload.user.userid,
      MIN(DATE(timestamp)) AS datepartition
    FROM glean_customer_event
    WHERE
	  jsonPayload.type = 'WORKFLOW'
      -- These workflowIDs correspond to Chats
      AND jsonPayload.workflow.workflowid IN UNNEST(workflow_ids)
      AND jsonPayload.workflow.initiator = 'USER'
    GROUP BY 1,2

    UNION ALL

    SELECT
      DISTINCT jsonPayload.workflowrun.runid AS run_id,
      jsonPayload.user.userid,
      DATE(timestamp) AS datepartition
    FROM glean_customer_event,

    UNNEST(jsonPayload.workflowrun.workflowexecutions) AS wfe
    WHERE jsonPayload.type = 'WORKFLOW_RUN'
    AND jsonPayload.workflowrun.initiator = 'USER'
    AND wfe.workflowid IN UNNEST(workflow_ids)
  ) wf
  -- Join the unioned workflow data with chat data
  FULL OUTER JOIN (
    SELECT
      DATE(timestamp) AS datepartition,
      jsonPayload.user.userid,
      jsonPayload.chat.qtt AS qtt,
      jsonPayload.chat.workflowrunid AS workflowrunid
    FROM glean_customer_event
    WHERE jsonPayload.type = 'CHAT'
    AND jsonPayload.chat.initiator = 'USER'
  ) chat
  ON wf.run_id = chat.workflowrunid
  GROUP BY 1, 2
)

, feature_usage AS (
  SELECT
    datepartition,
    userid,
    COALESCE(chat_usage.num_chat_queries, 0) AS _num_chats
  FROM chat_usage
)

, canonicalized AS (
  SELECT
    datepartition,
    COALESCE(id_to_alias.canonicalid, feature_usage.userid) AS userid,
    COALESCE(SUM(_num_chats), 0) AS num_chats
  FROM feature_usage
  LEFT JOIN id_to_alias
  ON feature_usage.userid = id_to_alias.aliasid
  WHERE
    COALESCE(id_to_alias.canonicalid, feature_usage.userid) IS NOT NULL
  GROUP BY 1, 2
  HAVING COALESCE(SUM(_num_chats), 0) > 0
)

SELECT * FROM canonicalized ORDER BY datepartition DESC, userid
Output: Daily feedback metrics (upvotes, downvotes, manual feedback) by AI application
Grain: Daily aggregation by AI application
Use Case: Monitor custom AI app performance and user satisfaction
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    -- Optional project filtering - only filter if project_id is specified
    (project_id IS NULL OR resource.labels.project_id = project_id)
)
, gleanchat_feedback AS (
  SELECT
    DISTINCT TIMESTAMP_TRUNC(m.`timestamp`, DAY) AS utc_day
    , m.jsonPayload.User.UserId
    , m.jsonPayload.Chat.Qtt --Query Tracking Token
    , COALESCE(m.jsonPayload.Chat.ApplicationId, 'Default Gleanchat') AS ApplicationId
    , f.jsonPayload.ChatFeedback.Event
  FROM
    glean_customer_event m
  LEFT JOIN glean_customer_event f
  ON m.jsonPayload.Chat.ResponseMessageId = f.jsonPayload.ChatFeedback.MessageId
  WHERE
    m.jsonPayload.Type = 'CHAT'
    AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
)
SELECT
  ApplicationId
  , utc_day --dateparititon
  , COUNT(DISTINCT Qtt) AS num_chat_messages
  , COUNT(DISTINCT CASE WHEN Event = 'UPVOTE' THEN Qtt END) AS upvotes
  , COUNT(DISTINCT CASE WHEN Event = 'DOWNVOTE' THEN Qtt END) AS downvotes
  , COUNT(DISTINCT CASE WHEN Event = 'MANUAL_FEEDBACK' THEN Qtt END) AS shared_feedback
FROM
  gleanchat_feedback
WHERE
  COALESCE(NULLIF(applicationid,''), 'Default Gleanchat') != 'Default Gleanchat'
GROUP BY
  1, 2
Output: Chat messages with feedback ratings and comments
Grain: Individual chat message level showing ratings and comments if exists
Use Case: Analyze chat response quality and gather detailed user feedback for improvements
Note: Requires raw logs.
-- Parameterized lookback period using variables
DECLARE lookback_period INT64 DEFAULT 14;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

BEGIN
WITH 
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
  SELECT 
    DATE(timestamp) AS datepartition
    , resource.labels.project_id AS project_id
    , jsonPayload.User.UserId AS userid
    , jsonPayload
    , timestamp
  FROM 
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE 
    timestamp >= CASE 
      WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
      WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
      WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
      WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
      ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))  -- Default to DAY
    END
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)

,chat_events AS (
  SELECT DISTINCT
      m.`timestamp` AS message_time 
    , m.jsonPayload.User.Userid
    , f.`timestamp` AS feedback_time
    , f.jsonPayload.ChatFeedback.Event AS rating
    , fc.jsonPayload.ChatFeedback.comments AS comments
    , m.jsonPayload.Chat.Qtt AS run_id
  FROM
    glean_customer_event m
  LEFT JOIN
    glean_customer_event f
  ON m.jsonPayload.Chat.ResponseMessageId = f.jsonPayload.ChatFeedback.MessageId
  LEFT JOIN
    -- For the same chat message we might receive both upvote/downvote and manual feedback. This is to collect comments from the user.
    glean_customer_event fc
  ON m.jsonPayload.Chat.ResponseMessageId = fc.jsonPayload.ChatFeedback.MessageId
  WHERE
    m.jsonPayload.Type = 'CHAT'
    AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
    AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
    AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
    AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
)

,workflow_events AS (
  SELECT DISTINCT
    m.`timestamp` AS message_time
    , m.jsonPayload.User.Userid
    , f.`timestamp` AS feedback_time
    , f.jsonPayload.ChatFeedback.Event AS rating
    , fc.jsonPayload.ChatFeedback.comments AS comments
    , m.jsonPayload.workflow.RunID AS run_id
  FROM
    glean_customer_event m
  LEFT JOIN
    glean_customer_event f
  ON m.jsonPayload.Workflow.RunId = f.jsonPayload.ChatFeedback.RunID
  LEFT JOIN
    -- For the same chat message we might receive both upvote/downvote and manual feedback. This is to collect comments from the user.
    glean_customer_event fc
  ON m.jsonPayload.Workflow.RunId = fc.jsonPayload.ChatFeedback.RunID
  WHERE
    m.jsonPayload.Type = 'WORKFLOW'
    AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
    AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
    AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
    AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
)

,workflowrun_events AS (
  SELECT DISTINCT
    m.`timestamp` AS message_time
    , m.jsonPayload.User.Userid
    , f.`timestamp` AS feedback_time
    , f.jsonPayload.ChatFeedback.Event AS rating
    , fc.jsonPayload.ChatFeedback.comments AS comments
    , m.jsonPayload.workflowRun.RunID AS run_id
  FROM
    glean_customer_event m
  LEFT JOIN
    glean_customer_event f
  ON m.jsonPayload.WorkflowRun.RunId = f.jsonPayload.ChatFeedback.RunID
  LEFT JOIN
    -- For the same chat message we might receive both upvote/downvote and manual feedback. This is to collect comments from the user.
    glean_customer_event fc
  ON m.jsonPayload.WorkflowRun.RunId = fc.jsonPayload.ChatFeedback.RunID
  WHERE
    m.jsonPayload.Type = 'WORKFLOW_RUN'
    AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
    AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
    AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
    AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
)


SELECT * 
FROM chat_events

UNION ALL 

SELECT * 
FROM workflow_events

UNION ALL

SELECT * 
FROM workflowrun_events;
END
Output: All chat citation records from both CHAT_CITATIONS events and WORKFLOW_RUN citations
Grain: Individual citation level over configurable time period
Use Case: Extract all chat citations for analysis, including source documents, files, and people cited in chat responses across both legacy and modern workflow implementations
DECLARE lookback_period INT64 DEFAULT 7;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL  -- Set to NULL to include all projects, or specify a project ID
DECLARE workflow_ids ARRAY<STRING> DEFAULT [
  'DOC_CONTEXT_READER',
  'ORIGINAL_MESSAGE_SEARCH',
  'DIRECT_LLM_RESPONSE',
  'REACT_TOOLS_2_HOPS',
  'DEFAULT_CHAT',
  'WORLD_MODE_V2',
  'DEEP_RESEARCH_PYAGENT',
  'DEEP_RESEARCH_PREVIEW'
];

WITH 
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
  SELECT 
    DATE(timestamp) AS datepartition
    , resource.labels.project_id AS project_id
    , jsonPayload.User.UserId AS userid
    , jsonPayload
    , timestamp
  FROM 
    -- replace <glean_customer_event_table> with your firm's table name
    <glean_customer_event_table>
  WHERE 
    timestamp >= CASE 
      WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
      WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
      WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
      WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
      ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))  -- Default to DAY
    END
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)

-- Legacy CHAT_CITATIONS events  
, chat_citations_events AS (
  SELECT
    DATE(timestamp) as datepartition,
    jsonPayload.user.userid,
    jsonPayload.user.useremail,
    jsonPayload.chatcitations.chatsessionid,
    jsonPayload.chatcitations.workflowrunid,
    citation.trackingtoken,
    citation.sourcedocument.datasource,
    citation.sourcedocument.doctype,
    citation.sourcedocument.id AS source_document_id,
    citation.sourcedocument.title AS source_document_title,
    citation.sourcedocument.url AS source_document_url,
    citation.sourcefile.id AS source_file_id,
    citation.sourcefile.name AS source_file_name,  
    citation.sourceperson.id AS source_person_id,
    citation.sourceperson.name AS source_person_name,
    'CHAT_CITATIONS' AS event_source
  FROM glean_customer_event,
  UNNEST(jsonPayload.chatcitations.citations) AS citation
  WHERE jsonPayload.Type = 'CHAT_CITATIONS'
)

-- WORKFLOW_RUN citations for chat workflows
, workflow_run_citations AS (
  SELECT
    DATE(timestamp) as datepartition,
    jsonPayload.user.userid,
    jsonPayload.user.useremail,
    jsonPayload.workflowrun.chatsessionid,
    jsonPayload.workflowrun.runid AS workflowrunid,
    citation.trackingtoken,
    citation.sourcedocument.datasource,
    citation.sourcedocument.doctype,
    citation.sourcedocument.id AS source_document_id,
    citation.sourcedocument.title AS source_document_title,
    citation.sourcedocument.url AS source_document_url,
    citation.sourcefile.id AS source_file_id,
    citation.sourcefile.name AS source_file_name,
    citation.sourceperson.id AS source_person_id,
    citation.sourceperson.name AS source_person_name,
    'WORKFLOW_RUN' AS event_source
  FROM glean_customer_event,
  UNNEST(jsonPayload.workflowrun.workflowexecutions) AS workflow_execution,
  UNNEST(jsonPayload.workflowrun.stepexecutions) AS step_execution,
  UNNEST(step_execution.citations) AS citation
  WHERE jsonPayload.Type = 'WORKFLOW_RUN'
    AND jsonPayload.workflowrun.initiator = 'USER'
    -- Filter for chat-related workflows
    AND workflow_execution.workflowid IN UNNEST(workflow_ids)
)

-- Combine all chat citations
SELECT * FROM chat_citations_events
UNION DISTINCT
SELECT * FROM workflow_run_citations
ORDER BY datepartition DESC
Output: Complete chat conversations with user queries, Glean responses, and user metadata
Grain: Individual chat run level over configurable time period
Use Case: Analyze chat conversation patterns, response quality, and user engagement across departments and user segments
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL
DECLARE start_date DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE end_date DATE DEFAULT CURRENT_DATE();
DECLARE chat_workflow_ids ARRAY<STRING> DEFAULT [
  'DOC_CONTEXT_READER',
  'ORIGINAL_MESSAGE_SEARCH',
  'DIRECT_LLM_RESPONSE',
  'REACT_TOOLS_2_HOPS',
  'DEFAULT_CHAT',
  'WORLD_MODE_V2',
  'DEEP_RESEARCH_PYAGENT',
  'DEEP_RESEARCH_PREVIEW'
];

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    <glean_customer_event_table>
  WHERE
    -- Optional project filtering - only filter if project_id is specified
    (project_id IS NULL OR resource.labels.project_id = project_id)
    AND timestamp BETWEEN TIMESTAMP(start_date) and TIMESTAMP(end_date)
)
, chat_usage as (
  -- Workflow data + Workflow run data
  SELECT
    COALESCE(wf.datepartition, chat.datepartition) AS date_partition
    , COALESCE(wf.run_id, chat.workflow_run_id) as run_id
    , COALESCE(wf.useremail, chat.useremail) as user_email
    , COALESCE(wf.department, chat.department) as department
  FROM (
    SELECT
      DISTINCT jsonPayload.workflow.runid AS run_id
      , jsonPayload.user.*
      , DATE(timestamp) AS datepartition
    FROM glean_customer_event
    WHERE
      jsonPayload.workflow.workflowid IN UNNEST(chat_workflow_ids)
      AND jsonPayload.workflow.initiator = 'USER'

    UNION ALL

    SELECT
      DISTINCT jsonPayload.workflowrun.runid AS run_id
      , jsonPayload.user.*
      , DATE(timestamp) AS datepartition
    FROM glean_customer_event, UNNEST(jsonPayload.workflowrun.workflowexecutions) AS wfe
    WHERE jsonPayload.type = 'WORKFLOW_RUN'
    AND jsonPayload.workflowrun.initiator = 'USER'
    AND wfe.workflowid IN UNNEST(chat_workflow_ids)
  ) wf
  -- Join the unioned workflow data with chat data
  FULL OUTER JOIN (
    SELECT
      DATE(timestamp) AS datepartition
      , jsonPayload.user.*
      , jsonPayload.chat.workflowrunid AS workflow_run_id
    FROM glean_customer_event
    WHERE jsonPayload.type = 'CHAT'
    AND jsonPayload.chat.initiator = 'USER'
  ) chat
  ON wf.run_id = chat.workflow_run_id
)
, workflow_conversation AS(
  SELECT 
    jsonPayload.workflowconversation.workflowrunid as run_id
    , MAX(CASE WHEN message.author='USER' THEN text END) as user_query
    , MAX(CASE WHEN message.author='GLEAN' THEN text END) as response
  FROM
    glean_customer_event, UNNEST(jsonPayload.workflowconversation.messages) as message
  WHERE
    COALESCE(jsonPayload.workflowconversation.workflowrunid, '') != ''
  GROUP BY 1
)
SELECT
  chat_usage.* except(run_id)
  , user_query
  , response
  , chat_usage.run_id
FROM chat_usage
LEFT JOIN workflow_conversation
USING(run_id)
ORDER by date_partition DESC
Output: Complete chat citation clicks log with user, datasource, and doc information
Grain: Individual chat run level over configurable time period
Use Case: Analyze chat citation click data
DECLARE lookback_period INT64 DEFAULT 25;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL  -- Set to NULL to include all projects, or specify a project ID

WITH 
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
  SELECT 
    DATE(timestamp) AS datepartition
    , resource.labels.project_id AS project_id
    , jsonPayload.User.UserId AS userid
    , jsonPayload
    , timestamp
  FROM 
    -- replace <glean_customer_event_table> with your firm's table name
    <glean_customer_event_table>
  WHERE 
    timestamp >= CASE 
      WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
      WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
      WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
      WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
      ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))  -- Default to DAY
    END
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)

,ccc AS (
  SELECT
    project_id,
    jsonPayload.user.userid     AS user_id,
    jsonPayload.chatcitationclick.datasource AS citation_datasource,
    jsonPayload.chatcitationclick.trackingtoken AS citation_click_tracking_token,
    timestamp
  FROM glean_customer_event
  WHERE jsonPayload.Type = 'CHAT_CITATION_CLICK'
),

wr AS (
  SELECT
    project_id,
    jsonPayload.user.userid    AS user_id,
    jsonPayload.workflowrun.chatsessionid AS chat_session_id,
    jsonPayload.workflowrun.runid        AS run_id,
    search_tracking_token
  FROM glean_customer_event, UNNEST(jsonPayload.workflowrun.actionexecutions) AS ae,
       UNNEST(ae.searchtrackingtokens) AS search_tracking_token
  WHERE jsonPayload.Type = 'WORKFLOW_RUN'
),

cc AS (
  SELECT
    jsonPayload.chatcitations.workflowrunid  AS run_id,
    jsonPayload.chatcitations.chatsessionid  AS chat_session_id,
    c.trackingtoken                          AS citation_tracking_token,
    c.sourcedocument.datasource              AS cited_datasource,
    c.sourcedocument.id                      AS doc_id,
    c.sourcedocument.title                   AS doc_title,  -- NULL in scrubbed logs
    c.sourcedocument.url                     AS doc_url     -- NULL in scrubbed logs
  FROM glean_customer_event,
       UNNEST(jsonPayload.chatcitations.citations) AS c
  WHERE jsonPayload.Type = 'CHAT_CITATIONS'
)

SELECT
  ccc.timestamp,
  ccc.user_id,
  ccc.citation_datasource,
  wr.chat_session_id,
  wr.run_id,
  cc.doc_id,
  cc.doc_title,
  cc.doc_url
FROM ccc
LEFT JOIN wr
  ON ccc.project_id = wr.project_id
 AND ccc.user_id    = wr.user_id
 AND ccc.citation_click_tracking_token = wr.search_tracking_token
LEFT JOIN cc
  ON wr.run_id         = cc.run_id
 AND wr.chat_session_id = cc.chat_session_id
 AND ccc.citation_click_tracking_token = cc.citation_tracking_token

Agent Metrics

Agent Types Explained: When viewing query results in this section, you’ll see agents categorized by type:
  • AGENT: Standard conversational agents created and managed by users. These are the primary agents that end-users interact with directly.
  • SUBAGENT: Internal helper agents that are called by other agents to perform specialized tasks. These run behind the scenes and are not directly invoked by users. Common examples include search agents, data retrieval agents, and tool execution agents. Note: In production environments, you may see a high volume of subagent runs as they are triggered by parent agents during workflow execution.
  • AGENT_TEMPLATE: Pre-built agent configurations provided by Glean that users can clone, customize, and deploy as their own agents. These serve as starting points for common use cases.
Output: Agent-level metrics including total runs, first and last usage dates, and agent type
Grain: Per agent aggregation over configurable time period (default: 185 days)
Use Case: Track agent lifecycle from creation through usage, analyze agent popularity and engagement trends, identify stale agents
Agent Types Explained:When viewing query results, you’ll see agents categorized by type:
  • AGENT: Standard conversational agents created and managed by users. These are the primary agents that end-users interact with directly.
  • SUBAGENT: Internal helper agents that are called by other agents to perform specialized tasks. These run behind the scenes and are not directly invoked by users. Common examples include search agents, data retrieval agents, and tool execution agents. Note: In production environments, you may see a high volume of subagent runs as they are triggered by parent agents during workflow execution.
  • AGENT_TEMPLATE: Pre-built agent configurations provided by Glean that users can clone, customize, and deploy as their own agents. These serve as starting points for common use cases.
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 185;

WITH glean_customer_event AS (
  SELECT
    resource,
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    jsonPayload,
    timestamp,
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE 
    timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Get agent metadata from latest snapshots
, agent_info AS (
  SELECT
    resource.labels.project_id,
    jsonPayload.productsnapshot.workflow.workflowid AS workflow_id,
    ARRAY_AGG(jsonPayload.productsnapshot.workflow.name ORDER BY timestamp DESC LIMIT 1)[OFFSET(0)] AS name,
    ARRAY_AGG(jsonPayload.productsnapshot.workflow.namespaceenum ORDER BY timestamp DESC LIMIT 1)[OFFSET(0)] AS workflow_type,
    ARRAY_AGG(
      PARSE_DATE('%Y-%m-%d', SUBSTR(jsonPayload.productsnapshot.workflow.createdat, 1, 10)) 
      ORDER BY timestamp DESC LIMIT 1
    )[OFFSET(0)] AS created_date,
    ARRAY_AGG(jsonPayload.productsnapshot.workflow.createdby ORDER BY timestamp DESC LIMIT 1)[OFFSET(0)] AS createdby
  FROM glean_customer_event
  WHERE 
    jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
    -- Filter out draft-only agents that were never published/deployed for actual use
    AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
  GROUP BY 1, 2
)
-- Agent runs from WORKFLOW_RUN (must be in agent_info)
, agent_runs AS (
  SELECT
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    resource.labels.project_id,
    workflow_execution.workflowid AS workflow_id,
    jsonpayload.workflowrun.runid AS run_id
  FROM glean_customer_event, UNNEST(jsonpayload.workflowrun.workflowexecutions) AS workflow_execution
  WHERE
    jsonPayload.type = 'WORKFLOW_RUN'
    AND workflow_execution.status = 'SUCCESS'
)
-- Filter agent runs to only those in agent_info
, agent_runs_filtered AS (
  SELECT ar.*
  FROM agent_runs ar
  INNER JOIN agent_info ai ON ar.workflow_id = ai.workflow_id
)
-- App runs from CHAT (don't require agent_info)
, app_runs_from_chat AS (
  SELECT
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    resource.labels.project_id,
    jsonpayload.chat.applicationid AS workflow_id,
    IF(
      jsonpayload.chat.workflowrunid IS NOT NULL AND jsonpayload.chat.workflowrunid != '',
      jsonpayload.chat.workflowrunid,
      jsonpayload.chat.qtt
    ) AS run_id
  FROM glean_customer_event
  WHERE
    jsonPayload.type = 'CHAT'
    -- Filter for valid agent/app IDs: Glean assigns 16-character IDs to custom apps/agents
    AND LENGTH(jsonpayload.chat.applicationid) = 16
)
-- App runs from WORKFLOW_RUN.applicationid (don't require agent_info)
, app_runs_from_workflow AS (
  SELECT
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    resource.labels.project_id,
    jsonPayload.workflowrun.applicationid AS workflow_id,
    jsonpayload.workflowrun.runid AS run_id
  FROM glean_customer_event, UNNEST(jsonpayload.workflowrun.workflowexecutions) AS workflow_execution
  WHERE
    jsonPayload.type = 'WORKFLOW_RUN'
    AND jsonPayload.workflowrun.applicationid IS NOT NULL
    AND jsonPayload.workflowrun.applicationid != ''
    AND workflow_execution.status = 'SUCCESS'
  GROUP BY 1, 2, 3, 4
)
-- Combine all runs (both agent and app)
, all_runs AS (
  SELECT * FROM agent_runs_filtered
  UNION ALL
  SELECT * FROM app_runs_from_chat
  UNION ALL
  SELECT * FROM app_runs_from_workflow
)
-- Add creation dates as activity (only for agents with metadata)
, all_activity AS (
  SELECT datepartition, project_id, workflow_id, run_id
  FROM all_runs
  
  UNION ALL
  
  SELECT 
    created_date AS datepartition,
    project_id,
    workflow_id,
    CONCAT('creation_', workflow_id) AS run_id
  FROM agent_info
  WHERE
    created_date >= CURRENT_DATE - INTERVAL lookback_days DAY
    AND created_date <= CURRENT_DATE - INTERVAL '1' DAY
    AND created_date != DATE('0001-01-01')
    AND createdby IS NOT NULL
)

-- Final aggregation with LEFT JOIN to include apps without metadata
SELECT
  aa.project_id,
  aa.workflow_id AS agent_id,
  COALESCE(ai.name, aa.workflow_id) AS agent_name,
  ai.workflow_type AS agent_type,
  COUNT(DISTINCT CASE WHEN aa.run_id NOT LIKE 'creation_%' THEN aa.run_id END) AS num_agent_runs,
  MAX(aa.datepartition) AS last_usage_date,
  MIN(aa.datepartition) AS first_usage_date
FROM all_activity aa
LEFT JOIN agent_info ai ON aa.workflow_id = ai.workflow_id
GROUP BY 1, 2, 3, 4
ORDER BY 5 DESC;
Output: Detailed agent creation records with creator, permissions, and metadata
Grain: Individual agent level for agents created within the lookback period (default: 185 days)
Use Case: Track who is creating agents, analyze permission settings, identify agent creation trends, and monitor agent lifecycle from creation
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 185;

WITH glean_customer_event AS (
  SELECT
    resource,
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    jsonPayload,
    timestamp,
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE 
    timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Get latest workflow snapshot info (filter to agents only)
, workflows_last_snapshot_date AS (
  SELECT
    resource.labels.project_id,
    jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
    MAX(DATE(DATE_TRUNC(timestamp, DAY))) AS latest_snapshot_date
  FROM glean_customer_event
  WHERE 
    jsonPayload.type = 'PRODUCT_SNAPSHOT'
    AND jsonPayload.productsnapshot.type = 'WORKFLOW'
    AND jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
    -- Filter out draft-only agents that were never published/deployed for actual use
    AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
  GROUP BY 1, 2
)
-- Extract all workflow snapshot data with parsed dates (agents only)
, all_workflow_snapshots AS (
  SELECT
    resource.labels.project_id,
    jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
    DATE(DATE_TRUNC(timestamp, DAY)) AS snapshot_date,
    jsonPayload.productsnapshot.workflow.name AS name,
    -- Parse dates once to reuse
    PARSE_DATE('%Y-%m-%d', SUBSTR(jsonPayload.productsnapshot.workflow.createdat, 1, 10)) AS created_date,
    jsonPayload.productsnapshot.workflow.createdby AS createdby,
    jsonPayload.productsnapshot.workflow.trigger.type AS trigger_type,
    jsonPayload.productsnapshot.workflow.namespaceenum AS workflow_type,
    permissions.role AS role,
    permissions.id AS permission_id,
    permissions.type AS permission_type
  FROM glean_customer_event, UNNEST(jsonPayload.productsnapshot.workflow.roles) AS permissions
  WHERE
    jsonPayload.type = 'PRODUCT_SNAPSHOT'
    AND jsonPayload.productsnapshot.type = 'WORKFLOW'
    AND jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
)
-- Get most recent workflow metadata
, all_workflows_info AS (
  SELECT
    ws.project_id,
    ws.agent_id,
    ws.name,
    ws.created_date,
    ws.createdby,
    ws.trigger_type,
    ws.workflow_type,
    ws.role,
    ws.permission_id,
    ws.permission_type,
    -- Mark as deleted if snapshot is older than yesterday
    ws.snapshot_date < CURRENT_DATE - INTERVAL '1' DAY AS is_deleted
  FROM all_workflow_snapshots ws
  INNER JOIN workflows_last_snapshot_date lsd
    ON ws.agent_id = lsd.agent_id
    AND ws.snapshot_date = lsd.latest_snapshot_date
)
-- Aggregate workflow metadata with permission levels
, all_workflows AS (
  SELECT DISTINCT
    created_date,
    project_id,
    -- Handle system-created workflows (date = 0001-01-01)
    CASE 
      WHEN created_date = DATE('0001-01-01') THEN NULL
      ELSE createdby
    END AS creator_id,
    agent_id,
    name,
    trigger_type,
    workflow_type,
    created_date = DATE('0001-01-01') AS is_system_created_workflow,
    is_deleted,
    -- Determine viewer audience based on permissions
    MAX(CASE
      WHEN role IN ('VIEWER', 'EDITOR') AND permission_type = 'ALL' THEN '4) All users'
      WHEN role IN ('VIEWER', 'EDITOR') AND permission_type = 'DEPARTMENT' THEN '3) Users in select departments'
      WHEN role IN ('VIEWER', 'EDITOR') AND permission_type = 'USER' THEN '2) Select users'
      ELSE '1) Owner only'
    END) AS viewer_audience,
    -- Determine editor audience
    MAX(CASE
      WHEN role = 'EDITOR' AND permission_type = 'ALL' THEN '4) All users'
      WHEN role = 'EDITOR' AND permission_type = 'DEPARTMENT' THEN '3) Users in select departments'
      WHEN role = 'EDITOR' AND permission_type = 'USER' THEN '2) Select users'
      ELSE '1) Owner only'
    END) AS editor_audience
  FROM all_workflows_info
  GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
)

-- Agent creation metrics
SELECT
  created_date,
  creator_id,
  project_id,
  agent_id,
  name,
  workflow_type AS agent_type,
  trigger_type,
  viewer_audience,
  editor_audience,
  is_deleted
FROM all_workflows
WHERE
  workflow_type IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
  -- Exclude system-created workflows
  AND is_system_created_workflow = FALSE
  AND creator_id IS NOT NULL
  -- Only include creations within our time window
  AND created_date >= CURRENT_DATE - INTERVAL lookback_days DAY
  AND created_date <= CURRENT_DATE - INTERVAL '1' DAY
ORDER BY
  created_date DESC,
  creator_id;
Output: Agent feedback data including upvotes, downvotes, and sentiment analysis per user and agent
Grain: Per user per agent aggregation over configurable time period (default: 28 days)
Use Case: Track agent response quality through user feedback, identify poorly performing agents, measure user satisfaction trends
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 28;  -- Number of days to look back (default: 28 days)

WITH glean_customer_event AS (
  SELECT
    resource,
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    jsonPayload,
    timestamp,
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE 
    timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
  SELECT
    resource,
    DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
    jsonPayload,
    timestamp,
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE 
    timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Get latest workflow snapshot info (filter to agents only)
, workflows_last_snapshot_date AS (
  SELECT
    resource.labels.project_id,
    jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
    MAX(DATE(DATE_TRUNC(timestamp, DAY))) AS latest_snapshot_date
  FROM glean_customer_event
  WHERE 
    jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
    -- Filter out draft-only agents that were never published/deployed for actual use
    AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
  GROUP BY 1, 2
)
-- Extract workflow snapshot data (agents only)
, all_workflow_snapshots AS (
  SELECT
    resource.labels.project_id,
    jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
    DATE(DATE_TRUNC(timestamp, DAY)) AS snapshot_date,
    jsonPayload.productsnapshot.workflow.name AS name,
    jsonPayload.productsnapshot.workflow.namespaceenum AS workflow_type
  FROM glean_customer_event
  WHERE
    jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
    -- Filter out draft-only agents that were never published/deployed for actual use
    AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
)
-- Get most recent agent metadata
, agent_info AS (
  SELECT
    ws.project_id,
    ws.agent_id,
    ws.name,
    ws.workflow_type
  FROM all_workflow_snapshots ws
  INNER JOIN workflows_last_snapshot_date lsd
    ON ws.agent_id = lsd.agent_id
    AND ws.snapshot_date = lsd.latest_snapshot_date
)
-- Agent feedback from CHAT_FEEDBACK events
, agent_feedback_raw AS (
  SELECT
    gce.jsonPayload.chatfeedback.runid AS run_id,
    gce.jsonPayload.chatfeedback.agentid AS agent_id,
    gce.jsonPayload.user.userid AS user_id,
    gce.resource.labels.project_id AS project_id,
    MAX(ai.name) AS agent_name,
    MAX(ai.workflow_type) AS agent_type,
    -- Take latest vote, disregard manual feedback
    MAX_BY(gce.jsonPayload.chatfeedback.event, gce.timestamp) AS vote
  FROM agent_info ai
  INNER JOIN glean_customer_event gce
    ON gce.jsonPayload.chatfeedback.agentid = ai.agent_id
  WHERE
    gce.jsonPayload.type = 'CHAT_FEEDBACK'
    AND gce.jsonPayload.chatfeedback.event IN ('UPVOTE', 'DOWNVOTE')
  GROUP BY 1, 2, 3, 4
)
-- Aggregate feedback by date, agent, and user
, agent_feedback AS (
  SELECT
    agent_id,
    user_id,
    project_id,
    agent_name,
    agent_type,
    SUM(CASE WHEN vote = 'UPVOTE' THEN 1 ELSE 0 END) AS num_upvotes,
    SUM(CASE WHEN vote = 'DOWNVOTE' THEN 1 ELSE 0 END) AS num_downvotes,
    COUNT(DISTINCT run_id) AS num_feedback_instances
  FROM agent_feedback_raw
  GROUP BY 1, 2, 3, 4, 5
)

-- Final output with feedback metrics
SELECT
  project_id,
  agent_id,
  agent_name,
  agent_type,
  user_id,
  num_upvotes,
  num_downvotes,
  num_feedback_instances,
  -- Calculate net sentiment
  num_upvotes - num_downvotes AS net_sentiment,
  -- Calculate upvote rate
  CASE 
    WHEN (num_upvotes + num_downvotes) > 0 
    THEN CAST(num_upvotes AS FLOAT64) / (num_upvotes + num_downvotes)
    ELSE NULL 
  END AS upvote_rate
FROM agent_feedback
WHERE 
  -- Only include agents with actual feedback
  (num_upvotes + num_downvotes) > 0
ORDER BY
  num_feedback_instances DESC;

GleanBot User Level Activity

Output: Proactive GleanBot messages with user interaction details and engagement metrics
Grain: Individual message level with user interaction data
Use Case: Analyze bot effectiveness, user engagement patterns, and proactive messaging success rates
DECLARE project_id STRING DEFAULT NULL;  -- Replace with your project ID or leave as NULL

WITH glean_customer_event AS (
  SELECT *
  FROM
    -- replace <glean_customer_event_table> with your firm's table name
    `<glean_customer_event_table>`
  WHERE
    timestamp > TIMESTAMP(CURRENT_DATE() - INTERVAL 1 DAY)
    -- Optional project filtering - only filter if project_id is specified
    AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, gleanbot_activity AS (
  SELECT
    jsonPayload.user.userid AS user_id
    ,jsonPayload.gleanbotactivity.eventtrackingtoken AS event_tracking_token
    ,jsonPayload.gleanbotactivity.stt AS stt
    ,DATE(timestamp) AS datepartition
    ,jsonPayload.gleanbotactivity.latenciesmillismap.proactive_message AS latency_proactive_message
    ,jsonPayload.gleanbotactivity.eventtype AS event_type_verbose
    ,jsonPayload.gleanbotactivity.responseevents AS response_events_verbose
    ,jsonPayload.gleanbotactivity.workflowexecutionpoints AS workflow_execution_points_verbose
    ,(jsonPayload.gleanbotactivity.applicationid is NOT NULL AND LENGTH(jsonPayload.gleanbotactivity.applicationid) != 0) AS is_app_session
  FROM
    glean_customer_event
  WHERE
    jsonPayload.type = 'GLEAN_BOT_ACTIVITY'
)
, message_data AS (
  SELECT *
  FROM gleanbot_activity
  WHERE
    (
      -- Identify messages where proactive workflow started
      'PROACTIVE_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
      -- Identify messages where proactive workflow for assistant type started
      OR 'PROACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
      -- Identify messages where proactive workflow for search type started
      OR 'PROACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
      -- All messages that were not DMs and did not mention @Glean or \glean fall in this category
      OR event_type_verbose IN (
        'NON_TAGGED_MESSAGE'
        -- All events for which proactive thread summarizer workflow is initiated
        , 'PROACTIVE_DISCUSSION_SUMMARIZER'
        -- All events where proactive digest was clicked
        'VIEW_DIGEST_CLICK'
        -- All events where digest was subscribed to and thus proactively sent over DM
        , 'DAILY_DIGEST_REMINDER'
      )
    )
    -- Remove the events where proactive workflow cannot be triggered. Eg: Invalid Channel
    AND 'PROACTIVE_BOT_DISABLED_FAILURE' NOT IN UNNEST(workflow_execution_points_verbose)
    -- Remove the events which represent messages sent by other bots, so we don't converse with slack automations
    AND 'DROP_BOT_MESSAGE' NOT IN UNNEST(workflow_execution_points_verbose)
    AND 'DROP_EXTERNAL_CHANNEL_MESSAGE' NOT IN UNNEST(workflow_execution_points_verbose)
)
, interaction_data AS (
  SELECT *
  FROM gleanbot_activity
  WHERE
    event_type_verbose IN (
      -- All events where a user clicked on show sources modal
      'SHOW_SOURCES',
      -- All events for which helpful/not helpful buttons click was initiated
      'SHARE_HELPFULNESS',
      -- All events for which sharing workflow was started
      'SHARE_CLICK',
      -- All events for which manual feedback workflow was started
      'SUBMIT_FEEDBACK',
      -- All events for which dismiss suggestion workflow was initiated
      'DISMISS_SUGGESTION'
    )

)

SELECT
  message_data.datepartition
  -- ID of the user who received the proactive message
  ,message_data.user_id
  -- Unique identifier for the proactive message
  ,message_data.event_tracking_token
  -- Whether the user was in an active app session.
  ,message_data.is_app_session
  -- Latency in milliseconds for the proactive message delivery
  ,message_data.latency_proactive_message
  -- Tracking token of the user's interaction with the proactive message
  ,interaction_data.event_tracking_token AS interaction_event_tracking_token
  -- ID of the user who interacted with the proactive message
  ,interaction_data.user_id AS interacting_user_id
  -- Whether the workflow was triggered (via non-tagged or summarizer event)
  ,(CASE
    WHEN message_data.event_type_verbose = 'NON_TAGGED_MESSAGE' THEN TRUE
    WHEN message_data.event_type_verbose = 'PROACTIVE_DISCUSSION_SUMMARIZER' THEN TRUE
    ELSE FALSE
    END) AS workflow_started
  -- Whether the message was classified as a question. Commented out for now
  -- ,IF(question_detection_data.isquestion = TRUE, TRUE, FALSE) AS question_detected
  -- Bot responded: includes proactive message, or summarization thread/DM
  ,(CASE
    WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    WHEN 'PROACTIVE_MESSAGE' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    ELSE FALSE
    END) AS bot_responded
  -- Bot response was explicitly triggered by user (e.g., digest opened or summary clicked)
  ,(CASE
    WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    WHEN 'VIEW_DIGEST' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    ELSE FALSE
    END) AS bot_responded_on_users_request
  -- Response was shared by the user
  ,IF(interaction_data.event_type_verbose = 'SHARE_CLICK', TRUE, FALSE) AS response_shared
  -- Response was dismissed or downvoted
  ,(CASE
    WHEN 'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflow_execution_points_verbose) THEN TRUE
    WHEN interaction_data.event_type_verbose = 'DISMISS_SUGGESTION' THEN TRUE
    ELSE FALSE
    END) AS response_downvoted
  -- Citations shown to the user
  ,IF(interaction_data.event_type_verbose = 'SHOW_SOURCES', TRUE, FALSE) AS citations_shown
  -- Response was marked helpful
  ,IF('SHARE_HELPFUL' IN UNNEST(interaction_data.response_events_verbose), TRUE, FALSE) AS voted_helpful
  -- Response was marked not helpful
  ,IF('SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.response_events_verbose), TRUE, FALSE) AS voted_not_helpful
  -- Daily digest proactively sent to the user
  ,IF('DAILY_DIGEST_REMINDER_SENT' IN UNNEST(message_data.response_events_verbose), TRUE, FALSE) AS digest_sent
  -- User consumed/viewed the digest
  ,IF('VIEW_DIGEST' IN UNNEST(message_data.response_events_verbose), TRUE, FALSE) AS digest_consumed
  -- Bot sent a proactive thread or DM summary
  ,(CASE
    WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
    ELSE FALSE
    END) AS proactive_summary_sent
FROM message_data
LEFT JOIN interaction_data
ON message_data.event_tracking_token = interaction_data.stt