Identity
- SSO
- People Data
- Roles
Assistant
- Configuration
- Features
- Apps
- Actions
- Data Analysis
- Slackbot
- Glean Assist
- Zoom Custom AI Companion
Knowledge
- Announcements
- Answers
- People & Teams
Management
- Alerts
- Glean Apps
- Customization
Insights
Glean Customer Event Logs
- Data Dictionary
- Glean Customer Event Log Updates
Developer
Sample Queries
The following examples illustrate BigQuery queries for common analytics, assuming logs have been copied to a table called glean_customer_event_table.
WITH queries_data AS (
SELECT
DISTINCT TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
),
clicks_data AS (
SELECT
DISTINCT jsonPayload.SearchClick.TrackingToken AS qtt
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
)
SELECT
q.utc_day --metric date
, COUNT(DISTINCT IF(c.qtt IS NOT NULL, q.stt, NULL)) / GREATEST(COUNT(DISTINCT q.stt), 1) AS session_satisfaction --Ratio of satisfied sessions to all sessions
FROM
queries_data q
LEFT JOIN
clicks_data c
ON
q.qtt = c.qtt
GROUP BY 1
ORDER BY 1;
WITH clicks_data AS (
SELECT
jsonPayload.SearchClick.TrackingToken AS qtt
,jsonPayload.searchclick.position AS position
,MAX(TIMESTAMP_TRUNC(`timestamp`, DAY)) AS utc_day
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
GROUP BY
1,2
),
precision_data AS (
SELECT
utc_day
, qtt
, position
, COUNT(*) OVER (PARTITION BY qtt ORDER BY position) / (1 + position) AS precision
FROM
clicks_data
),
average_precision_data AS (
SELECT
utc_day
, qtt
, AVG(precision) AS average_precision
FROM
precision_data
GROUP BY
1, 2
)
SELECT
utc_day --metric_date
, AVG(average_precision) AS mean_average_precision
FROM
average_precision_data
GROUP BY
1
ORDER BY
1;
WITH queries_data AS (
SELECT
DISTINCT TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- filter for user queries (exclude background and slackbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
),
clicks_data AS (
SELECT
jsonPayload.SearchClick.TrackingToken AS qtt
, MAX(timestamp) as timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
GROUP BY 1
),
sessions_w_clicks AS (
SELECT
utc_day --metric_date
, stt
, first_query_ts
, last_click_ts
, timestamp_diff(last_click_ts, first_query_ts, millisecond) AS time_from_first_query_to_last_click
FROM (
SELECT
stt
, MIN(utc_day) AS utc_day
, MIN(q.timestamp) AS first_query_ts
, MIN(c.timestamp) AS first_click_ts
, MAX(c.timestamp) AS last_click_ts
, COUNTIF(c.qtt IS NOT NULL) AS num_clicks
FROM
queries_data q
LEFT JOIN
clicks_data c
ON
q.qtt = c.qtt
GROUP BY
1
)
WHERE
num_clicks > 0
)
SELECT
utc_day --metric_date
-- , avg(time_from_first_query_to_last_click) as mean_time_from_first_query_to_last_click -- surface average if desired
, approx_quantiles(time_from_first_query_to_last_click, 100)[offset(50)] AS p50_time_from_first_query_to_last_click
FROM
sessions_w_clicks
GROUP BY
1
ORDER BY
1
WITH gleanchat_feedback AS (
SELECT
DISTINCT TIMESTAMP_TRUNC(m.`timestamp`, DAY) AS utc_day
, m.jsonPayload.User.UserId
, m.jsonPayload.Chat.Qtt --Query Tracking Token
, COALESCE(m.jsonPayload.Chat.ApplicationId, 'Default Gleanchat') AS ApplicationId
, f.jsonPayload.ChatFeedback.Event
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>` m
LEFT JOIN `<glean_customer_event_table>` f
ON m.jsonPayload.Chat.ResponseMessageId = f.jsonPayload.ChatFeedback.MessageId
WHERE
m.jsonPayload.Type = 'CHAT'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
)
SELECT
ApplicationId
, utc_day --dateparititon
, COUNT(DISTINCT Qtt) AS num_chat_messages
, COUNT(DISTINCT CASE WHEN Event = 'UPVOTE' THEN Qtt END) AS upvotes
, COUNT(DISTINCT CASE WHEN Event = 'DOWNVOTE' THEN Qtt END) AS downvotes
, COUNT(DISTINCT CASE WHEN Event = 'MANUAL_FEEDBACK' THEN Qtt END) AS shared_feedback
FROM
gleanchat_feedback
WHERE
COALESCE(NULLIF(applicationid,''), 'Default Gleanchat') != 'Default Gleanchat'
GROUP BY
1, 2
WITH
processed_ca_logs AS (
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.clientevent.event AS event_name
, timestamp AS event_timestamp
, jsonPayload.user.userid AS user_id
, jsonPayload.clientevent.category AS category
, jsonPayload.clientevent.label AS label
, jsonPayload.clientevent.pagepath AS page_path
, jsonPayload.clientevent.SessionTrackingToken AS stt
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE jsonPayload.Type = 'CLIENT_EVENT'
-- change date ranges as needed
AND CAST(DATE_TRUNC(`timestamp`, DAY) AS DATE) >= CURRENT_DATE - 30
),
-- We do not attribute most of the events fired to DAU unless that event correspond to a
-- user action that leads to some level of value creation. The below filter capture workflows
-- corresponding to such greenlisted user actions
filtered_ca_logs AS (
SELECT
*
FROM
processed_ca_logs
WHERE
(
-- Depending on category, opening links/documents counts towards DAU
event_name IN (
'OpenDocument'
, 'OpenLink'
)
AND category IN (
-- Autocomplete and Search interactions count towards DAU
'Search Result'
, 'Autocomplete'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
, 'New Tab Page'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
, 'Featured Question and Answer'
, 'Generated Question and Answer'
, 'Pins'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Verification'
, 'Datasource Auth'
, 'Insights'
-- Feature related interactions count towards DAU
, 'Chat'
, 'Result Preview'
, 'App Card'
, 'Customer Card'
, 'Search'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
OR (
-- Depending on category, certain feature related clicks count towards DAU
event_name IN (
'Click'
)
AND category IN (
-- Autocomplete and Search interactions count towards DAU
'Autocomplete'
, 'Search Result'
, 'Datasource Filter'
, 'Facets'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- Sidebar interactions count towards DAU
, 'Sidebar Tabs'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Datasource Auth'
, 'User Menu'
, 'Admin Console'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
-- CRUD operations on User Generated Content(UGC) always count always count towards DAU
OR (
event_name IN (
'Add'
, 'Create'
, 'Delete'
) AND category IN (
'Announcements'
, 'Answers'
, 'Collections'
)
)
OR (
event_name IN
(
'View'
)
AND category IN
(
-- User Generated Content(UGC) interactions count towards DAU
'Announcements',
'Answers',
'Collections',
-- Directory tab interactions count towards DAU
'Person Card',
'Team Card',
'Org Chart'
) and page_path not in ('/', '/ntp', '/search')
)
),
feature_logs AS (
SELECT
DISTINCT jsonPayload.user.userid AS user_id
, TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
((
jsonPayload.Type IN ('SEARCH')
-- User initiated search queries only
AND jsonpayload.Search.initiator = 'USER'
)
OR (
jsonPayload.Type IN ('CHAT')
-- User initiated chat queries only
AND jsonpayload.Chat.initiator = 'USER'
)
OR (
jsonPayload.Type IN ('SEARCH_CLICK','CHAT_FEEDBACK','AI_SUMMARY','AI_ANSWER','SEARCH_FEEDBACK')
)
OR (
jsonPayload.Type IN ('SHORTCUT')
AND jsonPayload.shortcut.event IN ('REDIRECT', 'CREATE', 'DELETE', 'UPDATE')
)
OR (
jsonPayload.Type IN ('AUTOCOMPLETE')
-- Filtering autocomplete queries to include only those with a query length greater than 1 to ensure only intentional engagement is considered.
AND jsonPayload.autocomplete.querylength>=1.0
))
-- change date ranges as needed
AND CAST(DATE_TRUNC(`timestamp`, DAY) AS DATE) >= CURRENT_DATE - 30
)
SELECT
utc_day
, COUNT(DISTINCT user_id) AS DAU
FROM (
SELECT
utc_day
, user_id
FROM
filtered_ca_logs
UNION ALL
SELECT
utc_day
, user_id
FROM
feature_logs
)
GROUP BY
1
ORDER BY
1 DESC
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
,COUNT(DISTINCT jsonPayload.User.UserId) AS num_searchers
,COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_queries
,COUNT(DISTINCT jsonPayload.Search.SessionTrackingToken) AS num_sessions
,COUNT(DISTINCT jsonPayload.Search.TrackingToken)/COUNT(DISTINCT jsonPayload.Search.SessionTrackingToken) AS Queries_per_session
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for data from the last 7 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
GROUP BY 1
ORDER BY 1;
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
,count(jsonPayload.SearchClick.TrackingToken) AS num_clicks
,count(DISTINCT jsonPayload.SearchClick.TrackingToken) AS num_queries_with_clicks
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
-- filter for data from the last 7 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
GROUP BY 1
ORDER BY 1;
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
,jsonPayload.searchclick.datasource
,count(jsonPayload.SearchClick.TrackingToken) AS num_clicks
,count(DISTINCT jsonPayload.SearchClick.TrackingToken) AS num_queries_with_clicks
,avg(jsonPayload.searchclick.position)+1 as average_position
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
-- filter for data from the last 7 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
GROUP BY 1,2
ORDER BY 1,3 desc;
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, COUNT(DISTINCT jsonPayload.User.UserId) AS num_searchers
, COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_searches
FROM `<glean_customer_event_table>`
WHERE jsonPayload.Type = 'SEARCH'
AND jsonPayload.Search.Initiator = 'USER'
AND jsonPayload.Search.Modality = 'FULLPAGE'
GROUP BY 1
ORDER BY 1
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.Search.HostDomain
, COUNT(DISTINCT jsonPayload.User.UserId) AS num_searchers
, COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_searches
FROM `<glean_customer_event_table>`
WHERE jsonPayload.Type = 'SEARCH'
AND jsonPayload.Search.Initiator = 'USER'
AND jsonPayload.Search.Modality = 'EMBEDDED_SEARCH'
GROUP BY 1, 2
ORDER BY 1, 2
Requires raw logs.
SELECT DISTINCT
m.`timestamp` AS message_time
, m.jsonPayload.User.Userid
, m.jsonPayload.Chat.UserQuery
, m.jsonPayload.Chat.Response
, f.`timestamp` AS feedback_time
, f.jsonPayload.ChatFeedback.Event AS rating
, fc.jsonPayload.ChatFeedback.comments AS comments
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>` m
LEFT JOIN
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>` f
ON m.jsonPayload.Chat.ResponseMessageId = f.jsonPayload.ChatFeedback.MessageId
LEFT JOIN
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>` fc
ON m.jsonPayload.Chat.ResponseMessageId = fc.jsonPayload.ChatFeedback.MessageId
WHERE
m.jsonPayload.Type = 'CHAT'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
-- filter for data from the last 14 days (adjust as needed)
AND m.`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
WITH queries_data AS (
SELECT
DISTINCT `timestamp`
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
, jsonPayload.Search.Query AS search_query
, jsonPayload.user.userid
, jsonPayload.user.useremail
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
SELECT
search_query
, COUNT(DISTINCT qtt) AS frequency
, COUNT(DISTINCT userid) AS user_count
FROM
queries_data
GROUP BY
1
ORDER BY
2 DESC
WITH queries_data AS (
SELECT
DISTINCT `timestamp`
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
, jsonPayload.Search.Query AS search_query
, jsonPayload.user.userid
, jsonPayload.user.useremail
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for data from the last 14 days (adjust as needed)
AND `timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
),
query_terms_list AS (
SELECT
*
, SPLIT(search_query, ' ') AS query_terms
FROM
queries_data
),
query_terms_flattened AS (
SELECT
timestamp
, stt
, qtt
, userid
, useremail
, query_term
FROM
query_terms_list
, UNNEST(query_terms) AS query_term
)
SELECT
query_term
, COUNT(*) AS frequency
, COUNT(DISTINCT userid) AS user_count
FROM
query_terms_flattened
GROUP BY
1
ORDER BY
2 DESC
Each row of this query represents a proactive Gleanbot message, enriched with whether the bot responded, the user engaged with it (e.g., shared, dismissed, voted), and if it was a question or digest. The query supports analytics around bot effectiveness and user engagement.
WITH gleanbot_activity AS (
SELECT
jsonPayload.user.userid AS user_id
,jsonPayload.gleanbotactivity.eventtrackingtoken AS event_tracking_token
,jsonPayload.gleanbotactivity.stt AS stt
,DATE(timestamp) AS datepartition
,jsonPayload.gleanbotactivity.latenciesmillismap.proactive_message AS latency_proactive_message
,jsonPayload.gleanbotactivity.eventtype AS event_type_verbose
,jsonPayload.gleanbotactivity.responseevents AS response_events_verbose
,jsonPayload.gleanbotactivity.workflowexecutionpoints AS workflow_execution_points_verbose
,(jsonPayload.gleanbotactivity.applicationid is NOT NULL AND LENGTH(jsonPayload.gleanbotactivity.applicationid) != 0) AS is_app_session
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- Adjust length of time as needed
DATE(timestamp) = CURRENT_DATE - INTERVAL 1 DAY
AND jsonPayload.type = 'GLEAN_BOT_ACTIVITY'
AND resource.labels.project_id = 'glean-linkedin'
)
,message_data AS (
SELECT *
FROM gleanbot_activity
WHERE
(
-- Identify messages where proactive workflow started
'PROACTIVE_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
-- Identify messages where proactive workflow for assistant type started
OR 'PROACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
-- Identify messages where proactive workflow for search type started
OR 'PROACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
-- All messages that were not DMs and did not mention @Glean or \glean fall in this category
OR event_type_verbose IN (
'NON_TAGGED_MESSAGE'
-- All events for which proactive thread summarizer workflow is initiated
, 'PROACTIVE_DISCUSSION_SUMMARIZER'
-- All events where proactive digest was clicked
'VIEW_DIGEST_CLICK'
-- All events where digest was subscribed to and thus proactively sent over DM
, 'DAILY_DIGEST_REMINDER'
)
)
-- Remove the events where proactive workflow cannot be triggered. Eg: Invalid Channel
AND 'PROACTIVE_BOT_DISABLED_FAILURE' NOT IN UNNEST(workflow_execution_points_verbose)
-- Remove the events which represent messages sent by other bots, so we don't converse with slack automations
AND 'DROP_BOT_MESSAGE' NOT IN UNNEST(workflow_execution_points_verbose)
AND 'DROP_EXTERNAL_CHANNEL_MESSAGE' NOT IN UNNEST(workflow_execution_points_verbose)
)
,interaction_data AS (
SELECT *
FROM gleanbot_activity
WHERE
event_type_verbose IN (
-- All events where a user clicked on show sources modal
'SHOW_SOURCES',
-- All events for which helpful/not helpful buttons click was initiated
'SHARE_HELPFULNESS',
-- All events for which sharing workflow was started
'SHARE_CLICK',
-- All events for which manual feedback workflow was started
'SUBMIT_FEEDBACK',
-- All events for which dismiss suggestion workflow was initiated
'DISMISS_SUGGESTION'
)
)
SELECT
message_data.datepartition
-- ID of the user who received the proactive message
,message_data.user_id
-- Unique identifier for the proactive message
,message_data.event_tracking_token
-- Whether the user was in an active app session.
,message_data.is_app_session
-- Latency in milliseconds for the proactive message delivery
,message_data.latency_proactive_message
-- Tracking token of the user's interaction with the proactive message
,interaction_data.event_tracking_token AS interaction_event_tracking_token
-- ID of the user who interacted with the proactive message
,interaction_data.user_id AS interacting_user_id
-- Whether the workflow was triggered (via non-tagged or summarizer event)
,(CASE
WHEN message_data.event_type_verbose = 'NON_TAGGED_MESSAGE' THEN TRUE
WHEN message_data.event_type_verbose = 'PROACTIVE_DISCUSSION_SUMMARIZER' THEN TRUE
ELSE FALSE
END) AS workflow_started
-- Whether the message was classified as a question. Commented out for now
-- ,IF(question_detection_data.isquestion = TRUE, TRUE, FALSE) AS question_detected
-- Bot responded: includes proactive message, or summarization thread/DM
,(CASE
WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'PROACTIVE_MESSAGE' IN UNNEST(message_data.response_events_verbose) THEN TRUE
ELSE FALSE
END) AS bot_responded
-- Bot response was explicitly triggered by user (e.g., digest opened or summary clicked)
,(CASE
WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'VIEW_DIGEST' IN UNNEST(message_data.response_events_verbose) THEN TRUE
ELSE FALSE
END) AS bot_responded_on_users_request
-- Response was shared by the user
,IF(interaction_data.event_type_verbose = 'SHARE_CLICK', TRUE, FALSE) AS response_shared
-- Response was dismissed or downvoted
,(CASE
WHEN 'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflow_execution_points_verbose) THEN TRUE
WHEN interaction_data.event_type_verbose = 'DISMISS_SUGGESTION' THEN TRUE
ELSE FALSE
END) AS response_downvoted
-- Citations shown to the user
,IF(interaction_data.event_type_verbose = 'SHOW_SOURCES', TRUE, FALSE) AS citations_shown
-- Response was marked helpful
,IF('SHARE_HELPFUL' IN UNNEST(interaction_data.response_events_verbose), TRUE, FALSE) AS voted_helpful
-- Response was marked not helpful
,IF('SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.response_events_verbose), TRUE, FALSE) AS voted_not_helpful
-- Daily digest proactively sent to the user
,IF('DAILY_DIGEST_REMINDER_SENT' IN UNNEST(message_data.response_events_verbose), TRUE, FALSE) AS digest_sent
-- User consumed/viewed the digest
,IF('VIEW_DIGEST' IN UNNEST(message_data.response_events_verbose), TRUE, FALSE) AS digest_consumed
-- Bot sent a proactive thread or DM summary
,(CASE
WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
ELSE FALSE
END) AS proactive_summary_sent
FROM message_data
LEFT JOIN interaction_data
ON message_data.event_tracking_token = interaction_data.stt
Metrics: Active Chat Users, # of Chats
Grain: Per Day, Per User
This query identifies users who had at least one chat interaction on a given date; one row per user per day.
WITH glean_customer_event AS (
SELECT *
-- replace <glean_customer_event_table> with your firm's table name
FROM `<glean_customer_event_table>`
),
-- Extract the user level data from the most recent date to get most recent user attributes
-- Concatenate aliasids at a userid level
product_snapshot_agg AS (
SELECT
jsonPayload.productsnapshot.user.id AS userid,
MAX(jsonPayload.productsnapshot.user.signuptime) AS signuptime,
ARRAY_CONCAT_AGG(jsonPayload.productsnapshot.user.aliasids) AS aliasids,
MAX(jsonPayload.productsnapshot.user.department) AS department
-- replace <glean_customer_event> with your firm's table
FROM `<glean_customer_event_table>`
WHERE
-- Adjust as needed
DATE(timestamp) = CURRENT_DATE - INTERVAL 1 DAY
AND jsonPayload.type = 'PRODUCT_SNAPSHOT'
GROUP BY 1
),
-- [Optional]: For users belonging to departments with less than 5 members, assign them a single bucket
latest_orgchart_data AS (
SELECT
userid,
signuptime,
aliasids,
CASE WHEN COUNT(userid) OVER (PARTITION BY department) < 5
THEN "Departments less than 5 Users"
ELSE department
END AS department
FROM product_snapshot_agg
),
-- Map each alias to the canonical UserID
id_to_alias AS (
SELECT
DISTINCT aliasid,
userid AS canonicalid
FROM
latest_orgchart_data, UNNEST(aliasids) AS aliasid
),
-- [IMP]: Calculate chat usage metrics.
/*
This CTE merges data across three event types: CHAT, WORKFLOW, and WORKFLOW_RUN.
The WORKFLOW event type was active from mid-February to mid-June 2025, after which it was replaced by WORKFLOW_RUN.
Before that period, all chat-related activity was logged under the CHAT event type.
To calculate accurate Chat usage metrics, we first UNION WORKFLOW and WORKFLOW_RUN. Since only one is ever populated
at a given time, this is a safe operation. We then perform a FULL OUTER JOIN with CHAT to deduplicate any overlapping
runs that appear in both CHAT and WORKFLOW/WORKFLOW_RUN logs.
*/
chat_usage as (
-- Workflow data + Workflow run data
SELECT
COALESCE(wf.datepartition, chat.datepartition) AS datepartition,
COALESCE(wf.userid, chat.userid) AS userid,
COUNT(DISTINCT wf.run_id) + COUNT(DISTINCT CASE WHEN wf.run_id IS NULL THEN chat.qtt END) AS num_chat_queries
FROM (
SELECT
jsonPayload.workflow.runid AS run_id,
jsonPayload.user.userid,
MIN(DATE(timestamp)) AS datepartition
FROM glean_customer_event
WHERE
-- These four workflowIDs correspond to Chats
jsonPayload.workflow.workflowid IN (
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS'
)
AND jsonPayload.workflow.initiator = 'USER'
GROUP BY 1,2
UNION ALL
SELECT
DISTINCT jsonPayload.workflowrun.runid AS run_id,
jsonPayload.user.userid,
DATE(timestamp) AS datepartition
FROM glean_customer_event,
UNNEST(jsonPayload.workflowrun.workflowexecutions) AS wfe
WHERE jsonPayload.type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
AND wfe.workflowid IN (
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS'
)
) wf
-- Join the unioned workflow data with chat data
FULL OUTER JOIN (
SELECT
DATE(timestamp) AS datepartition,
jsonPayload.user.userid,
jsonPayload.chat.qtt AS qtt,
jsonPayload.chat.workflowrunid AS workflowrunid
FROM glean_customer_event
WHERE jsonPayload.type = 'CHAT'
AND jsonPayload.chat.initiator = 'USER'
) chat
ON wf.run_id = chat.workflowrunid
GROUP BY 1, 2
),
feature_usage AS (
SELECT
datepartition,
userid,
COALESCE(chat_usage.num_chat_queries, 0) AS _num_chats
FROM chat_usage
),
canonicalized AS (
SELECT
COALESCE(id_to_alias.canonicalid, feature_usage.userid) AS userid,
datepartition,
COALESCE(SUM(_num_chats), 0) AS num_chats
FROM feature_usage
LEFT JOIN id_to_alias
ON feature_usage.userid = id_to_alias.aliasid
WHERE
COALESCE(id_to_alias.canonicalid, feature_usage.userid) IS NOT NULL
GROUP BY 1, 2
HAVING COALESCE(SUM(_num_chats), 0) > 0
)
SELECT * FROM canonicalized
Metrics: Agentic active users, # of agents created, # of agent runs
Grain: Per Day, Per User
This query identifies daily active agent users based on their agent usage, which includes both running and creating agents. It also provides a daily count of agents run or created by each user.
WITH glean_customer_event AS (
SELECT *
-- replace <glean_customer_event_table> with your firm's table name
FROM `<glean_customer_event_table>`
WHERE
-- adjust as needed
DATE(timestamp) = CURRENT_DATE - INTERVAL 1 DAY
),
-- Get the latest snapshot of existing workflows. Note that any older workflow that has been deleted before today will not appear in the product_snapshot table, so you will have to persist a table to get all workflows
product_snapshot AS (
SELECT *
-- This is a sample query designed to make it run, so I'm using a dated table
FROM glean_customer_event
WHERE
jsonpayload.type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.type = 'WORKFLOW'
AND DATE(timestamp) = CURRENT_DATE - INTERVAL 1 DAY
),
all_workflows_info AS (
SELECT
resource.labels.project_id
, jsonPayload.workflow.createdat
, jsonPayload.workflow.createdby
, jsonPayload.workflow.workflowid
, jsonPayload.workflow.triggertype
-- AGENT, AGENT_TEMPLATE, STATIC_WORKFLOW etc
, jsonPayload.workflow.namespaceenum
-- EDITOR, OWNER, VIEWER
, permissions.role AS role
-- user id, department id, or 'All' for public
, permissions.id AS id
-- USER, DEPARTMENT, ALL
, permissions.type AS id_type
FROM product_snapshot
, UNNEST(jsonpayload.workflow.roles) AS permissions
),
all_workflows AS (
SELECT DISTINCT
PARSE_DATE('%Y-%m-%d', SUBSTR(createdat, 1, 10)) AS created_at
, project_id
-- Glean created workflows have created ID value as salted version on developer email and hence wont map to a legitimate user ID
, IF(PARSE_DATE('%Y-%m-%d', SUBSTR(createdat, 1, 10)) = '0001-01-01', NULL, createdby) AS creator_id
, workflowid AS workflow_id
-- Indicates how agentic workflows would be invoked/triggered
, triggertype AS trigger_type
-- AGENT, SUBAGENT and AGENT_TEMPLATE correspond to various agentic workflows
-- STATIC_WORKFLOW correspond to all other workflows
-- PROMPT_TEMPLATE represent non-migrated prompts that will sunset slowly
-- ROUTER refers to the assistant router, that routes to either an agent or chat each deployment will have exactly 1 such workflow
, namespaceenum AS workflow_type
, IF(PARSE_DATE('%Y-%m-%d', SUBSTR(createdat, 1, 10)) = '0001-01-01', TRUE, FALSE) AS is_glean_created_workflow
FROM all_workflows_info
GROUP BY
1, 2, 3, 4, 5, 6, 7
)
,agent_creation AS (
SELECT
DATE(DATE_TRUNC(created_at, DAY)) AS datepartition
, creator_id AS user_id
, project_id
, COUNT(DISTINCT workflow_id) AS num_agents_created
FROM all_workflows
WHERE
creator_id IS NOT NULL
AND workflow_type IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
GROUP BY
1,2,3
),
agent_info AS (
SELECT DISTINCT
workflow_id,
project_id,
workflow_type,
creator_id
FROM all_workflows
),
all_workflow_steps AS (
SELECT
-- Primary Key, concatenate run_id and step_id
concat(jsonpayload.workflowrun.runid, '_', step_exec.stepid) as step_instance_id
-- ORIGINAL_MESSAGE_SEARCH, AGENTIC_WORKFLOW_ID etc.
, step_exec.stepid as step_id
, jsonpayload.workflowrun.runid as run_id
, jsonpayload.workflowrun.chatsessionid AS chat_id
, jsonPayload.workflowrun.sessiontrackingtoken as stt
, resource.labels.project_id as project_id
, min_by(jsonPayload.user.userid, timestamp) as user_id
, min(DATE(timestamp)) as datepartition
, any_value(workflow_execution.workflowid) AS workflow_id
-- Generic version of the step_id (e.g. Employee Search)
, max(step_exec.status) as step_status
FROM glean_customer_event
JOIN UNNEST(jsonpayload.workflowrun.StepExecutions) AS step_exec
JOIN UNNEST(jsonpayload.workflowrun.WorkflowExecutions) AS workflow_execution
ON step_exec.workflowid = workflow_execution.workflowid
WHERE
jsonPayload.type = 'WORKFLOW_RUN'
AND step_exec.stepid IS NOT NULL
GROUP BY 1,2,3,4,5,6
),
workflow_steps AS (
SELECT
ws.*,
ai.workflow_type,
IF(ai.workflow_type IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE') AND ai.creator_id IS NOT NULL, TRUE, FALSE) AS is_agent_step
FROM all_workflow_steps AS ws
LEFT JOIN agent_info AS ai
ON ws.workflow_id = ai.workflow_id AND ws.project_id = ai.project_id
),
executed_workflow_steps AS (
SELECT
DISTINCT datepartition
, user_id
, project_id
, run_id
, chat_id
FROM workflow_steps
WHERE
step_status = 'EXECUTED'
AND is_agent_step = TRUE
),
agent_usage AS (
SELECT
datepartition
, user_id
, project_id
, COUNT(DISTINCT run_id) AS num_agent_runs
, COUNT(DISTINCT chat_id) AS num_agent_sessions
FROM executed_workflow_steps
GROUP BY
1,2,3
),
all_users_dup AS (
SELECT
DATE(timestamp) AS datepartition
,resource.labels.project_id AS project_id
,jsonpayload.user.userid AS user_id
FROM glean_customer_event
UNION ALL
SELECT
datepartition
,project_id
,user_id
FROM agent_creation
WHERE
datepartition = CURRENT_DATE - INTERVAL 1 DAY
)
,all_users AS (
SELECT DISTINCT
datepartition
,project_id
,user_id
FROM all_users_dup
)
,semi_final AS (
SELECT
au.datepartition,
au.user_id,
au.project_id,
COALESCE(num_agent_runs, 0) + COALESCE(num_agents_created, 0) > 0 AS is_agents_active_1d,
COALESCE(num_agent_runs, 0) > 0 AS is_agent_runs_active_1d,
COALESCE(num_agents_created, 0) > 0 AS is_agents_creator_active_1d,
COALESCE(num_agent_runs,0) AS num_agent_runs,
COALESCE(num_agents_created, 0) AS num_agents_created
FROM all_users au
LEFT JOIN agent_creation a
ON au.datepartition = a.datepartition
AND au.project_id = a.project_id
AND au.user_id = a.user_id
LEFT JOIN agent_usage
ON au.user_id = agent_usage.user_id
AND au.datepartition = agent_usage.datepartition
AND au.project_id = agent_usage.project_id
)
SELECT *
FROM semi_final
WHERE
num_agent_runs + num_agents_created > 0
Was this page helpful?