Overview
This section provides pre-built BigQuery queries specifically designed for creating executive dashboards and generating key insights from Glean usage data. These queries focus on high-level metrics, trend analysis, and actionable insights that help organizations measure the impact and adoption of their Glean deployment. The queries in this section are optimized for:- Executive Reporting: High-level KPIs and trend analysis
- ROI Measurement: Productivity gains and adoption metrics
- Strategic Planning: Department performance and content strategy insights
- User Experience: Journey analysis and feature effectiveness
Production-Ready Insights Queries
These are the complete, production-quality enhanced queries used to power the Glean Insights Overview dashboard. Each query includes comprehensive business logic and user canonicalization.Department Adoption & Engagement Overview
Department Adoption & Engagement Overview
Output: Comprehensive department-level metrics including employee counts, signup rates, active users (monthly/weekly), search/chat activity volumes and user counts by department
Grain: Department-level aggregation with activity breakdowns across multiple time windows (monthly, weekly)
Use Case: Executive dashboard for tracking Glean adoption and engagement across organizational departments, identifying high/low adoption areas, and measuring feature usage penetration
Grain: Department-level aggregation with activity breakdowns across multiple time windows (monthly, weekly)
Use Case: Executive dashboard for tracking Glean adoption and engagement across organizational departments, identifying high/low adoption areas, and measuring feature usage penetration
Copy
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE startDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE endDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY);
-- Define workflow IDs as variables for easy management
DECLARE chat_workflow_ids ARRAY<STRING> DEFAULT [
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS',
'DEFAULT_CHAT',
'WORLD_MODE_V2',
'DEEP_RESEARCH_PYAGENT',
'DEEP_RESEARCH_PREVIEW'
];
WITH
glean_customer_event AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE DATE(DATE_TRUNC(timestamp, DAY)) BETWEEN DATE(startDate) AND DATE(endDate)
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, id_to_alias AS (
SELECT
DISTINCT aliasid,
userid AS canonicalid
FROM (
SELECT
userid,
aliasids
FROM (
SELECT
DISTINCT jsonPayload.productsnapshot.user.id AS userid,
FIRST_VALUE(jsonPayload.productsnapshot.user.aliasids) OVER (PARTITION BY jsonPayload.productsnapshot.user.id ORDER BY datepartition DESC) AS aliasids
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.Type = 'USER'
)
), UNNEST(aliasids) as aliasid
)
, latest_orgchart_data AS (
SELECT
userid,
loggingid,
signuptime,
-- Privacy protection: anonymize departments with <5 users
CASE WHEN COUNT(userid) OVER (PARTITION BY department) < 5
THEN 'ALL_DEPTS_LT_5_USERS'
ELSE department
END AS department
FROM (
SELECT
jsonPayload.productsnapshot.user.id AS userid,
MAX(jsonPayload.productsnapshot.user.loggingid) AS loggingid,
MAX(jsonPayload.productsnapshot.user.signuptime) AS signuptime,
MAX(jsonPayload.productsnapshot.user.department) AS department
FROM (
-- Get latest snapshot per user
SELECT
jsonPayload,
ROW_NUMBER() OVER (
PARTITION BY jsonPayload.productsnapshot.user.id
ORDER BY timestamp DESC
) as rn
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.Type = 'USER'
AND jsonPayload.productsnapshot.user.id IS NOT NULL
) latest_snapshots
WHERE rn = 1
GROUP BY 1
)
)
, workflow_data AS (
SELECT
jsonPayload.workflow.runid AS run_id,
jsonPayload.user.userid,
MIN(datepartition) AS datepartition
FROM glean_customer_event
WHERE jsonPayload.workflow.workflowid IN UNNEST(chat_workflow_ids)
AND jsonPayload.workflow.initiator = 'USER'
GROUP BY 1, 2
)
, workflowrun_data AS (
SELECT
DISTINCT jsonPayload.workflowrun.runid AS run_id,
jsonPayload.user.userid,
datepartition
FROM glean_customer_event, UNNEST(jsonPayload.workflowrun.workflowexecutions) as workflow_execution
WHERE jsonPayload.Type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
AND workflow_execution.workflowid IN UNNEST(chat_workflow_ids)
)
, chat_data AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.chat.qtt AS qtt,
jsonPayload.chat.workflowrunid AS workflowrunid
FROM glean_customer_event
WHERE jsonPayload.Type = 'CHAT'
AND jsonPayload.chat.initiator = 'USER'
)
, combined_workflow_data AS (
SELECT run_id, userid, datepartition FROM workflow_data
UNION ALL
SELECT run_id, userid, datepartition FROM workflowrun_data
)
, chat_usage as (
SELECT
COALESCE(wf.datepartition, chat.datepartition) AS datepartition,
COALESCE(wf.userid, chat.userid) AS userid,
COUNT(DISTINCT wf.run_id) + COUNT(DISTINCT CASE WHEN wf.run_id IS NULL THEN chat.qtt END) AS num_chat_queries
FROM combined_workflow_data wf
FULL OUTER JOIN chat_data chat
ON wf.run_id = chat.workflowrunid
GROUP BY 1, 2
)
, other_feature_usage AS (
SELECT
gce.jsonPayload.user.userid,
gce.datepartition,
COUNT(
DISTINCT CASE WHEN (
gce.jsonPayload.Type = 'SEARCH'
AND (
gce.jsonPayload.search.initiator IN ('USER', 'PAGE_LOAD')
AND LOWER(gce.jsonPayload.search.modality) NOT IN ('gleanbot', 'slackbot_command', 'slackbot_leaderboard', 'slackbot_retry', '')
)
) THEN gce.jsonPayload.search.trackingtoken END
) +
COUNT(
CASE WHEN (
gce.jsonPayload.Type = 'SEARCH'
AND gce.jsonPayload.search.isrestclientapi
) THEN 1 END
) AS num_searches,
COUNT(
DISTINCT CASE WHEN (
-- Depending on category, opening links/documents counts towards DAU
gce.jsonPayload.clientevent.event IN ('OpenDocument', 'OpenLink')
AND gce.jsonPayload.clientevent.category IN (
-- Autocomplete and Search interactions count towards DAU
'Search Result'
, 'Autocomplete'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
, 'New Tab Page'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
, 'Featured Question and Answer'
, 'Generated Question and Answer'
, 'Pins'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Verification'
, 'Datasource Auth'
, 'Insights'
-- Feature related interactions count towards DAU
, 'Chat'
, 'Result Preview'
, 'App Card'
, 'Customer Card'
, 'Search'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
OR (
-- Depending on category, certain feature related clicks count towards DAU
gce.jsonPayload.clientevent.event IN ('Click')
AND gce.jsonPayload.clientevent.category IN (
-- Autocomplete and Search interactions count towards DAU
'Autocomplete'
, 'Search Result'
, 'Datasource Filter'
, 'Facets'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- Sidebar interactions count towards DAU
, 'Sidebar Tabs'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Datasource Auth'
, 'User Menu'
, 'Admin Console'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
-- CRUD operations on User Generated Content(UGC) always count always count towards DAU
OR (
gce.jsonPayload.clientevent.event IN ('Add', 'Create', 'Delete')
AND gce.jsonPayload.clientevent.category IN ('Announcements', 'Answers', 'Collections')
)
OR (
gce.jsonPayload.clientevent.event IN ('View')
AND gce.jsonPayload.clientevent.category IN (
-- User Generated Content(UGC) interactions count towards DAU
'Announcements'
,'Answers'
,'Collections'
-- Directory tab interactions count towards DAU
,'Person Card'
,'Team Card'
,'Org Chart'
)
AND gce.jsonPayload.clientevent.pagepath NOT IN ('/', '/ntp', '/search')
)
/* Summarization expansion */
OR ( gce.jsonPayload.clientevent.event = 'Expand' AND gce.jsonPayload.clientevent.category = 'Summary' AND jsonPayload.clientevent.uielement = 'summarize-card')
/* Counts Start events (user chat message in Chat tab) and Expand events (prompt expansion in Discover tab) from Sidebar V2 */
OR ( gce.jsonPayload.clientevent.event in ('Start', 'Expand') AND gce.jsonPayload.clientevent.category = 'Sidebar V2')
/* Counts Start events (user query or preset click) from Inline Menu */
OR ( gce.jsonPayload.clientevent.event = 'Start' AND gce.jsonPayload.clientevent.category = 'Inline Menu')
/* Counts visits to past chats via GleanChat Conversation History */
OR ( gce.jsonPayload.clientevent.event = 'Click' AND gce.jsonPayload.clientevent.category = 'Chat' AND jsonPayload.clientevent.uielement = 'chats-menu')
THEN gce.jsonPayload.clientevent.SessionTrackingToken END
) AS num_client_active_sessions,
COUNT(
CASE WHEN (
gce.jsonPayload.Type IN ('SEARCH_CLICK','CHAT_FEEDBACK','AI_SUMMARY','AI_ANSWER','SEARCH_FEEDBACK')
OR gce.jsonPayload.Type IN ('SHORTCUT')
AND gce.jsonPayload.shortcut.event IN ('REDIRECT', 'CREATE', 'DELETE', 'UPDATE')
) THEN 1 END
) >= 1 AS is_active_other
FROM
glean_customer_event gce
WHERE
gce.jsonPayload.Type not in ('CHAT','WORKFLOW')
GROUP BY
1, 2
)
, event_traces AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.gleanbotactivity.eventtrackingtoken,
jsonPayload.gleanbotactivity.responseevents,
jsonPayload.gleanbotactivity.eventtype,
jsonPayload.gleanbotactivity.workflowexecutionpoints,
jsonPayload.gleanbotactivity.stt
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'GLEAN_BOT_ACTIVITY'
AND LOWER(jsonPayload.gleanbotactivity.source) = 'slack'
)
, slack_reactive_bot_activity AS (
SELECT
datepartition,
userid,
COUNT(DISTINCT CASE
WHEN response_downvoted THEN NULL
WHEN bot_responded THEN eventtrackingtoken
WHEN response_shared THEN eventtrackingtoken
ELSE NULL END) AS num_useful_responses
FROM (
SELECT
message_data.datepartition,
message_data.userid,
message_data.eventtrackingtoken,
'REACTIVE_ACT_MESSAGE' IN UNNEST(message_data.responseevents) -- Messages that got a successful response from the bot should be counted
OR 'REACTIVE_GENERATION_RESPONSE_MODAL' IN UNNEST(message_data.responseevents) -- Gleanbot added value to users other than the question asker by generating responses for them
OR 'COMMAND_SEARCH_RESULTS_MESSAGE' IN UNNEST(message_data.responseevents) -- Messages that were triggered through a /glean command and got results as response
OR 'COMMAND_SEARCH_UPDATE_MODAL_WITH_RESULTS' IN UNNEST(message_data.responseevents) -- Messages that were triggered through a /glean command and got results as response on retry
OR 'COMMAND_WORKFLOW_SUCCESS' IN UNNEST(message_data.responseevents) -- Response was successfully generated after user queried through glean console
OR 'DISCUSSION_SUMMARY_SENT' IN UNNEST(message_data.responseevents) -- [Slack Compete] Response to summarize a thread discussion was successfully sent through glean DM
OR 'SINGLE_CHANNEL_SUMMARY_SENT' IN UNNEST(message_data.responseevents) -- [Slack Compete] Response to summarize a channel discussion was successfully sent through glean DM
OR 'DOC_URL_SUMMARY_SENT' IN UNNEST(message_data.responseevents) -- [Slack Compete] Response to summarize a uploaded doc discussion was successfully sent through glean DM
AS bot_responded,
interaction_data.eventtype = 'SHARE_CLICK' AS response_shared, -- Response was shared by the user
'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflowexecutionpoints) AS response_downvoted, -- Response was dismissed by the user
'SHOW_SOURCES_CLICK_SUCCESS' IN UNNEST(interaction_data.workflowexecutionpoints) AS citations_shown, -- Response was checked for its citation source
'SHARE_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_helpful, -- Response was voted as helpful
'SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_not_helpful -- Response was voted as not helpful
FROM (
SELECT
*
FROM
event_traces
WHERE
-- Identify the set of valid slack reactive bot workflow starting points that could potentitally contribute to active users definition
(
'REACTIVE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where reactive workflow started successfully
OR 'REACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where reactive workflow for assistant type started
OR 'REACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where reactive workflow for search type started
OR 'COMMAND_SEARCH_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where /glean command was used to initiate a reactive search
OR 'COMMAND_DISCUSSION_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where glean console was used to summarize a discussion/query
OR 'COMMAND_CHANNEL_SUMMARIZE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where glean console was used to summarize a channel
OR 'COMMAND_DOC_SUMMARIZE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where glean console was used to summarize an attached/shared doc
OR eventtype IN (
'TAGGED_MESSAGE' -- Identify messages where @glean was mentioned
, 'GENERATE_ANSWER' -- Identify messages where someone clicked on generate answer button
, 'COMMAND' -- Identify messages where /glean was mentioned
, 'DM_TO_GLEANBOT_MESSAGE' -- [Slack Compete] Identify messages where user sent DM to glean bot with a query
)
)
AND NOT 'REACTIVE_DISABLED_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove the events where reactive workflow cannont be triggered. Eg: Invalid Channel
AND NOT 'REACTIVE_DISABLED_FAILURE_FOR_USER' IN UNNEST(workflowexecutionpoints) -- Remove the events where reactive workflow cannont be triggered for the user Eg: Insufficient permissions
AND NOT 'COMMAND_BAD_REQUEST' IN UNNEST(workflowexecutionpoints) -- Remove the event where /glean command was triggered in an invalid channel/by invalid user
AND NOT 'COMMAND_SEARCH_RESULTS_MODAL_UPDATE_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove cases where /glean command failed to load results
AND NOT 'COMMAND_SEARCH_NO_RESULTS_MODAL_UPDATE_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove cases where /glean command failed to update and dispaly results
) AS message_data
LEFT JOIN (
SELECT
*
FROM
event_traces
WHERE eventtype IN (
'SHOW_SOURCES' -- All events where a user clicked on show sources modal
, 'SHARE_HELPFULNESS' -- All events for which helpful/not helpful buttons click was initiated
, 'SHARE_CLICK' -- All events for which sharing workflow was started
, 'SUBMIT_FEEDBACK' -- All events for which manual feedback workflow was started
)
) AS interaction_data
ON
message_data.eventtrackingtoken = interaction_data.stt
)
GROUP BY
datepartition,
userid
)
, slack_proactive_bot_activity AS (
SELECT
datepartition,
userid,
COUNT(DISTINCT CASE
WHEN response_downvoted THEN NULL
WHEN voted_helpful THEN eventtrackingtoken
WHEN citations_shown THEN eventtrackingtoken
WHEN response_shared THEN eventtrackingtoken
WHEN bot_responded_on_users_request AND NOT voted_not_helpful THEN eventtrackingtoken
ELSE NULL END) AS num_useful_responses,
COUNT(DISTINCT CASE
WHEN response_downvoted THEN NULL
WHEN voted_helpful THEN eventtrackingtoken
WHEN citations_shown THEN eventtrackingtoken
WHEN response_shared THEN eventtrackingtoken
WHEN bot_responded_on_users_request AND NOT voted_not_helpful AND NOT digest_consumed THEN eventtrackingtoken
ELSE NULL END) AS num_useful_responses_without_digest
FROM (
SELECT
message_data.datepartition,
message_data.userid,
message_data.eventtrackingtoken,
message_data.eventtype IN ('NON_TAGGED_MESSAGE', 'PROACTIVE_DISCUSSION_SUMMARIZER') AS workflow_started,
'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.responseevents)
OR 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.responseevents)
OR 'PROACTIVE_MESSAGE' IN UNNEST(message_data.responseevents)
AS bot_responded,
'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.responseevents)
OR 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.responseevents)
OR 'VIEW_DIGEST' IN UNNEST(message_data.responseevents)
AS bot_responded_on_users_request,
interaction_data.eventtype = 'SHARE_CLICK' AS response_shared, -- Response was shared by the user
'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflowexecutionpoints)
OR interaction_data.eventtype = 'DISMISS_SUGGESTION'
AS response_downvoted, -- Response was dismissed by the user
interaction_data.eventtype = 'SHOW_SOURCES' AS citations_shown, -- Response was checked for its citation source
'SHARE_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_helpful, -- Response was voted as helpful
'SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_not_helpful, -- Response was voted as not helpful
'DAILY_DIGEST_REMINDER_SENT' IN UNNEST(message_data.responseevents) AS digest_sent, -- Bot sent user daily digest over DM
'VIEW_DIGEST' IN UNNEST(message_data.responseevents) AS digest_consumed, -- Digest was opened and viewed by the user
'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.responseevents)
OR 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.responseevents)
AS proactive_summary_sent
FROM (
SELECT
*
FROM
event_traces
WHERE
-- Identify the set of valid slack proactive bot workflow starting points that could potentitally contribute to active users definition
(
'PROACTIVE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where proactive workflow started
OR 'PROACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where proactive workflow for assistant type started
OR 'PROACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where proactive workflow for search type started
OR eventtype IN (
'NON_TAGGED_MESSAGE' -- All messages that were not DMs and did not mention @Glean or \glean fall in this category
, 'PROACTIVE_DISCUSSION_SUMMARIZER' -- All events for which proactive thread summarizer workflow is initiated
, 'VIEW_DIGEST_CLICK' -- All events where proactive digest was clicked
, 'DAILY_DIGEST_REMINDER' -- All events where digest was subscribed to and thus proactively sent over DM
)
)
AND NOT 'PROACTIVE_BOT_DISABLED_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove the events where proactive workflow cannont be triggered. Eg: Invalid Channel
AND NOT 'DROP_BOT_MESSAGE' IN UNNEST(workflowexecutionpoints) -- Remove the events which represent messages sent by other bots, so we dont converse with slack automations
AND NOT 'DROP_EXTERNAL_CHANNEL_MESSAGE' IN UNNEST(workflowexecutionpoints) -- Remove the events that correspond to messages sent on external channels.
) AS message_data
LEFT JOIN (
SELECT
*
FROM
event_traces
WHERE
eventtype IN (
'SHOW_SOURCES' -- All events where a user clicked on show sources modal
, 'SHARE_HELPFULNESS' -- All events for which helpful/not helpful buttons click was initiated
, 'SHARE_CLICK' -- All events for which sharing workflow was started
, 'SUBMIT_FEEDBACK' -- All events for which manual feedback workflow was started
, 'DISMISS_SUGGESTION' -- All events for which dismiss suggestion workflow was initiated
)
) AS interaction_data
ON
message_data.eventtrackingtoken = interaction_data.stt
)
GROUP BY
datepartition, userid
)
, slackbot_usage as (
SELECT
COALESCE(p.datepartition, r.datepartition) AS datepartition,
COALESCE(p.userid, r.userid) AS userid,
COALESCE(r.num_useful_responses, 0) AS reactive_num_useful_responses,
COALESCE(p.num_useful_responses, 0) AS proactive_num_useful_responses,
COALESCE(p.num_useful_responses_without_digest, 0) AS proactive_num_useful_responses_without_digest,
COALESCE(r.num_useful_responses, 0) + COALESCE(p.num_useful_responses, 0) AS total_num_useful_responses
FROM slack_proactive_bot_activity p
FULL OUTER JOIN slack_reactive_bot_activity r
ON p.datepartition = r.datepartition
AND p.userid = r.userid
)
, feature_usage as(
WITH base_join AS (
SELECT
COALESCE(chat_usage.datepartition, other_feature_usage.datepartition) AS datepartition,
COALESCE(chat_usage.userid, other_feature_usage.userid) AS userid,
COALESCE(chat_usage.num_chat_queries, 0) AS _num_chats,
COALESCE(other_feature_usage.num_searches, 0) AS _num_searches,
COALESCE(other_feature_usage.num_client_active_sessions, 0) AS _num_client_active_sessions,
COALESCE(other_feature_usage.is_active_other, FALSE) AS _is_active_other
FROM
other_feature_usage
FULL OUTER JOIN
chat_usage
ON
other_feature_usage.datepartition = chat_usage.datepartition
AND other_feature_usage.userid = chat_usage.userid
)
SELECT
COALESCE(slackbot_usage.datepartition, base_join.datepartition) AS datepartition,
COALESCE(slackbot_usage.userid, base_join.userid) AS userid,
COALESCE(base_join._num_chats, 0) AS _num_chats,
COALESCE(base_join._num_searches, 0) AS _num_searches,
COALESCE(base_join._num_client_active_sessions, 0) AS _num_client_active_sessions,
COALESCE(base_join._is_active_other, FALSE) AS _is_active_other,
COALESCE(slackbot_usage.total_num_useful_responses, 0) AS _num_slackbot_useful_responses
FROM
base_join
FULL OUTER JOIN
slackbot_usage
ON
base_join.datepartition = slackbot_usage.datepartition
AND base_join.userid = slackbot_usage.userid
)
, canonicalized_feature_usage AS (
SELECT
COALESCE(latest_orgchart_data.userid, id_to_alias.canonicalid, feature_usage.userid) AS userid,
datepartition,
COALESCE(SUM(_num_searches), 0) AS num_searches,
COALESCE(SUM(_num_chats), 0) AS num_chats,
COALESCE(SUM(_num_client_active_sessions), 0) AS num_client_active_sessions,
COALESCE(SUM(_num_slackbot_useful_responses), 0) AS num_slackbot_useful_responses,
COALESCE(LOGICAL_OR(_is_active_other), FALSE) AS is_active_other
FROM
feature_usage
LEFT JOIN
id_to_alias
ON
feature_usage.userid = id_to_alias.aliasid
LEFT JOIN
latest_orgchart_data
ON
COALESCE(id_to_alias.canonicalid, feature_usage.userid) = latest_orgchart_data.loggingid
WHERE
COALESCE(id_to_alias.canonicalid, feature_usage.userid) IS NOT NULL
GROUP BY
1, 2
HAVING
COALESCE(SUM(_num_searches), 0) > 0
OR COALESCE(SUM(_num_chats), 0) > 0
OR COALESCE(SUM(_num_client_active_sessions), 0) > 0
OR COALESCE(SUM(_num_slackbot_useful_responses), 0) > 0
OR COALESCE(LOGICAL_OR(_is_active_other), FALSE)
)
SELECT
o.department,
COUNT(DISTINCT o.userid) AS NumEmployees,
-- Users with AU-qualifying activity should never be considered not-signed-up.
-- TODO: resolve the KI where users can be active on gleanbot without a signup time
COUNT(DISTINCT CASE WHEN o.signuptime IS NOT NULL OR c.userid IS NOT NULL THEN o.userid END) AS NumSignups,
COUNT(DISTINCT CASE WHEN c.datepartition BETWEEN DATE(endDate) - INTERVAL '27' DAY AND DATE(endDate) THEN c.userid END) AS MonthlyActiveUsers,
COUNT(DISTINCT CASE WHEN c.datepartition BETWEEN DATE(endDate) - INTERVAL '6' DAY AND DATE(endDate) THEN c.userid END) AS WeeklyActiveUsers,
COALESCE(SUM(CASE WHEN c.datepartition BETWEEN DATE(startDate) AND DATE(endDate) THEN c.num_searches ELSE 0 END), 0) AS NumSearches,
COUNT(DISTINCT CASE WHEN c.num_searches > 0 AND c.datepartition BETWEEN DATE(startDate) AND DATE(endDate) THEN c.userid END) AS NumSearchUsers,
COALESCE(SUM(CASE WHEN c.datepartition BETWEEN DATE(startDate) AND DATE(endDate) THEN c.num_chats ELSE 0 END), 0) AS NumChats,
COUNT(DISTINCT CASE WHEN c.num_chats > 0 AND c.datepartition BETWEEN DATE(startDate) AND DATE(endDate) THEN c.userid END) AS NumChatUsers,
COUNT(DISTINCT CASE WHEN c.datepartition BETWEEN DATE(endDate) - INTERVAL '27' DAY AND DATE(endDate) AND c.num_searches > 0 THEN c.userid END) AS MonthlySearchActiveUsers,
COUNT(DISTINCT CASE WHEN c.datepartition BETWEEN DATE(endDate) - INTERVAL '6' DAY AND DATE(endDate) AND c.num_searches > 0 THEN c.userid END) AS WeeklySearchActiveUsers,
COUNT(DISTINCT CASE WHEN c.datepartition BETWEEN DATE(endDate) - INTERVAL '27' DAY AND DATE(endDate) AND c.num_chats > 0 THEN c.userid END) AS MonthlyChatActiveUsers,
COUNT(DISTINCT CASE WHEN c.datepartition BETWEEN DATE(endDate) - INTERVAL '6' DAY AND DATE(endDate) AND c.num_chats > 0 THEN c.userid END) AS WeeklyChatActiveUsers
FROM
latest_orgchart_data o -- require orgchart on left side of join to obtain non-user employees
LEFT JOIN
canonicalized_feature_usage c
ON
c.userid = o.userid
GROUP BY
1
Most Referenced Chat Datasources
Most Referenced Chat Datasources
Output: Top 10 datasources by citation count in AI chat responses over specified date range
Grain: Datasource-level aggregation across all chat interactions in date range
Use Case: Identify which content sources AI assistant references most frequently to optimize knowledge curation and data source prioritization
Grain: Datasource-level aggregation across all chat interactions in date range
Use Case: Identify which content sources AI assistant references most frequently to optimize knowledge curation and data source prioritization
Copy
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE startDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE endDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY);
-- Define workflow IDs as variables for easy management
DECLARE chat_workflow_ids ARRAY<STRING> DEFAULT [
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS',
'DEFAULT_CHAT',
'WORLD_MODE_V2',
'DEEP_RESEARCH_PYAGENT',
'DEEP_RESEARCH_PREVIEW'
];
WITH
glean_customer_event AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE DATE(DATE_TRUNC(timestamp, DAY)) BETWEEN DATE(startDate) AND DATE(endDate)
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Extract chat citations from CHAT_CITATIONS events
, chat_citations AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.chatcitations.chatsessionid,
chat_citation.sourcedocument.id AS docid,
chat_citation.sourcedocument.datasource
FROM
glean_customer_event
CROSS JOIN
UNNEST(jsonPayload.chatcitations.citations) AS chat_citation
WHERE
jsonPayload.Type = 'CHAT_CITATIONS'
AND jsonPayload.chatcitations.chatsessionid IS NOT NULL
AND chat_citation.sourcedocument.id IS NOT NULL
AND chat_citation.sourcedocument.datasource IS NOT NULL
),
-- Extract workflow citations from WORKFLOW events
workflow_citations AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.workflow.chatsessionid,
workflow_citation.sourcedocument.id AS docid,
workflow_citation.sourcedocument.datasource
FROM
glean_customer_event, UNNEST(jsonPayload.workflow.citations) AS workflow_citation
WHERE
jsonPayload.workflow.workflowid IN UNNEST(chat_workflow_ids)
AND jsonPayload.Type = 'WORKFLOW'
AND jsonPayload.workflow.initiator = 'USER'
AND jsonPayload.workflow.chatsessionid IS NOT NULL
AND workflow_citation.sourcedocument.id IS NOT NULL
AND workflow_citation.sourcedocument.datasource IS NOT NULL
),
-- Extract workflow run citations from WORKFLOW_RUN events
workflowrun_citations AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.workflowrun.chatsessionid,
step_citation.sourcedocument.id AS docid,
step_citation.sourcedocument.datasource
FROM
glean_customer_event,
UNNEST(jsonPayload.workflowrun.workflowexecutions) AS workflow_execution,
UNNEST(jsonPayload.workflowrun.stepexecutions) AS step_execution,
UNNEST(step_execution.citations) AS step_citation
WHERE
jsonPayload.Type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
AND workflow_execution.workflowid IN UNNEST(chat_workflow_ids)
AND jsonPayload.workflowrun.chatsessionid IS NOT NULL
AND step_citation.sourcedocument.id IS NOT NULL
AND step_citation.sourcedocument.datasource IS NOT NULL
),
-- Combine workflow and workflowrun citations
combined_workflow_citations AS (
SELECT
DISTINCT datepartition,
userid,
chatsessionid,
docid,
datasource
FROM (
SELECT * FROM workflow_citations
UNION ALL
SELECT * FROM workflowrun_citations
)
),
-- Combine all citation sources (chat + workflow)
all_citations AS (
SELECT
COALESCE(c.datepartition, w.datepartition) AS datepartition,
COALESCE(c.userid, w.userid) AS userid,
COALESCE(c.chatsessionid, w.chatsessionid) AS chatsessionid,
COALESCE(c.docid, w.docid) AS docid,
COALESCE(c.datasource, w.datasource) AS datasource
FROM
chat_citations c
FULL OUTER JOIN
combined_workflow_citations w
ON
c.datepartition = w.datepartition
AND c.userid = w.userid
AND c.chatsessionid = w.chatsessionid
AND c.docid = w.docid
AND c.datasource = w.datasource
)
SELECT
citations.datasource,
COUNT(DISTINCT CONCAT(citations.chatsessionid, '|', citations.docid)) AS num_citations
FROM
all_citations AS citations
WHERE
COALESCE(citations.datasource, '') != ''
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
Most Clicked Search Result Datasources
Most Clicked Search Result Datasources
Output: Top 10 datasources by search result click count over specified date range
Grain: Datasource-level aggregation across all search click interactions in date range
Use Case: Identify which content sources users click on most frequently in search results to optimize content relevance and data source prioritization
Grain: Datasource-level aggregation across all search click interactions in date range
Use Case: Identify which content sources users click on most frequently in search results to optimize content relevance and data source prioritization
Copy
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE startDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE endDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY);
WITH
glean_customer_event AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE DATE(DATE_TRUNC(timestamp, DAY)) BETWEEN DATE(startDate) AND DATE(endDate)
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
jsonPayload.searchclick.datasource,
COALESCE(COUNT(DISTINCT jsonPayload.searchclick.trackingtoken), 0) AS num_clicks
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
AND COALESCE(jsonPayload.searchclick.datasource, '') != ''
GROUP BY
1
ORDER BY
2 DESC
LIMIT 10
Search Satisfaction Rate (SSAT)
Search Satisfaction Rate (SSAT)
Output: Overall search satisfaction rate calculated as percentage of searches that result in user clicks
Grain: Aggregated satisfaction rate across all searches in the specified date range
Use Case: Measure search effectiveness and user satisfaction by tracking click-through rates on search results to identify search quality trends
Grain: Aggregated satisfaction rate across all searches in the specified date range
Use Case: Measure search effectiveness and user satisfaction by tracking click-through rates on search results to identify search quality trends
Copy
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE startDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE endDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY);
WITH
glean_customer_event AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE DATE(DATE_TRUNC(timestamp, DAY)) BETWEEN DATE(startDate) AND DATE(endDate)
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, id_to_alias AS (
SELECT
DISTINCT aliasid,
userid AS canonicalid,
department
FROM (
SELECT
userid,
aliasids,
department
FROM (
SELECT
DISTINCT jsonPayload.productsnapshot.user.id AS userid,
FIRST_VALUE(jsonPayload.productsnapshot.user.aliasids) OVER (PARTITION BY jsonPayload.productsnapshot.user.id ORDER BY datepartition DESC) AS aliasids,
FIRST_VALUE(jsonPayload.productsnapshot.user.departmentid) OVER (PARTITION BY jsonPayload.productsnapshot.user.id ORDER BY datepartition DESC) AS department
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.Type = 'USER'
)
), UNNEST(aliasids) as aliasid
)
, user_searches AS(
SELECT
DISTINCT jsonPayload.user.userid,
DATE(timestamp) as datepartition,
jsonPayload.search.sessiontrackingtoken AS stt,
jsonPayload.search.trackingtoken AS qtt
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
AND jsonPayload.search.initiator IN ('USER', 'PAGE_LOAD')
AND LOWER(jsonPayload.search.modality) NOT IN ('gleanbot', 'slackbot_command', 'slackbot_leaderboard', 'slackbot_retry', '')
)
, search_clicks AS(
SELECT
DISTINCT jsonPayload.user.userid,
DATE(timestamp) as datepartition,
jsonPayload.searchclick.trackingtoken AS qtt
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
)
, search_interaction AS (
SELECT
searches.datepartition,
id_to_alias.department,
COUNT(DISTINCT IF(clicks.qtt IS NOT NULL, searches.stt, NULL)) AS num_searches_with_clicks,
COUNT(DISTINCT searches.stt) AS num_searches
FROM
user_searches searches
LEFT JOIN
search_clicks clicks
ON
searches.userid = clicks.userid
AND searches.qtt = clicks.qtt
AND searches.datepartition = clicks.datepartition
LEFT JOIN
id_to_alias
ON
searches.userid = id_to_alias.aliasid
GROUP BY 1,2
)
SELECT
1.0 * SUM(num_searches_with_clicks)/GREATEST(SUM(num_searches), 1) AS SearchSSAT
FROM
search_interaction
Top Users by Chat and Search Usage
Top Users by Chat and Search Usage
Output: List of users ranked by total chat and search activity over specified date range
Grain: User-level aggregation showing individual usage patterns across chat and search features
Use Case: Identify power users and usage patterns to understand feature adoption, target training programs, and recognize high-engagement users
Grain: User-level aggregation showing individual usage patterns across chat and search features
Use Case: Identify power users and usage patterns to understand feature adoption, target training programs, and recognize high-engagement users
Copy
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE startDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE endDate DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY);
-- Define workflow IDs as variables for easy management
DECLARE chat_workflow_ids ARRAY<STRING> DEFAULT [
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS',
'DEFAULT_CHAT',
'WORLD_MODE_V2',
'DEEP_RESEARCH_PYAGENT',
'DEEP_RESEARCH_PREVIEW'
];
WITH
glean_customer_event AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE DATE(DATE_TRUNC(timestamp, DAY)) BETWEEN DATE(startDate) AND DATE(endDate)
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, id_to_alias AS (
SELECT
DISTINCT aliasid,
userid AS canonicalid,
department,
signuptime
FROM (
SELECT
userid,
aliasids,
department,
signuptime
FROM (
SELECT
DISTINCT jsonPayload.productsnapshot.user.id AS userid,
FIRST_VALUE(jsonPayload.productsnapshot.user.aliasids) OVER (PARTITION BY jsonPayload.productsnapshot.user.id ORDER BY datepartition DESC) AS aliasids,
FIRST_VALUE(jsonPayload.productsnapshot.user.departmentid) OVER (PARTITION BY jsonPayload.productsnapshot.user.id ORDER BY datepartition DESC) AS department,
FIRST_VALUE(jsonPayload.productsnapshot.user.signuptime IGNORE NULLS) OVER(PARTITION BY jsonPayload.productsnapshot.user.id ORDER BY datepartition) AS signuptime
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.Type = 'USER'
)
), UNNEST(aliasids) as aliasid
)
, workflow_data AS (
SELECT
jsonPayload.workflow.runid AS run_id,
jsonPayload.user.userid,
jsonPayload.user.useremail,
MIN(datepartition) AS datepartition
FROM glean_customer_event
WHERE jsonPayload.workflow.workflowid IN UNNEST(chat_workflow_ids)
AND jsonPayload.workflow.initiator = 'USER'
GROUP BY 1, 2, 3
)
, workflowrun_data AS (
SELECT
DISTINCT jsonPayload.workflowrun.runid AS run_id,
jsonPayload.user.userid,
jsonPayload.user.useremail,
datepartition
FROM glean_customer_event, UNNEST(jsonPayload.workflowrun.workflowexecutions) as workflow_execution
WHERE jsonPayload.Type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
AND workflow_execution.workflowid IN UNNEST(chat_workflow_ids)
)
, chat_data AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.user.useremail,
jsonPayload.chat.qtt AS qtt,
jsonPayload.chat.workflowrunid AS workflowrunid
FROM glean_customer_event
WHERE jsonPayload.Type = 'CHAT'
AND jsonPayload.chat.initiator = 'USER'
)
, combined_workflow_data AS (
SELECT run_id, userid, useremail, datepartition FROM workflow_data
UNION ALL
SELECT run_id, userid, useremail, datepartition FROM workflowrun_data
)
, chat_usage as (
SELECT
COALESCE(wf.datepartition, chat.datepartition) AS datepartition,
COALESCE(wf.userid, chat.userid) AS userid,
COALESCE(wf.useremail, chat.useremail) as useremail,
COUNT(DISTINCT wf.run_id) + COUNT(DISTINCT CASE WHEN wf.run_id IS NULL THEN chat.qtt END) AS num_chat_queries
FROM combined_workflow_data wf
FULL OUTER JOIN chat_data chat
ON wf.run_id = chat.workflowrunid
GROUP BY 1, 2, 3
)
, other_feature_usage AS (
SELECT
gce.jsonPayload.user.userid,
gce.jsonPayload.user.useremail,
gce.datepartition,
COUNT(
DISTINCT CASE WHEN (
gce.jsonPayload.Type = 'SEARCH'
AND (
gce.jsonPayload.search.initiator IN ('USER', 'PAGE_LOAD')
AND LOWER(gce.jsonPayload.search.modality) NOT IN ('gleanbot', 'slackbot_command', 'slackbot_leaderboard', 'slackbot_retry', '')
)
) THEN gce.jsonPayload.search.trackingtoken END
) +
COUNT(
CASE WHEN (
gce.jsonPayload.Type = 'SEARCH'
AND gce.jsonPayload.search.isrestclientapi
) THEN 1 END
) AS num_searches,
COUNT(
DISTINCT CASE WHEN (
-- Depending on category, opening links/documents counts towards DAU
gce.jsonPayload.clientevent.event IN ('OpenDocument', 'OpenLink')
AND gce.jsonPayload.clientevent.category IN (
-- Autocomplete and Search interactions count towards DAU
'Search Result'
, 'Autocomplete'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
, 'New Tab Page'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
, 'Featured Question and Answer'
, 'Generated Question and Answer'
, 'Pins'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Verification'
, 'Datasource Auth'
, 'Insights'
-- Feature related interactions count towards DAU
, 'Chat'
, 'Result Preview'
, 'App Card'
, 'Customer Card'
, 'Search'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
OR (
-- Depending on category, certain feature related clicks count towards DAU
gce.jsonPayload.clientevent.event IN ('Click')
AND gce.jsonPayload.clientevent.category IN (
-- Autocomplete and Search interactions count towards DAU
'Autocomplete'
, 'Search Result'
, 'Datasource Filter'
, 'Facets'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- Sidebar interactions count towards DAU
, 'Sidebar Tabs'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Datasource Auth'
, 'User Menu'
, 'Admin Console'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
-- CRUD operations on User Generated Content(UGC) always count always count towards DAU
OR (
gce.jsonPayload.clientevent.event IN ('Add', 'Create', 'Delete')
AND gce.jsonPayload.clientevent.category IN ('Announcements', 'Answers', 'Collections')
)
OR (
gce.jsonPayload.clientevent.event IN ('View')
AND gce.jsonPayload.clientevent.category IN (
-- User Generated Content(UGC) interactions count towards DAU
'Announcements'
,'Answers'
,'Collections'
-- Directory tab interactions count towards DAU
,'Person Card'
,'Team Card'
,'Org Chart'
)
AND gce.jsonPayload.clientevent.pagepath NOT IN ('/', '/ntp', '/search')
)
/* Summarization expansion */
OR ( gce.jsonPayload.clientevent.event = 'Expand' AND gce.jsonPayload.clientevent.category = 'Summary' AND jsonPayload.clientevent.uielement = 'summarize-card')
/* Counts Start events (user chat message in Chat tab) and Expand events (prompt expansion in Discover tab) from Sidebar V2 */
OR ( gce.jsonPayload.clientevent.event in ('Start', 'Expand') AND gce.jsonPayload.clientevent.category = 'Sidebar V2')
/* Counts Start events (user query or preset click) from Inline Menu */
OR ( gce.jsonPayload.clientevent.event = 'Start' AND gce.jsonPayload.clientevent.category = 'Inline Menu')
/* Counts visits to past chats via GleanChat Conversation History */
OR ( gce.jsonPayload.clientevent.event = 'Click' AND gce.jsonPayload.clientevent.category = 'Chat' AND jsonPayload.clientevent.uielement = 'chats-menu')
THEN gce.jsonPayload.clientevent.SessionTrackingToken END
) AS num_client_active_sessions,
COUNT(
CASE WHEN (
gce.jsonPayload.Type IN ('SEARCH_CLICK','CHAT_FEEDBACK','AI_SUMMARY','AI_ANSWER','SEARCH_FEEDBACK')
OR gce.jsonPayload.Type IN ('SHORTCUT')
AND gce.jsonPayload.shortcut.event IN ('REDIRECT', 'CREATE', 'DELETE', 'UPDATE')
) THEN 1 END
) >= 1 AS is_active_other
FROM
glean_customer_event gce
WHERE
gce.jsonPayload.Type not in ('CHAT','WORKFLOW')
GROUP BY
1, 2, 3
)
, event_traces AS (
SELECT
datepartition,
jsonPayload.user.userid,
jsonPayload.user.useremail,
jsonPayload.gleanbotactivity.eventtrackingtoken,
jsonPayload.gleanbotactivity.responseevents,
jsonPayload.gleanbotactivity.eventtype,
jsonPayload.gleanbotactivity.workflowexecutionpoints,
jsonPayload.gleanbotactivity.stt
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'GLEAN_BOT_ACTIVITY'
AND LOWER(jsonPayload.gleanbotactivity.source) = 'slack'
)
, slack_reactive_bot_activity AS (
SELECT
datepartition,
userid,
useremail,
COUNT(DISTINCT CASE
WHEN response_downvoted THEN NULL
WHEN bot_responded THEN eventtrackingtoken
WHEN response_shared THEN eventtrackingtoken
ELSE NULL END) AS num_useful_responses
FROM (
SELECT
message_data.datepartition,
message_data.userid,
message_data.useremail,
message_data.eventtrackingtoken,
'REACTIVE_ACT_MESSAGE' IN UNNEST(message_data.responseevents) -- Messages that got a successful response from the bot should be counted
OR 'REACTIVE_GENERATION_RESPONSE_MODAL' IN UNNEST(message_data.responseevents) -- Gleanbot added value to users other than the question asker by generating responses for them
OR 'COMMAND_SEARCH_RESULTS_MESSAGE' IN UNNEST(message_data.responseevents) -- Messages that were triggered through a /glean command and got results as response
OR 'COMMAND_SEARCH_UPDATE_MODAL_WITH_RESULTS' IN UNNEST(message_data.responseevents) -- Messages that were triggered through a /glean command and got results as response on retry
OR 'COMMAND_WORKFLOW_SUCCESS' IN UNNEST(message_data.responseevents) -- Response was successfully generated after user queried through glean console
OR 'DISCUSSION_SUMMARY_SENT' IN UNNEST(message_data.responseevents) -- [Slack Compete] Response to summarize a thread discussion was successfully sent through glean DM
OR 'SINGLE_CHANNEL_SUMMARY_SENT' IN UNNEST(message_data.responseevents) -- [Slack Compete] Response to summarize a channel discussion was successfully sent through glean DM
OR 'DOC_URL_SUMMARY_SENT' IN UNNEST(message_data.responseevents) -- [Slack Compete] Response to summarize a uploaded doc discussion was successfully sent through glean DM
AS bot_responded,
interaction_data.eventtype = 'SHARE_CLICK' AS response_shared, -- Response was shared by the user
'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflowexecutionpoints) AS response_downvoted, -- Response was dismissed by the user
'SHOW_SOURCES_CLICK_SUCCESS' IN UNNEST(interaction_data.workflowexecutionpoints) AS citations_shown, -- Response was checked for its citation source
'SHARE_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_helpful, -- Response was voted as helpful
'SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_not_helpful -- Response was voted as not helpful
FROM (
SELECT
*
FROM
event_traces
WHERE
-- Identify the set of valid slack reactive bot workflow starting points that could potentitally contribute to active users definition
(
'REACTIVE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where reactive workflow started successfully
OR 'REACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where reactive workflow for assistant type started
OR 'REACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where reactive workflow for search type started
OR 'COMMAND_SEARCH_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where /glean command was used to initiate a reactive search
OR 'COMMAND_DISCUSSION_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where glean console was used to summarize a discussion/query
OR 'COMMAND_CHANNEL_SUMMARIZE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where glean console was used to summarize a channel
OR 'COMMAND_DOC_SUMMARIZE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where glean console was used to summarize an attached/shared doc
OR eventtype IN (
'TAGGED_MESSAGE' -- Identify messages where @glean was mentioned
, 'GENERATE_ANSWER' -- Identify messages where someone clicked on generate answer button
, 'COMMAND' -- Identify messages where /glean was mentioned
, 'DM_TO_GLEANBOT_MESSAGE' -- [Slack Compete] Identify messages where user sent DM to glean bot with a query
)
)
AND NOT 'REACTIVE_DISABLED_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove the events where reactive workflow cannont be triggered. Eg: Invalid Channel
AND NOT 'REACTIVE_DISABLED_FAILURE_FOR_USER' IN UNNEST(workflowexecutionpoints) -- Remove the events where reactive workflow cannont be triggered for the user Eg: Insufficient permissions
AND NOT 'COMMAND_BAD_REQUEST' IN UNNEST(workflowexecutionpoints) -- Remove the event where /glean command was triggered in an invalid channel/by invalid user
AND NOT 'COMMAND_SEARCH_RESULTS_MODAL_UPDATE_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove cases where /glean command failed to load results
AND NOT 'COMMAND_SEARCH_NO_RESULTS_MODAL_UPDATE_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove cases where /glean command failed to update and dispaly results
) AS message_data
LEFT JOIN (
SELECT
*
FROM
event_traces
WHERE eventtype IN (
'SHOW_SOURCES' -- All events where a user clicked on show sources modal
, 'SHARE_HELPFULNESS' -- All events for which helpful/not helpful buttons click was initiated
, 'SHARE_CLICK' -- All events for which sharing workflow was started
, 'SUBMIT_FEEDBACK' -- All events for which manual feedback workflow was started
)
) AS interaction_data
ON
message_data.eventtrackingtoken = interaction_data.stt
)
GROUP BY
datepartition,
userid,
useremail
)
, slack_proactive_bot_activity AS (
SELECT
datepartition,
userid,
useremail,
COUNT(DISTINCT CASE
WHEN response_downvoted THEN NULL
WHEN voted_helpful THEN eventtrackingtoken
WHEN citations_shown THEN eventtrackingtoken
WHEN response_shared THEN eventtrackingtoken
WHEN bot_responded_on_users_request AND NOT voted_not_helpful THEN eventtrackingtoken
ELSE NULL END) AS num_useful_responses,
COUNT(DISTINCT CASE
WHEN response_downvoted THEN NULL
WHEN voted_helpful THEN eventtrackingtoken
WHEN citations_shown THEN eventtrackingtoken
WHEN response_shared THEN eventtrackingtoken
WHEN bot_responded_on_users_request AND NOT voted_not_helpful AND NOT digest_consumed THEN eventtrackingtoken
ELSE NULL END) AS num_useful_responses_without_digest
FROM (
SELECT
message_data.datepartition,
message_data.userid,
message_data.useremail,
message_data.eventtrackingtoken,
message_data.eventtype IN ('NON_TAGGED_MESSAGE', 'PROACTIVE_DISCUSSION_SUMMARIZER') AS workflow_started,
'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.responseevents)
OR 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.responseevents)
OR 'PROACTIVE_MESSAGE' IN UNNEST(message_data.responseevents)
AS bot_responded,
'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.responseevents)
OR 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.responseevents)
OR 'VIEW_DIGEST' IN UNNEST(message_data.responseevents)
AS bot_responded_on_users_request,
interaction_data.eventtype = 'SHARE_CLICK' AS response_shared, -- Response was shared by the user
'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflowexecutionpoints)
OR interaction_data.eventtype = 'DISMISS_SUGGESTION'
AS response_downvoted, -- Response was dismissed by the user
interaction_data.eventtype = 'SHOW_SOURCES' AS citations_shown, -- Response was checked for its citation source
'SHARE_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_helpful, -- Response was voted as helpful
'SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.responseevents) AS voted_not_helpful, -- Response was voted as not helpful
'DAILY_DIGEST_REMINDER_SENT' IN UNNEST(message_data.responseevents) AS digest_sent, -- Bot sent user daily digest over DM
'VIEW_DIGEST' IN UNNEST(message_data.responseevents) AS digest_consumed, -- Digest was opened and viewed by the user
'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.responseevents)
OR 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.responseevents)
AS proactive_summary_sent
FROM (
SELECT
*
FROM
event_traces
WHERE
-- Identify the set of valid slack proactive bot workflow starting points that could potentitally contribute to active users definition
(
'PROACTIVE_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where proactive workflow started
OR 'PROACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where proactive workflow for assistant type started
OR 'PROACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflowexecutionpoints) -- Identify messages where proactive workflow for search type started
OR eventtype IN (
'NON_TAGGED_MESSAGE' -- All messages that were not DMs and did not mention @Glean or \glean fall in this category
, 'PROACTIVE_DISCUSSION_SUMMARIZER' -- All events for which proactive thread summarizer workflow is initiated
, 'VIEW_DIGEST_CLICK' -- All events where proactive digest was clicked
, 'DAILY_DIGEST_REMINDER' -- All events where digest was subscribed to and thus proactively sent over DM
)
)
AND NOT 'PROACTIVE_BOT_DISABLED_FAILURE' IN UNNEST(workflowexecutionpoints) -- Remove the events where proactive workflow cannont be triggered. Eg: Invalid Channel
AND NOT 'DROP_BOT_MESSAGE' IN UNNEST(workflowexecutionpoints) -- Remove the events which represent messages sent by other bots, so we dont converse with slack automations
AND NOT 'DROP_EXTERNAL_CHANNEL_MESSAGE' IN UNNEST(workflowexecutionpoints) -- Remove the events that correspond to messages sent on external channels.
) AS message_data
LEFT JOIN (
SELECT
*
FROM
event_traces
WHERE
eventtype IN (
'SHOW_SOURCES' -- All events where a user clicked on show sources modal
, 'SHARE_HELPFULNESS' -- All events for which helpful/not helpful buttons click was initiated
, 'SHARE_CLICK' -- All events for which sharing workflow was started
, 'SUBMIT_FEEDBACK' -- All events for which manual feedback workflow was started
, 'DISMISS_SUGGESTION' -- All events for which dismiss suggestion workflow was initiated
)
) AS interaction_data
ON
message_data.eventtrackingtoken = interaction_data.stt
)
GROUP BY
datepartition, userid, useremail
)
, slackbot_usage as (
SELECT
COALESCE(p.datepartition, r.datepartition) AS datepartition,
COALESCE(p.userid, r.userid) AS userid,
COALESCE(p.useremail, r.useremail) AS useremail,
COALESCE(r.num_useful_responses, 0) AS reactive_num_useful_responses,
COALESCE(p.num_useful_responses, 0) AS proactive_num_useful_responses,
COALESCE(p.num_useful_responses_without_digest, 0) AS proactive_num_useful_responses_without_digest,
COALESCE(r.num_useful_responses, 0) + COALESCE(p.num_useful_responses, 0) AS total_num_useful_responses
FROM slack_proactive_bot_activity p
FULL OUTER JOIN slack_reactive_bot_activity r
ON p.datepartition = r.datepartition
AND p.userid = r.userid
)
, feature_usage as(
WITH base_join AS (
SELECT
COALESCE(chat_usage.datepartition, other_feature_usage.datepartition) AS datepartition,
COALESCE(chat_usage.userid, other_feature_usage.userid) AS userid,
COALESCE(chat_usage.useremail, other_feature_usage.useremail) AS useremail,
COALESCE(chat_usage.num_chat_queries, 0) AS _num_chats,
COALESCE(other_feature_usage.num_searches, 0) AS _num_searches,
COALESCE(other_feature_usage.num_client_active_sessions, 0) AS _num_client_active_sessions,
COALESCE(other_feature_usage.is_active_other, FALSE) AS _is_active_other
FROM
other_feature_usage
FULL OUTER JOIN
chat_usage
ON
other_feature_usage.datepartition = chat_usage.datepartition
AND other_feature_usage.userid = chat_usage.userid
)
SELECT
COALESCE(slackbot_usage.datepartition, base_join.datepartition) AS datepartition,
COALESCE(slackbot_usage.userid, base_join.userid) AS userid,
COALESCE(slackbot_usage.useremail, base_join.useremail) AS useremail,
COALESCE(base_join._num_chats, 0) AS _num_chats,
COALESCE(base_join._num_searches, 0) AS _num_searches,
COALESCE(base_join._num_client_active_sessions, 0) AS _num_client_active_sessions,
COALESCE(base_join._is_active_other, FALSE) AS _is_active_other,
COALESCE(slackbot_usage.total_num_useful_responses, 0) AS _num_slackbot_useful_responses
FROM
base_join
FULL OUTER JOIN
slackbot_usage
ON
base_join.datepartition = slackbot_usage.datepartition
AND base_join.userid = slackbot_usage.userid
)
, canonicalized_feature_usage AS (
SELECT
COALESCE(id_to_alias.canonicalid, feature_usage.userid) AS userid,
feature_usage.useremail,
datepartition,
COALESCE(SUM(_num_searches), 0) AS num_searches,
COALESCE(SUM(_num_chats), 0) AS num_chats,
COALESCE(SUM(_num_client_active_sessions), 0) AS num_client_active_sessions,
COALESCE(SUM(_num_slackbot_useful_responses), 0) AS num_slackbot_useful_responses,
COALESCE(LOGICAL_OR(_is_active_other), FALSE) AS is_active_other
FROM
feature_usage
LEFT JOIN
id_to_alias
ON
feature_usage.userid = id_to_alias.aliasid
WHERE
COALESCE(id_to_alias.canonicalid, feature_usage.userid) IS NOT NULL
GROUP BY
1, 2, 3
HAVING
COALESCE(SUM(_num_searches), 0) > 0
OR COALESCE(SUM(_num_chats), 0) > 0
OR COALESCE(SUM(_num_client_active_sessions), 0) > 0
OR COALESCE(SUM(_num_slackbot_useful_responses), 0) > 0
OR COALESCE(LOGICAL_OR(_is_active_other), FALSE)
)
SELECT
o.canonicalid AS UserId,
COALESCE(SUM(c.num_searches), 0) AS NumSearches,
COALESCE(SUM(c.num_chats), 0) AS NumChats
FROM
id_to_alias o
LEFT JOIN
canonicalized_feature_usage c
ON
c.userid = o.canonicalid
WHERE
o.signuptime IS NOT NULL OR c.userid IS NOT NULL
GROUP BY
1
ORDER BY
COALESCE(SUM(c.num_searches), 0) + COALESCE(SUM(c.num_chats), 0) DESC