Page MenuHomePhabricator
Paste P108

analytics queries for flow
ActivePublic

Authored by EBernhardson on Nov 26 2014, 3:37 AM.
Tags
None
Referenced Files
F16412: analytics_queries_for_flow
Nov 26 2014, 4:05 PM
F16234: analytics_queries_for_flow
Nov 26 2014, 3:37 AM
Subscribers
None
CREATE FUNCTION uuid_for_time (foo DATETIME)
RETURNS binary(11) DETERMINISTIC
RETURN unhex(rpad(lpad(HEX((1000*UNIX_TIMESTAMP(foo))<<2),12,'0'),22,'0'));
CREATE FUNCTION time_for_uuid (uuid BINARY(11))
RETURNS CHAR(22) DETERMINISTIC
RETURN FROM_UNIXTIME((conv(substring(hex(rev_id),1,12),16,10)>>2)/1000);
CREATE FUNCTION time_for_mwts (ts BINARY(14))
RETURNS DATETIME DETERMINISTIC
RETURN STR_TO_DATE( ts, '%Y%m%d%H%i%s' );
# 1. Number of boards with at least 1 created post within the week
SELECT z.yearweek,
count(distinct workflow_wiki, workflow_namespace, workflow_title_text) as num_boards
FROM flow_workflow
JOIN (
SELECT a.tree_ancestor_id, YEARWEEK(time_for_uuid(a.tree_descendant_id)) as yearweek
FROM flow_tree_node a
JOIN (
SELECT b.tree_descendant_id, MAX(b.tree_depth) as max
FROM flow_tree_node b
GROUP BY b.tree_descendant_id
) y ON y.max = a.tree_depth AND y.tree_descendant_id = a.tree_descendant_id
) z ON z.tree_ancestor_id = workflow_id
GROUP BY z.yearweek;
# 2. Active topics
#
# aggregate: Number of topics with at least 1 created revision within the
# week
SELECT z.yearweek, count(distinct workflow_id) as num_topics
FROM flow_workflow
JOIN (
SELECT a.tree_ancestor_id, YEARWEEK(time_for_uuid(a.tree_descendant_id)) as yearweek
FROM flow_tree_node a
JOIN (
SELECT b.tree_descendant_id, MAX(b.tree_depth) as max
FROM flow_tree_node b
GROUP BY b.tree_descendant_id
) y ON y.max = a.tree_depth AND y.tree_descendant_id = a.tree_descendant_id
) z ON z.tree_ancestor_id = workflow_id
GROUP BY z.yearweek;
# broken down per board
SELECT z.yearweek, count(distinct workflow_id) as num_topics,
workflow_wiki, workflow_namespace, workflow_title_text
FROM flow_workflow
JOIN (
SELECT a.tree_ancestor_id, YEARWEEK(time_for_uuid(a.tree_descendant_id)) as yearweek
FROM flow_tree_node a
JOIN (
SELECT b.tree_descendant_id, MAX(b.tree_depth) as max
FROM flow_tree_node b
GROUP BY b.tree_descendant_id
) y ON y.max = a.tree_depth AND y.tree_descendant_id = a.tree_descendant_id
) z ON z.tree_ancestor_id = workflow_id
GROUP BY z.yearweek, workflow_wiki, workflow_namespace, workflow_title_text;
# 3. Messages posted
#
# aggregate: Number of replies across all boards in a week
select YEARWEEK(time_for_uuid(rev_id)) as yearweek, count( rev_id ) as num_replies
FROM flow_revision
WHERE rev_change_type = 'reply'
GROUP BY yearweek;
# broken down by board: Number of replies created per board
SELECT z.yearweek, workflow_wiki, workflow_namespace, workflow_title_text, count(workflow_wiki) as num_replies
FROM flow_workflow
JOIN (
SELECT a.tree_ancestor_id, YEARWEEK(time_for_uuid(a.tree_descendant_id)) as yearweek
FROM flow_tree_node a
JOIN (
SELECT b.tree_descendant_id, MAX(b.tree_depth) as max
FROM flow_tree_node b
JOIN (
SELECT rev_type_id
FROM flow_revision
WHERE rev_change_type = 'reply'
) x ON x.rev_type_id = b.tree_descendant_id
GROUP BY b.tree_descendant_id
) y ON y.max = a.tree_depth AND y.tree_descendant_id = a.tree_descendant_id
) z ON z.tree_ancestor_id = workflow_id
GROUP BY z.yearweek, workflow_wiki, workflow_namespace, workflow_title_text;
# 4. Unique users posting messages
#
# aggregate: across all boards
SELECT yearweek, SUM(user) as unique_users
FROM (
SELECT 1 as user, YEARWEEK(time_for_uuid(rev_id)) as yearweek
FROM flow_revision
WHERE rev_change_type = 'reply'
GROUP BY rev_user_wiki, rev_user_id, rev_user_ip
) x
GROUP BY yearweek;
# broken down by board
SELECT yearweek, workflow_wiki, workflow_namespace, workflow_title_text,
count(distinct rev_user_wiki, rev_user_id, IF(rev_user_ip IS NULL, "", rev_user_ip)) as unique_users
FROM flow_workflow
JOIN (
SELECT rev_user_wiki, rev_user_id, rev_user_ip, a.tree_ancestor_id,
YEARWEEK(time_for_uuid(a.tree_descendant_id)) as yearweek
FROM flow_tree_node a
JOIN (
SELECT rev_user_wiki, rev_user_id, rev_user_ip,
MAX(b.tree_depth) as max, b.tree_descendant_id
FROM flow_tree_node b
JOIN (
SELECT rev_type_id, rev_user_wiki, rev_user_id, rev_user_ip
FROM flow_revision
WHERE rev_change_type = 'reply'
) x ON x.rev_type_id = tree_descendant_id
GROUP BY b.tree_descendant_id, rev_user_wiki, rev_user_id, rev_user_ip
) y ON y.max = a.tree_depth AND y.tree_descendant_id = a.tree_descendant_id
) z ON z.tree_ancestor_id = workflow_id
GROUP BY yearweek, workflow_wiki, workflow_namespace, workflow_title_text;
# 5. Moderation actions
#
# aggregate: Number of moderation actions performed in a week per type
SELECT rev_change_type, count(rev_change_type) as num_actions,
YEARWEEK(time_for_uuid(rev_id)) as yearweek
FROM flow_revision
WHERE rev_change_type IN (
'restore-post', 'hide-post', 'delete-post',
'restore-topic', 'hide-topic', 'delete-topic'
)
GROUP BY yearweek, rev_change_type;
# broken down by board: Number of moderation actions performed in a week
# per type per board
SELECT yearweek, workflow_wiki, workflow_namespace, workflow_title_text,
rev_change_type, count(rev_change_type) as num_actions
FROM flow_workflow
JOIN (
SELECT y.rev_change_type, a.tree_ancestor_id,
YEARWEEK(time_for_uuid(a.tree_descendant_id)) as yearweek
FROM flow_tree_node a
JOIN (
SELECT x.rev_change_type, MAX(b.tree_depth) as max, b.tree_descendant_id
FROM flow_tree_node b
JOIN (
SELECT rev_type_id, rev_change_type
FROM flow_revision
WHERE rev_change_type IN (
'restore-post', 'hide-post', 'delete-post',
'restore-topic', 'hide-topic', 'delete-topic'
)
) x ON x.rev_type_id = b.tree_descendant_id
GROUP BY b.tree_descendant_id, x.rev_change_type
) y ON y.max = a.tree_depth AND y.tree_descendant_id = a.tree_descendant_id
) z ON z.tree_ancestor_id = workflow_id
GROUP BY yearweek, workflow_wiki, workflow_namespace, workflow_title_text, rev_change_type;

Event Timeline

EBernhardson changed the title of this paste from untitled to analytics queries for flow.
EBernhardson updated the paste's language from autodetect to mysql.