Page MenuHomePhabricator
Authored By
Ironholds
Oct 19 2015, 3:56 PM
Size
1 KB
Referenced Files
None
Subscribers
None

verify_and_data.R

library(wmf)
library(ggplot2)
library(data.table)
library(uaparser)
# Get data
data <- wmf::mysql_read("SELECT timestamp, userAgent, wiki,
event_searchSessionId AS session_id, event_subTest AS subtest
FROM TestSearchSatisfaction2_14098806", "log")
data <- as.data.table(data)
data$terms_enabled <- ifelse(grepl(x=data$subtest, pattern = "enabled"), TRUE, FALSE)
data$subtest <- gsub(x = data$subtest, pattern = "\\:(dis|en)abled$", replacement = "")
# Check the sampling rate
sampling_rate_session <- length(unique(data[!is.na(data$subtest),]$session_id))/length(unique(data$session_id))
sampling_rate_events <- nrow(data[!is.na(data$subtest),])/nrow(data)
per_group_sampling_rate <- data[,j=list(event_count = .N), by = "subtest",]
write.table(sampling_rate_events, file = "relaxer_sampling_rate.tsv", row.names = FALSE)
# Check the user agents
ua_data <- parse_agents(data$userAgent)
ua_data <- as.data.frame(table(paste(ua_data$browser, ua_data$browser_major)), stringsAsFactors = FALSE)
ua_data <- ua_data[order(ua_data$Freq, decreasing = TRUE),]
write.table(ua_data[1:20,], file = "relaxer_ua_data.tsv", row.names = FALSE)
# Check the projects
project_data <- data[!is.na(data$subtest), j = list(events = .N), by = c("wiki", "subtest")]
write.table(project_data, file = "relaxer_project_data.tsv", row.names = FALSE)

File Metadata

Mime Type
text/plain
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2749956
Default Alt Text
verify_and_data.R (1 KB)

Event Timeline