diff --git a/DESCRIPTION b/DESCRIPTION index bf174e6..1f04758 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,20 +1,22 @@ Package: wmf Type: Package Title: R Code for Wikimedia Foundation Internal Usage Version: 0.2.1 Date: 2015-09-30 Authors@R: c( person("Oliver", "Keyes", email = "oliver@wikimedia.org", role = "cre"), person("Mikhail", "Popov", email = "mpopov@wikimedia.org", role = "aut")) Description: More about what it does (maybe more than one line). License: What license is it under? LazyData: TRUE Imports: - RMySQL, - urltools, - pwr, - ggplot2, - ggthemes + RMySQL, + urltools, + pwr, + ggplot2, + ggthemes URL: https://github.com/Ironholds/wmf BugReports: https://github.com/Ironholds/wmf/issues -Suggests: testthat +Suggests: + testthat +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index ec0cbb7..3cf30e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,32 +1,33 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand +export(date_clause) export(from_log) export(from_mediawiki) export(get_logfile) export(global_query) -export(hive_query) export(mysql_close) export(mysql_connect) export(mysql_disconnect) export(mysql_exists) export(mysql_read) export(mysql_write) +export(query_hive) export(read_sampled_log) export(sample_size_effect) export(sample_size_odds) export(set_proxies) export(theme_fivethirtynine) export(to_log) export(to_mediawiki) import(ggplot2) import(ggthemes) importFrom(pwr,pwr.chisq.test) importFrom(urltools,url_decode) importMethodsFrom(RMySQL,dbClearResult) importMethodsFrom(RMySQL,dbConnect) importMethodsFrom(RMySQL,dbDisconnect) importMethodsFrom(RMySQL,dbExistsTable) importMethodsFrom(RMySQL,dbListResults) importMethodsFrom(RMySQL,dbSendQuery) importMethodsFrom(RMySQL,dbWriteTable) importMethodsFrom(RMySQL,fetch) diff --git a/R/dataviz.R b/R/dataviz.R index 35b27b9..4773b0b 100644 --- a/R/dataviz.R +++ b/R/dataviz.R @@ -1,33 +1,38 @@ #'@title Theme inspired by fivethirtyeight.com plots #'@description A modification of \code{ggthemes::theme_fivethirtyeight} #' #'@param base_size base font size #'@param base_family base font family #' #'@details Basically it adds axis titles (with some modification on the y to #' allow for long titles) back in and does a small amount of reduction of the #' overall plot size to avoid an absolute ton of extraneous spacing. #' #'@name FiveThirtyNine #'@rdname FiveThirtyNine #'@import ggplot2 #'@import ggthemes #' #'@export #' theme_fivethirtynine <- function(base_size = 12, base_family = "sans"){ (theme_foundation(base_size = base_size, base_family = base_family) + - theme(line = element_line(), rect = element_rect(fill = ggthemes::ggthemes_data$fivethirtyeight["ltgray"], + theme(line = element_line(), rect = element_rect(fill = ggthemes:::ggthemes_data$fivethirtyeight["ltgray"], linetype = 0, colour = NA), - text = element_text(colour = ggthemes::ggthemes_data$fivethirtyeight["dkgray"]), - axis.title.y = element_text(size = rel(1.5), angle = 90, vjust = 1.5), axis.text = element_text(), - axis.title.x = element_text(size = rel(1.5)), + text = element_text(colour = ggthemes:::ggthemes_data$fivethirtyeight["dkgray"], margin = ggplot2::margin(), debug = FALSE), + axis.title.y = element_text(size = rel(2), angle = 90, vjust = 1.5, margin = ggplot2::margin(12), debug = FALSE), + axis.text = element_text(size=rel(1.5)), + axis.title.x = element_text(size = rel(2), margin = ggplot2::margin(12), debug = FALSE), axis.ticks = element_blank(), axis.line = element_blank(), legend.background = element_rect(), legend.position = "bottom", legend.direction = "horizontal", legend.box = "vertical", panel.grid = element_line(colour = NULL), - panel.grid.major = element_line(colour = ggthemes_data$fivethirtyeight["medgray"]), + panel.grid.major = element_line(colour = ggthemes:::ggthemes_data$fivethirtyeight["medgray"]), panel.grid.minor = element_blank(), - plot.title = element_text(hjust = 0, size = rel(1.5), face = "bold"), - strip.background = element_rect())) + plot.title = element_text(hjust = 0, size = rel(1.5), face = "bold", margin = ggplot2::margin(), debug = FALSE), + strip.background = element_rect(), + legend.text = element_text(size=18), legend.title = element_text(size=rel(1.5), margin = ggplot2::margin(4), debug = FALSE), + legend.key.size = unit(1,"in"), + panel.background = element_rect(fill = "transparent", color = NA), + plot.background = element_rect(fill = "transparent", color = NA))) } diff --git a/R/hive.R b/R/hive.R index facfec9..8eaa034 100644 --- a/R/hive.R +++ b/R/hive.R @@ -1,75 +1,80 @@ #'@title hive_query #'@details Hive querying function #'@description this is the "old" hive querying function - it's deprecated as all hell and waiting #'until Andrew sticks the hive server on a dedicated and more powerful machine. #' -#'@param query a query, or the location of a .hql file containing a query. -#' -#'@param file a file name. If this is provided, the results of the query will be written straight -#'there, and a boolean TRUE returned. If not provided (it's NULL by default), the results of the query -#'will be returned as a data.frame -#' -#'@param dt Whether to return it as a data.table or not. -#' -#'@param ... other arguments to pass to read.delim. +#'@param query a Hive query #' #'@section escaping: #'\code{hive_query} works by running the query you provide through the CLI via a system() call. #'As a result, single escapes for meaningful characters (such as quotes) within the query will not work: #'R will interpret them only as escaping that character /within R/. Double escaping (\\\) is thus necessary, #'in the same way that it is for regular expressions. #' #'@return a data.frame containing the results of the query, or a boolean TRUE if the user has chosen #'to write straight to file. #' #'@section handling our hadoop/hive setup: #' #'The \code{webrequests} table is documented #'\href{https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Hive}{on Wikitech}, which also provides #'\href{https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Hive/Queries}{a set of example #'queries}. #' #'When it comes to manipulating the rows with Java before they get to you, Nuria has written a #'\href{https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Hive/QueryUsingUDF}{brief tutorial on loading UDFs} #'which should help if you want to engage in that; the example provided is a user agent parser, allowing you to #'get the equivalent of \code{\link{ua_parse}}'s output further upstream. #'@seealso \code{\link{log_strptime}} for converting the "dt" column in the webrequests table to POSIXlt, #'and \code{\link{mysql_query}} and \code{\link{global_query}} for querying our MySQL databases. #' +#'@examples +#'\dontrun{ +#'query_hive("USE wmf; DESCRIBE webrequest;") +#'} +#' #'@export -hive_query <- function(query, file = NULL, ...){ +query_hive <- function(query){ - to_R <- FALSE - - #If the user wants it passed straight to R... - if(is.null(file)){ + # Write query out to tempfile and create tempfile for results. + query_dump <- tempfile() + cat(query, file = query_dump) + results_dump <- tempfile() - #Create temp file - file <- tempfile(pattern = "file", fileext = ".tsv") - - #Note - to_R <- TRUE - - } + # Query and read in the results + try({ + system(paste0("export HADOOP_HEAPSIZE=1024 && hive -S -f ", query_dump, " > ", results_dump)) + results <- read.delim(results_dump, sep = "\t", quote = "", as.is = TRUE, header = TRUE) + }) - #Run query. If the query is /not/ a file, make it one - if(!grepl(x = query, pattern = "\\.hql$")){ - query_file <- tempfile(fileext = ".hql") - cat(query, file = query_file) - query <- query_file - } + # Clean up and return + file.remove(query_dump, results_dump) + stop_on_empty(results) + return(results) - #Run - system(paste("export HADOOP_HEAPSIZE=1024 && hive -f", query, ">", file)) +} - #Read, remove the file and return(if appropriate) - if(to_R){ - data <- read.delim(file = file, header = TRUE, as.is = TRUE, quote = "", ...) - file.remove(file) - return(data) +#'@title Generate a Date Clause for a Hive query +#'@description what it says on the tin; generates a "WHERE year = foo AND month = bar" using lubridate +#'that can then be combined with other elements to form a Hive query. +#' +#'@param date the date to use. If NULL, yesterday will be used. +#' +#'@return a list containing two elements, "date_clause" and "date"; the returning of +#'the date allows you to include it with. +#' +#'@export +date_clause <- function(date) { + if (is.null(date)) { + date <- Sys.Date() - 1 } - return(TRUE) + split_date <- unlist(strsplit(as.character(date), "-")) + fragment <- (paste("WHERE year =", split_date[1], + "AND month =",split_date[2], + "AND day =", split_date[3], " ")) -} + output <- list(date_clause = fragment, date = date) + return(output) +} \ No newline at end of file diff --git a/R/mysql.R b/R/mysql.R index 9d30151..cffc47d 100644 --- a/R/mysql.R +++ b/R/mysql.R @@ -1,118 +1,128 @@ RMySQL_version <- function() { # Returns 93 if the installed version of RMySQL is 0.9.3 return(as.numeric(paste0(unlist(packageVersion("RMySQL")), collapse = ""))) } +# Ensure that we recognise and error on 0 rows +stop_on_empty <- function(data){ + if(nrow(data) == 0){ + stop("No rows were returned from the database") + } + return(invisible()) +} + + #'@title Work with MySQL databases #'@description Read from, write to, and check data from the MySQL databases and #' tables in the Wikimedia cluster. Assumes the presence of a validly #' formatted configuration file. #' #'@param query A SQL query. #' #'@param database The name of the database to query. #' #'@param con A MySQL connection returned by \code{mysql_connect}. #' Optional -- if not provided, a temporary connection will be opened up. #' #'@param table The name of a table to check for the existence of or create, #' depending on the function. #' #'@param ... Further arguments to pass to dbWriteTable. See ?dbWriteTable for more details. #' #'@name mysql #'@rdname mysql #'@importMethodsFrom RMySQL dbConnect #' #'@seealso \code{\link{hive_query}} or \code{\link{global_query}} #' #'@export mysql_connect <- function(database) { if (RMySQL_version() > 93) { con <- dbConnect(drv = RMySQL::MySQL(), host = "analytics-store.eqiad.wmnet", dbname = database, default.file = "/etc/mysql/conf.d/analytics-research-client.cnf") } else { # Using version RMySQL 0.9.3 or older: con <- dbConnect(drv = "MySQL", host = "analytics-store.eqiad.wmnet", dbname = database, default.file = "/etc/mysql/conf.d/analytics-research-client.cnf") } return(con) } #'@rdname mysql #'@importMethodsFrom RMySQL dbSendQuery dbDisconnect dbListResults dbClearResult fetch #'@export mysql_read <- function(query, database, con = NULL) { already_connected <- !is.null(con) if (!already_connected) { #Open a temporary connection to the db con <- mysql_connect(database) } to_fetch <- dbSendQuery(con, query) data <- fetch(to_fetch, -1) message(sprintf("Fetched %.0f rows and %.0f columns.", nrow(data), ncol(data))) dbClearResult(dbListResults(con)[[1]]) if (!already_connected) { #Close temporary connection dbDisconnect(con) } + stop_on_empty(data) return(data) } #'@rdname mysql #'@importMethodsFrom RMySQL dbExistsTable dbDisconnect #'@export mysql_exists <- function(database, table_name, con = NULL) { already_connected <- !is.null(con) if (!already_connected) { #Open a temporary connection to the db con <- mysql_connect(database) } #Grab the results and close off table_exists <- dbExistsTable(conn = con, name = table_name) if (!already_connected) { #Close temporary connection dbDisconnect(con) } #Return return(table_exists) } #'@rdname mysql #'@importMethodsFrom RMySQL dbWriteTable dbDisconnect #'@export mysql_write <- function(x, database, table_name, con = NULL, ...){ already_connected <- !is.null(con) if (!already_connected) { #Open a temporary connection to the db con <- mysql_connect(database) } #Write result <- dbWriteTable(conn = con, name = table_name, value = x, row.names = FALSE, ...) if (!already_connected) { #Close temporary connection dbDisconnect(con) } #Return the success/failure return(result) } #'@rdname mysql #'@importMethodsFrom RMySQL dbDisconnect #'@export mysql_close <- function(con) { dbDisconnect(con) return(invisible()) } #'@rdname mysql #'@export mysql_disconnect <- function(con) { mysql_close(con) } diff --git a/man/FiveThirtyNine.Rd b/man/FiveThirtyNine.Rd index feed3a6..fb66530 100644 --- a/man/FiveThirtyNine.Rd +++ b/man/FiveThirtyNine.Rd @@ -1,23 +1,23 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dataviz.R \name{FiveThirtyNine} \alias{FiveThirtyNine} \alias{theme_fivethirtynine} \title{Theme inspired by fivethirtyeight.com plots} \usage{ theme_fivethirtynine(base_size = 12, base_family = "sans") } \arguments{ \item{base_size}{base font size} \item{base_family}{base font family} } \description{ A modification of \code{ggthemes::theme_fivethirtyeight} } \details{ Basically it adds axis titles (with some modification on the y to allow for long titles) back in and does a small amount of reduction of the overall plot size to avoid an absolute ton of extraneous spacing. } diff --git a/man/date_clause.Rd b/man/date_clause.Rd new file mode 100644 index 0000000..b2736b6 --- /dev/null +++ b/man/date_clause.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hive.R +\name{date_clause} +\alias{date_clause} +\title{Generate a Date Clause for a Hive query} +\usage{ +date_clause(date) +} +\arguments{ +\item{date}{the date to use. If NULL, yesterday will be used.} +} +\value{ +a list containing two elements, "date_clause" and "date"; the returning of +the date allows you to include it with. +} +\description{ +what it says on the tin; generates a "WHERE year = foo AND month = bar" using lubridate +that can then be combined with other elements to form a Hive query. +} + diff --git a/man/get_logfile.Rd b/man/get_logfile.Rd index 4b6102d..00e28d9 100644 --- a/man/get_logfile.Rd +++ b/man/get_logfile.Rd @@ -1,27 +1,27 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/logs.R \name{get_logfile} \alias{get_logfile} \title{retrieve a vector of sampled log files} \usage{ get_logfile(earliest = NULL, latest = NULL) } \arguments{ \item{earliest}{a "Date" object. Set to NULL by default, which triggers the retrieval of all log file names.} \item{latest}{a "Date" object; set to NULL by default. In the event that \code{earliest} is set but \code{latest} is not, the files retrieved will span from \code{earliest} to the current date; in the event that both arguments are set, the retrieved files will be those in that range.} } \value{ A vector of filenames that can be passed into \code{\link{read_sampled_log}}. } \description{ Grab sampled log files to be piped into \code{\link{read_sampled_log}}. By default this retrieves all sampled log files; it can be used to retrieve a particular date range of files through the "earliest" and "latest" arguments. } diff --git a/man/global_query.Rd b/man/global_query.Rd index 9b7bfe5..f07525c 100644 --- a/man/global_query.Rd +++ b/man/global_query.Rd @@ -1,26 +1,26 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/global.R \name{global_query} \alias{global_query} \title{global SQL queries for analytics-store.eqiad.wmnet} \usage{ global_query(query, project_type = "all") } \arguments{ \item{query}{the SQL query you want to run} \item{project_type}{what class of wiki (wikisource, wiktionary..) you want to run against. Set to "all" by default.} } \description{ \code{global_query} is a simple wrapper around the mysql queries that allows a useR to send a query to all production dbs on analytics-store.eqiad.wmnet, joining the results from each query into a single object. } \author{ Oliver Keyes } \seealso{ \code{\link{mysql_read}} for querying an individual db, \code{\link{mw_strptime}} for converting MediaWiki timestamps into POSIXlt timestamps, or \code{\link{hive_query}} for accessing the Hive datastore. } diff --git a/man/mysql.Rd b/man/mysql.Rd index df53578..bd3475e 100644 --- a/man/mysql.Rd +++ b/man/mysql.Rd @@ -1,46 +1,46 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mysql.R \name{mysql} \alias{mysql} \alias{mysql_close} \alias{mysql_connect} \alias{mysql_disconnect} \alias{mysql_exists} \alias{mysql_read} \alias{mysql_write} \title{Work with MySQL databases} \usage{ mysql_connect(database) mysql_read(query, database, con = NULL) mysql_exists(database, table_name, con = NULL) mysql_write(x, database, table_name, con = NULL, ...) mysql_close(con) mysql_disconnect(con) } \arguments{ \item{database}{The name of the database to query.} \item{query}{A SQL query.} \item{con}{A MySQL connection returned by \code{mysql_connect}. - Optional -- if not provided, a temporary connection will be opened up.} +Optional -- if not provided, a temporary connection will be opened up.} \item{...}{Further arguments to pass to dbWriteTable. See ?dbWriteTable for more details.} \item{table}{The name of a table to check for the existence of or create, - depending on the function.} +depending on the function.} } \description{ Read from, write to, and check data from the MySQL databases and tables in the Wikimedia cluster. Assumes the presence of a validly formatted configuration file. } \seealso{ \code{\link{hive_query}} or \code{\link{global_query}} } diff --git a/man/hive_query.Rd b/man/query_hive.Rd similarity index 75% rename from man/hive_query.Rd rename to man/query_hive.Rd index 4e69bd8..4203a83 100644 --- a/man/hive_query.Rd +++ b/man/query_hive.Rd @@ -1,56 +1,54 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hive.R -\name{hive_query} -\alias{hive_query} +\name{query_hive} +\alias{query_hive} \title{hive_query} \usage{ -hive_query(query, file = NULL, ...) +query_hive(query) } \arguments{ -\item{query}{a query, or the location of a .hql file containing a query.} - -\item{file}{a file name. If this is provided, the results of the query will be written straight -there, and a boolean TRUE returned. If not provided (it's NULL by default), the results of the query -will be returned as a data.frame} - -\item{...}{other arguments to pass to read.delim.} - -\item{dt}{Whether to return it as a data.table or not.} +\item{query}{a Hive query} } \value{ a data.frame containing the results of the query, or a boolean TRUE if the user has chosen to write straight to file. } \description{ this is the "old" hive querying function - it's deprecated as all hell and waiting until Andrew sticks the hive server on a dedicated and more powerful machine. } \details{ Hive querying function } \section{escaping}{ \code{hive_query} works by running the query you provide through the CLI via a system() call. As a result, single escapes for meaningful characters (such as quotes) within the query will not work: R will interpret them only as escaping that character /within R/. Double escaping (\\\) is thus necessary, in the same way that it is for regular expressions. } \section{handling our hadoop/hive setup}{ The \code{webrequests} table is documented \href{https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Hive}{on Wikitech}, which also provides \href{https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Hive/Queries}{a set of example queries}. When it comes to manipulating the rows with Java before they get to you, Nuria has written a \href{https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Hive/QueryUsingUDF}{brief tutorial on loading UDFs} which should help if you want to engage in that; the example provided is a user agent parser, allowing you to get the equivalent of \code{\link{ua_parse}}'s output further upstream. +} +\examples{ +\dontrun{ +query_hive("USE wmf; DESCRIBE webrequest;") +} + } \seealso{ \code{\link{log_strptime}} for converting the "dt" column in the webrequests table to POSIXlt, and \code{\link{mysql_query}} and \code{\link{global_query}} for querying our MySQL databases. } diff --git a/man/read_sampled_log.Rd b/man/read_sampled_log.Rd index 207e912..3baa690 100644 --- a/man/read_sampled_log.Rd +++ b/man/read_sampled_log.Rd @@ -1,28 +1,28 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/logs.R \name{read_sampled_log} \alias{read_sampled_log} \title{read a sampled log file} \usage{ read_sampled_log(file, transparent = FALSE, nrows = NULL) } \arguments{ \item{file}{a filename, retrieved with \code{\link{get_logfile}}} \item{transparent}{a logical flag whether to gunzip the log file explicitly first (default) or read it in directly.} \item{nrows}{Number of rows to read in. (Optional)} } \value{ a data.frame containing 16 columns - "squid", "sequence_no", "timestamp", "servicetime", "ip_address", "status_code", "reply_size", "request_method", "url", "squid_status", "mime_type", "referer", "x_forwarded", "user_agent", "lang" and "x_analytics". } \description{ read a sampled log file identified with \code{\link{get_logfile}}. The sampled logs are returned as a data.frame with 16 columns - see the "Value" documentation. } diff --git a/man/sample_size_effect.Rd b/man/sample_size_effect.Rd index 30e775f..88ddd2b 100644 --- a/man/sample_size_effect.Rd +++ b/man/sample_size_effect.Rd @@ -1,34 +1,35 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/power.R \name{sample_size_effect} \alias{sample_size_effect} \title{calculate sample size given effect size} \usage{ sample_size_effect(w = NULL, groups = 2, sig_level = 0.05, power = 0.8) } \arguments{ \item{w}{Effect size you want the test to be able to detect. (Optional)} \item{groups}{Number of groups. Used in degrees of freedom calculation. Defaults to 2 (e.g. control group vs treatment group).} \item{sig_level}{Probability of Type 1 error. Usually called alpha. Defaults to 0.05.} \item{power}{Ability to detect the effect. (1 - probability of Type 2 error) Defaults to 80\%.} } \value{ If \code{w} was not provided, returns a data frame containing possible values of w and the corresponding sample size estimates. } \description{ Uses Cohen's w for effect size to calculate sample size for a chi-squared test of independence. } \examples{ sample_size_effect() sample_size_effect(0.1) sample_size_effect(w = 0.1, groups = 3, sig_level = 0.001, power = 0.9) + } diff --git a/man/sample_size_odds.Rd b/man/sample_size_odds.Rd index 77289fa..ad73b92 100644 --- a/man/sample_size_odds.Rd +++ b/man/sample_size_odds.Rd @@ -1,58 +1,56 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/power.R \name{sample_size_odds} \alias{sample_size_odds} \title{calculate sample size given odds ratio} \usage{ sample_size_odds(odds_ratio = NULL, p_control = NULL, p_treatment = NULL, power = NULL, conf_level = 0.95, sample_ratio = 1, visualize = FALSE) } \arguments{ \item{odds_ratio}{The expected odds ratio. That is, the ratio of the odds of the outcome in the test group relative to the control group. Optional, but see \strong{Details}.} \item{p_control}{Your guess for prevalence of outcome in the control group. Optional but see \strong{Details}.} \item{p_treatment}{Your guess for prevalence of outcome in the test group. Optional but see \strong{Details}.} \item{power}{The ability of the test to detect an effect where there is one. Power = 1 - Prob(Type 2 error). Optional. See \strong{Value} for details.} \item{conf_level}{Desired confidence level. Defaults to 95\%.} \item{sample_ratio}{Ratio of test group to control group. 1 is even split.} \item{visualize}{Whether to plot power or prevalence of outcome in the - control group vs sample size. Can be used to help make a decision.} +control group vs sample size. Can be used to help make a decision.} } \value{ If \code{power} was not provided, returns vector containing possible power values and the appropriate sample size for each \%. If all values were provided, returns a single sample size estimate. } -\description{ -calculate sample size given odds ratio -} \details{ The function only needs to know two of the following three: \code{odds_ratio}, \code{p_control}, and \code{p_treatment}. If given all three, it will check to make sure the odds ratio is correct. It will figure out the missing third value from the other two. } \section{References}{ Wang, H., Chow, S.-C., & Li, G. (2002). On sample size calculation based on odds ratio in clinical trials. \emph{Journal of Biopharmaceutical Statistics}, \strong{12}(4), 471–483. \url{http://doi.org/10.1081/BIP-120016231} } \examples{ sample_size_odds(p_treatment = 0.4, p_control = 0.25, power = 0.8) sample_size_odds(odds_ratio = 2, p_control = 0.4, power = c(0.8, 0.9, 0.95)) sample_size_odds(odds_ratio = 2, p_control = 0.4) sample_size_odds(odds_ratio = 2, p_control = 0.4, visualize = TRUE) + } diff --git a/man/set_proxies.Rd b/man/set_proxies.Rd index 3bce545..685932e 100644 --- a/man/set_proxies.Rd +++ b/man/set_proxies.Rd @@ -1,23 +1,24 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/proxies.R \name{set_proxies} \alias{set_proxies} \title{set HTTP and HTTPS proxies} \usage{ set_proxies() } \description{ set the HTTP and HTTPS proxies when running R on one of the Wikimedia servers } \examples{ \dontrun{ #This will fail in the cluster devtools::install_github("ironholds/urltools") #This will work set_proxies() devtools::install_github("ironholds/urltools") } + } diff --git a/man/timeconverters.Rd b/man/timeconverters.Rd index 4c831ff..d192809 100644 --- a/man/timeconverters.Rd +++ b/man/timeconverters.Rd @@ -1,28 +1,28 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/time.R \name{timeconverters} \alias{from_log} \alias{from_mediawiki} \alias{timeconverters} \alias{to_log} \alias{to_mediawiki} \title{convert to and from common timestamp formats} \usage{ from_mediawiki(x) from_log(x) to_mediawiki(x) to_log(x) } \arguments{ \item{x}{a vector of timestamps} } \description{ convert to and from MediaWiki and request log timestamp formats } \examples{ from_mediawiki("20150101010301") }