Another way to get R download stats
Published:
This is code from Mark Scheuerell that was adapted from this post by Felix Schonbrodt for a different way to get download stats: http://www.nicebread.de/finally-tracking-cran-packages-downloads/
## adadpted from code by Felix Schonbrodt ## http://www.nicebread.de/finally-tracking-cran-packages-downloads/ ## ====================================================================== ## Step 1: Download all log files ## ====================================================================== # start & end dates 12 months prior to current date this.year = as.numeric(format(Sys.time(), "%Y")) start <- as.Date( paste(this.year-1,"-",format(Sys.time(), "%m-%d"),sep="") ) today <- as.Date(Sys.time()) all_days <- seq(start, today, by = 'day') year <- as.POSIXlt(all_days)$year + 1900 urls <- paste0('http://cran-logs.rstudio.com/', year, '/', all_days, '.csv.gz') # only download the files you don't have: missing_days <- setdiff(as.character(all_days), tools::file_path_sans_ext(dir("CRANlogs"), TRUE)) dir.create("CRANlogs") for (i in 1:length(missing_days)) { print(paste0(i, "/", length(missing_days))) download.file(urls[i], paste0('CRANlogs/', missing_days[i], '.csv.gz')) } ## ====================================================================== ## Step 2: Load single data files into one big data.table ## ## NOTE: this step takes FOREVER to run ## ====================================================================== file_list <- list.files("CRANlogs", full.names=TRUE) logs <- list() for (file in file_list) { print(paste("Reading", file, "...")) logs[[file]] <- read.table(file, header = TRUE, sep = ",", quote = "\"", dec = ".", fill = TRUE, comment.char = "", as.is=TRUE) } # rbind together all files library(data.table) dat <- rbindlist(logs) # add some keys and define variable types dat[, date:=as.Date(date)] dat[, package:=factor(package)] dat[, country:=factor(country)] dat[, weekday:=weekdays(date)] dat[, week:=strftime(as.POSIXlt(date),format="%Y-%W")] setkey(dat, package, date, week, country) save(dat, file="CRANlogs/CRANlogs.RData") # for later analyses: load the saved data.table # load("CRANlogs/CRANlogs.RData") ## ====================================================================== ## Step 3: Plot results ## ====================================================================== # vector of pkgs to compare pkgs <- c("MARSS","dlm") # vector of plot colors clr <- seq(length(pkgs)) # downloads of selected pkgs by week com1 <- dat[J(pkgs), length(unique(ip_id)), by=c("week", "package")] # total downloads to date com1[, sum(V1), by=package] # cumulative downloads by week com1$C1 <- (com1[, cumsum(V1), by=package])$V1 # nicer form for plotting plotdat <- cast(com1,week ~ package, value="C1") # plot cumulative downloads over time matplot(plotdat, type="l", lty="solid", lwd=2, col=clr, ylab="Cumulative downloads", xlab="Week of 2013") legend(x="topleft", legend=colnames(plotdat)[-1], lty="solid", lwd=2, col=clr)