library(RcppArmadillo)
library(Rcpp)
sourceCpp("helpers.cpp")

## helper functions
gen.lengths <- function(particles, ntrials=3, prob=0.5) {
    ## a vector of random draws
    pmax(1, rnbinom(particles, ntrials, prob))
}
gen.dat <- function(nelem, rate=1) {
    ## a list of vectors, vector i has length nelem[i]
    ## each vector is filled with random draws
    lapply(nelem, rexp, rate=rate)
}

## naive method
appendL <- function(new.dat, new.lengths, dat, dat.lengths) {
    ## grow dat by appending to list element i
    ## memory will be reallocated at each call
    dat <- mapply( append, dat, new.dat )
    ## update lengths
    dat.lengths <- dat.lengths + new.lengths
    return(list(dat=dat, len=dat.lengths))
}

## dynamically append to preallocated matrix
## maintain a vector of the number of "filled" elements in each row
## emit error if overfilled
## R solution
appendR <- function(new.dat, new.lengths, dat, dat.lengths) {
    ## grow pre-allocated dat by inserting data in the correct place
    for (irow in 1:length(new.dat)) {
        ## insert one vector at a time
        ## col indices for where to insert new.dat
        cols.ii <- (dat.lengths[irow]+1):(dat.lengths[irow]+new.lengths[irow])
        dat[irow, cols.ii] = new.dat[[irow]]
    }
    ## update lengths
    dat.lengths <- dat.lengths + new.lengths
    return(list(dat=dat, len=dat.lengths))
}

## unit test
test.correct.append <- function(nrep, particles=1e3, max.cols=1e3, do.c=F) {
    ## list of empty vectors, fill with append
    dat.list <- lapply(1:particles, function(x) numeric())
    ## preallocated matrix, fill rows from left to right
    dat.r <- dat.c <- matrix(numeric(), nrow=particles, ncol=max.cols)
    ## length of each element/row
    N.list <- N.r <- N.c <- rep(0, particles)
    ## repeat process, "appending" as we go
    for (ii in 1:nrep) {
        N.new <- gen.lengths(particles)
        dat.new <- gen.dat(N.new)
        ## in R, list of vectors
        tmp <- appendL(dat.new, N.new, dat.list, N.list)
        ## unpack, update
        dat.list <- tmp$dat
        N.list <- tmp$len
        ## in R, preallocate
        tmp <- appendR(dat.new, N.new, dat.r, N.r)
        ## unpack, update
        dat.r <- tmp$dat
        N.r <- tmp$len
        ## as above for C, modify dat.c and N.c in place
        appendRcpp(dat.new, N.new, dat.c, N.c)
    }
    ## pull pre-allocated data back into list
    dat.r.list <- apply(dat.r, 1, function(x) { x <- na.omit(x); attributes(x) <- NULL; x } )
    ## check that everything is  
    identical(dat.r, dat.c) && identical(N.r, N.c) &&
    identical(dat.list, dat.r.list) && identical(N.list, N.r)
}

## timing function, test each method
test.time.append <- function(nrep, particles=1e2, max.cols=1e3, append.fun, do.update=T, seed=2) {
    ## object to modify
    N.test <- rep(0, particles)
    dat.test <- matrix(numeric(), nrow=particles, ncol=max.cols)
    ## speed is affected by size, 
    ## so ensure that each run the same elements
    set.seed(seed)
    for (irep in 1:nrep) {
        ## generate draws
        N.new <- gen.lengths(particles)
        dat.new <- gen.dat(N.new)
        ## bind in using given method
        tmp <- append.fun(dat.new, N.new, dat.test, N.test)
        if(do.update) {
            ## skip update for C
            dat.test <- tmp$dat
            N.test <- tmp$len
        }
    }
}


## run unit test, verify both functions return identical results
is.identical <- test.correct.append(1e1, max.cols=1e3)
print(is.identical)

library(rbenchmark)
## test timings of each solution
test.nreps <- 10
test.ncols <- 1e3
timings = benchmark(
    r=test.time.append(test.nreps, max.cols=test.ncols, append.fun=appendR),
    c=test.time.append(test.nreps, max.cols=test.ncols, do.update=F, append.fun=appendRcpp),
    list=test.time.append(test.nreps, max.cols=test.ncols, append.fun=appendL),
    replications=10
)

## test fast solutions with larger data structures
test.nreps <- 1e2
test.ncols <- 1e4
timings.fast = benchmark(
    r=test.time.append(test.nreps, max.cols=test.ncols, append.fun=appendR),
    c=test.time.append(test.nreps, max.cols=test.ncols, do.update=F, append.fun=appendRcpp),
    replications=1e1
)