Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
123 views
in Technique[技术] by (71.8m points)

R: Errors encountered during "loops": x Input `name` can't be recycled to size 100

I am using the R programming language. I made an earlier post (R: Using "microbenchmark" and ggplot2 to plot runtimes) where I am learning how to use loops and functions to iterate procedures (7 procedures) in R for sample sizes. Once this is done, I want to produce a plot.

Based on the previous answer, I tried to write a few of these loops in R:

library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)

#simulate data

var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )

#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)

#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)

# configure run sizes
sizes <- c(100,200,300,400,500,600,700,800,900,1000)

# Procedure 1: :
proc1 <- function(size){
    assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
                        metric = "gower"),envir = .GlobalEnv)
        
    assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
           envir = .GlobalEnv)
        
}     

proc1List <- lapply(sizes,function(x){
        b <- microbenchmark(proc1(x))
        b$obs <- x
        b
})

proc1summary <- do.call(rbind,(proc1List))

#procedure2


proc2 <- function(size){
    assign(paste0("lof_",size), lof(gower_dist, k=3),envir = .GlobalEnv)}

proc2List <- lapply(sizes,function(x){
        b <- microbenchmark(proc2(x))
        b$obs <- x
        b
})


proc2summary <- do.call(rbind,(proc2List))


#procedure3


proc3 <- function(size){
    assign(paste0("lof_",size), lof(gower_dist, k=5),envir = .GlobalEnv)}

proc3List <- lapply(sizes,function(x){
        b <- microbenchmark(proc3(x))
        b$obs <- x
        b
})


proc3summary <- do.call(rbind,(proc3List))

#procedure4

proc4 <- function(size){
    assign(paste0("tsne_obj_",size),Rtsne(gower_dist,  is_distance = TRUE),envir = .GlobalEnv)
        
    assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
    data.frame() %>%
    setNames(c("X", "Y")) %>%
    mutate(
           name = f$ID) ,envir = .GlobalEnv)}


proc4List <- lapply(sizes,function(x){
        b <- microbenchmark(proc4(x))
        b$obs <- x
        b
})


proc4summary <- do.call(rbind,(proc4List))


#procedure5

proc5 <- function(size){
    assign(paste0("tsne_obj_",size),Rtsne(gower_dist, perplexity = 10,  is_distance = TRUE),envir = .GlobalEnv)
        
    assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
    data.frame() %>%
    setNames(c("X", "Y")) %>%
    mutate(
           name = f$ID) ,envir = .GlobalEnv)}


proc5List <- lapply(sizes,function(x){
        b <- microbenchmark(proc5(x))
        b$obs <- x
        b
})


proc5summary <- do.call(rbind,(proc5List))


#procedure6

proc6 <- function(size){
    assign(paste0("plot_",size),ggplot(aes(x = X, y = Y), data = tsne_data) + geom_point(aes()),envir = .GlobalEnv)}
        

proc6List <- lapply(sizes,function(x){
        b <- microbenchmark(proc6(x))
        b$obs <- x
        b
})


proc6summary <- do.call(rbind,(proc6List))

#procedure 7

proc7 <- function(size) {

assign(paste0 ("tsne_obj_", size),  Rtsne(gower_dist,  is_distance = TRUE), envir = .GlobalEnv)

assign(paste0 ("tsne_data_", size),  tsne_data <- tsne_obj$Y %>%
  data.frame() %>%
  setNames(c("X", "Y")) %>%
  mutate(
    name = f$ID, 
    lof=lof,
    var1=f$var_1,
    var2=f$var_2,
    var3=f$var_3
    ), envir = .GlobalEnv)

assign(paste0 ("p1_", size),  ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1, 
  var2=var2, var3=var3), data = tsne_data) + 
  geom_point(shape=1, col="red") + theme_minimal(), envir = .GlobalEnv)


assign(paste0 ("plotly_", size),  
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3")
 ), envir = .GlobalEnv)


}


proc7List <- lapply(sizes,function(x){
    b <- microbenchmark(proc7(x))
    b$obs <- x
    b
})



proc7summary <- do.call(rbind,(proc7List))


do.call(rbind,list(proc1summary,proc2summary,proc3summary, proc4summary, proc5summary, proc6summary, proc7summary)) %>% 
    group_by(expr,obs) %>%
    summarise(.,time_ms = mean(time) * .000001) -> proc_time 



ggplot(proc_time,aes(obs,time_ms,group = expr)) +
    geom_line(aes(group = expr),color = "grey80") + 
    geom_point(aes(color = expr))

However, for some of these procedures, when I call them though a list, I keep getting an error:

proc4List <- lapply(sizes,function(x){
     b <- microbenchmark(proc4(x))
     b$obs <- x
     b
 })

 Error: Problem with `mutate()` input `name`.
x Input `name` can't be recycled to size 100.
i Input `name` is `f$ID`.
i Input `name` must be size 100 or 1, not 1000.

I tried reading other stackoverflow posts (Input `typ` can't be recycled to size in R), but I could not understand why this "recycling error" keeps showing up. Is it because "size = 100" is too small? Is it because some of the variables have been named improperly?

Could someone please tell me what I am doing wrong?

Thanks

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

In order to make procedures 4 - 7 work we needed to make the adjustments listed in the conclusions section of Using microbenchmark and ggplot2 to plot runtimes:

  1. Wrap the original procedure in a function that we can use as the unit of analysis for microbenchmark(), and include a size argument
  2. Modify the procedure to use size as a variable where necessary
  3. Modify the procedure to access objects from previous steps, based on the size argument
  4. Modify the procedure to write its outputs with assign() and size if these are needed for subsequent procedure steps

The modified code looks like this:

library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)

#simulate data

var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )


#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)

#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)

# configure run sizes
sizes <- c(10,50,100,200,500,1000)

# configure # of benchmark runs
time_ct <- 10

# Procedure 1: :
proc1 <- function(size){
    assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
                        metric = "gower"),envir = .GlobalEnv)
        
    assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
           envir = .GlobalEnv)
        
}     

proc1List <- lapply(sizes,function(x){
        b <- microbenchmark(proc1(x),times=time_ct)
        b$obs <- x
        b
})
proc1summary <- do.call(rbind,(proc1List))

#Procedure 2

proc2 <- function(size){
        lof <- lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=3)
}
proc2List <- lapply(sizes,function(x){
    b <- microbenchmark(proc2(x),times=time_ct)
    b$obs <- x
    b
})
proc2summary <- do.call(rbind,(proc2List))

#Procedure 3

proc3 <- function(size){
    assign(paste0("lof_",size),lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=5),
           envir = .GlobalEnv)
}
proc3List <- lapply(sizes,function(x){
    b <- microbenchmark(proc3(x),times=time_ct)
    b$obs <- x
    b
})
proc3summary <- do.call(rbind,(proc3List))

proc4 <- function(size){
    tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),  
                      perplexity = min(30,(size-1)/3),
                      is_distance = TRUE)
    tsne_data <- tsne_obj$Y %>%
        data.frame() %>%
        setNames(c("X", "Y")) %>%
        mutate(
            name = 1:size)
}

proc4List <- lapply(sizes,function(x){
    b <- microbenchmark(proc4(x),times=time_ct)
    b$obs <- x
    b
})

proc4summary <- do.call(rbind,(proc4List))

proc5 <- function(size){
    tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),  
                      perplexity = min(10,(size-1)/3),
                      is_distance = TRUE)
    tsne_data <- tsne_obj$Y %>%
        data.frame() %>%
        setNames(c("X", "Y")) %>%
        mutate(
            name = 1:size)
    assign(paste0("tsne_data_",size),tsne_data,envir = .GlobalEnv)
}

proc5List <- lapply(sizes,function(x){
    b <- microbenchmark(proc5(x),times=time_ct)
    b$obs <- x
    b
})

proc5summary <- do.call(rbind,(proc5List))

proc6 <- function(size){
    plot = ggplot(aes(x = X, y = Y), data = get(paste0("tsne_data_",size),envir = .GlobalEnv)) + geom_point(aes())
    
}

proc6List <- lapply(sizes,function(x){
    b <- microbenchmark(proc6(x),times=time_ct)
    b$obs <- x
    b
})

proc6summary <- do.call(rbind,(proc6List))

proc7 <- function(size){
    tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),  
                      perplexity = min(30,(size-1)/3),
                      is_distance = TRUE)
    
    tsne_data <- tsne_obj$Y %>%
        data.frame() %>%
        setNames(c("X", "Y")) %>%
        mutate(
            name = 1:size, 
            lof=get(paste0("lof_",size),envir = .GlobalEnv),
            var1=f$var_1[1:size],
            var2=f$var_2[1:size],
            var3=f$var_3[1:size]
        )
    
    p1 <- ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1, 
                     var2=var2, var3=var3), data = tsne_data) + 
        geom_point(shape=1, col="red")+
        theme_minimal()
    
    ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3"))
    
    
}

proc7List <- lapply(sizes,function(x){
    b <- microbenchmark(proc7(x),times=time_ct)
    b$obs <- x
    b
})

proc7summary <- do.call(rbind,(proc7List))

do.call(rbind,list(proc1summary,proc2summary,proc3summary,proc4summary,proc5summary,
                   proc6summary,proc7summary)) %>% 
    group_by(expr,obs) %>%
    summarise(.,time_ms = mean(time) * .000001) -> proc_time 

head(proc_time)

ggplot(proc_time,aes(obs,time_ms,group = expr)) +
    geom_line(aes(group = expr),color = "grey80") + 
    geom_point(aes(color = expr))

...and the output:

enter image description here

Notes

Since some of these procedures take a long time to run (relatively speaking), we reduced the number of iterations in microbenchmark() from the default of 100 to 10, which is more than sufficient to demonstrate that the code works as intended. Since this was set as a parameter at the top of the code, one can increase this number to increase the number of times each benchmark is executed.


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...