3
\$\begingroup\$

My goal is to apply a function func1 to each row of the matrix input and then return a new one resulting from the transformation.

The code works but when the data frame contains more than 1 million rows, it become extremely slow. How can I optimize my code? I start learning programming and I am not familiar with strategies to speed up R code.

The functions performs 2 main steps:

  1. Find the locations of all neighboring cells that are located in the extent PR from a focal cell, extract raster's values at these locations and calculate probability matrix
  2. Find the maximum value in the matrix and the new cell corresponding with the maximum value.

Here's the data frame and raster:

library(dplyr)
library(raster)
library(psych)
    set.seed(1234) 
    n = 10000
    input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))

    r <- raster(extent(0, 10, 0, 10), res = 1)
    values(r) <- sample(1:1000, size = 10*10, replace = T)
    ## plot(r)

Here's my code to apply the function to each row in the matrix:

system.time(
  test <- input %>% 
    split(1:nrow(input)) %>% 
    map(~ func1(.x, 2, 2, "test_1")) %>% 
    do.call("rbind", .))

Here's the function:

func1 <- function(dataC, PR, DB, MT){

    ## Retrieve the coordinates x and y of the current cell
    c1 <- dataC[[1]]
    c2 <- dataC[[2]]

    ## Retrieve the coordinates x and y of the previous cell
    c3 <- dataC[[3]]
    c4 <- dataC[[4]]

    ## Initialize the coordinates x and y of the new cell
    newc1 <- -999
    newc2 <- -999

    if(MT=="test_1"){

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
      V1 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
      V2 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
      V3 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
      V4 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      V5 <- 0 ## cell at middle corner

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
      V6 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
      V7 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB 

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
      V8 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
      V9 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB

    } else if(MT=="test_2"){

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
      V1 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
      V2 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
      V3 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
      V4 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      V5 <- 0 ## cells at middle corner

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
      V6 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
      V7 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB 

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
      V8 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB

      ## Extract the raster values with coordinates in matC
      matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
      V9 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB

    }

    ## Build the matrix of cell selection
    tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
    mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5, 
                             (1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)


    while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){

      ## Test if the new cell is the previous cell
      if(c3 == newc1 && c4 == newc2){
        mat_V[choiceC[1], choiceC[2]] <- NaN
        ## print(mat_V)
      }

      ## Find the maximum value in the matrix
      choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
      ## print(choiceC)
      ## If there are several maximum values
      if(nrow(choiceC) > 1){
        choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
      }

      ## Find the new cell relative to the current cell 
      if(choiceC[1]==1 & choiceC[2]==1){ ## cell at the upper-left corner

        newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)

      } else if(choiceC[1]==1 & choiceC[2]==2){ ## cell at the upper-middle corner

        newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)

      } else if(choiceC[1]==1 & choiceC[2]==3){ ## cell at the upper-right corner

        newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)

      } else if(choiceC[1]==2 & choiceC[2]==1){ ## cell at the left corner

        newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)

      } else if(choiceC[1]==2 & choiceC[2]==3){ ## cell at the right corner

        newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)

      } else if(choiceC[1]==3 & choiceC[2]==1){ ## cell at the bottom-left corner

        newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)

      } else if(choiceC[1]==3 & choiceC[2]==2){ ## cell at the bottom-middle corner

        newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)

      } else if(choiceC[1]==3 & choiceC[2]==3){ ## cell at the bottom-right corner

        newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
      }

      newc1 <- newC[[1]]
      newc2 <- newC[[2]]

    }

    return(newC)

  } 

Here's the elapsed time when n = 10000. Ideally, I would like to reduce the time required at < 1 min.

user  system elapsed 
108.96    0.01  109.81 
\$\endgroup\$
2
  • \$\begingroup\$ do the r need to be raster? can no t we use simple matrix? \$\endgroup\$
    – minem
    Commented Nov 26, 2018 at 12:28
  • \$\begingroup\$ Yes, r can be a matrix. \$\endgroup\$
    – Pierre
    Commented Nov 26, 2018 at 15:18

1 Answer 1

2
\$\begingroup\$

Did dome upgrades, but only for 'test_1' case, you can update 'test2' case similarly. For me this function run in 13.54 sek vs 26.16 sek for your original code.

func1 <- function(dataC, PR, DB, MT){

  ## Retrieve the coordinates x and y of the current cell
  c1 <- dataC[[1]]
  c2 <- dataC[[2]]

  ## Retrieve the coordinates x and y of the previous cell
  c3 <- dataC[[3]]
  c4 <- dataC[[4]]

  ## Initialize the coordinates x and y of the new cell
  newc1 <- -999
  newc2 <- -999

  a1 <- c((c1 - PR), (c1 - 1))
  a2 <- c((c2 - PR), (c2 - 1))
  a3 <- c((c2 - 1), (c2 + 1))
  a4 <- c((c2 + 1), (c2 + PR))
  a5 <- c((c1 - 1), (c1 + 1))
  a6 <- c((c1 + 1), (c1 + PR))


  xx <- c(a1, a2, a3, a4, a5, a6)
  xx <- seq(min(xx), max(xx))
  gg <- expand.grid(xx, xx, KEEP.OUT.ATTRS = F)
  gg <- as.matrix(gg)
  gg1 <- gg[, 1]
  gg2 <- gg[, 2]

  ff2 <- function(matC) {
    y1 <- raster::extract(r, matC)
    mean(y1, na.rm = T)
  }

  cgrid <- function(x, y) {
    gg[gg1 >= x[1] & gg1 <= x[2] & gg2 >= y[1] & gg2 <= y[2], ]
  }

  if (MT == "test_1") {
    ## cells at upper-left corner
    V1 <- ff2(cgrid(x = a1, y = a2)) * sqrt(2) * DB
    ## cells at upper-middle corner
    V2 <- ff2(cgrid(x = a1, y = a3)) * DB
    ## cells at upper-right corner
    V3 <- ff2(cgrid(x = a1, y = a4)) * sqrt(2) * DB
    ## cells at left corner
    V4 <- ff2(cgrid(x = a5, y = a2)) * DB
    V5 <- 0 ## cell at middle corner
    ## cells at right corner
    V6 <- ff2(cgrid(x = a5, y = a4)) * DB
    ## cells at bottom-left corner
    V7 <- ff2(cgrid(x = a6, y = a2)) * sqrt(2) * DB 
    ## cells at bottom-middle corner
    V8 <- ff2(cgrid(x = a6, y = a3)) * DB
    ## cells at bottom-right corner
    V9 <- ff2(cgrid(x = a6, y = a4) ) * sqrt(2) * DB
  }

  ## Build the matrix of cell selection
  V <- c(V1, V2, V3, V4, V5, V6, V7, V8, V9)
  tot <- sum(1/V[-5], na.rm = TRUE)
  mat_V <- matrix((1/V)/tot, nrow = 3, ncol = 3, byrow = TRUE)
  mat_V[5] <- V5

  while ((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)) {

    ## Test if the new cell is the previous cell
    if (c3 == newc1 && c4 == newc2) {
      mat_V[choiceC[1], choiceC[2]] <- NaN
      ## print(mat_V)
    }

    ## Find the maximum value in the matrix
    choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)

    ## If there are several maximum values
    if (nrow(choiceC) > 1) choiceC <- choiceC[sample.int(nrow(choiceC), 1L), ]

    ## Find the new cell relative to the current cell 
    newC <- c(x = c1 + (choiceC[1] - 2), y = c2 + (choiceC[2] - 2))
    newC <- matrix(newC, ncol = 2)

    newc1 <- newC[[1]]
    newc2 <- newC[[2]]

  }
  return(newC)
} 
\$\endgroup\$
1
  • \$\begingroup\$ Thank you very much ! The function runs for 99.16 s vs 110 s from my code. \$\endgroup\$
    – Pierre
    Commented Nov 26, 2018 at 15:22

Not the answer you're looking for? Browse other questions tagged or ask your own question.