proxy_stars vs stars using st_apply #349

alexyshr opened this issue Nov 8, 2020 · 0 comments

alexyshr commented Nov 8, 2020

Hi!. Here playing again with 5D (five dimensions) stars object! (from #348)

It is possible to use st_apply with the non-proxy stars object and the personalized function theFunction.

  1. How can I improve theFunction to work with the stars_proxy version of the object?
  2. How to remove NA plots when plotting the object st_functionNoProxy?

Purpose of the function theFunction: Look for Monday values (single attribute Temperature) of the FIRST level dimension and FIRST number dimension; for other days returns NA.

Data can be download here!

#> Loading required package: abind
#> Loading required package: sf
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>     date, intersect, setdiff, union
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>     filter, lag
#> The following objects are masked from 'package:base':
#>     intersect, setdiff, setequal, union
path= "./"
ncname = "download5Dcolombia"
ncfile5d = paste0(path, ncname, ".nc")
(st_col5d = read_stars(ncfile5d, proxy=TRUE))
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: Recode from UTF-8 to CP_ACP failed with
#> the error: "Invalid argument".
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: The dataset has several variables
#> that could be identified as vector fields, but not all share the same primary
#> dimension. Consequently they will be ignored.
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: dimension #1 (number) is not a Time
#> dimension.
#> stars_proxy object with 1 attribute in file:
#> $
#> [1] "[...]/"
#> dimension(s):
#>        from  to         offset   delta  refsys point
#> x         1  25         -79.35     0.5      NA    NA
#> y         1  35          12.75    -0.5      NA    NA
#> level     1  37             NA      NA udunits    NA
#> number    1  10              0       1      NA    NA
#> time      1 480 2020-01-01 UTC 3 hours POSIXct    NA
#>                                               values x/y
#> x                                               NULL [x]
#> y                                               NULL [y]
#> level  [1,2) [millibars],...,[1000,1025) [millibars]    
#> number                                          NULL    
#> time                                            NULL

#Read timestamp values
st_time = st_col5d %>% slice(x, 1) %>% slice(y, 1) %>%
  slice(level, 1) %>% slice(number, 1) %>% st_as_stars()
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: Recode from UTF-8 to CP_ACP failed with
#> the error: "Invalid argument".
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: The dataset has several variables
#> that could be identified as vector fields, but not all share the same primary
#> dimension. Consequently they will be ignored.
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: dimension #1 (number) is not a Time
#> dimension.
timePOSIXlt = as.POSIXlt(seq(from = as_datetime(c(0), origin=st_dimensions(st_time)$time$offset),
                   by=st_dimensions(st_time)$time$delta, length.out = st_dimensions(st_time)$time$to))

(st_col5dNoProxy = st_col5d %>% slice(level, 1:2) %>%
                        slice(number, 1:2) %>% st_as_stars())
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: Recode from UTF-8 to CP_ACP failed with
#> the error: "Invalid argument".
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: The dataset has several variables
#> that could be identified as vector fields, but not all share the same primary
#> dimension. Consequently they will be ignored.
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: dimension #1 (number) is not a Time
#> dimension.
#> stars object with 5 dimensions and 1 attribute
#> attribute(s), summary of first 1e+05 cells:
#> [K]
#>  Min.   :237.1            
#>  1st Qu.:248.2            
#>  Median :258.9            
#>  Mean   :258.0            
#>  3rd Qu.:267.9            
#>  Max.   :275.3            
#> dimension(s):
#>        from  to         offset   delta  refsys point
#> x         1  25         -79.35     0.5      NA    NA
#> y         1  35          12.75    -0.5      NA    NA
#> level     1   2             NA      NA udunits    NA
#> number    1   2              0       1      NA    NA
#> time      1 480 2020-01-01 UTC 3 hours POSIXct    NA
#>                                      values x/y
#> x                                      NULL [x]
#> y                                      NULL [y]
#> level  [1,2) [millibars], [2,3) [millibars]    
#> number                                 NULL    
#> time                                   NULL

#theFunction description:
#Filtering temperatures registered on "Mondays"(theDay = 1),
#belonging to the first LEVEL (dimension), and
#belonging to the first NUMBER (dimension).
#The function returns values only for Mondays, for other days it returns NA

theFunction = function(theStars, timePlt, theDay){
  #theDay values:
  #Dom: 0
  #Lun: 1  <<<< Monday
  #Mar: 2
  #Mie: 3
  #Jue: 4
  #Vie: 5
  #Sat: 6
  #Negative or > 6: all days
  theValues = rep(NA, length.out=length(timePlt))

  if (theDay == 1){
    mondayIndexes = which(timePlt$wday == 1)
    arraySliced = theStars[1,1,mondayIndexes] #First Index is LEVEL
                                              #Second Index is NUMBER
                                              #Third Index is TIME
    theValues[mondayIndexes]= arraySliced[[1]]#The only attribute is Temperature

#With no proxy stars work perfect!
  st_functionNoProxy <- st_col5dNoProxy %>%
    st_apply(MARGIN = c("x", "y"), FUN = theFunction, timePlt = timePOSIXlt, theDay = 1) %>%
    st_set_dimensions("theFunction", timePOSIXlt$wday)
  #How to remove NA plots?

#With stars_proxy the function DOES NOT receive extra parameters timePlt and theDay
st_functionProxy <- st_col5d %>%
    st_apply(MARGIN = c("x", "y"), FUN = theFunction, timePlt = timePOSIXlt, theDay = 1) %>%
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: Recode from UTF-8 to CP_ACP failed with
#> the error: "Invalid argument".
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: The dataset has several variables
#> that could be identified as vector fields, but not all share the same primary
#> dimension. Consequently they will be ignored.
#> Warning in CPL_read_gdal(as.character(x), as.character(options),
#> as.character(driver), : GDAL Message 1: dimension #1 (number) is not a Time
#> dimension.
#> Error in FUN(array(newX[, i],,, ...): argument "timePlt" is missing, with no default
