Introduction

This R Markdown document tests the utilities developed to compute the layer proportions for each selected cells of a three dimensional grid. The $N$ layers are defined by the elevation of their top limit.

knitr::opts_chunk$set(echo = TRUE)
rm(list=ls())
library(gstlearn)

## Warning: le package 'gstlearn' a été compilé avec la version R 4.3.0

## 
## Attachement du package : 'gstlearn'

## Les objets suivants sont masqués depuis 'package:base':
## 
##     message, toString

OptCst_defineByKey("ASP",0)

## NULL

library(ggplot2)
library(ggpubr)
set.seed(43243)
opers = EStatOption_fromKeys(c("NUM","MINI","MAXI","MEAN","STDV"))

Parameters

# The 3D grid is defined on  [0,1]^3 
# 2D limits of the 3D grid 
limits <- list(XP = 1.0*c(0, 1, 1, 0, 0),
               YP = 1.0*c(0, 0, 1, 1, 0))
p_lim   = PolyElem(limits$XP, limits$YP)
pol_lim = Polygons()
err     = pol_lim$addPolyElem(p_lim)

flag.rotation = TRUE
# ----------------------------------------------
# ----------------------------------------------
if(flag.rotation) {
# 3D grid
nx = c(10, 13, 7)
dx = 1/nx
x0 = dx/2
# 2D grid
nx_2d = c(256, 256)
dx_2d = sqrt(2.0) / nx_2d
x0_2d = c(1/2, -1/2)
angles_2d = c(45, 0) # in degrees
} else { # no rotation
# 3D grid
nx = c(10, 10, 10)
dx = 1/nx
x0 = dx/2
# 2D grid
nx_2d = nx[1:2]
dx_2d = dx[1:2]
x0_2d = dx_2d/2
angles_2d = c(0, 0) # in degrees
}

grid = DbGrid_create(nx = nx, dx = dx, x0 = x0)
surfaces = DbGrid_create(nx = nx_2d, dx = dx_2d, x0 = x0_2d, angles = angles_2d)

# plots
p = ggDefaultGeographic() + 
    plot.grid(grid, name_raster = "rank", useSel = TRUE, show.legend.raster = FALSE) +
    plot.polygon(poly = pol_lim, color = "orange", fill = NA) +
    plot.decoration(xlab = "Easting", ylab = "Northing", title = paste0("3D grid and limits"))
ggPrint(p)

p = ggDefaultGeographic() + 
    plot.grid(surfaces, name_raster = "rank", useSel = TRUE, show.legend.raster = FALSE) +
    plot.polygon(poly = pol_lim, color = "orange", fill = NA) +
    plot.decoration(xlab = "Easting", ylab = "Northing", title = paste0("2D grid and limits"))
ggPrint(p)

# ----------------------
# selection of some grid cells
# ----------------------
sel  = (grid["x1"] >= 0.20)&(grid["x1"] <= 0.70) & 
       (grid["x3"] >= 0.10)&(grid["x3"] <= 0.70)
sel  = as.numeric(sel)
err  = grid$setColumn(tab = sel, name = "sel")
err  = grid$setLocator(name = "sel", ELoc_SEL(), cleanSameLocator = TRUE)

# ----------------------
# plot of a slice
# ----------------------
var_nm = "rank"
posx = 1; xlab = "Ox"
posy = 3; ylab = "Oz"
k = 3
corner = rep(k - 1, 3); 
corner[posx] = 0; 
corner[posy] = 0
slice = DbGrid_create(nx = c(nx[posx], nx[posy]), dx = c(dx[posx], dx[posy]), x0 = c(x0[posx], x0[posy]))
val = grid$getOneSlice(name = var_nm, posx = posx - 1, posy = posy - 1 , corner = c(0,0,k-1), 
                       useSel = TRUE)
err   = slice$setColumn(tab = val, name = paste0("K = ", k))
p = ggDefaultGeographic() + 
    plot.grid(slice, name_raster = paste0("K = ", k), useSel = TRUE, legend.name.raster = var_nm,
            show.legend.raster = TRUE) +
    plot.polygon(poly = pol_lim, color = "orange", fill = NA) +
    plot.decoration(xlab = xlab, ylab = ylab, 
                  title = paste0("Slice K = ", k))
ggPrint(p)

# -----------------------------------
# simulation of the surfaces
# -----------------------------------
N = 3
h_mean = c(0.5, 0.25, 0.25)
nbsim = c(3, 4, 3)

# N = 2
# h_mean = c(0.75, 0.25)
# nbsim = c(3, 4)

model = Model_createFromParam(ECov_GAUSSIAN(), range=0.5, sill=1.0)
for (i in 1:N) {
  err = simtub(dbin = NULL, dbout = surfaces, model = model, nbsimu = nbsim[i], namconv = NamingConvention(paste0("Y_", i)))
}

# conversion into uniform thickness
for (i in 1:N) {
  val = matrix(surfaces$getColumns(names = paste0("Y_", i , ".", 1:nbsim[i]), useSel = FALSE),
               nrow = surfaces$getSampleNumber(useSel = FALSE), ncol = nbsim[i])
  val = h_mean[i] * pnorm(val)
  for (k in 1:nbsim[i]) {
    err = surfaces$setColumn(tab = val[, k], name = paste0("Z_", i , ".", k), useSel = FALSE)
  }
}

# plot of a variable
var_nm = "Z_1.3"
p = ggDefaultGeographic() + 
    plot.grid(surfaces, name_raster = var_nm, useSel = TRUE, legend.name.raster = var_nm,
            show.legend.raster = TRUE) +
    plot.polygon(poly = pol_lim, color = "orange", fill = NA) +
    plot.decoration(xlab = "Easting", ylab = "Northing", 
                  title = paste0("2D Surface: variable = ", var_nm))
ggPrint(p)

Computing the proportions

The simulated variables are the thickness $Z_i^{(k)}(s)$ .

Sample selection

For a block selected by $(i, j, k)$ the samples $(x,y,z)$ of the surfaces are selected by:

$|x - (x_0[1]+ dx[1] (i - 1)| dx[1] /2 $
$|y - (x_0[2]+ dx[2] (j - 1)| dx[2] /2 $

# compute the rank from ijk of the values of an array defined by its dimensions nn
# This is a vector version of ijk2rank and rank2ijk for any number of dimensions
#  rank = i + nn[1] (j - 1) + nn[1]*nn[2]*(k - 1) + ...
rank2ijkVec <- function(rank, nn) {
    stopifnot((range(rank)[1] > 0)&(range(rank)[2] <= prod(nn)))
    N = length(nn)
    IJK = matrix(NaN, nrow = length(rank), ncol = N)
    rr = rank - 1
    for (i in 1:N) {
      IJK[,i] = 1 + rr %% nn[i]
      if(i < N) {rr = (rr - (IJK[,i] - 1)) %/% nn[i]}
    }
    if(length(rank) == 1) {IJK = as.numeric(IJK)}
    IJK
  }
  
# convert the ijk corresponding to the rank of an array defined by its dimensions nn
ijk2rankVec <- function(ijk, nn) {
    if(!is.matrix(ijk)){ijk = matrix(ijk, nrow = 1, ncol = length(ijk))}
    stopifnot(dim(ijk)[2] == length(nn))
    as.numeric(1 + (ijk-1) %*% cumprod(c(1, nn[-length(nn)])))
  }

# test on a 4d array  
nn = c(5, 13, 7, 31) 
rank = 1:prod(nn)
ijk  = rank2ijkVec(rank = rank, nn = nn)
stopifnot(max(abs(rank - ijk2rankVec(rank2ijkVec(rank,nn),nn))) == 0)
stopifnot(max(abs(ijk - rank2ijkVec(ijk2rankVec(ijk,nn),nn))) == 0)

# test on a cell
cell = c(3, 4, 5)
rank = ijk2rankVec(ijk = cell, nn = grid$getNXs())
stopifnot(rank == ijk2rankVec(ijk = rank2ijkVec(rank = rank, nn = grid$getNXs()), nn = grid$getNXs()))
stopifnot(max(abs(cell - rank2ijkVec(rank = ijk2rankVec(ijk = cell, nn = grid$getNXs()), nn = grid$getNXs()))) == 0)

# select the samples in dbin belonging to the grid cell and return the number of selected samples
# the selection works only with "not rotated" grid 
select_samples <- function(dbin, rank, grid, nameSel = "inBlock") {
  stopifnot(max(abs(grid$getAngles())) == 0) # no rotation
  cell = rank2ijkVec(rank = rank, nn = grid$getNXs())
  x0  = grid$getX0s()
  dx  = grid$getDXs()
  xCell = x0 + dx * (cell - 1)
  u1  = dbin$getColumn(name = "x1", useSel = FALSE)
  u2  = dbin$getColumn(name = "x2", useSel = FALSE)
  sel = as.numeric( (abs(u1 - xCell[1]) <= dx[1]/2)&(abs(u2 - xCell[2]) <= dx[2]/2))
  err = dbin$deleteColumn(name = nameSel)
  err = dbin$setColumn(tab = sel, name = nameSel, useSel=FALSE)
  err = dbin$setLocator(name = nameSel, locatorType = ELoc_SEL(), cleanSameLocator = TRUE)
  dbin$getSampleNumber(useSel = TRUE)
}

err = surfaces$deleteColumn("inBlock")
err = surfaces$deleteColumn("sel_*")
print(paste0(">>> number of selected samples = ", select_samples(surfaces, rank = ijk2rankVec(ijk = cell, nn = grid$getNXs()), grid)))

## [1] ">>> number of selected samples = 238"

# plot of selected samples of the surface grid

err = surfaces$clearSelection()
err = surfaces$setLocator(name = "inBlock", locatorType = ELoc_SEL(), cleanSameLocator = TRUE)
var_nm = "Z_1.3"
p = ggDefaultGeographic() + 
    plot.grid(surfaces, name_raster = var_nm, useSel = TRUE, legend.name.raster = var_nm,
            show.legend.raster = TRUE) +
    plot.polygon(poly = pol_lim, color = "orange", fill = NA) +
    plot.decoration(xlab = "Easting", ylab = "Northing", 
                  title = paste0("2D surface: variable = ", var_nm))
ggPrint(p)

u1 = surfaces$getColumn(name = "x1", useSel = TRUE)
u2 = surfaces$getColumn(name = "x2", useSel = TRUE)

#
X0 = grid$getX0s()
DX = grid$getDXs()
ll = list( x = c(
  X0[1] + DX[1] * (cell[1] - 1  - 1/2), 
  X0[1] + DX[1] * (cell[1] - 1  + 1/2  ), 
  X0[1] + DX[1] * (cell[1] - 1  + 1/2), 
  X0[1] + DX[1] * (cell[1] - 1  - 1/2), 
  X0[1] + DX[1] * (cell[1] - 1  - 1/2)),
  y = c(
  X0[2] + DX[2] * (cell[2] - 1  - 1/2), 
  X0[2] + DX[2] * (cell[2] - 1  - 1/2), 
  X0[2] + DX[2] * (cell[2] - 1  + 1/2), 
  X0[2] + DX[2] * (cell[2] - 1  + 1/2), 
  X0[2] + DX[2] * (cell[2] - 1  - 1/2)))
plot(ll$x, ll$y, col = "orange", type = "l")
abline(v = X0[1] + DX[1] * (cell[1] - c(-1,1)/2), col = "gray", lty = 2)
abline(h = X0[2] + DX[2] * (cell[2] - 1  - c(-1,1)/2), col = "gray", lty = 2)
points(u1, u2, col = "red", pch = 19)

Computation of the multivariate simulation

flag.match = FALSE
ranks = grid$getColumn(name = "rank", useSel = TRUE)
# processing some cells
cell_selected = sort(sample(ranks, size = 5, replace = FALSE))

# --------------------------------------------------------------------------
# combining the univariate simulations into multivariate simulations
# --------------------------------------------------------------------------
if (flag.match) {
  K = min(nbsim)
  idx = matrix(NaN, nrow = K, ncol = N)
  for (k in 1:K) {idx[k,] = k}
} else {
  K = prod(nbsim)
  idx = rank2ijkVec(rank = 1:K, nn = nbsim)
} # flag.match

P  = N + 1
print(paste0(">>> computing ", P, " proportions over ", K, " simulations"))

## [1] ">>> computing 4 proportions over 36 simulations"

for (r in cell_selected) {
  cell = rank2ijkVec(rank = r, nn = grid$getNXs())
  ns = select_samples(surfaces, r, grid)
  # mono-variate simulations of the N variables for the selected samples
  H = list()
  for (n in 1:N) {
    H[[1+length(H)]] <- matrix(surfaces$getColumns(names = paste0("Z_", n, ".*"), useSel = TRUE),
               nrow = surfaces$getSampleNumber(useSel = TRUE),
               ncol = nbsim[n]
              )
  }
  z_base = (cell[3]-1)/grid$getNXs()[3]
  z_top  = (cell[3])/grid$getNXs()[3]
  h_max  = z_top - z_base
  
  # code = 0 -Inf           < z <= H1
  # code = 1   H1           < z <= H1 + H2
  # code = 2   H1 + H2      < z <= H1 + H2 + H3
  # code = 3   H1 + H2 + H3 < z <= + Inf
  res = array(0.0, c(ns, K, P))

    # bottom to top
  for (i in 1:(P-1)) {
    for (j in 1:i) {res[,,i] = res[,,i] + H[[j]][,idx[,j]]}
    res[,,i] = pmin(pmax(res[,,i] - z_base,0), h_max)
  }
  res[,,P] = h_max

  # bottom to top
  for (i in P:2){
    res[,,i] = res[,,i] - res[,,i-1]
  }
  res = res / h_max
  
  # -----------------------------------------------------------------
  # up-scaling (averaging the ns samples) to defined a P x K matrix
  # -----------------------------------------------------------------
  up_res = apply(X = res, MARGIN = c(2,3), FUN = mean)
  
  # -----------------------------------------------------------------
  # statistics on the K simulation up-scaling (averaging the ns samples) to defined a P x K matrix
  # -----------------------------------------------------------------
  count <- function(u){length(u) - sum(is.na(u))}
  opers = c("count", "min", "max", "median", "mean", "sd")
  tab = matrix(NaN, nrow = P, ncol = length(opers))
  for (j in seq_along(opers)) {
    tab[, j] = apply(X = up_res, MARGIN = 2, FUN = opers[j])
  }
  colnames(tab) <- opers
  rownames(tab) <- paste0("Code = ", 0:(P-1))

  print(paste0("== Cell #", r, "/", grid$getSampleNumber(useSel=FALSE)," (regrouping ", ns, " samples)"))
  
  titre = paste0("Statistics on ", ns, " samples", 
                " for cell ", cell[1], "/", cell[2], "/", cell[3], 
                " over the ", K, " simulations")
  print(knitr::kable(tab, digits = 3, caption = titre))
  
} # loop over the cells of the grid

## [1] "== Cell #213/910 (regrouping 250 samples)"
## 
## 
## Table: Statistics on 250 samples for cell 3/9/2 over the 36 simulations
## 
## |         | count| min|   max| median|  mean|    sd|
## |:--------|-----:|---:|-----:|------:|-----:|-----:|
## |Code = 0 |    36|   0| 0.911|  0.009| 0.307| 0.433|
## |Code = 1 |    36|   0| 0.696|  0.077| 0.140| 0.191|
## |Code = 2 |    36|   0| 0.739|  0.080| 0.199| 0.251|
## |Code = 3 |    36|   0| 1.000|  0.229| 0.355| 0.373|
## [1] "== Cell #294/910 (regrouping 247 samples)"
## 
## 
## Table: Statistics on 247 samples for cell 4/4/3 over the 36 simulations
## 
## |         | count|   min|   max| median|  mean|    sd|
## |:--------|-----:|-----:|-----:|------:|-----:|-----:|
## |Code = 0 |    36| 0.003| 0.630|  0.563| 0.399| 0.285|
## |Code = 1 |    36| 0.368| 0.997|  0.437| 0.541| 0.245|
## |Code = 2 |    36| 0.000| 0.588|  0.000| 0.060| 0.165|
## |Code = 3 |    36| 0.000| 0.000|  0.000| 0.000| 0.000|
## [1] "== Cell #346/910 (regrouping 258 samples)"
## 
## 
## Table: Statistics on 258 samples for cell 6/9/3 over the 36 simulations
## 
## |         | count|   min|   max| median|  mean|    sd|
## |:--------|-----:|-----:|-----:|------:|-----:|-----:|
## |Code = 0 |    36| 0.000| 0.932|  0.052| 0.328| 0.434|
## |Code = 1 |    36| 0.012| 0.948|  0.238| 0.386| 0.358|
## |Code = 2 |    36| 0.000| 0.980|  0.105| 0.254| 0.301|
## |Code = 3 |    36| 0.000| 0.459|  0.000| 0.031| 0.092|
## [1] "== Cell #423/910 (regrouping 238 samples)"
## 
## 
## Table: Statistics on 238 samples for cell 3/4/4 over the 36 simulations
## 
## |         | count|   min|   max| median|  mean|    sd|
## |:--------|-----:|-----:|-----:|------:|-----:|-----:|
## |Code = 0 |    36| 0.000| 0.000|  0.000| 0.000| 0.000|
## |Code = 1 |    36| 0.036| 0.934|  0.414| 0.444| 0.268|
## |Code = 2 |    36| 0.066| 0.951|  0.586| 0.549| 0.260|
## |Code = 3 |    36| 0.000| 0.138|  0.000| 0.006| 0.024|
## [1] "== Cell #507/910 (regrouping 260 samples)"
## 
## 
## Table: Statistics on 260 samples for cell 7/12/4 over the 36 simulations
## 
## |         | count|   min|   max| median|  mean|    sd|
## |:--------|-----:|-----:|-----:|------:|-----:|-----:|
## |Code = 0 |    36| 0.000| 0.448|  0.157| 0.202| 0.188|
## |Code = 1 |    36| 0.457| 0.998|  0.746| 0.719| 0.175|
## |Code = 2 |    36| 0.000| 0.543|  0.001| 0.079| 0.158|
## |Code = 3 |    36| 0.000| 0.002|  0.000| 0.000| 0.000|

Using convenient post-simulation processing function

err = surfaces$clearSelection()
err = simuPostPropByLayer(dbin = surfaces, dbout = grid, names = paste0("Z_", 1:length(nbsim), "*"), 
                          flag_match=FALSE, 
                          upscale = EPostUpscale_fromKey("MEAN"),
                          stats = EPostStat_fromKeys(c("MINI","MAXI","MED","MEAN","STD")),
                          verbose=TRUE, check_targets = cell_selected, check_level=0
                          )

## 
## Simulation Post-Processing
## --------------------------
## Multiplicity order for all variables
## - Variable 1 (Z_1*) = 3
## - Variable 2 (Z_2*) = 4
## - Variable 3 (Z_3*) = 3
## Number of Iterations: 36 (using the 'product' criterion)
## Number of Statistics: 5
## Number of Transform Variables: 4
## Number of Output Variables: 20
## 
## == Cell #213/910 (regrouping 250 samples)
##               Minimum    Maximum     Median       Mean       Std.
##      Var 1      0.000      0.911      0.009      0.307      0.433
##      Var 2      0.000      0.696      0.077      0.140      0.191
##      Var 3      0.000      0.739      0.080      0.199      0.251
##      Var 4      0.000      1.000      0.229      0.355      0.373
## 
## == Cell #294/910 (regrouping 247 samples)
##               Minimum    Maximum     Median       Mean       Std.
##      Var 1      0.003      0.630      0.563      0.399      0.285
##      Var 2      0.368      0.997      0.437      0.541      0.245
##      Var 3      0.000      0.588      0.000      0.060      0.165
##      Var 4      0.000      0.000      0.000      0.000      0.000
## 
## == Cell #346/910 (regrouping 258 samples)
##               Minimum    Maximum     Median       Mean       Std.
##      Var 1      0.000      0.932      0.052      0.328      0.434
##      Var 2      0.012      0.948      0.238      0.386      0.358
##      Var 3      0.000      0.980      0.105      0.254      0.301
##      Var 4      0.000      0.459      0.000      0.031      0.092
## 
## == Cell #423/910 (regrouping 238 samples)
##               Minimum    Maximum     Median       Mean       Std.
##      Var 1      0.000      0.000      0.000      0.000      0.000
##      Var 2      0.036      0.934      0.414      0.444      0.268
##      Var 3      0.066      0.951      0.586      0.549      0.260
##      Var 4      0.000      0.138      0.000      0.006      0.024
## 
## == Cell #507/910 (regrouping 260 samples)
##               Minimum    Maximum     Median       Mean       Std.
##      Var 1      0.000      0.448      0.157      0.202      0.188
##      Var 2      0.457      0.998      0.746      0.719      0.175
##      Var 3      0.000      0.543      0.001      0.079      0.158
##      Var 4      0.000      0.002      0.000      0.000      0.000

Testing the standard deviation

U = runif(10)

# standard deviation on the samples
if(abs(sd(U) - VectorHelper_stdv(U, FALSE)) > 1e-15) {
  u = round(sd(U), digits = 6)
  v = round(VectorHelper_stdv(U, FALSE), digits = 6)
  print(paste0("sd(U) = ", u, " vs. VectorHelper_stdv(U, FALSE) = ", v))
} else {
  print(paste0("sd(U) == VectorHelper_stdv(U, FALSE)"))
}

## [1] "sd(U) == VectorHelper_stdv(U, FALSE)"

# standard deviation on the population
if(abs(sd(U) - VectorHelper_stdv(U, TRUE)) > 1e-15) {
  u = round(sd(U), digits = 6)
  v = round(VectorHelper_stdv(U, TRUE), digits = 6)
  print(paste0("sd(U) = ", u, " vs. VectorHelper_stdv(U, TRUE) = ", v))
} else {
  print(paste0("sd(U) == VectorHelper_stdv(U, TRUE)"))
}

## [1] "sd(U) = 0.224831 vs. VectorHelper_stdv(U, TRUE) = 0.213293"

# formulae
local_mean <- function(u) {sum(u)/length(u)}
local_varP <- function(u) {sum(u^2)/length(u) - local_mean(u)^2}
local_var  <- function(u) {m = local_mean(u); sum((u-m)^2)/(length(u) - 1)}
local_var2 <- function(u) {(sum(u^2) - length(u) * mean(u)^2)/(length(u) - 1)}

stopifnot(abs(sd(U) - sqrt(local_var(U))) < 1.e-15)
stopifnot(abs(sd(U) - sqrt(local_var2(U))) < 1.e-15)

Simulation Post-Processing for calculating Layer Proportions

Xavier Freulon

2023-07-24