Loading libraries

rm(list = ls())
library("gstlearn")
## 
## Attaching package: 'gstlearn'
## The following objects are masked from 'package:base':
## 
##     message, toString
library("tidyr") # essentially, for drop_na function 

1. Neighborhood

Setting global variables

verbose = TRUE 
flagGraphic = TRUE 

# space dimension 
ndim = 2
defineDefaultSpace(ESpaceType_RN(),ndim)
## NULL

Setting seed for the random number generator

law_set_random_seed(5584)
## NULL
set.seed(13155)

A Poisson data set will be generated and various neighborhoods will be explored around a specific node of a regular grid

dxref = 0.1
grid = DbGrid_create(nx = c(10,10),
                     dx=c(dxref, dxref))
xlim = grid$getExtrema(0)
ylim = grid$getExtrema(1)

1.1 Random Point data set

Setting parameters for the dbgrid hosting the data

coormin = grid$getCoorMinimum()
coormax = grid$getCoorMaximum()
nech = 100
data = Db_createFromBox(nech, coormin, coormax)

Plotting the data (if needed for more control over the graph parameters, use ggplot instead)

ggDefaultGeographic()+
  plot.point(data, flagCst=True)+
  plot.decoration(xlab="Easting", ylab="Northing")

1.1.1 Standard neighborhood

Defining a standard moving neighborhood

nmini = 1
nmaxi = 15
radius = 0.3
nsect = 8
nsmax = 3
neigh = NeighMoving_create(flag_xvalid = FALSE, 
                           nmaxi = nmaxi,
                           radius = radius, 
                           nmini = nmini, 
                           nsect = nsect, 
                           nsmax = nsmax)
neigh
## 
## Moving Neighborhood
## ===================
## Minimum number of samples           = 1
## Maximum number of samples           = 15
## Number of angular sectors           = 8
## Maximum number of points per sector = 3
## Maximum horizontal distance         = 0.3

Attaching the neighborhood to the grid and selecting the neighbors retained

node = 55
neigh$attach(data, grid)
## [1] 0
ranks = VectorInt()
neigh$select(node, ranks)
## NULL
dataSel = data$clone()
dum = dataSel$addSelectionByRanks(ranks)

Plotting the result

ggDefaultGeographic()+
  plot.point(data,
             color = "red")+
  plot.point(dataSel,
             color = "blue") + 
  plot.neigh(neigh = neigh,
             grid = grid, 
             node = node, 
             flagCell = FALSE)+
  plot.decoration(title="Standard Neighborhood", 
                  xlab="Easting",
                  ylab="Northing")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

1.1.2 Defining variable block extensions

In the following chunk, variables are generated in the grid containing the cell extension. This cell extension replaces the constant value of the mesh. The new values define blocks around each sample point that will be represented below

# defining the parameters 
nech = grid$getSampleNumber()
mini = 0.5
maxi = 2.5
blx = numeric()
bly = numeric()

# adding the random values 
for (i in 1:nech){
  blx = append(blx, runif(n = 1, min = mini, max = maxi))
  bly = append(bly, runif(n = 1, min = mini, max = maxi))
}
blx = dxref*blx
bly = dxref*bly

# incorporating the columns into the data base : 
dum = grid$addColumns(blx, "X-ext", ELoc_BLEX(), 0)
dum = grid$addColumns(bly, "Y-ext", ELoc_BLEX(), 1)

Displaying each block and its cell extension Until the next gstlearn version release, this needs to be done manually thanks to ggplot

df = data[]
colnames(df) = c("rank", "x1", "x2")
extensions = data.frame(x = NA, y = NA, node = NA)
for (node in 1:nech){
  edges = data.frame(grid$getCellEdges(node-1))
  edges = cbind(edges,NA)
  colnames(edges) = c("x","y", "node")
  extensions = rbind(extensions, edges[1:4,])
  extensions$node[(4*node-2):(4*node+1)] = node
}
extensions = drop_na(extensions)

p = ggplot()+
  geom_point(data=df,
             aes(x = x1, y = x2),
             color = "black",
             size = 1)+
  geom_polygon(data=extensions,
               aes(x=x, y=y, group=node),
               color = "black",
               linewidth = 0.2,
               fill = NA)
p

Choosing a specific cell again and determining the standard block neighborhood

node = 56
neigh$attach(data, grid)
## [1] 0
ranks = VectorInt()
neigh$select(node, ranks)
## NULL
dataSel = data$clone()
dum = dataSel$addSelectionByRanks(ranks)

Plotting (TODO: this currently generates a warning message)

ggDefaultGeographic()+
  plot.point(data,
             color = "red")+
  plot.point(dataSel,
             color = 'blue')+
  plot.neigh(neigh = neigh,
             grid = grid,
             node = node,
             flagCell=TRUE)+
  plot.decoration(title="Standard Neighborhood")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

Using the cell neighborhood to force the selection of all samples belonging to the block

nmini = 1
neigh = NeighCell_create(FALSE, nmini)
neigh
## 
## Cell Neighborhood
## =================
## Reject samples which do not belong to target Block

Attaching the neighborhood to the grid and selecting the neighbors retained

node = 56
neigh$attach(data, grid)
## [1] 0
ranks = VectorInt()
neigh$select(node, ranks)
## NULL
dataSel = data$clone()
dum = dataSel$addSelectionByRanks(ranks)

Plotting the result

 ggDefaultGeographic()+
    plot.point(data,
             color = "red")+
   plot.point(dataSel,
              color='blue')+
   plot.neigh(neigh,
              grid, 
              node,
              flagCell=TRUE)+
   plot.decoration(title="Standard Neighborhood")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

1.2 Data gathered along lines

This part is dedicated to data organized along lines, under the assumption that the typical distance between two consecutive flight lines is much larger than the typical distance between two observation along a line. To illustrate this case, data will be generated at the nodes of a regular grid with dx>>dy. The file is converted into a point file to make the rest of the procedure more flexible

Creating the grid and the point file containing the data

grid = DbGrid_create(nx=c(20,100), dx=c(10,1))
iuid = grid$addColumns(VectorHelper_simulateUniform(grid$getSampleNumber()),"z", ELoc_Z())
point = Db_createReduce(grid)

ggDefaultGeographic()+
  plot.point(grid,size=0.3)