Loading libraries

rm(list = ls())
library("gstlearn")
## 
## Attaching package: 'gstlearn'
## The following objects are masked from 'package:base':
## 
##     message, toString
library("tidyr") # essentially, for drop_na function 

1. Neighborhood

Setting global variables

verbose = TRUE 
flagGraphic = TRUE 

# space dimension 
ndim = 2
defineDefaultSpace(ESpaceType_RN(),ndim)
## NULL

Setting seed for the random number generator

law_set_random_seed(5584)
## NULL
set.seed(13155)

A Poisson data set will be generated and various neighborhoods will be explored around a specific node of a regular grid

dxref = 0.1
grid = DbGrid_create(nx = c(10,10),
                     dx=c(dxref, dxref))
xlim = grid$getExtrema(0)
ylim = grid$getExtrema(1)

1.1 Random Point data set

Setting parameters for the dbgrid hosting the data

coormin = grid$getCoorMinimum()
coormax = grid$getCoorMaximum()
nech = 100
data = Db_createFromBox(nech, coormin, coormax)

Plotting the data (if needed for more control over the graph parameters, use ggplot instead)

ggDefaultGeographic()+
  plot.point(data, flagCst=True)+
  plot.decoration(xlab="Easting", ylab="Northing")

1.1.1 Standard neighborhood

Defining a standard moving neighborhood

nmini = 1
nmaxi = 15
radius = 0.3
nsect = 8
nsmax = 3
neigh = NeighMoving_create(flag_xvalid = FALSE, 
                           nmaxi = nmaxi,
                           radius = radius, 
                           nmini = nmini, 
                           nsect = nsect, 
                           nsmax = nsmax)
neigh
## 
## Moving Neighborhood
## ===================
## Minimum number of samples           = 1
## Maximum number of samples           = 15
## Number of angular sectors           = 8
## Maximum number of points per sector = 3
## Maximum horizontal distance         = 0.3

Attaching the neighborhood to the grid and selecting the neighbors retained

node = 55
neigh$attach(data, grid)
## [1] 0
ranks = VectorInt()
neigh$select(node, ranks)
## NULL
dataSel = data$clone()
dum = dataSel$addSelectionByRanks(ranks)

Plotting the result

ggDefaultGeographic()+
  plot.point(data,
             color = "red")+
  plot.point(dataSel,
             color = "blue") + 
  plot.neigh(neigh = neigh,
             grid = grid, 
             node = node, 
             flagCell = FALSE)+
  plot.decoration(title="Standard Neighborhood", 
                  xlab="Easting",
                  ylab="Northing")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

1.1.2 Defining variable block extensions

In the following chunk, variables are generated in the grid containing the cell extension. This cell extension replaces the constant value of the mesh. The new values define blocks around each sample point that will be represented below

# defining the parameters 
nech = grid$getSampleNumber()
mini = 0.5
maxi = 2.5
blx = numeric()
bly = numeric()

# adding the random values 
for (i in 1:nech){
  blx = append(blx, runif(n = 1, min = mini, max = maxi))
  bly = append(bly, runif(n = 1, min = mini, max = maxi))
}
blx = dxref*blx
bly = dxref*bly

# incorporating the columns into the data base : 
dum = grid$addColumns(blx, "X-ext", ELoc_BLEX(), 0)
dum = grid$addColumns(bly, "Y-ext", ELoc_BLEX(), 1)

Displaying each block and its cell extension Until the next gstlearn version release, this needs to be done manually thanks to ggplot

df = data[]
colnames(df) = c("rank", "x1", "x2")
extensions = data.frame(x = NA, y = NA, node = NA)
for (node in 1:nech){
  edges = data.frame(grid$getCellEdges(node-1))
  edges = cbind(edges,NA)
  colnames(edges) = c("x","y", "node")
  extensions = rbind(extensions, edges[1:4,])
  extensions$node[(4*node-2):(4*node+1)] = node
}
extensions = drop_na(extensions)

p = ggplot()+
  geom_point(data=df,
             aes(x = x1, y = x2),
             color = "black",
             size = 1)+
  geom_polygon(data=extensions,
               aes(x=x, y=y, group=node),
               color = "black",
               linewidth = 0.2,
               fill = NA)
p

Choosing a specific cell again and determining the standard block neighborhood

node = 56
neigh$attach(data, grid)
## [1] 0
ranks = VectorInt()
neigh$select(node, ranks)
## NULL
dataSel = data$clone()
dum = dataSel$addSelectionByRanks(ranks)

Plotting (TODO: this currently generates a warning message)

ggDefaultGeographic()+
  plot.point(data,
             color = "red")+
  plot.point(dataSel,
             color = 'blue')+
  plot.neigh(neigh = neigh,
             grid = grid,
             node = node,
             flagCell=TRUE)+
  plot.decoration(title="Standard Neighborhood")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

Using the cell neighborhood to force the selection of all samples belonging to the block

nmini = 1
neigh = NeighCell_create(FALSE, nmini)
neigh
## 
## Cell Neighborhood
## =================
## Reject samples which do not belong to target Block

Attaching the neighborhood to the grid and selecting the neighbors retained

node = 56
neigh$attach(data, grid)
## [1] 0
ranks = VectorInt()
neigh$select(node, ranks)
## NULL
dataSel = data$clone()
dum = dataSel$addSelectionByRanks(ranks)

Plotting the result

 ggDefaultGeographic()+
    plot.point(data,
             color = "red")+
   plot.point(dataSel,
              color='blue')+
   plot.neigh(neigh,
              grid, 
              node,
              flagCell=TRUE)+
   plot.decoration(title="Standard Neighborhood")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

1.2 Data gathered along lines

This part is dedicated to data organized along lines, under the assumption that the typical distance between two consecutive flight lines is much larger than the typical distance between two observation along a line. To illustrate this case, data will be generated at the nodes of a regular grid with dx>>dy. The file is converted into a point file to make the rest of the procedure more flexible

Creating the grid and the point file containing the data

grid = DbGrid_create(nx=c(20,100), dx=c(10,1))
iuid = grid$addColumns(VectorHelper_simulateUniform(grid$getSampleNumber()),"z", ELoc_Z())
point = Db_createReduce(grid)

ggDefaultGeographic()+
  plot.point(grid,size=0.3)

Checking the neighborhood search for a target located close to one line, say at coordinates (98,50)

target = Db_createFromOnePoint(c(98,50))
# it's basically a db made from one single point 

Creating a standard neighborhood with 10 samples per neighborhood and a radius of 7

nmini = 1
nmaxi = 10
radius = 7 
neigh = NeighMoving_create(FALSE, nmaxi, radius, nmini)
neigh
## 
## Moving Neighborhood
## ===================
## Minimum number of samples           = 1
## Maximum number of samples           = 10
## Maximum horizontal distance         = 7

Attaching the neighborhood to the grid and selecting the neighbors retained

neigh$attach(point, target)
## [1] 0
ranks = VectorInt()
neigh$select(0, ranks)
## NULL
pointSel = point$clone()
dum = pointSel$addSelectionByRanks(ranks)

plotting the result

ggDefaultGeographic()+
  plot.point(point,
             color = "black")+
  plot.point(pointSel,
             color = "blue")+
  plot.neigh(neigh = neigh,
             grid = target,
             node = 0)+
  plot.decoration("Standard neighborhood")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

All samples are gathered on the closest line. This may be a problem when performing kriging on a first order random function. The linear system is singular if all samples are located on a straight line

nmini = 1
nmaxi = 10
radius = 7
nsect = 8
nsmax = 3
neigh = NeighMoving_create(FALSE, nmaxi, radius, nmini, nsect, nsmax)
neigh
## 
## Moving Neighborhood
## ===================
## Minimum number of samples           = 1
## Maximum number of samples           = 10
## Number of angular sectors           = 8
## Maximum number of points per sector = 3
## Maximum horizontal distance         = 7

Attaching the neighborhood to the grid and selecting the neighbors retained

neigh$attach(point, target)
## [1] 0
ranks = VectorInt()
neigh$select(0, ranks)
## NULL
pointSel = point$clone()
dum = pointSel$addSelectionByRanks(ranks)

Plotting the result

ggDefaultGeographic()+
  plot.point(point,
             size = 0.3,
             color = "black")+
  plot.point(pointSel,
             color = "blue")+
  plot.neigh(neigh = neigh,
             grid = target,
             node = 0)+
  plot.decoration("Standard Neighborhood")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

Despite the use of angular sectors, the samples selected in the neighborhood still belong to a single line. The obvious reason is the size of the neighborhood radius which must be enlarged.

nmini = 1
nmaxi = 10
radius = 20
nsect = 8
nsmax = 3
neigh = NeighMoving_create(FALSE, nmaxi, radius, nmini, nsect, nsmax)
neigh
## 
## Moving Neighborhood
## ===================
## Minimum number of samples           = 1
## Maximum number of samples           = 10
## Number of angular sectors           = 8
## Maximum number of points per sector = 3
## Maximum horizontal distance         = 20
neigh$attach(point, target)
## [1] 0
ranks = VectorInt()
neigh$select(0, ranks)
## NULL
pointSel = point$clone()
dum = pointSel$addSelectionByRanks(ranks)

Plotting the result

ggDefaultGeographic()+
  plot.point(point,
             color = "red",
             size = 0.3)+
  plot.point(pointSel,
             color = "blue",
             size = 0.3)+
  plot.neigh(neigh = neigh, 
             grid = target,
             node = 0)+
  plot.decoration("Standard Nneighborhood with sectors")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length

The rule now is to consider each sector in turn and select, within this sector, the sample(s) closest to the target (there are 10 maximum numbers of sample for 8 sectors so some sectors contain more than one point). Then, by increasing the number of angular sectors, it is possible to achieve a fairer distribution of the retained samples between the lines captured by the neighborhood

Preparing the neighborhood

nmini = 1
nmaxi = 20 
radius = 20
nsect = 20
nsmax = 1
neigh = NeighMoving_create(flag_xvalid = FALSE,
                           nmaxi = nmaxi,
                           radius = radius,
                           nmini = nmini,
                           nsect = nsect,
                           nsmax = nsmax)
neigh
## 
## Moving Neighborhood
## ===================
## Minimum number of samples           = 1
## Maximum number of samples           = 20
## Number of angular sectors           = 20
## Maximum number of points per sector = 1
## Maximum horizontal distance         = 20

Attaching the points selected to the neighborhood

neigh$attach(point, target)
## [1] 0
ranks = VectorInt()
neigh$select(0, ranks)
## NULL
pointSel = point$clone()
dum = pointSel$addSelectionByRanks(ranks)   #adding selection on these samples 

Plotting the result

ggDefaultGeographic()+
  plot.point(point, 
             color = "red",
             size = 0.3)+
  plot.point(pointSel,
             color = "blue",
             size = 0.3)+
  plot.neigh(neigh = neigh,
             grid = target,
             node = 0)+
  plot.decoration(title = "Standard neighborhood with more sectors")
## Warning in names(guides$guides)[to_change] <- paste0(names(guides$guides), :
## number of items to replace is not a multiple of replacement length