Preamble

In this preamble, we load the gstlearn library, clean the workspace.

rm(list=ls())
library(gstlearn)
library(ggplot2)
library(ggpubr)
library(ggnewscale)

flagInternetAvailable=TRUE

Getting the Data Bases from the repository.

The input Data Base (called temperatures) contains the target variable (January_temp). Note that this data base also contains the elevation variable (called Elevation) which is assigned the locator f (for external drift). We finally add a selection (called sel) which only consider the samples where the temperature is actually calculated.

fileNF = "Scotland_Temperatures.NF"
if(flagInternetAvailable){
  download.file(paste0("https://soft.minesparis.psl.eu/gstlearn/data/Scotland/",fileNF), fileNF, quiet=TRUE)
}
temperatures = Db_createFromNF(fileNF)

temperatures$setLocator("Elevation", ELoc_F())
## NULL
iuid = temperatures$addSelection(as.numeric(! is.na(temperatures["J*"])))
temperatures$display()
## 
## Data Base Characteristics
## =========================
## 
## Data Base Summary
## -----------------
## File is organized as a set of isolated points
## Space dimension              = 2
## Number of Columns            = 6
## Total number of samples      = 236
## Number of active samples     = 151
## 
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = Longitude - Locator = x1
## Column = 2 - Name = Latitude - Locator = x2
## Column = 3 - Name = Elevation - Locator = f1
## Column = 4 - Name = January_temp - Locator = z1
## Column = 5 - Name = NewSel - Locator = sel
## NULL

The output file is a grid (called grid). It contains an active selection (inshore) as well as the elevation over the field (called Elevation). Note that this variable is assigned the locator f (external drift).

fileNF = "Scotland_Elevations.NF"
if(flagInternetAvailable){
  download.file(paste0("https://soft.minesparis.psl.eu/gstlearn/data/Scotland/",fileNF), fileNF, quiet=TRUE)
}
grid = DbGrid_createFromNF(fileNF)
grid$display()
## 
## Data Base Grid Characteristics
## ==============================
## 
## Data Base Summary
## -----------------
## File is organized as a regular grid
## Space dimension              = 2
## Number of Columns            = 4
## Total number of samples      = 11097
## Number of active samples     = 3092
## 
## Grid characteristics:
## ---------------------
## Origin :     65.000   535.000
## Mesh   :      4.938     4.963
## Number :         81       137
## 
## Variables
## ---------
## Column = 0 - Name = Longitude - Locator = x1
## Column = 1 - Name = Latitude - Locator = x2
## Column = 2 - Name = Elevation - Locator = f1
## Column = 3 - Name = inshore - Locator = sel
## NULL

Variography

Calculate the omni-directional variogram of the temperature for 18 lags of 25.

vparam = VarioParam_createOmniDirection(npas=18, dpas=25)
vario = Vario(vparam,temperatures)
err = vario$compute()
vario$display()
## 
## Variogram characteristics
## =========================
## Number of variable(s)       = 1
## Number of direction(s)      = 1
## Space dimension             = 2
## Variance-Covariance Matrix     1.020
## 
## Direction #1
## ------------
## Number of lags              = 18
## Direction coefficients      =      1.000     0.000
## Direction angles (degrees)  =      0.000     0.000
## Tolerance on direction      =     90.000 (degrees)
## Calculation lag             =     25.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          0    78.000     7.681     0.137
##          1   512.000    26.905     0.376
##          2   915.000    50.556     0.614
##          3  1135.000    74.820     0.855
##          4  1285.000   100.070     0.935
##          5  1190.000   124.927     1.050
##          6  1117.000   149.550     1.099
##          7  1004.000   175.060     1.218
##          8   924.000   199.603     1.175
##          9   769.000   224.493     1.502
##         10   594.000   249.105     1.377
##         11   438.000   274.605     1.316
##         12   363.000   298.685     1.071
##         13   236.000   323.920     1.190
##         14   153.000   349.725     1.321
##         15   105.000   373.088     1.219
##         16    76.000   399.464     0.802
##         17    58.000   426.095     0.677
## NULL
p = ggDefault()
p = p + plot.varmod(vario)
p = p + plot.decoration(title="Variogram of Temperature (Raw)")
ggPrint(p)

We calculate the variogram (using the same calculation parameters) based on the residuals after a trend has been removed. This trend is considered as a linear combinaison of the external drift information.

vparam = VarioParam_createOmniDirection(npas=18, dpas=25)
vario = Vario(vparam,temperatures)
md = Model()
md$setDriftIRF(order=0, nfex=1)
## NULL
vario$compute(model=md)
## [1] 0
vario$display()
## 
## Variogram characteristics
## =========================
## Number of variable(s)       = 1
## Number of direction(s)      = 1
## Space dimension             = 2
## Variance-Covariance Matrix     0.363
## 
## Direction #1
## ------------
## Number of lags              = 18
## Direction coefficients      =      1.000     0.000
## Direction angles (degrees)  =      0.000     0.000
## Tolerance on direction      =     90.000 (degrees)
## Calculation lag             =     25.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          0    78.000     7.681     0.102
##          1   512.000    26.905     0.163
##          2   915.000    50.556     0.211
##          3  1135.000    74.820     0.264
##          4  1285.000   100.070     0.265
##          5  1190.000   124.927     0.321
##          6  1117.000   149.550     0.363
##          7  1004.000   175.060     0.444
##          8   924.000   199.603     0.466
##          9   769.000   224.493     0.547
##         10   594.000   249.105     0.561
##         11   438.000   274.605     0.582
##         12   363.000   298.685     0.472
##         13   236.000   323.920     0.565
##         14   153.000   349.725     0.424
##         15   105.000   373.088     0.345
##         16    76.000   399.464     0.410
##         17    58.000   426.095     0.411
## NULL
p = ggDefault()
p = p + plot.varmod(vario)
p = p + plot.decoration(title="Variogram of Temperature (Residuals)")
ggPrint(p)

Fit the variogram of residuals in a model having drifts. Some constraints have been added during the fitting step.

model = md
structs = ECov_fromKeys(c("NUGGET", "BESSEL_K"))

const = Constraints()
err = const$addItemFromParamId(EConsElem_SILL(),icov=0,type=EConsType_UPPER(),value=0.1)
err = const$addItemFromParamId(EConsElem_PARAM(),icov=1, type = EConsType_EQUAL(),value = 1)
err = const$addItemFromParamId(EConsElem_RANGE(),icov=1, type = EConsType_LOWER(),value = 100)
err = const$addItemFromParamId(EConsElem_RANGE(),icov=1, type = EConsType_UPPER(),value = 350)

err = md$fit(vario,structs,constraints=const)
p = ggDefault()
p = p + plot.varmod(vario, model)
p = p + plot.decoration(title="Variogram of Temperature (Residuals)")
ggPrint(p)

Identify the Drift

Derive the parameter of a Global Trend (composed of the Elevation as a drift function) using the SPDE formalism.

spde = SPDE(model,grid,temperatures,ESPDECalcMode_KRIGING())
coeffs = spde$getCoeffs()
cat("Trend coefficients:", coeffs)
## Trend coefficients: 3.970059 -0.006597714

Represent the scatter plot of the Temperature given the Elevation and add the Global Trend (caledlated beforehand)

p = ggplot()
p = p + plot.correlation(temperatures,namex="Elevation",namey="*temp", asPoint=TRUE)
p = p + geom_segment(aes(x = 0, y = coeffs[1], xend = 400, yend = coeffs[1] + 400 * coeffs[2]))
ggPrint(p)

Estimation

We perform the Estimation in the SPDE framework (considering the Elevation as the Global Trend)

err = krigingSPDE(temperatures, grid, model)
p = ggDefaultGeographic()
p = p + plot.grid(grid)
p = p + plot.decoration(title="Temperature (using Elevation as global Trend)")
ggPrint(p)

We also perform the Estimation by Kriging (using Elevation as External Drift). This estimation is performed in Unique Neighborhood.

neighU = NeighUnique_create()
err = kriging(temperatures, grid, model, neighU)
p = ggDefaultGeographic()
p = p + plot.grid(grid)
p = p + plot.decoration(title="Temperature (with Elevation as External Drift)")
ggPrint(p)

Comparing both estimates

p = ggplot()
p = p + plot.correlation(grid,namex="Kriging.January_temp.estim",
                         namey="KrigingSPDE.January_temp.estim", asPoint=TRUE)
p = p + plot.decoration(xlab="Kriging with External Drift", ylab="SPDE Kriging")
ggPrint(p)