In this preamble, we load the gstlearn library and clean the workspace.
rm(list=ls())
library(gstlearn)
library(ggplot2)
library(ggpubr)
library(ggnewscale)
flagInternetAvailable=TRUE
## Global plot option
plot.setDefaultGeographic(dims=c(8,8))
We load two data bases:
dat
containing point observations of two variables across Scotland: the elevation (Elevation
) and the temperature (January_temp
)target
containing a grid of points covering Scotland with a selection variable (inshore
) selecting the points that are on land, and a variable (Elevation
) giving the elevation at every point on land## Data points
fileNF = "Scotland_Temperatures.NF"
if(flagInternetAvailable){
download.file(paste0("https://soft.minesparis.psl.eu/gstlearn/data/Scotland/",fileNF), fileNF, quiet=TRUE)
}
dat = Db_createFromNF(fileNF)
## Target grid
fileNF = "Scotland_Elevations.NF"
if(flagInternetAvailable){
download.file(paste0("https://soft.minesparis.psl.eu/gstlearn/data/Scotland/",fileNF), fileNF, quiet=TRUE)
}
target = DbGrid_createFromNF(fileNF)
We also compute an experimental variogram on the observations and fit a model on it.
## Define and compute experimental variogram
varioparam = VarioParam_createOmniDirection(npas=40, dpas=10)
vario_raw2dir = Vario_create(varioparam, dat)
err = vario_raw2dir$compute()
## Fit model
fitmod = Model()
err = fitmod$fit(vario_raw2dir,
types=ECov_fromKeys(c("NUGGET", "SPHERICAL", "CUBIC")))
fitmod$display()
##
## Model characteristics
## =====================
## Space dimension = 2
## Number of variable(s) = 1
## Number of basic structure(s) = 1
## Number of drift function(s) = 0
## Number of drift equation(s) = 0
##
## Covariance Part
## ---------------
## Spherical
## - Sill = 1.155
## - Range = 135.133
## Total Sill = 1.155
## NULL
neighU = NeighUnique_create()
ndim = 2
defineDefaultSpace(ESpaceType_RN(), ndim)
## NULL
To generate unconditional simulations, we use the simtub
function. This function generates samples from a Gaussian random field with a covariance model defined in a Model
object, using the turning bands algorithm. We specify
dbout
)Model
object defining the model we want to simulate (argument model
)nbsimu
)nbtuba
)Optionally, we can specify a seed number for the simulation (to ensure reproducibility). The simtub
function adds the simulated samples directly to the target data base specified in dbout
(with a naming convention that can be set through the argument namconv
). Note that the samples generated by this function have the same mean as the one specified in the model object. If this mean has not specified been specified (through the setMeans
method), then zero-mean simulations are generated.
Let us generate a sample from the model fitmod
we fitted on the observations. First, we simulate the model with a single turning band.
err = simtub(dbout=target, model=fitmod,
nbsimu=1,
nbtuba=1, seed=12454,
namconv=NamingConvention("Simu1"))
target$display()
##
## Data Base Grid Characteristics
## ==============================
##
## Data Base Summary
## -----------------
## File is organized as a regular grid
## Space dimension = 2
## Number of Columns = 5
## Total number of samples = 11097
## Number of active samples = 3092
##
## Grid characteristics:
## ---------------------
## Origin : 65.000 535.000
## Mesh : 4.938 4.963
## Number : 81 137
##
## Variables
## ---------
## Column = 0 - Name = Longitude - Locator = x1
## Column = 1 - Name = Latitude - Locator = x2
## Column = 2 - Name = Elevation - Locator = f1
## Column = 3 - Name = inshore - Locator = sel
## Column = 4 - Name = Simu1 - Locator = z1
## NULL
p = ggDefaultGeographic()
p = p + plot.grid(target, nameRaster = "Simu1",
flagLegendRaster=TRUE,palette="Spectral",
legendNameRaster="Value")
p = p + plot.decoration(title="Simulation with 1 band")
ggPrint(p)
Let us now simulate the model using 10 turning bands.
err = simtub(dbout=target, model=fitmod,
nbsimu=1,
nbtuba=10, seed=12454,
namconv=NamingConvention("Simu10"))
p = ggDefaultGeographic()
p = p + plot.grid(target, nameRaster = "Simu10",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p = p + plot.decoration(title="Simulation with 1 band")
ggPrint(p)
Let us now simulate the model using 1000 turning bands.
err = simtub(dbout=target, model=fitmod,
nbsimu=1,
nbtuba=1000, seed=12454,
namconv=NamingConvention("Simu1000"))
p = ggDefaultGeographic()
p = p + plot.grid(target, nameRaster = "Simu1000",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p = p + plot.decoration(title="Simulation with 1 band")
ggPrint(p)
To perform conditional simulations, we use the same command as for unconditional simulations. We just need to supply two additional arguments: the data base containing the conditioning data (argument dbin
), and the type of neighborhood used when conditioning the simulations (since this is done using kriging).
Circling back to our example, let us consider the temperature observations in the data base dat
as conditioning points. Our aim is to generate simulations of the model fitmod
that honor these data.
We first must control that our data follow more or less a gaussian distribution.
p = ggplot()
p = p + plot.hist(dat,name="January_temp", bins=10)
ggPrint(p)
Then, we compute the mean of temperature observations and set it as the mean of the model, so that the future simulations of the model also share this mean.
## Compute mean temperature
mean_Temperature = dbStatisticsMono(dat, names=c("January_temp"),
opers=EStatOption_fromKeys(c("MEAN")))$getValue(0,0)
cat(paste("Mean of observed temperatures:", round(mean_Temperature,3)))
## Mean of observed temperatures: 2.815
## Add to model
err = fitmod$setMeans(mean_Temperature)
Then, to generate 10 conditional simulations using 1000 turning bands, we can simply run:
## Parameters
nbsimu = 10
nbtuba = 1000
seed = 13231
## Simulations
err = simtub(dbin=dat, dbout=target,
model=fitmod,
neigh=neighU,
nbsimu=nbsimu,
nbtuba=nbtuba, seed=seed)
Let us display a few simulation results.
p1 = ggDefaultGeographic()
p1 = p1 + plot.grid(target, nameRaster = "Simu*temp.1",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p1 = p1 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
p2 = ggDefaultGeographic()
p2 = p2 + plot.grid(target, nameRaster = "Simu*temp.2",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p2 = p2 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
p3 = ggDefaultGeographic()
p3 = p3 + plot.grid(target, nameRaster = "Simu*temp.3",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p3 = p3 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
p4 = ggDefaultGeographic()
p4 = p4 + plot.grid(target, nameRaster = "Simu*temp.4",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p4 = p4 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
ggarrange(p1,p2,p3,p4,nrow=2,ncol=2)
Let us now compute the mean of the simulations we just generated. To do so, we use the statistics
method of the Db
class.
target$statisticsBySample(names=c("Simu.January_temp*"),
opers=EStatOption_fromKeys(c("MEAN")))
## NULL
Let us compare the mean of the simulations with result from a simple kriging prediction of the temperature.
err = kriging(dat, target, model=fitmod,
neigh = neighU,
namconv=NamingConvention("KS"))
## Plot correlation plot
p = ggplot()
p = p + plot.correlation(target, "Stats.MEAN", "KS*estim", flagDiag=TRUE, bins=100)
p = p + plot.decoration(xlab="Mean of Simulations",
ylab="Simple Kriging Estimate",
title = "Correlation plot")
ggPrint(p)
In this section, we show how to simulate a model with external drifts. To do so, it suffices to call the simtub
function with a Model
object that includes external drifts.
Let us build such a model, to illustrate our point. We start by reloading the two data bases of the Preambule.
## Data points
fileNF = "Scotland_Temperatures.NF"
if(flagInternetAvailable){
download.file(paste0("https://soft.minesparis.psl.eu/gstlearn/data/Scotland/",fileNF), fileNF, quiet=TRUE)
}
dat = Db_createFromNF(fileNF)
## Target grid
fileNF = "Scotland_Elevations.NF"
if(flagInternetAvailable){
download.file(paste0("https://soft.minesparis.psl.eu/gstlearn/data/Scotland/",fileNF), fileNF, quiet=TRUE)
}
target = DbGrid_createFromNF(fileNF)
We will consider the temperature as our variable of interest, and the elevation as an external drift. Hence, we set the elevation variable to a f
locator in both data bases.
## Set `f` locator to elevation in `dat` data base
err=dat$setLocator("Elevation", ELoc_F())
## Set `f` locator to elevation in `target` data base
err=target$setLocator("Elevation", ELoc_F())
Finally, we create a model with external drift, which we fit on our data.
## Create with external drift
model_ED = Model()
err = model_ED$setDriftIRF(order=0,nfex=1)
## Create experimental variogram of residuals
vario_resED = Vario_create(varioparam, dat)
err = vario_resED$compute(model=model_ED)
## Fit model on experimental variogram
err = model_ED$fit(vario_resED,
types=ECov_fromKeys(c("SPHERICAL","CUBIC")))
model_ED$display()
##
## Model characteristics
## =====================
## Space dimension = 2
## Number of variable(s) = 1
## Number of basic structure(s) = 2
## Number of drift function(s) = 2
## Number of drift equation(s) = 2
##
## Covariance Part
## ---------------
## Spherical
## - Sill = 0.178
## - Range = 30.039
## Cubic
## - Sill = 0.420
## - Range = 451.351
## Total Sill = 0.598
##
## Drift Part
## ----------
## Universality_Condition
## External_Drift:0
## NULL
Let us plot the fitted model (solid line) together with the experimental variogram (dashed line).
ggplot() + plot.varmod(vario_resED, model_ED)
Now, to generate 10 conditional simulations from the model with external drift that we just create, we call the simtub
function with the same synthax as before.
err = simtub(dbin=dat, dbout=target, model=model_ED,
neigh=neighU,
nbsimu=nbsimu,
nbtuba=nbtuba, seed=seed)
Let us display a few simulation results.
p1 = ggDefaultGeographic()
p1 = p1 + plot.grid(target, nameRaster = "Simu*temp.1",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p1 = p1 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
p2 = ggDefaultGeographic()
p2 = p2 + plot.grid(target, nameRaster = "Simu*temp.2",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p2 = p2 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
p3 = ggDefaultGeographic()
p3 = p3 + plot.grid(target, nameRaster = "Simu*temp.3",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p3 = p3 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
p4 = ggDefaultGeographic()
p4 = p4 + plot.grid(target, nameRaster = "Simu*temp.4",
flagLegendRaster=TRUE,palette="Spectral",legendNameRaster="Value")
p4 = p4 + plot.point(dat,flagCst = T,pch=18,cex=0.5)
ggarrange(p1,p2,p3,p4,nrow=2,ncol=2)
Let us now compute the mean of the simulations we just generated, and compare it with a prediction by kriging with external drift.
target$statisticsBySample(names=c("Simu.January_temp*"),
opers=EStatOption_fromKeys(c("MEAN")))
## NULL
## Compute kriging
err = kriging(dat, target, model=model_ED,
neigh = neighU,
namconv=NamingConvention("KED"))
p = ggplot()
p = p + plot.correlation(target, "Stats.MEAN", "KED*estim", flagDiag=TRUE, bins=100)
p = p + plot.decoration(xlab="Mean of Simulations", ylab="Kriging with External Drift",title = "Correlation plot")
ggPrint(p)
As an application, we show how to compute maps probabilities of exceeding a threshold. For instance, to compute the probabilities of being positive, we start by turning the simulation results into binary variables taking the value 1 if the simulated value is positive and 0 otherwise. Then, the probability of being positive is computed as the mean of these binary variables.
## Turn simulation results into binary variable
target["Simu.January_temp*"] = target["Simu.January_temp*"] > 0
## Average binary variables
target$statisticsBySample(names=c("Simu.January_temp*"),
opers=EStatOption_fromKeys(c("MEAN")),
namconv=NamingConvention("Proba"))
## NULL
Let us plot the results.
p = ggDefaultGeographic()
p = p + plot.grid(target, "Proba.MEAN", flagLegendRaster=TRUE, legendNameRaster="Probability")
p = p + plot.decoration(title="Probability for positive Temperatures")
ggPrint(p)