import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc
import matplotlib.pyplot as plt
import numpy as np
import os

gdoc.setNoScroll()

ndim = 2
gl.defineDefaultSpace(gl.ESpaceType.RN, ndim)
gl.OptCst.define(gl.ECst.NTCAR, 15)
gp.setDefaultGeographic(dims=[8,8])

## Load observations
temp_nf = gdoc.loadData("Scotland", "Scotland_Temperatures.NF")
dat = gl.Db.createFromNF(temp_nf)
### Change variable name
dat.setName("*temp", "Temperature")

## Load grid
elev_nf = gdoc.loadData("Scotland", "Scotland_Elevations.NF")
target = gl.DbGrid.createFromNF(elev_nf)

target

Data Base Grid Characteristics
==============================

Data Base Summary
-----------------
File is organized as a regular grid
Space dimension              = 2
Number of Columns            = 4
Total number of samples      = 11097
Number of active samples     = 3092

Grid characteristics:
---------------------
Origin :          65.000        535.000
Mesh   :           4.938          4.963
Number :              81            137

Variables
---------
Column = 0 - Name = Longitude - Locator = x1
Column = 1 - Name = Latitude - Locator = x2
Column = 2 - Name = Elevation - Locator = f1
Column = 3 - Name = inshore - Locator = sel

ax = target.plot("Elevation", flagLegendRaster=True)

dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 5
Total number of samples      = 236

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = Longitude - Locator = x1
Column = 2 - Name = Latitude - Locator = x2
Column = 3 - Name = Elevation - Locator = NA
Column = 4 - Name = Temperature - Locator = z1

gl.dbStatisticsPrint(db=dat, names=["Elevation", "Temperature"],
                     opers=gl.EStatOption.fromKeys(["NUM"]),flagIso = False,
                     title="Number of observations")

Number of observations
----------------------
                     Number
Elevation               236
Temperature             151

gl.dbStatisticsPrint(db=dat, names=["Elevation", "Temperature"],
                     opers=gl.EStatOption.fromKeys(["MEAN","MINI","MAXI"]),flagIso = False,
                     title="Statistics of observations")

Statistics of observations
--------------------------
                    Minimum         Maximum            Mean
Elevation             2.000         800.000         146.441
Temperature           0.600           5.200           2.815

gl.dbStatisticsPrint(db=target, names=["Elevation"],
                     opers=gl.EStatOption.fromKeys(["MEAN","MINI","MAXI"]),flagIso = False,
                     title="Statistics of target")

Statistics of target
--------------------
                  Minimum         Maximum            Mean
Elevation           0.000        1270.000         241.152

gl.dbStatisticsPrint(db=dat, names=["Elevation", "Temperature"],
                     opers=gl.EStatOption.fromKeys(["MEAN","MINI","MAXI"]),flagIso = True,
                     title="Filtered statistics of observations")

Filtered statistics of observations
-----------------------------------
                    Minimum         Maximum            Mean
Elevation             3.000         387.000          87.974
Temperature           0.600           5.200           2.815

ax = target.plot(nameRaster="Elevation")
ax = dat.plot(nameSize="Temperature", color="yellow", sizmin=0.1, sizmax=20)

ax = gp.correlation(dat, namex="Elevation", namey="Temperature", asPoint=True, regrLine=True)
ax.decoration(title="Correlation between Temperature and Elevation")

varioparam = gl.VarioParam.createMultiple(ndir=2, npas=30, dpas=10)
vario_raw2dir = gl.Vario(varioparam)
err = vario_raw2dir.compute(dat)

fitmod_raw = gl.Model()
err = fitmod_raw.fit(vario_raw2dir,
                    types=[gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.CUBIC, gl.ECov.LINEAR])
fitmod_raw.setDriftIRF(0,0)

ax = gp.varmod(vario_raw2dir, fitmod_raw)
ax.decoration(title="Experimental and fitted variogram models - Raw temperature observations", 
              xlabel = "Distance (km)", ylabel = "Variogram")

uniqueNeigh = gl.NeighUnique.create()

err = gl.kriging(dbin=dat, dbout=target, model=fitmod_raw, 
              neigh=uniqueNeigh,
              namconv=gl.NamingConvention.create(prefix="OK"))

ax = target.plot(nameRaster="OK*estim")
ax = dat.plot(flagCst=True, color="black")
ax.decoration(title="Temperature - Ordinary Kriging")

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["OK.T*"]), opers=opers,
                    title="Statistics on the Ordinary Kriging:")

Statistics on the Ordinary Kriging:
-----------------------------------
                              Number         Minimum         Maximum            Mean        St. Dev.
OK.Temperature.estim            3092           0.588           5.153           2.826           0.976
OK.Temperature.stdev            3092           0.036           0.990           0.407           0.187

## Compute cross-validation
err = gl.xvalid(dat, model=fitmod_raw, 
             neigh=uniqueNeigh,
             namconv=gl.NamingConvention.create(prefix="CV_OK",flag_locator=False))

mse=np.nanmean(np.square(dat.getColumn("CV_OK*esterr*")))
print("Mean squared cross-validation error:", round(mse,3))

mse=np.nanmean(np.square(dat.getColumn("CV_OK*stderr*")))
print("Mean squared standardized error:", round(mse,3))

Mean squared cross-validation error: 0.281
Mean squared standardized error: 2.635

dat.setLocators(names=["Temperature", "Elevation"], locatorType=gl.ELoc.Z)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 7
Total number of samples      = 236

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = Longitude - Locator = x1
Column = 2 - Name = Latitude - Locator = x2
Column = 3 - Name = Elevation - Locator = z2
Column = 4 - Name = Temperature - Locator = z1
Column = 5 - Name = CV_OK.Temperature.esterr - Locator = NA
Column = 6 - Name = CV_OK.Temperature.stderr - Locator = NA

varioexp2var = gl.Vario.create(varioparam)
err = varioexp2var.compute(dat)

ax=gp.varmod(varioexp2var)

fitmod2var = gl.Model()
err = fitmod2var.fit(varioexp2var,
                     types=[gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.CUBIC, gl.ECov.LINEAR])
fitmod2var.setDriftIRF(0,0)

ax = gp.varmod(varioexp2var, fitmod2var, lw=2)
gp.decoration(ax,title="Temperature (°C) and Elevation")

err = gl.kriging(dbin=dat, dbout=target, model=fitmod2var, 
              neigh=uniqueNeigh,
              namconv=gl.NamingConvention.create(prefix="COK"))

ax = target.plot(nameRaster="COK.Temp*estim")
ax = dat.plot(flagCst=True, color="black")
ax.decoration(title="Temperature - CoKriging")

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["COK.T*"]), opers=opers,
                    title="Statistics on the CoKriging predictions")

Statistics on the CoKriging predictions
---------------------------------------
                               Number         Minimum         Maximum            Mean        St. Dev.
COK.Temperature.estim            3092           0.200           5.094           2.671           0.970
COK.Temperature.stdev            3092           0.231           0.948           0.448           0.109

ax = gp.correlation(target, namex="OK.T*estim", namey="COK.T*estim", bissLine=True, bins=100, cmin=1)
ax.decoration(xlabel="Ordinary Kriging",ylabel="Ordinary CoKriging")

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["OK.T*estim", "COK.T*estim"]), opers=opers,
                    title="Comparison between Ordinary and Universal kriging predictions")

Comparison between Ordinary and Universal kriging predictions
-------------------------------------------------------------
                               Number         Minimum         Maximum            Mean        St. Dev.
OK.Temperature.estim             3092           0.588           5.153           2.826           0.976
COK.Temperature.estim            3092           0.200           5.094           2.671           0.970

movingNeigh = gl.NeighMoving.create(radius = 1000, nmaxi = 10)

err = gl.xvalid(dat, model=fitmod2var,
             neigh=movingNeigh,
             namconv=gl.NamingConvention.create(prefix="CV_COK",flag_locator=False))

mse=np.nanmean(np.square(dat.getColumn("CV_COK.Temperature*esterr*")))
print("Mean squared cross-validation error:", round(mse,3))

mse=np.nanmean(np.square(dat.getColumn("CV_COK.Temperature*stderr*")))
print("Mean squared standardized error:", round(mse,3))

Mean squared cross-validation error: 0.279
Mean squared standardized error: 1.227

mse=np.nanmean(np.square(dat.getColumn("CV_COK.Elevation*esterr*")))
print("Mean squared cross-validation error:", round(mse,3))

mse=np.nanmean(np.square(dat.getColumn("CV_COK.Elevation*stderr*")))
print("Mean squared standardized error:", round(mse,3))

Mean squared cross-validation error: 17849.434
Mean squared standardized error: 1.206

## Set `z` locator to temperature
dat.setLocator("Temperature",gl.ELoc.Z,cleanSameLocator=True)

regr = gl.regression(dat, "Temperature", ["Elevation"], mode=0, flagCst=True)
regr.display()
b = regr.getCoeff(0)
a = regr.getCoeff(1)

Linear Regression
-----------------
- Calculated on 151 active values
- Constant term           = 3.61197
- Explanatory Variable #1 = -0.0090641
- Initial variance        = 1.01979
- Variance of residuals   = 0.363298

err = gl.dbRegression(dat, "Temperature", ["Elevation"], flagCst=True,
                     namconv = gl.NamingConvention.create("RegRes",flag_locator=False))

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(dat, names = (["RegRes*"]), opers=opers,
                    title="Statistics on the residuals")

Statistics on the residuals
---------------------------
                            Number         Minimum         Maximum            Mean        St. Dev.
RegRes.Temperature             151          -1.359           1.795           0.000           0.603

ax = gp.correlation(dat, namex="Elevation", namey="RegRes*",regrLine=True,asPoint=True)

dat.setLocator("RegRes*",gl.ELoc.Z, cleanSameLocator=True)

## Compute experimental variogram
varioexpR = gl.Vario(varioparam)
err = varioexpR.compute(dat)

## Fit model
fitmodR = gl.Model()
err = fitmodR.fit(varioexpR,types=[gl.ECov.NUGGET, gl.ECov.SPHERICAL, gl.ECov.LINEAR])
fitmodR.setDriftIRF(0,0)

ax = gp.varmod(varioexpR, fitmodR)
ax.decoration(title="Experimental and fitted variogram models - Temperature Residual",
              xlabel = "Distance (km)", ylabel = "Variogram")

## Compute kriging
err = gl.kriging(dbin=dat, dbout=target, model=fitmodR, 
              neigh=uniqueNeigh,
              namconv=gl.NamingConvention.create(prefix="ROK"))

ax = target.plot("ROK*estim")
ax = dat.plot(flagCst=True, color="black")
ax.decoration(title="Temperature residuals - Ordinary Kriging")

## Compute temperature predictor
ROK_estim =  b + a * target["Elevation"] + target["ROK*estim"]

## Add it to data base
uid = target.addColumns(ROK_estim,"KR.Temperature.estim")

ax = target.plot("KR.T*estim")
ax = dat.plot(flagCst=True, color="black")
ax.decoration(title="Temperature - Ordinary Kriging of the residuals")

ax = gp.correlation(target, namex="OK.T*estim", namey="KR.T*estim", bissLine=True, bins=100, 
                    flagSameAxes=True, cmin=1)
ax.decoration(xlabel="Ordinary Kriging",ylabel="Ordinary CoKriging")

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["OK.T*estim*", "KR.T*estim"]), opers=opers,
                    title="Comparison between Ordinary and Residual kriging predictions")

Comparison between Ordinary and Residual kriging predictions
------------------------------------------------------------
                              Number         Minimum         Maximum            Mean        St. Dev.
OK.Temperature.estim            3092           0.588           5.153           2.826           0.976
KR.Temperature.estim            3092          -8.097           5.108           1.445           1.906

## Set `z` locator to temperature
dat.setLocator("Temperature",gl.ELoc.Z,cleanSameLocator=True)

## Create model and set polynomial drift of degree 1
polDriftModel = gl.Model.create()
err = polDriftModel.setDriftIRF(order=1)

## Compute variogram of residuals
vario_res2dir = gl.Vario(varioparam)
err = vario_res2dir.compute(dat,model=polDriftModel)

varioparam = gl.VarioParam.createMultiple(ndir=2, npas=30, dpas=10)
vario_raw2dir = gl.Vario(varioparam)
err = vario_raw2dir.compute(dat)

ax = gp.varmod(vario_raw2dir)

err = polDriftModel.fit(vario_res2dir,types=[gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.CUBIC])

ax = gp.varmod(vario_res2dir, polDriftModel)
ax.decoration(title="Experimental and fitted variogram models - Residuals", 
              xlabel = "Distance (km)", ylabel = "Variogram")

regResults = gl.regression(dat, nameResp="Temperature", mode=2, model=polDriftModel)
regResults.display()

Linear Regression
-----------------
- Calculated on 151 active values
- Explanatory Variable #1 = 3.52136
- Explanatory Variable #2 = -0.00746599
- Explanatory Variable #3 = 0.00197753
- Initial variance        = 1.01979
- Variance of residuals   = 0.735557

err = gl.kriging(dbin=dat, dbout=target, model=polDriftModel, 
              neigh=uniqueNeigh,
              namconv=gl.NamingConvention.create(prefix="UK"))

ax = target.plot(nameRaster="UK*estim")
ax = dat.plot(flagCst=True, color="black")
ax.decoration(title="Temperature - Universal Kriging")

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["UK.T*"]), opers=opers,
                    title="Statistics on the Universal Kriging:")

Statistics on the Universal Kriging:
------------------------------------
                              Number         Minimum         Maximum            Mean        St. Dev.
UK.Temperature.estim            3092           0.613           5.051           2.841           0.923
UK.Temperature.stdev            3092           0.083           0.919           0.555           0.138

ax = gp.correlation(target, namex="OK*estim", namey="UK*estim", bissLine=True, bins=100,cmin=1)
ax.decoration(xlabel="Ordinary Kriging",ylabel="Universal Kriging")

gl.dbStatisticsPrint(target, names = (["OK.T*estim", "UK.T*estim"]), opers=opers,
                    title="Comparison between Ordinary and Universal kriging predictions:")

Comparison between Ordinary and Universal kriging predictions:
--------------------------------------------------------------
                              Number         Minimum         Maximum            Mean        St. Dev.
OK.Temperature.estim            3092           0.588           5.153           2.826           0.976
UK.Temperature.estim            3092           0.613           5.051           2.841           0.923

err = gl.xvalid(dat, model=polDriftModel, neigh=uniqueNeigh, 
                namconv=gl.NamingConvention.create(prefix="CV_UK",flag_locator=False))

mse=np.nanmean(np.square(dat.getColumn("CV_UK*esterr*")))
print("Mean squared cross-validation error:", round(mse,3))

mse=np.nanmean(np.square(dat.getColumn("CV_UK*stderr*")))
print("Mean squared standardized error:", round(mse,3))

Mean squared cross-validation error: 0.251
Mean squared standardized error: 0.855

## Set `z` locator to temperature
dat.setLocator("Temperature",gl.ELoc.Z,cleanSameLocator=True)

## Set `f` locator to elevation
dat.setLocator("Elevation",gl.ELoc.F,cleanSameLocator=True)

EDmodel = gl.Model()
EDmodel.setDriftIRF(order = 0, nfex = 1)

varioKED = gl.Vario(varioparam)
err = varioKED.compute(dat,model=EDmodel)

ax = gp.varmod(vario_raw2dir, linestyle='dotted')
ax = gp.varmod(varioKED, linestyle='dashed')
ax.decoration(title="Temperature (°C)",
              xlabel = "Distance (km)", ylabel = "Variogram")

err = EDmodel.fit(varioKED,types=[gl.ECov.NUGGET, gl.ECov.CUBIC, gl.ECov.GAUSSIAN])

ax = gp.varmod(varioKED, EDmodel)
ax.decoration(title="Experimental and fitted variogram models - Residuals")

err = gl.kriging(dbin=dat, dbout=target, model=EDmodel, 
              neigh=uniqueNeigh,
              namconv=gl.NamingConvention.create(prefix="KED"))

ax = target.plot("KED.T*estim")
ax = dat.plot(flagCst=True, color="black")
ax.decoration(title="Temperature - Kriging with external drift")

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["KED.T*"]), opers=opers,
                    title="Statistics on the Kriging with External Drift predictions")

Statistics on the Kriging with External Drift predictions
---------------------------------------------------------
                               Number         Minimum         Maximum            Mean        St. Dev.
KED.Temperature.estim            3092          -6.004           4.773           1.778           1.540
KED.Temperature.stdev            3092           0.312           0.615           0.396           0.051

ax = gp.correlation(target, namex="OK.T*estim", namey="KED.T*estim", bissLine=True, bins=100,
                    flagSameAxes=True, cmin=1)
ax.decoration(xlabel="Ordinary Kriging",ylabel="Kriging with External Drift")

err = gl.xvalid(dat, model=EDmodel, 
             neigh=uniqueNeigh,
             namconv=gl.NamingConvention.create(prefix="CV_KED",flag_locator=False))

mse=np.nanmean(np.square(dat.getColumn("CV_KED*esterr*")))
print("Mean squared cross-validation error:", round(mse,3))

mse=np.nanmean(np.square(dat.getColumn("CV_KED*stderr*")))
print("Mean squared standardized error:", round(mse,3))

Mean squared cross-validation error: 0.172
Mean squared standardized error: 1.143

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(dat, names = (["CV*.Temperature.esterr*"]), opers=opers,
                    title="Mean-squared cross-validation errors")

Mean-squared cross-validation errors
------------------------------------
                                   Number         Minimum         Maximum            Mean        St. Dev.
CV_OK.Temperature.esterr              151          -1.411           1.644          -0.017           0.530
CV_COK.Temperature.esterr             151          -1.759           1.648          -0.105           0.517
CV_UK.Temperature.esterr              151          -1.713           1.477          -0.003           0.501
CV_KED.Temperature.esterr             151          -1.577           1.001          -0.009           0.414

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["*.Temperature.estim"]), opers=opers,
                    title="Statistics of the predictors")

Statistics of the predictors
----------------------------
                                      Number         Minimum         Maximum            Mean        St. Dev.
OK.Temperature.estim                    3092           0.588           5.153           2.826           0.976
COK.Temperature.estim                   3092           0.200           5.094           2.671           0.970
ROK.RegRes.Temperature.estim            3092          -0.771           1.586           0.019           0.455
KR.Temperature.estim                    3092          -8.097           5.108           1.445           1.906
UK.Temperature.estim                    3092           0.613           5.051           2.841           0.923
KED.Temperature.estim                   3092          -6.004           4.773           1.778           1.540

opers = gl.EStatOption.fromKeys(["NUM","MINI","MAXI","MEAN","STDV"])
gl.dbStatisticsPrint(target, names = (["*.Temperature.stdev"]), opers=opers,
                    title="Statistics of the standard-deviation of each predictors")

Statistics of the standard-deviation of each predictors
-------------------------------------------------------
                                      Number         Minimum         Maximum            Mean        St. Dev.
OK.Temperature.stdev                    3092           0.036           0.990           0.407           0.187
COK.Temperature.stdev                   3092           0.231           0.948           0.448           0.109
ROK.RegRes.Temperature.stdev            3092           0.304           0.504           0.362           0.031
UK.Temperature.stdev                    3092           0.083           0.919           0.555           0.138
KED.Temperature.stdev                   3092           0.312           0.615           0.396           0.051

Multivariate¶

Table of Contents

Preamble¶

Exploratory data analysis¶

Baseline univariate model¶

Model fitting¶

Ordinary kriging¶

Cross-validation¶

Multivariate Models and Cokriging¶

Fitting a bivariate model¶

Cokriging predictions¶

Cross-validation¶

Working with residuals¶

Computing and fitting the residuals¶

Ordinary kriging of the residuals¶

Models with auxiliary variables¶

Models with polynomial trends (intrinsic model)¶

Fitting a model with a polynomial trend¶

Universal Kriging¶

Cross-validation¶

Models with External Drifts¶

Fitting a model with external drifts¶

Kriging with external drifts¶

Cross-validation¶

Comparing the various kriging predictions¶