Global variables

verbose = TRUE
graphics = TRUE
err = OptCst_define(ECst_NTCOL(),6)

Reading data

The data are stored in a CSV format in the file called Pollution.dat

At this stage: - it is not possible to pass the list of variable names c(“X”,“Y”). - to pass the FLAGS for DbStringFormat (they are not an ENUM)

dlfile = "https://soft.minesparis.psl.eu/gstlearn/data/Pollution/Pollution.dat"
filepath = "Pollution.dat"
download.file(dlfile, filepath, quiet=TRUE)
mydb = Db_createFromCSV(filepath,CSVformat())
err = mydb$setLocator("X",ELoc_X(),0)
err = mydb$setLocator("Y",ELoc_X(),1)
err = mydb$setLocator("Zn",ELoc_Z())
if (verbose)
{
  dbfmt = DbStringFormat()
  dbfmt$setFlags(flag_extend = TRUE)
  mydb$display(dbfmt)
}
## 
## Data Base Characteristics
## =========================
## 
## Data Base Summary
## -----------------
## File is organized as a set of isolated points
## Space dimension              = 2
## Number of Columns            = 5
## Total number of samples      = 102
## 
## Data Base Extension
## -------------------
## Coor #1 - Min =    109.850 - Max =    143.010 - Ext = 33.16
## Coor #2 - Min =    483.660 - Max =    513.040 - Ext = 29.38
## 
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = X - Locator = x1
## Column = 2 - Name = Y - Locator = x2
## Column = 3 - Name = Zn - Locator = z1
## Column = 4 - Name = Pb - Locator = p1
## NULL

Accessing to the variable names

cat("List of all variable names =",mydb$getAllNames())
## List of all variable names = rank X Y Zn Pb

Extracting the vector containing the Zn variable in order to perform a selection

tabZn = mydb$getColumn("Zn")
selZn = as.numeric(tabZn < 20)
mydb$addSelection(selZn,"sel")
## [1] 5
mydb$setLocator('Pb',ELoc_Z())
## NULL
if (verbose)
    mydb$display()
## 
## Data Base Characteristics
## =========================
## 
## Data Base Summary
## -----------------
## File is organized as a set of isolated points
## Space dimension              = 2
## Number of Columns            = 6
## Total number of samples      = 102
## Number of active samples     = 99
## 
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = X - Locator = x1
## Column = 2 - Name = Y - Locator = x2
## Column = 3 - Name = Zn - Locator = NA
## Column = 4 - Name = Pb - Locator = z1
## Column = 5 - Name = sel - Locator = sel
## NULL

Display my Data (with samples represented by color and size)

if (graphics)
    ggplot() + plot(mydb,nameColor="Pb") + plot.decoration(title="Data Set")
## Warning: Using size for a discrete variable is not advised.

Variograms

We first define the geometry of the variogram calculations

myVarioParamOmni = VarioParam()
mydir = DirParam_create(npas=10,dpas=1.)
myVarioParamOmni$addDir(mydir)
## NULL

We use the variogram definition in order to calculate the variogram cloud.

dbcloud = db_vcloud(db=mydb, varioparam=myVarioParamOmni)

We recall that the Variogram cloud is calculated by filling an underlying grid where each cell is painted according to the number of pairs at the given distance and given variability. Representing the variogram cloud.

if (graphics)
    ggplot() + plot(dbcloud,nameRaster="Cloud*") + plot.decoration(title="Variogram Cloud")

Calculating the experimental omni-directional variogram

myVarioOmni = Vario(myVarioParamOmni)
err = myVarioOmni$compute(mydb, ECalcVario_VARIOGRAM())
if (verbose)
    myVarioOmni$display()
## 
## Variogram characteristics
## =========================
## Number of variable(s)       = 1
## Number of direction(s)      = 1
## Space dimension             = 2
## Variance-Covariance Matrix     2.881
## 
## Direction #1
## ------------
## Number of lags              = 10
## Direction coefficients      =      1.000     0.000
## Direction angles (degrees)  =      0.000     0.000
## Tolerance on direction      =     90.000 (degrees)
## Calculation lag             =      1.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          0     3.000     0.389     0.462
##          1   123.000     1.081     1.495
##          2   183.000     2.038     1.620
##          3   205.000     3.006     2.526
##          4   231.000     4.013     2.240
##          5   229.000     5.036     2.524
##          6   198.000     5.962     2.396
##          7   187.000     7.000     2.708
##          8   204.000     7.996     2.772
##          9   184.000     8.990     2.868
## NULL

The variogram is represented graphically for a quick check

if (graphics)
    ggplot() + plot.varmod(myVarioOmni) + 
    plot.decoration(title="Omni-directional Variogram for Pb")

Calculate a variogram in several directions

myvarioParam = VarioParam()
mydirs = DirParam_createMultiple(ndir=4, npas=10, dpas=1.)
myvarioParam$addMultiDirs(mydirs)
## NULL
myvario = Vario(myvarioParam)
myvario$compute(mydb, ECalcVario_VARIOGRAM())
## [1] 0
if (verbose)
    myvario$display()
## 
## Variogram characteristics
## =========================
## Number of variable(s)       = 1
## Number of direction(s)      = 4
## Space dimension             = 2
## Variance-Covariance Matrix     2.881
## 
## Direction #1
## ------------
## Number of lags              = 10
## Direction coefficients      =      1.000     0.000
## Direction angles (degrees)  =      0.000     0.000
## Tolerance on direction      =     22.500 (degrees)
## Calculation lag             =      1.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          0     1.000     0.410     0.180
##          1    29.000     1.094     1.634
##          2    47.000     2.079     1.415
##          3    53.000     3.003     2.824
##          4    63.000     3.999     2.348
##          5    66.000     5.035     2.319
##          6    60.000     5.978     3.115
##          7    52.000     7.045     2.746
##          8    52.000     8.020     3.927
##          9    37.000     8.980     2.554
## 
## Direction #2
## ------------
## Number of lags              = 10
## Direction coefficients      =      0.707     0.707
## Direction angles (degrees)  =     45.000     0.000
## Tolerance on direction      =     22.500 (degrees)
## Calculation lag             =      1.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          0     1.000     0.344     0.080
##          1    31.000     1.051     1.113
##          2    50.000     1.960     1.890
##          3    62.000     2.999     2.443
##          4    58.000     4.014     2.701
##          5    51.000     5.016     2.702
##          6    36.000     5.999     1.833
##          7    37.000     7.015     2.130
##          8    50.000     7.997     2.060
##          9    53.000     8.995     2.381
## 
## Direction #3
## ------------
## Number of lags              = 10
## Direction coefficients      =      0.000     1.000
## Direction angles (degrees)  =     90.000     0.000
## Tolerance on direction      =     22.500 (degrees)
## Calculation lag             =      1.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          1    32.000     1.149     1.631
##          2    39.000     2.080     1.670
##          3    39.000     2.979     2.511
##          4    48.000     4.012     2.120
##          5    51.000     5.029     3.055
##          6    47.000     5.939     2.856
##          7    49.000     6.965     2.386
##          8    42.000     7.952     2.708
##          9    41.000     9.018     2.320
## 
## Direction #4
## ------------
## Number of lags              = 10
## Direction coefficients      =     -0.707     0.707
## Direction angles (degrees)  =    135.000     0.000
## Tolerance on direction      =     22.500 (degrees)
## Calculation lag             =      1.000
## Tolerance on distance       =     50.000 (Percent of the lag value)
## 
## For variable 1
##       Rank    Npairs  Distance     Value
##          0     1.000     0.411     1.125
##          1    31.000     1.028     1.606
##          2    47.000     2.044     1.496
##          3    51.000     3.040     2.330
##          4    62.000     4.028     1.791
##          5    61.000     5.058     2.155
##          6    55.000     5.939     1.587
##          7    49.000     6.975     3.425
##          8    60.000     8.004     2.408
##          9    53.000     8.972     3.996
## NULL
if (graphics)
    ggplot() + plot.varmod(myvario) + 
    plot.decoration(title="Multi-Directional Variogram of Pb")

Calculating the Variogram Map

myvmap = db_vmap(db=mydb,calcul_type=ECalcVario_VARIOGRAM(),nxx=c(20,20))
if (verbose)
    myvmap$display()
## 
## Data Base Grid Characteristics
## ==============================
## 
## Data Base Summary
## -----------------
## File is organized as a regular grid
## Space dimension              = 2
## Number of Columns            = 5
## Total number of samples      = 1681
## 
## Grid characteristics:
## ---------------------
## Origin :    -33.160   -29.380
## Mesh   :      1.658     1.469
## Number :         41        41
## 
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = x1 - Locator = x1
## Column = 2 - Name = x2 - Locator = x2
## Column = 3 - Name = VMAP.Pb.Var - Locator = z1
## Column = 4 - Name = VMAP.Pb.Nb - Locator = NA
## NULL
if (graphics)
    ggplot() + plot(myvmap, nameRaster="*Var") + 
    plot.decoration(title="Variogram Map")

Model

Fitting a Model. We call the Automatic Fitting procedure providing the list of covariance functions to be tested.

mymodel = Model_createFromDb(mydb)
err = mymodel$fit(vario=myvario,types=ECov_fromKeys(c("EXPONENTIAL","SPHERICAL")))

Visualizing the resulting model, overlaid on the experimental variogram

if (graphics)
    ggplot() + plot.varmod(myvario,mymodel) + plot.decoration(title="Model for Pb")