Global variables
verbose = TRUE
graphics = TRUE
err = OptCst_define(ECst_NTCOL(),6)
The data are stored in a CSV format in the file called Pollution.dat
At this stage: - it is not possible to pass the list of variable names c(“X”,“Y”). - to pass the FLAGS for DbStringFormat (they are not an ENUM)
dlfile = "https://soft.minesparis.psl.eu/gstlearn/data/Pollution/Pollution.dat"
filepath = "Pollution.dat"
download.file(dlfile, filepath, quiet=TRUE)
mydb = Db_createFromCSV(filepath,CSVformat())
err = mydb$setLocator("X",ELoc_X(),0)
err = mydb$setLocator("Y",ELoc_X(),1)
err = mydb$setLocator("Zn",ELoc_Z())
if (verbose)
{
dbfmt = DbStringFormat()
dbfmt$setFlags(flag_extend = TRUE)
mydb$display(dbfmt)
}
##
## Data Base Characteristics
## =========================
##
## Data Base Summary
## -----------------
## File is organized as a set of isolated points
## Space dimension = 2
## Number of Columns = 5
## Total number of samples = 102
##
## Data Base Extension
## -------------------
## Coor #1 - Min = 109.850 - Max = 143.010 - Ext = 33.16
## Coor #2 - Min = 483.660 - Max = 513.040 - Ext = 29.38
##
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = X - Locator = x1
## Column = 2 - Name = Y - Locator = x2
## Column = 3 - Name = Zn - Locator = z1
## Column = 4 - Name = Pb - Locator = p1
## NULL
Accessing to the variable names
cat("List of all variable names =",mydb$getAllNames())
## List of all variable names = rank X Y Zn Pb
Extracting the vector containing the Zn variable in order to perform a selection
tabZn = mydb$getColumn("Zn")
selZn = as.numeric(tabZn < 20)
mydb$addSelection(selZn,"sel")
## [1] 5
mydb$setLocator('Pb',ELoc_Z())
## NULL
if (verbose)
mydb$display()
##
## Data Base Characteristics
## =========================
##
## Data Base Summary
## -----------------
## File is organized as a set of isolated points
## Space dimension = 2
## Number of Columns = 6
## Total number of samples = 102
## Number of active samples = 99
##
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = X - Locator = x1
## Column = 2 - Name = Y - Locator = x2
## Column = 3 - Name = Zn - Locator = NA
## Column = 4 - Name = Pb - Locator = z1
## Column = 5 - Name = sel - Locator = sel
## NULL
Display my Data (with samples represented by color and size)
if (graphics)
ggplot() + plot(mydb,nameColor="Pb") + plot.decoration(title="Data Set")
## Warning: Using size for a discrete variable is not advised.
We first define the geometry of the variogram calculations
myVarioParamOmni = VarioParam()
mydir = DirParam_create(npas=10,dpas=1.)
myVarioParamOmni$addDir(mydir)
## NULL
We use the variogram definition in order to calculate the variogram cloud.
dbcloud = db_vcloud(db=mydb, varioparam=myVarioParamOmni)
We recall that the Variogram cloud is calculated by filling an underlying grid where each cell is painted according to the number of pairs at the given distance and given variability. Representing the variogram cloud.
if (graphics)
ggplot() + plot(dbcloud,nameRaster="Cloud*") + plot.decoration(title="Variogram Cloud")
Calculating the experimental omni-directional variogram
myVarioOmni = Vario(myVarioParamOmni)
err = myVarioOmni$compute(mydb, ECalcVario_VARIOGRAM())
if (verbose)
myVarioOmni$display()
##
## Variogram characteristics
## =========================
## Number of variable(s) = 1
## Number of direction(s) = 1
## Space dimension = 2
## Variance-Covariance Matrix 2.881
##
## Direction #1
## ------------
## Number of lags = 10
## Direction coefficients = 1.000 0.000
## Direction angles (degrees) = 0.000 0.000
## Tolerance on direction = 90.000 (degrees)
## Calculation lag = 1.000
## Tolerance on distance = 50.000 (Percent of the lag value)
##
## For variable 1
## Rank Npairs Distance Value
## 0 3.000 0.389 0.462
## 1 123.000 1.081 1.495
## 2 183.000 2.038 1.620
## 3 205.000 3.006 2.526
## 4 231.000 4.013 2.240
## 5 229.000 5.036 2.524
## 6 198.000 5.962 2.396
## 7 187.000 7.000 2.708
## 8 204.000 7.996 2.772
## 9 184.000 8.990 2.868
## NULL
The variogram is represented graphically for a quick check
if (graphics)
ggplot() + plot.varmod(myVarioOmni) +
plot.decoration(title="Omni-directional Variogram for Pb")
Calculate a variogram in several directions
myvarioParam = VarioParam()
mydirs = DirParam_createMultiple(ndir=4, npas=10, dpas=1.)
myvarioParam$addMultiDirs(mydirs)
## NULL
myvario = Vario(myvarioParam)
myvario$compute(mydb, ECalcVario_VARIOGRAM())
## [1] 0
if (verbose)
myvario$display()
##
## Variogram characteristics
## =========================
## Number of variable(s) = 1
## Number of direction(s) = 4
## Space dimension = 2
## Variance-Covariance Matrix 2.881
##
## Direction #1
## ------------
## Number of lags = 10
## Direction coefficients = 1.000 0.000
## Direction angles (degrees) = 0.000 0.000
## Tolerance on direction = 22.500 (degrees)
## Calculation lag = 1.000
## Tolerance on distance = 50.000 (Percent of the lag value)
##
## For variable 1
## Rank Npairs Distance Value
## 0 1.000 0.410 0.180
## 1 29.000 1.094 1.634
## 2 47.000 2.079 1.415
## 3 53.000 3.003 2.824
## 4 63.000 3.999 2.348
## 5 66.000 5.035 2.319
## 6 60.000 5.978 3.115
## 7 52.000 7.045 2.746
## 8 52.000 8.020 3.927
## 9 37.000 8.980 2.554
##
## Direction #2
## ------------
## Number of lags = 10
## Direction coefficients = 0.707 0.707
## Direction angles (degrees) = 45.000 0.000
## Tolerance on direction = 22.500 (degrees)
## Calculation lag = 1.000
## Tolerance on distance = 50.000 (Percent of the lag value)
##
## For variable 1
## Rank Npairs Distance Value
## 0 1.000 0.344 0.080
## 1 31.000 1.051 1.113
## 2 50.000 1.960 1.890
## 3 62.000 2.999 2.443
## 4 58.000 4.014 2.701
## 5 51.000 5.016 2.702
## 6 36.000 5.999 1.833
## 7 37.000 7.015 2.130
## 8 50.000 7.997 2.060
## 9 53.000 8.995 2.381
##
## Direction #3
## ------------
## Number of lags = 10
## Direction coefficients = 0.000 1.000
## Direction angles (degrees) = 90.000 0.000
## Tolerance on direction = 22.500 (degrees)
## Calculation lag = 1.000
## Tolerance on distance = 50.000 (Percent of the lag value)
##
## For variable 1
## Rank Npairs Distance Value
## 1 32.000 1.149 1.631
## 2 39.000 2.080 1.670
## 3 39.000 2.979 2.511
## 4 48.000 4.012 2.120
## 5 51.000 5.029 3.055
## 6 47.000 5.939 2.856
## 7 49.000 6.965 2.386
## 8 42.000 7.952 2.708
## 9 41.000 9.018 2.320
##
## Direction #4
## ------------
## Number of lags = 10
## Direction coefficients = -0.707 0.707
## Direction angles (degrees) = 135.000 0.000
## Tolerance on direction = 22.500 (degrees)
## Calculation lag = 1.000
## Tolerance on distance = 50.000 (Percent of the lag value)
##
## For variable 1
## Rank Npairs Distance Value
## 0 1.000 0.411 1.125
## 1 31.000 1.028 1.606
## 2 47.000 2.044 1.496
## 3 51.000 3.040 2.330
## 4 62.000 4.028 1.791
## 5 61.000 5.058 2.155
## 6 55.000 5.939 1.587
## 7 49.000 6.975 3.425
## 8 60.000 8.004 2.408
## 9 53.000 8.972 3.996
## NULL
if (graphics)
ggplot() + plot.varmod(myvario) +
plot.decoration(title="Multi-Directional Variogram of Pb")
Calculating the Variogram Map
myvmap = db_vmap(db=mydb,calcul_type=ECalcVario_VARIOGRAM(),nxx=c(20,20))
if (verbose)
myvmap$display()
##
## Data Base Grid Characteristics
## ==============================
##
## Data Base Summary
## -----------------
## File is organized as a regular grid
## Space dimension = 2
## Number of Columns = 5
## Total number of samples = 1681
##
## Grid characteristics:
## ---------------------
## Origin : -33.160 -29.380
## Mesh : 1.658 1.469
## Number : 41 41
##
## Variables
## ---------
## Column = 0 - Name = rank - Locator = NA
## Column = 1 - Name = x1 - Locator = x1
## Column = 2 - Name = x2 - Locator = x2
## Column = 3 - Name = VMAP.Pb.Var - Locator = z1
## Column = 4 - Name = VMAP.Pb.Nb - Locator = NA
## NULL
if (graphics)
ggplot() + plot(myvmap, nameRaster="*Var") +
plot.decoration(title="Variogram Map")
Fitting a Model. We call the Automatic Fitting procedure providing the list of covariance functions to be tested.
mymodel = Model_createFromDb(mydb)
err = mymodel$fit(vario=myvario,types=ECov_fromKeys(c("EXPONENTIAL","SPHERICAL")))
Visualizing the resulting model, overlaid on the experimental variogram
if (graphics)
ggplot() + plot.varmod(myvario,mymodel) + plot.decoration(title="Model for Pb")