import numpy as np
import matplotlib.pyplot as plt
import gstlearn as gl
import gstlearn.plot as gp

# parameters for the simulation
m   = 1.
sig = 0.5

# initialization of the grid
grd = gl.DbGrid.create(x0=(0.0,0.0), dx=(0.01,0.01), nx=(100,100))

# simulation from a model
model = gl.Model.createFromParam(gl.ECov.EXPONENTIAL, range=0.2)
gl.simtub(dbin = None, dbout = grd, model = model, nbsimu = 1)
grd.setName("Simu", "Y")
grd["Z"] = m * np.exp(sig * grd["Y"].squeeze() - sig**2 / 2)

# Data set (10% of the grid)
data = gl.Db.createSamplingDb(grd, 0.1, flagAddSampleRank=0)
data.useSel = True

# plots
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(15,6))
ax1.raster(grd,"Z")
ax1.decoration(title="Initial variable")
ax2.histogram(grd, name='Z',  bins = 25, color="orange")
ax2.decoration(xlabel = "Raw variable", title="Histogram of the initial variable")

fig, ax3 = plt.subplots(figsize=(6,6))
ax3.raster(grd,"Z")
ax3.gstpoint(data,nameSize="Z", color='yellow')
ax3.decoration(title="Random data subsample")

zcut = np.quantile(data['Z'], q = [0.3, 0.5, 0.7, 0.9])

print("\n{:^40}".format("Coupures sur la variable Z"))
print("{:^10}{:^10}{:^10}{:^10}".format(0.3, 0.5, 0.7, 0.9))
print("{:^10.3f}{:^10.3f}{:^10.3f}{:^10.3f}".format(*zcut),'\n')

mylimits = gl.Limits(zcut, True) #defines limits based on the cutoff values 
# True for addFromZero, so that we add the interval [0, z_1[
mylimits.display()

       Coupures sur la variable Z       
   0.3       0.5       0.7       0.9    
  0.712     0.920     1.241     1.778    

Bound( 1 ) : [ 0 ; 0.712209 [
Bound( 2 ) : [ 0.712209 ; 0.920089 [
Bound( 3 ) : [ 0.920089 ; 1.24076 [
Bound( 4 ) : [ 1.24076 ; 1.77795 [

# Compute the indicators (4 new variables)
iud_Indic = mylimits.toIndicator(data, name='Z', OptionIndicator=1) 
# Compute the discretized version of the variable
iud_Mean  = mylimits.toIndicator(data, name='Z', OptionIndicator=0) 

# statistics on the indicators
w = gl.dbStatisticsMono(data, ["Indicator.Z.Class*"], [gl.EStatOption.MEAN]).getValues()
w = list(w) + [1 - np.sum(w)]
print("Proportions = ", np.round(w,3))
Nclass = len(w) # total number of indicators, including the fifth one which is not computed

Proportions =  [0.3 0.2 0.2 0.2 0.1]

print(data)
ax = data.plot(nameColor="*Mean")
ax.decoration(title="Discretized variable")

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 10
Total number of samples      = 1000

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x1 - Locator = x1
Column = 2 - Name = x2 - Locator = x2
Column = 3 - Name = Y - Locator = NA
Column = 4 - Name = Z - Locator = NA
Column = 5 - Name = Indicator.Z.Class.1 - Locator = NA
Column = 6 - Name = Indicator.Z.Class.2 - Locator = NA
Column = 7 - Name = Indicator.Z.Class.3 - Locator = NA
Column = 8 - Name = Indicator.Z.Class.4 - Locator = NA
Column = 9 - Name = Indicator.Z.Mean - Locator = z1

# Locate Z
data.clearLocators(gl.ELoc.Z)
data.setLocator("Z", gl.ELoc.Z)

# Variogram parameters (omnidirectional)
varioParam = gl.VarioParam.createOmniDirection(npas=10, dpas=0.05)
# Compute variogram
var_Z = gl.Vario.computeFromDb(varioParam, data)

# fit model
mod_Z = gl.Model()
opt=gl.Option_AutoFit()
opt.setWmode(2) # weighted proportional to the number of pairs and inverse proportional to the distance
mod_Z.fit(var_Z, [gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.EXPONENTIAL], mauto = opt)

# plot
ax = gp.varmod(var_Z, mod_Z, flagLegend=True)
ax.decoration(title = "Variogram of the data sample")

mod_Z.display()

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 1
Number of basic structure(s) = 2
Number of drift function(s)  = 0
Number of drift equation(s)  = 0

Covariance Part
---------------
Exponential
- Sill         =      0.152
- Range        =      0.091
- Theo. Range  =      0.030
Exponential
- Sill         =      0.207
- Range        =      0.627
- Theo. Range  =      0.209
Total Sill     =      0.359
Known Mean(s)     0.000

data.clearLocators(gl.ELoc.Z)
data.setLocator("Indicator.Z.Mean", gl.ELoc.Z)

var_Z = gl.Vario.computeFromDb(varioParam, data)

mod_Z = gl.Model()
mod_Z.fit(var_Z, [gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.EXPONENTIAL], mauto = opt)

ax = gp.varmod(var_Z, mod_Z, flagLegend=True)
ax.decoration(title = "Variogram of the discretized variable")

mod_Z.display()

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 1
Number of basic structure(s) = 3
Number of drift function(s)  = 0
Number of drift equation(s)  = 0

Covariance Part
---------------
Nugget Effect
- Sill         =      0.016
Exponential
- Sill         =      0.054
- Range        =      0.074
- Theo. Range  =      0.025
Exponential
- Sill         =      0.079
- Range        =      0.641
- Theo. Range  =      0.214
Total Sill     =      0.149
Known Mean(s)     0.000

data.clearLocators(gl.ELoc.Z)
data.setLocator("Indicator.Z.Class*", gl.ELoc.Z)

var_Z = gl.Vario.computeFromDb(varioParam, data)

mod_Z = gl.Model()
mod_Z.fit(var_Z, [gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.EXPONENTIAL], mauto = opt)

ax = gp.varmod(var_Z, mod_Z)
gp.decoration(ax,title = "Simple and cross variograms of the indicators of Z", fontsize=20)

data.clearLocators(gl.ELoc.Z)
data.setLocator("Indicator.Z.Class*", gl.ELoc.Z)

maf_I = gl.PCA()
maf_I.maf_compute_interval(data, hmin=0.045, hmax=0.055)
maf_I.display()

#extract matrices of conversion between factors and indicators
#Mz2f = np.reshape(maf_I.getZ2Fs(),(Nclass-1,Nclass-1)).T
#Mf2z = np.reshape(maf_I.getF2Zs(),(Nclass-1,Nclass-1)).T

PCA Contents
------------
Means
               [,  0]
     [  0,]     0.300
     [  1,]     0.200
     [  2,]     0.200
     [  3,]     0.200
Covariance Matrix
               [,  0]    [,  1]    [,  2]    [,  3]
     [  0,]     0.210    -0.060    -0.060    -0.060
     [  1,]    -0.060     0.160    -0.040    -0.040
     [  2,]    -0.060    -0.040     0.160    -0.040
     [  3,]    -0.060    -0.040    -0.040     0.160
Variogram Matrix at lag h
               [,  0]    [,  1]    [,  2]    [,  3]
     [  0,]     0.137    -0.055    -0.043    -0.034
     [  1,]    -0.055     0.144    -0.044    -0.033
     [  2,]    -0.043    -0.044     0.155    -0.048
     [  3,]    -0.034    -0.033    -0.048     0.148
Matrix MZ2F to transform standardized Variables Z into Factors F
               [,  0]    [,  1]    [,  2]    [,  3]
     [  0,]     0.822     1.539     0.682     3.132
     [  1,]     1.307     0.292     2.998     2.050
     [  2,]    -0.586     2.663     2.276     1.539
     [  3,]     2.387     2.536     1.344     1.025
Y = (Z - m) * MZ2F)
Matrix MF2Z to back-transform Factors F into standardized Variables Z
               [,  0]    [,  1]    [,  2]    [,  3]
     [  0,]    -0.014     0.088    -0.291     0.304
     [  1,]    -0.006    -0.254     0.221     0.195
     [  2,]    -0.254     0.294     0.150    -0.037
     [  3,]     0.381     0.038    -0.065    -0.168
Z = m + Y * MF2Z

maf_I.mafOfIndex()

array([ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ,
       -0.2989824 ,  1.1754654 , -3.55711089,  3.87611926, -2.09200035,
       -0.39522446, -3.02283878,  2.90560547,  2.5865877 , -3.75303541,
       -2.26694779,  3.73995343,  1.93405171, -0.39606933, -3.75502824,
        2.4777356 ,  0.76783218, -0.50985692, -1.79568474, -4.35778781])

# Calcul des maf comme fonction de l'index
maf = np.reshape(maf_I.mafOfIndex(), (Nclass, Nclass)).T

## Check that the factors are orthogonal
tab = np.dot(np.dot(maf.T, np.diag(w)),maf).round(10)
print("Correlation between factors MAF")
print(tab)

## plot MAF as a function of the index
fig, ax = plt.subplots(figsize = (10,6))
for k in range(Nclass-1):
    ax.plot(range(Nclass), np.sign(maf[0, k+1]) * maf[:,k+1], marker='.', label="MAF-"+str(k+1))
ax.set_xticks(range(Nclass), range(1, Nclass+1))
ax.hlines(0, 0, Nclass-1, color='black', linestyle='dashed', linewidth=0.7)
ax.set_xlabel("Indice des classes")
ax.set_ylabel("MAF")
l = ax.legend()

Correlation between factors MAF
[[ 1.          0.         -0.         -0.         -0.        ]
 [-0.          6.27627515  0.04800852  0.18515279 -0.15941253]
 [-0.          0.04800852  6.30949487  0.33603074 -0.34771972]
 [-0.          0.18515279  0.33603074  6.5286751   0.47064898]
 [-0.         -0.15941253 -0.34771972  0.47064898  4.55558437]]

maf_I.dbZ2F(data) # calculate MAF on the data points (new variables in data)

vario_maf = gl.Vario.computeFromDb(varioParam, data)

model_maf = gl.Model()
model_maf.fit(vario_maf, [gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.EXPONENTIAL], mauto = opt)

ax = gp.varmod(vario_maf, model_maf)
gp.decoration(ax, title = "Variograms of the MAFs", fontsize=20)

# models of the MAFs

fig, axs = plt.subplots(2,2, figsize=(11,7))

varios, models = [], []
for f in range(Nclass-1):
    vario_reduced = gl.Vario(vario_maf)
    vario_reduced.resetReduce(varcols = [f], dircols = [])
    model = gl.Model()
    opt=gl.Option_AutoFit()
    opt.setWmode(0)
    model.fit(vario_reduced, [gl.ECov.NUGGET, gl.ECov.EXPONENTIAL, gl.ECov.EXPONENTIAL], mauto=opt) 
    varios.append(vario_reduced)
    models.append(model)
    
    gp.varmod(vario_reduced, model, axsOld=axs.flat[f])
    axs.flat[f].set_title(f"Simple variogram of MAF{f+1}")

#define neighborhood
neigh = gl.NeighMoving.create(nmaxi=30, radius=0.5)

# Kriging MAF1
data.setLocator("F*1", gl.ELoc.Z, cleanSameLocator=True)
gl.kriging(data, grd, models[0], neigh)

# Kriging MAF2
data.setLocator("F*2", gl.ELoc.Z, cleanSameLocator=True)
err = gl.kriging(data, grd, models[1], neigh)

### outils de gstlearn : a ameliorer ? Ci-dessous permet de trouver l'estimation des indicatrices 
# à partir des MAFs:
# gridData.setLocator("Kriging.F*.estim", gl.ELoc.Z)
# maf_I.dbF2Z(gridData) # estimation of the indicators
### mais 2 manques:
 # avoir directement l'estimation d'une autre variable (ici S15), à partir des coefficient 
 # de la combinaison linéaire des indicatrices
 # ne prendre en compte que les premiers MAFs (eg dans le cas où les derniers MAFs ne sont pas structurés, 
 # c'est inutile de les kriger)
 # en attendant j'utilise la fonction définie ci-dessous

def vecF2myvar(maf, VecF, coefs=None, mean=None, SigmaF=None):
    """Compute values of a variable that is a linear combination of Z variables, from vectors of factors F.
    maf: object containing infos on the MAF computation (class PCA)
    VecF: table containing values of the factors, shape (Ndata*NF), where NF is the number of factors used 
          (if NF is lower than the total number of factors, then only the first ones are used, 
           the others are considered not structured spatially)
    coefs : coefficients of the variable along all variables Z
    mean : mean of the variable.
    SigmaF: table of the standard deviation of the estimation error of the factors, same shape as VecF. 
            If given, the standard deviation of the output variable are also computed and returned.
    
    eg: coefs = [1,0,0,0,0] is the first variable (indicator), with mean = 0 if not specified.
    """
    VecF = np.atleast_2d(VecF)
    Ndata, NF = VecF.shape
    Nvar = maf.getNVar()
    if NF < Nvar:
        print("The number of factors is less than the number of variables. The other factors will be neglected.")
    
    Mf2z      = maf.getF2Zs().toTL()
    sigma     = np.array(maf.getSigmas())
    means     = np.array(maf.getMeans())
    mean_N    = 1 - np.sum(means)
    coefs     = np.atleast_2d(coefs).T # one column = coefs for 1 variable
    
    if len(coefs) > Nvar + 1:
        raise ValueError(f"Too many coefficients were given ({len(coefs)}), coefs should be of length {Nvar} or {Nvar+1}.")
        
    if mean is None and len(coefs) == Nvar + 1:
        all_means = np.array(list(means) + [mean_N])
        mean = np.sum(coefs.T*all_means, axis=1)
    elif mean is None:
        raise ValueError("The mean of the variable should be given when only the N-1 coefficients are given.")
    
    if len(coefs) == Nvar:
        c_N = 1/mean_N*(mean - np.sum(coefs.T*means, axis=1))
        coefs = np.append(coefs, np.atleast_2d(c_N).T, axis=1) # add c_N
    if len(coefs) != Nvar + 1:
        print("Wrong len for coefs : ", len(coefs))
    
    Vweights = (sigma*(coefs[:-1] - coefs[-1]).T).T # w_i = (c_i - c_N)*sigma_i
    M = np.dot(Mf2z[:NF,:], Vweights)
    VecMyvar = np.dot(VecF, M) + mean
    
    if SigmaF is not None:
        VecSigmaMyvar = np.sqrt(np.dot(SigmaF**2, M**2))
        return np.squeeze(VecMyvar), VecSigmaMyvar.squeeze()
    else:
        return VecMyvar.squeeze()

# extract values of the first two MAFs (the other ones have only nugget effect)
grd.useSel=True
VecMAF   =  grd["Kriging.F.*.estim"]
SigmaMAF =  grd["Kriging.F.*.stdev"]

# estimation of the indicators
INDestim, INDstdev = vecF2myvar(maf_I, VecMAF, coefs=np.eye(5), SigmaF=SigmaMAF)
for i_indicator in range(5):
    grd[f"Kriging.Indicator{i_indicator+1}.estim"] = INDestim[:,i_indicator]
    
# estimation of S15
mean_per_class = np.unique(data["*Mean"])
Zestim, Zstdev = vecF2myvar(maf_I, VecMAF, coefs=mean_per_class, SigmaF=SigmaMAF, mean=np.mean(data['Z']))

grd["Kriging.Z.estim"] = Zestim # Z*
grd["Kriging.Z.stdev"] = Zstdev # standard deviation of the estimation error Var(Z*-Z) = S

The number of factors is less than the number of variables. The other factors will be neglected.
The number of factors is less than the number of variables. The other factors will be neglected.

fig, axs = plt.subplots(1,2, figsize=(10,5))
axs[0].raster(grd,"Kriging.F*1.estim")
axs[0].decoration(title='Estimation of MAF1')
axs[1].raster(grd,"Kriging.F*2.estim")
axs[1].decoration(title='Estimation of MAF2')

fig, axs = plt.subplots(1,3, figsize=(15,5))
axs[0].raster(grd,"Z")
axs[0].decoration(title='Initial Variable Z')
axs[1].raster(grd,"Kriging.Z.estim")
axs[1].decoration(title='Estimation of Z with DDK')
axs[2].raster(grd,"Kriging.Z.stdev")
axs[2].decoration(title='Standard deviation of kriging')

Discrete Disjunctive Kriging¶

Simulation of a reference data set¶

Elementary statistics¶

Discretization of the variable on cutoff values¶

Variography (omnidirectional)¶

Variogram of the raw variable Z¶

Variogram of the discretized variable¶

Variograms of the Indicator variables¶

MAF : Min/Max Autocorrelation Factors¶

Computing MAFs¶

Visualize MAF and their variograms¶

Kriging MAFs¶

Define individual models for the MAFs¶