import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc
import numpy as np
import matplotlib.pyplot as plt

gdoc.setNoScroll()

# Data
np.random.seed(123)
ndat = 100
ndim = 2

# Model
rangev = 0.2
sill = 1.
nugget = 0.1

# Z : vecteur des données
# Covmat : matrice de covariance
# drift : matrice de drift
# A et c permettent d'encoder les contraintes sur le vecteur des coefficients beta :
# sous la forme A * beta = c
def estimCoeff(Z,Covmat,drift,A=None,c=None):
    if A is not None and c is not None: 
        if A.shape[0]!= len(c) or  A.shape[1]!=drift.shape[1]:
            return np.nan
    
    invcovmat = np.linalg.inv(Covmat)
    invu = np.linalg.inv(drift.T@invcovmat@drift)
    estimatedCoeffs = invu@drift.T@invcovmat@Z
    
    if A is None or c is None :
        return estimatedCoeffs
    
    temp = invu@A.T@np.linalg.inv(A@invu@A.T)
    return estimatedCoeffs - temp@A@estimatedCoeffs+temp@c

def computeLogLikelihoodByHand(Z,Covmat,drift,coeffs=None,A=None,c=None):
    if coeffs is None:
        coeffs = estimCoeff(Z,Covmat,drift,A,c)
    Zc = Z - coeffs@drift.T
    cholcovmat = np.linalg.cholesky(Covmat)
    Zcstd = np.linalg.solve(cholcovmat,Zc) 
    quad = Zcstd.T@Zcstd
    logdet = 2. * np.sum(np.log(np.diag(cholcovmat)))
    return -0.5 * (quad + logdet + len(Z) * np.log(2.* np.pi))

def printCoeffs(title, coeffs, ndec=6):
    print(title + " : " + f"{str(np.round(coeffs,ndec))}")

model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sill=sill)
model.addCovFromParam(gl.ECov.NUGGET,sill=nugget)
model

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 1
Number of basic structure(s) = 2
Number of drift function(s)  = 0
Number of drift equation(s)  = 0

Covariance Part
---------------
Matern (Third Parameter = 1)
- Sill         =      1.000
- Range        =      0.200
- Theo. Range  =      0.058
Nugget Effect
- Sill         =      0.100
Total Sill     =      1.100
Known Mean(s)     0.000

dat = gl.Db.createFillRandom(ndat, ndim, 0)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 5
Total number of samples      = 100

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = drift - Locator = NA
Column = 4 - Name = Simu - Locator = z1

truecoeffs = [0.5]
dat["Simu"] = truecoeffs[0] + dat["Simu"]
#dat.setLocator("drift",gl.ELoc.F)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 5
Total number of samples      = 100

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = drift - Locator = NA
Column = 4 - Name = Simu - Locator = z1

model.setDriftIRF(0,0)
model

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 1
Number of basic structure(s) = 2
Number of drift function(s)  = 1
Number of drift equation(s)  = 1

Covariance Part
---------------
Matern (Third Parameter = 1)
- Sill         =      1.000
- Range        =      0.200
- Theo. Range  =      0.058
Nugget Effect
- Sill         =      0.100
Total Sill     =      1.100

Drift Part
----------
Universality_Condition

X = model.evalDriftMat(dat).toTL()
Covmat = model.evalCovMatSym(dat).toTL()

A = np.array([1]).reshape(1,1)
c = [0.3]

coeffs = estimCoeff(dat["Simu"],Covmat,X,A,c)
printCoeffs("a=0.3", coeffs)

a=0.3 : [0.3]

print(f"Computed manually : " + str(np.round(computeLogLikelihoodByHand(dat["Simu"],Covmat,X),6)))

Computed manually : -120.377463

likelihoodG = model.computeLogLikelihood(dat, True)

Likelihood calculation:
- Number of active samples     = 100
- Number of variables          = 1
- Length of Information Vector = 100
- Number of drift conditions = 1
Optimal Drift coefficients = 
     0.728
Log-Determinant = -33.879144
Quadratic term  = 90.846364
Log-likelihood  = -120.377463

likelihoodV = gl.logLikelihoodVecchia(dat, model, 4, True)

Lower Vecchia Matrix
--------------------
- Number of rows    = 100
- Number of columns = 100
- Sparse Format
               [,  0]    [,  1]    [,  2]    [,  3]    [,  4]    [,  5]    [,  6]
     [  0,]     1.000         .         .         .         .         .         .
     [  1,]    -0.011     1.000         .         .         .         .         .
     [  2,]    -0.274     0.000     1.000         .         .         .         .
     [  3,]     0.000    -0.001     0.000     1.000         .         .         .
     [  4,]    -0.171    -0.002    -0.001     0.000     1.000         .         .
     [  5,]    -0.028    -0.142     0.003         .    -0.022     1.000         .
     [  6,]     0.000    -0.026         .     0.000         .     0.001     1.000
(Ncols=7[from 100],Nrows=7[from 100])

Diagonal of Vecchia Matrix
               [,  0]    [,  1]    [,  2]    [,  3]    [,  4]    [,  5]    [,  6]
     [  0,]     0.909     0.909     0.983     0.909     0.937     0.929     0.910
     [  7,]     0.909     0.910     1.190     1.099     0.930     1.022     0.951
     [ 14,]     0.995     0.952     0.959     2.441     2.026     1.933     1.013
     [ 21,]     0.910     0.917     1.359     0.941     0.911     1.139     0.911
     [ 28,]     0.933     0.961     1.126     1.361     1.934     2.438     1.028
     [ 35,]     1.817     1.363     1.309     3.203     0.931     1.230     1.736
     [ 42,]     0.984     0.910     1.567     0.946     1.246     2.879     1.127
     [ 49,]     1.381     1.987     1.332     1.233     0.996     1.946     1.331
     [ 56,]     3.414     1.258     1.157     1.210     1.351     1.993     1.212
     [ 63,]     1.573     2.322     0.971     1.060     2.079     1.220     1.334
     [ 70,]     0.936     1.189     1.639     1.910     1.943     1.915     1.685
     [ 77,]     2.417     1.301     4.589     1.714     1.137     4.998     1.026
     [ 84,]     1.148     1.636     1.558     1.415     1.014     0.996     3.764
     [ 91,]     1.897     2.804     1.283     3.324     2.089     2.169     1.425
     [ 98,]     1.296     2.566
Optimal Drift coefficients = 
     0.725
Log-Determinant = -33.200255
Quadratic term  = 89.332909
Log-likelihood  = -119.960180

s1 = 0.4
s2 = 2.0
r  = 0.8
sills = np.array([[s1**2,r*s1*s2],[r*s1*s2,s2**2]])
model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sills=sills)

ndat=200
dat = gl.Db.createFillRandom(ndat, ndim, 0,2)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 8
Total number of samples      = 200

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = f-1 - Locator = f1
Column = 4 - Name = f-2 - Locator = f2
Column = 5 - Name = drift - Locator = NA
Column = 6 - Name = Simu.1 - Locator = z1
Column = 7 - Name = Simu.2 - Locator = z2

ax = plt.scatter(dat["Simu.1"],dat["Simu.2"])

truecoeffs1 = [0.5, 3]
truecoeffs2 = [1.5,-2]
model.setDriftIRF(0,1)
dat["Simu.1"] =  truecoeffs1[0] + truecoeffs1[1] * dat["drift"] + dat["Simu.1"]
dat["Simu.2"] =  truecoeffs2[0] + truecoeffs2[1] * dat["drift"] + dat["Simu.2"]
dat.setLocator("drift",gl.ELoc.F)

Covmat = model.evalCovMatSym(dat).toTL()
X = model.evalDriftMat(dat).toTL()
Z = dat["Simu*"]
Z = Z.T.reshape(-1)

coeffs = estimCoeff(Z,Covmat,X)
fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

printCoeffs("No Constraint",coeffs)

No Constraint : [ 0.591383  3.049498  1.968406 -2.140452]

likelihood = model.computeLogLikelihood(dat, True)

Likelihood calculation:
- Number of active samples     = 200
- Number of variables          = 2
- Length of Information Vector = 400
- Number of drift conditions = 4
Optimal Drift coefficients = 
     0.591     3.049     1.968    -2.140
Log-Determinant = -676.517039
Quadratic term  = 484.076013
Log-likelihood  = -271.354901

A = np.array([[1,0,0,0],[0,0,1,0]])
c = [0.5,1.5]
coeffs=estimCoeff(Z,Covmat,X,A,c)

fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

printCoeffs("a0=0.5 and b0=1.5", coeffs)

a0=0.5 and b0=1.5 : [ 0.5       3.177797  1.5      -1.482825]

A = np.array([[1,0,-1,0],[0,1,0,-1]])
c = [0,0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

printCoeffs("a0=b0 and a1=b1", coeffs)

a0=b0 and a1=b1 : [0.361879 3.91449  0.361879 3.91449 ]

model.setFlagLinked(True)
likelihood = model.computeLogLikelihood(dat, True)

Likelihood calculation:
- Number of active samples     = 200
- Number of variables          = 2
- Length of Information Vector = 400
- Number of drift conditions = 2
Optimal Drift coefficients = 
     0.362     3.914
Log-Determinant = -676.517039
Quadratic term  = 528.711994
Log-likelihood  = -293.672891

A = np.array([[1,0,-1,0]])
c = [0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

printCoeffs("a0=b0", coeffs)

a0=b0 : [0.361879 3.371714 0.361879 0.11506 ]

A = np.array([[1,0,-1,0],[0,1,0,0]])
c = [0,1]
coeffs = estimCoeff(Z,Covmat,X,A,c)

fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

printCoeffs("a0=b0 and a1=1", coeffs)

a0=b0 and a1=1 : [ 1.245068  1.        1.245068 -5.651889]

sills = gl.MatrixSymmetric.createRandomDefinitePositive(3)
model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sills=sills)

model

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 3
Number of basic structure(s) = 1
Number of drift function(s)  = 0
Number of drift equation(s)  = 0

Covariance Part
---------------
Matern (Third Parameter = 1)
- Sill matrix:
               [,  0]    [,  1]    [,  2]
     [  0,]     5.167    -3.000     1.843
     [  1,]    -3.000     3.831     2.299
     [  2,]     1.843     2.299     6.110
- Range        =      0.200
- Theo. Range  =      0.058
Total Sill
               [,  0]    [,  1]    [,  2]
     [  0,]     5.167    -3.000     1.843
     [  1,]    -3.000     3.831     2.299
     [  2,]     1.843     2.299     6.110

Known Mean(s)     0.000     0.000     0.000

ndat=200
dat = gl.Db.createFillRandom(ndat, ndim, 0,2)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 9
Total number of samples      = 200

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = f-1 - Locator = f1
Column = 4 - Name = f-2 - Locator = f2
Column = 5 - Name = drift - Locator = NA
Column = 6 - Name = Simu.1 - Locator = z1
Column = 7 - Name = Simu.2 - Locator = z2
Column = 8 - Name = Simu.3 - Locator = z3

truecoeffs1 = [ 0.5, 3]
truecoeffs2 = [ 1.5,-2]
truecoeffs3 = [-0.5,-2]
model.setDriftIRF(0,1)
dat["Simu.1"] =  truecoeffs1[0] + truecoeffs1[1] * dat["drift"] + dat["Simu.1"]
dat["Simu.2"] =  truecoeffs2[0] + truecoeffs2[1] * dat["drift"] + dat["Simu.2"]
dat["Simu.3"] =  truecoeffs3[0] + truecoeffs3[1] * dat["drift"] + dat["Simu.3"]
dat.setLocator("drift",gl.ELoc.F)

Covmat = model.evalCovMatSym(dat).toTL()
X = model.evalDriftMat(dat).toTL()
Z = dat["Simu*"]
Z = Z.T.reshape(-1)

coeffs = estimCoeff(Z,Covmat,X)
fig,ax = gp.init(1,3, figsize=[15,4])
print(coeffs)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
ax[2].scatter(dat["x-1"],dat["Simu.2"])
ax[2].plot([0,1],[coeffs[4],coeffs[4]+coeffs[5]])
gp.close()

[ 0.89610178  2.34510063  1.71794026 -1.729727   -0.00333606 -1.53702255]

A = np.array([[1,0,-1,0,0,0],[0,0,1,0,-1,0]])
c = [0, 0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

coeffs

array([ 2.05802776,  0.71379393,  2.05802776, -2.20719891,  2.05802776,
       -4.43111105])

Constraints on drifts¶

Parameters¶

Monovariate case¶

Model¶

Data¶

Bivariate¶

No constraint¶

Means of both variables are imposed¶

Same coefficients for mean and drift coefficients¶

Means are equal¶

Means are linked, coefficient of drift on the first variable is imposed¶

Multivariate¶

Means of all variables are equal¶