import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc
import numpy as np
import matplotlib.pyplot as plt

gdoc.setNoScroll()

# Data
np.random.seed(123)
ndat = 100
ndim = 2

# Model
rangev = 0.2
sill = 1.
nugget = 0.1

# Z : vecteur des données
# Covmat : matrice de covariance
# drift : matrice de drift
# A et c permettent d'encoder les contraintes sur le vecteur des coefficients beta :
# sous la forme A * beta = c

def estimCoeff(Z,Covmat,drift,A=None,c=None):
    if A is not None and c is not None: 
        if A.shape[0]!= len(c) or  A.shape[1]!=drift.shape[1]:
            return np.nan
    
    invcovmat = np.linalg.inv(Covmat)
    invu = np.linalg.inv(X.T@invcovmat@X)
    estimatedCoeffs = invu@X.T@invcovmat@Z
    
    if A is None or c is None :
        return estimatedCoeffs
    
    temp = invu@A.T@np.linalg.inv(A@invu@A.T)
    return estimatedCoeffs - temp@A@estimatedCoeffs+temp@c

def computeLogLikelihood(Z,Covmat,drift,coeffs=None,A=None,c=None):
    if coeffs is None:
        coeffs = estimCoeff(Z,Covmat,A,c)
    Zc = Z - coeffs@drift.T
    cholcovmat = np.linalg.cholesky(Covmat)
    Zcstd = np.linalg.solve(cholcovmat,Zc) 
    quad = Zcstd.T@Zcstd
    logdet = 2. * np.sum(np.log(np.diag(cholcovmat)))
    return -0.5 * (quad + logdet + len(Z) * np.log(2.* np.pi))

model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sill=sill)
model.addCovFromParam(gl.ECov.NUGGET,sill=nugget)
model

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 1
Number of basic structure(s) = 2
Number of drift function(s)  = 0
Number of drift equation(s)  = 0

Covariance Part
---------------
Matern (Third Parameter = 1)
- Sill         =      1.000
- Range        =      0.200
- Theo. Range  =      0.058
Nugget Effect
- Sill         =      0.100
Total Sill     =      1.100
Known Mean(s)     0.000

dat = gl.Db.createFillRandom(ndat, ndim, 0)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 5
Total number of samples      = 100

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = drift - Locator = NA
Column = 4 - Name = Simu - Locator = z1

truecoeffs = [0.5]
dat["Simu"] = truecoeffs[0] + dat["Simu"]
#dat.setLocator("drift",gl.ELoc.F)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 5
Total number of samples      = 100

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = drift - Locator = NA
Column = 4 - Name = Simu - Locator = z1

model.setDriftIRF(0,0)
model

Model characteristics
=====================
Space dimension              = 2
Number of variable(s)        = 1
Number of basic structure(s) = 2
Number of drift function(s)  = 1
Number of drift equation(s)  = 1

Covariance Part
---------------
Matern (Third Parameter = 1)
- Sill         =      1.000
- Range        =      0.200
- Theo. Range  =      0.058
Nugget Effect
- Sill         =      0.100
Total Sill     =      1.100

Drift Part
----------
Universality_Condition

X = model.evalDriftMatrix(dat).toTL()
Covmat = model.evalCovMatrixSymmetric(dat).toTL()

A = np.array([1]).reshape(1,1)
c = [0.3]

estimCoeff(dat["Simu"],Covmat,X,A,c)

array([0.3])

estimCoeff(dat["Simu"],Covmat,X)

array([0.72774768])

likelihood = model.computeLogLikelihood(dat, True)

print(f"Compute manual loglikelihood = " + str(np.round(computeLogLikelihood(dat["Simu"],Covmat,X),6)))

Likelihood calculation:
- Number of active samples     = 100
- Number of variables          = 1
- Length of Information Vector = 100
- Number of drift conditions = 1
Optimal Drift coefficients = 
     0.728
Log-Determinant = -33.879144
Quadratic term = 90.846364
Log-likelihood = -120.377463
Compute manual loglikelihood = -120.377463

s1=0.4
s2=2
r = 0.8
sills = np.array([s1**2,r*s1*s2,r*s1*s2,s2**2]).reshape(2,2)
model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sills=sills.reshape(-1))

ndat=200
dat = gl.Db.createFillRandom(ndat, ndim, 0,2)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 8
Total number of samples      = 200

Variables
---------
Column = 0 - Name = rank - Locator = NA
Column = 1 - Name = x-1 - Locator = x1
Column = 2 - Name = x-2 - Locator = x2
Column = 3 - Name = f-1 - Locator = f1
Column = 4 - Name = f-2 - Locator = f2
Column = 5 - Name = drift - Locator = NA
Column = 6 - Name = Simu.1 - Locator = z1
Column = 7 - Name = Simu.2 - Locator = z2

ax = plt.scatter(dat["Simu.1"],dat["Simu.2"])

truecoeffs1 = [0.5,3]
truecoeffs2 = [1.5,-2]
model.setDriftIRF(0,1)
dat["Simu.1"] =  truecoeffs1[0] + truecoeffs1[1] * dat["drift"] + dat["Simu.1"]
dat["Simu.2"] =  truecoeffs2[0] + truecoeffs2[1] * dat["drift"] + dat["Simu.2"]
dat.setLocator("drift",gl.ELoc.F)

Covmat = model.evalCovMatrixSymmetric(dat).toTL()
X = model.evalDriftMatrix(dat).toTL()

Z = dat["Simu*"]
Z=Z.T.reshape(-1)

coeffs = estimCoeff(Z,Covmat,X)
fig,ax = plt.subplots(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])

[<matplotlib.lines.Line2D at 0x7f5efc28edd0>]

print("No Constraint = ",coeffs)

No Constraint =  [ 0.59138302  3.04949823  1.96840598 -2.14045187]

likelihood = model.computeLogLikelihood(dat, True)

Likelihood calculation:
- Number of active samples     = 200
- Number of variables          = 2
- Length of Information Vector = 400
- Number of drift conditions = 4
Optimal Drift coefficients = 
     0.591     3.049     1.968    -2.140
Log-Determinant = -676.517039
Quadratic term = 484.076013
Log-likelihood = -271.354901

A = np.array([[1,0,0,0],[0,0,1,0]])
c = [0.5,1.5]
coeffs=estimCoeff(Z,Covmat,X,A,c)

fig,ax = plt.subplots(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])

[<matplotlib.lines.Line2D at 0x7f5eb1c16bd0>]

print("a0=0.5 and b0=1.5) = ",coeffs)

a0=0.5 and b0=1.5) =  [ 0.5         3.17779706  1.5        -1.48282494]

A = np.array([[1,0,-1,0],[0,1,0,-1]])
c = [0,0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

fig,ax = plt.subplots(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])

[<matplotlib.lines.Line2D at 0x7f5eb1b055d0>]

print("a0=b0 and a1=b1",coeffs)

a0=b0 and a1=b1 [0.3618792  3.91448991 0.3618792  3.91448991]

model.setFlagLinked(True)
likelihood = model.computeLogLikelihood(dat, True)

Likelihood calculation:
- Number of active samples     = 200
- Number of variables          = 2
- Length of Information Vector = 400
- Number of drift conditions = 2
Optimal Drift coefficients = 
     0.362     3.914
Log-Determinant = -676.517039
Quadratic term = 528.711994
Log-likelihood = -293.672891

A = np.array([[1,0,-1,0]])
c = [0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

fig,ax = plt.subplots(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])

[<matplotlib.lines.Line2D at 0x7f5eb1b6b550>]

print("a0 = b0",coeffs)

a0 = b0 [0.3618792  3.37171422 0.3618792  0.11506005]

A = np.array([[1,0,-1,0],[0,1,0,0]])
c = [0,1]
coeffs = estimCoeff(Z,Covmat,X,A,c)

fig,ax = plt.subplots(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])

[<matplotlib.lines.Line2D at 0x7f5eb1b1c590>]

print("a0=b0 and a1=1 ",coeffs)

a0=b0 and a1=1  [ 1.24506848  1.          1.24506848 -5.65188878]

Constraints on drifts¶

Parameters¶

Mono variate case¶

Model¶

Data¶

Multivariate¶

No constraint¶

Means of both variables are imposed¶

Same coefficients for mean and drift coefficients¶

Means are equal¶

Means are linked, coefficient of drift on the first variable is imposed¶