Projection Pursuit Multivariate Transform¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc
import os

gdoc.setNoScroll()

Initialization¶

Simulation of a Reference data set on a Grid

In [2]:
grd = gl.DbGrid.create(x0=[0.0,0.0], dx=[0.01,0.01], nx=[100,100])

# Simulate two independent Gaussian Random variables
model1 = gl.Model.createFromParam(gl.ECov.GAUSSIAN, range=0.2, sill=1.0)
err = gl.simtub(None, dbout = grd, model = model1, nbsimu = 1)
grd.setName("Simu","Y1")

model2 = gl.Model.createFromParam(gl.ECov.EXPONENTIAL, range=0.1, sill=1.0)
err = gl.simtub(None, dbout = grd, model = model2, nbsimu = 1)
grd.setName("Simu","Y2")

# Non linear transform
grd["Z"] = grd["Y1"] * grd["Y1"] + 0.5 * grd["Y1"] + 0.2 * grd["Y2"]
In [3]:
grd["Y1.NS"] = gl.VH.normalScore(grd["Y1"])
grd["Z.NS"] = gl.VH.normalScore(grd["Z"])
In [4]:
grd.deleteColumns(["Y1","Y2","Z"])
grd.setName("Y1.NS","Y1")
grd.setName("Z.NS","Y2")
In [5]:
ax = grd.plot("Y1")
No description has been provided for this image
In [6]:
ax = gp.histogram(grd,name="Y1",bins=30)
No description has been provided for this image
In [7]:
ax = grd.plot("Y2")
No description has been provided for this image
In [8]:
ax = gp.histogram(grd,name="Y2", bins=30)
No description has been provided for this image
In [9]:
ax = gp.correlation(grd,namex="Y1",namey="Y2",diagLine=True, bins=50)
No description has been provided for this image

Lognormal transformation

In [10]:
m_1 = 1.0; sigma_1 = 0.25
m_2 = 0.5; sigma_2 = 0.5
grd["Z1"] = m_1 * np.exp(sigma_1 * grd["Y1"] - 1/2 * sigma_1 * sigma_1 )
grd["Z2"] = m_2 * np.exp(sigma_2 * grd["Y2"] - 1/2 * sigma_2 * sigma_2 )
In [11]:
ax = gp.correlation(grd, namex = "Z1", namey = "Z2", diagLine=True, bins=50)
No description has been provided for this image
In [12]:
gl.dbStatisticsMono(grd, ["Z*","Y*"],opers=gl.EStatOption.fromKeys(["MINI","MAXI","MEAN","STDV"]))
Out[12]:
      Minimum    Maximum       Mean   St. Dev.
Z1      0.382      2.456      1.000      0.254
Z2      0.069      2.833      0.500      0.266
Y1     -3.719      3.719      0.000      0.999
Y2     -3.719      3.719      0.000      0.999
In [13]:
ax = grd.plot("Z1")
No description has been provided for this image
In [14]:
ax = gp.histogram(grd, name="Z1", bins=30)
No description has been provided for this image
In [15]:
ax = grd.plot("Z2")
No description has been provided for this image
In [16]:
ax = gp.histogram(grd, name="Z2", bins=30)
No description has been provided for this image

Extraction of a data set¶

In [17]:
nump  = 500
data = gl.Db.createSamplingDb(grd, number=nump, names = ["x1", "x2", "Z1", "Z2"])
In [18]:
ax = grd.plot("Z1")
ax = data.plot(color="yellow")
No description has been provided for this image

Normal score

In [19]:
data["Z1.NS"] = gl.VH.normalScore(data["Z1"])
data["Z2.NS"] = gl.VH.normalScore(data["Z2"])
data.setName("Z1.NS","Y1")
data.setName("Z2.NS","Y2")
In [20]:
gl.dbStatisticsMono(data,["Z*","Y*"],opers=gl.EStatOption.fromKeys(["MINI","MAXI","MEAN","STDV"]))
Out[20]:
      Minimum    Maximum       Mean   St. Dev.
Z1      0.477      1.998      1.001      0.258
Z2      0.083      1.861      0.498      0.272
Y1     -2.879      2.879      0.000      0.989
Y2     -2.879      2.879      0.000      0.989
In [21]:
ax = gp.histogram(data, name="Z1", color="orange", bins=30)
No description has been provided for this image
In [22]:
ax = gp.histogram(data, name= "Z2", color="skyblue", bins=30)
No description has been provided for this image
In [23]:
ax = gp.correlation(data, namex="Z1", namey="Z2", diagLine=True, asPoint=True)
No description has been provided for this image
In [24]:
ax = gp.correlation(data, namex = "Y1", namey = "Y2", diagLine=True, asPoint=True)
No description has been provided for this image
In [25]:
probas = np.arange(1,101) / (100 + 1)
q1 = gl.VH.quantiles(data["Y1"], probas)
q2 = gl.VH.qnormVec(probas)
ax = gp.curve(q2, q1, marker='o')
ax.decoration(title="Q-Q Plot - First Initial Variable",
              xlabel = "Theoretical quantiles", ylabel = "Experimental quantiles")
No description has been provided for this image
In [26]:
probas = np.arange(1,101) / (100 + 1)
q1 = gl.VH.quantiles(data["Y2"], probas)
q2 = gl.VH.qnormVec(probas)
ax = gp.curve(q2, q1, marker='o')
ax.decoration(title="Q-Q Plot - Second Initial Variable",
              xlabel = "Theoretical quantiles", ylabel = "Experimental quantiles")
No description has been provided for this image
In [27]:
gl.dbStatisticsMono(data,["Y*"],opers=gl.EStatOption.fromKeys(["MINI","MAXI","MEAN","STDV"]))
Out[27]:
      Minimum    Maximum       Mean   St. Dev.
Y1     -2.879      2.879      0.000      0.989
Y2     -2.879      2.879      0.000      0.989

Implementing PPMT¶

Launching PPMT and fitting it on the vector of Input data

In [28]:
ppmt = gl.PPMT.create(ndir=10, flagPreprocessing = False, methodDir = gl.EDirGen.VDC, methodTrans = gl.EGaussInv.HMT)
err = ppmt.fit(data, ["Y*"], flagStoreInDb = True, niter = 100, namconv=gl.NamingConvention("U"))

Evolution of the Index

In [29]:
ln_index = ppmt.getSerieScore(True)
ax = gp.curve(ln_index, icas=2, marker='o')
No description has been provided for this image

Final results (non correlated variables)

In [30]:
gl.dbStatisticsMono(data,["U.*"],opers=gl.EStatOption.fromKeys(["MINI","MAXI","MEAN","STDV"]))
Out[30]:
        Minimum    Maximum       Mean   St. Dev.
U.Y1     -2.823      3.231      0.000      0.999
U.Y2     -2.824      3.277      0.001      0.997

Transformed Gaussian variables

In [31]:
probas = np.arange(1,101) / (100 + 1)
q1 = gl.VH.quantiles(data["U.Y1"], probas)
q2 = gl.VH.qnormVec(probas)
ax = gp.curve(q2, q1, marker='o')
ax.decoration(title = "Q-Q Plot - First variable of PPMT",
              xlabel = "Theoretical quantiles", ylabel = "Experimental quantiles")
No description has been provided for this image
In [32]:
probas = np.arange(1,101) / (100 + 1)
q1 = gl.VH.quantiles(data["U.Y2"],probas)
q2 = gl.VH.qnormVec(probas)
ax = gp.curve(q2, q1, marker='o')
ax.decoration(title = "Q-Q Plot - Second variable of PPMT",
              xlabel = "Theoretical quantiles", ylabel = "Experimental quantiles")
No description has been provided for this image
In [33]:
ax = gp.correlation(data, namex="U.Y1", namey="U.Y2", asPoint=True)
ax.decoration(title = "Final variable")
No description has been provided for this image
In [34]:
ax = gp.histogram(data, name="U.Y1", bins=30)
ax.decoration(title="First variable of PPMT")
No description has been provided for this image
In [35]:
ax = gp.histogram(data, name="U.Y2", bins=30)
ax.decoration(title="Second variable of PPMT")
No description has been provided for this image
In [36]:
err = ppmt.gaussianToRaw(data, ["U*"], namconv=gl.NamingConvention("V"))
In [37]:
ax = gp.correlation(data, namex="Y1", namey="V.U.Y1", asPoint=True)
ax.decoration(title = "First variable")
No description has been provided for this image
In [38]:
ax = gp.correlation(data, namex="Y2", namey="V.U.Y2", asPoint=True)
ax.decoration(title = "Second variable")
No description has been provided for this image