Projection Pursuit Multivariate Transform¶

InĀ [1]:
import numpy as np
import matplotlib.pyplot as plt
import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc

gdoc.setNoScroll()

Initialization¶

Simulation of a Reference data set on a Grid

InĀ [2]:
grd = gl.DbGrid.create(x0=[0.0, 0.0], dx=[0.01, 0.01], nx=[100, 100])

# Simulate two independent Gaussian Random variables
model1 = gl.Model.createFromParam(gl.ECov.GAUSSIAN, range=0.2, sill=1.0)
err = gl.simtub(None, dbout=grd, model=model1, nbsimu=1)
grd.setName("Simu", "Y1")

model2 = gl.Model.createFromParam(gl.ECov.EXPONENTIAL, range=0.1, sill=1.0)
err = gl.simtub(None, dbout=grd, model=model2, nbsimu=1)
grd.setName("Simu", "Y2")

# Non linear transform
grd["Z"] = grd["Y1"] * grd["Y1"] + 0.5 * grd["Y1"] + 0.2 * grd["Y2"]
InĀ [3]:
grd["Y1.NS"] = gl.VH.normalScore(grd["Y1"])
grd["Z.NS"] = gl.VH.normalScore(grd["Z"])
InĀ [4]:
grd.deleteColumns(["Y1", "Y2", "Z"])
grd.setName("Y1.NS", "Y1")
grd.setName("Z.NS", "Y2")
InĀ [5]:
res = gp.plot(grd, "Y1")
No description has been provided for this image
InĀ [6]:
res = gp.histogram(grd, name="Y1", bins=30)
No description has been provided for this image
InĀ [7]:
res = gp.plot(grd, "Y2")
No description has been provided for this image
InĀ [8]:
res = gp.histogram(grd, name="Y2", bins=30)
No description has been provided for this image
InĀ [9]:
res = gp.correlation(grd, namex="Y1", namey="Y2", diagLine=True, bins=50)
No description has been provided for this image

Lognormal transformation

InĀ [10]:
m_1 = 1.0
sigma_1 = 0.25
m_2 = 0.5
sigma_2 = 0.5
grd["Z1"] = m_1 * np.exp(sigma_1 * grd["Y1"] - 1 / 2 * sigma_1 * sigma_1)
grd["Z2"] = m_2 * np.exp(sigma_2 * grd["Y2"] - 1 / 2 * sigma_2 * sigma_2)
InĀ [11]:
res = gp.correlation(grd, namex="Z1", namey="Z2", diagLine=True, bins=50)
No description has been provided for this image
InĀ [12]:
gl.dbStatisticsMono(
    grd, ["Z*", "Y*"], opers=gl.EStatOption.fromKeys(["MINI", "MAXI", "MEAN", "STDV"])
)
Out[12]:
       Minimum     Maximum        Mean    St. Dev.
Z1       0.382       2.456       1.000       0.254
Z2       0.069       2.833       0.500       0.266
Y1      -3.719       3.719       0.000       0.999
Y2      -3.719       3.719       0.000       0.999
InĀ [13]:
res = gp.plot(grd, "Z1")
No description has been provided for this image
InĀ [14]:
res = gp.histogram(grd, name="Z1", bins=30)
No description has been provided for this image
InĀ [15]:
res = gp.plot(grd, "Z2")
No description has been provided for this image
InĀ [16]:
res = gp.histogram(grd, name="Z2", bins=30)
No description has been provided for this image

Extraction of a data set¶

InĀ [17]:
nump = 500
data = gl.Db.createSamplingDb(grd, number=nump, names=["x1", "x2", "Z1", "Z2"])
InĀ [18]:
gp.plot(grd, "Z1")
gp.symbol(data, c="yellow")
plt.show()
No description has been provided for this image

Normal score

InĀ [19]:
data["Z1.NS"] = gl.VH.normalScore(data["Z1"])
data["Z2.NS"] = gl.VH.normalScore(data["Z2"])
data.setName("Z1.NS", "Y1")
data.setName("Z2.NS", "Y2")
InĀ [20]:
gl.dbStatisticsMono(
    data, ["Z*", "Y*"], opers=gl.EStatOption.fromKeys(["MINI", "MAXI", "MEAN", "STDV"])
)
Out[20]:
       Minimum     Maximum        Mean    St. Dev.
Z1       0.477       1.998       1.001       0.258
Z2       0.083       1.861       0.498       0.272
Y1      -2.879       2.879       0.000       0.989
Y2      -2.879       2.879       0.000       0.989
InĀ [21]:
res = gp.histogram(data, name="Z1", color="orange", bins=30)
No description has been provided for this image
InĀ [22]:
res = gp.histogram(data, name="Z2", color="skyblue", bins=30)
No description has been provided for this image
InĀ [23]:
res = gp.correlation(data, namex="Z1", namey="Z2", diagLine=True, asPoint=True)
No description has been provided for this image
InĀ [24]:
res = gp.correlation(data, namex="Y1", namey="Y2", diagLine=True, asPoint=True)
No description has been provided for this image
InĀ [25]:
probas = np.arange(1, 101) / (100 + 1)
q1 = gl.VH.quantiles(data["Y1"], probas)
q2 = gl.VH.qnormVec(probas)
gp.curve(q2, q1, marker="o")
gp.decoration(
    title="Q-Q Plot - First Initial Variable",
    xlabel="Theoretical quantiles",
    ylabel="Experimental quantiles",
)
No description has been provided for this image
InĀ [26]:
probas = np.arange(1, 101) / (100 + 1)
q1 = gl.VH.quantiles(data["Y2"], probas)
q2 = gl.VH.qnormVec(probas)
gp.curve(q2, q1, marker="o")
gp.decoration(
    title="Q-Q Plot - Second Initial Variable",
    xlabel="Theoretical quantiles",
    ylabel="Experimental quantiles",
)
No description has been provided for this image
InĀ [27]:
gl.dbStatisticsMono(
    data, ["Y*"], opers=gl.EStatOption.fromKeys(["MINI", "MAXI", "MEAN", "STDV"])
)
Out[27]:
       Minimum     Maximum        Mean    St. Dev.
Y1      -2.879       2.879       0.000       0.989
Y2      -2.879       2.879       0.000       0.989

Implementing PPMT¶

Launching PPMT and fitting it on the vector of Input data

InĀ [28]:
ppmt = gl.PPMT.create(
    ndir=10,
    flagPreprocessing=False,
    methodDir=gl.EDirGen.VDC,
    methodTrans=gl.EGaussInv.HMT,
)
err = ppmt.fit(
    data, ["Y*"], flagStoreInDb=True, niter=100, namconv=gl.NamingConvention("U")
)

Evolution of the Index

InĀ [29]:
ln_index = ppmt.getSerieScore(True)
res = gp.curve(ln_index, icas=2, marker="o")
No description has been provided for this image

Final results (non correlated variables)

InĀ [30]:
gl.dbStatisticsMono(
    data, ["U.*"], opers=gl.EStatOption.fromKeys(["MINI", "MAXI", "MEAN", "STDV"])
)
Out[30]:
         Minimum     Maximum        Mean    St. Dev.
U.Y1      -2.823       3.231       0.000       0.999
U.Y2      -2.824       3.277       0.001       0.997

Transformed Gaussian variables

InĀ [31]:
probas = np.arange(1, 101) / (100 + 1)
q1 = gl.VH.quantiles(data["U.Y1"], probas)
q2 = gl.VH.qnormVec(probas)
gp.curve(q2, q1, marker="o")
gp.decoration(
    title="Q-Q Plot - First variable of PPMT",
    xlabel="Theoretical quantiles",
    ylabel="Experimental quantiles",
)
No description has been provided for this image
InĀ [32]:
probas = np.arange(1, 101) / (100 + 1)
q1 = gl.VH.quantiles(data["U.Y2"], probas)
q2 = gl.VH.qnormVec(probas)
gp.curve(q2, q1, marker="o")
gp.decoration(
    title="Q-Q Plot - Second variable of PPMT",
    xlabel="Theoretical quantiles",
    ylabel="Experimental quantiles",
)
No description has been provided for this image
InĀ [33]:
gp.correlation(data, namex="U.Y1", namey="U.Y2", asPoint=True)
gp.decoration(title="Final variable")
No description has been provided for this image
InĀ [34]:
gp.histogram(data, name="U.Y1", bins=30)
gp.decoration(title="First variable of PPMT")
No description has been provided for this image
InĀ [35]:
gp.histogram(data, name="U.Y2", bins=30)
gp.decoration(title="Second variable of PPMT")
No description has been provided for this image
InĀ [36]:
err = ppmt.gaussianToRaw(data, ["U*"], namconv=gl.NamingConvention("V"))
InĀ [37]:
gp.correlation(data, namex="Y1", namey="V.U.Y1", asPoint=True)
gp.decoration(title="First variable")
No description has been provided for this image
InĀ [38]:
gp.correlation(data, namex="Y2", namey="V.U.Y2", asPoint=True)
gp.decoration(title="Second variable")
No description has been provided for this image