This notebook presents an example for the PPMT algorithm.
We create a reference data set (lognormal distribution) based on a model that we simulate using the Turning Band algorithm.
grd = DbGrid_create(x0=c(0.0,0.0), dx=c(0.01,0.01), nx=c(100,100))
# Simulate two independent Gaussian Random variables
model1 = Model_createFromParam(ECov_GAUSSIAN(), range=0.2, sill=1.0)
err = simtub(NULL, dbout = grd, model = model1, nbsimu = 1)
err = grd$setName("Simu","Y1")
model2 = Model_createFromParam(ECov_EXPONENTIAL(), range=0.1, sill=1.0)
err = simtub(NULL, dbout = grd, model = model2, nbsimu = 1)
err = grd$setName("Simu","Y2")
# Non linear transform
grd["Z"] = grd["Y1"] * grd["Y1"] + 0.5 * grd["Y1"] + 0.2 * grd["Y2"]
Two transforms are applied to the simulated variables:
a normal score transform
a lognormal transform
# Normal transform
grd["Y1.NS"] = VectorHelper_normalScore(grd["Y1"])
grd["Z.NS"] = VectorHelper_normalScore(grd["Z"])
err = grd$deleteColumns(c("Y1","Y2","Z"))
err = grd$setName("Y1.NS","Y1")
err = grd$setName("Z.NS","Y2")
# histograms and correlation
ggplot() + plot.hist(grd, name = "Y1", color = "gray", fill = "orange")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot() + plot.hist(grd, name = "Y2", color = "gray", fill = "skyblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p = ggplot()
p = p + plot.correlation(grd, namex = "Y1", namey = "Y2", bins=100, flagDiag = TRUE)
p = p + plot.decoration(title = "Initial Gaussian variables (grid)")
ggPrint(p)
# maps
ggplot() + plot.grid(grd, "Y1")
ggplot() + plot.grid(grd, "Y2")
## Lognormal transform
m_1 = 1.0; sigma_1 = 0.25
m_2 = 0.5; sigma_2 = 0.5
grd["Z1"] = m_1 * exp(sigma_1 * grd["Y1"] - 1/2 * sigma_1 * sigma_1 )
grd["Z2"] = m_2 * exp(sigma_2 * grd["Y2"] - 1/2 * sigma_2 * sigma_2 )
dbStatisticsMono(grd,c("Z*","Y*"), opers=EStatOption_fromKeys(c("MINI","MAXI","MEAN","STDV")))$display()
## Minimum Maximum Mean St. Dev.
## Z1 0.382 2.456 1.000 0.254
## Z2 0.069 2.833 0.500 0.266
## Y1 -3.719 3.719 0.000 0.999
## Y2 -3.719 3.719 0.000 0.999
## NULL
# histograms and correlation
ggplot() + plot.hist(grd, name = "Z1", color = "gray", fill = "orange")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot() + plot.hist(grd, name = "Z2", color = "gray", fill = "skyblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p = ggplot()
p = p + plot.correlation(grd, namex = "Z1", namey = "Z2", bins=100, flagDiag = TRUE)
p = p + plot.decoration(title = "Initial lognormal variables (grid)")
ggPrint(p)
# maps
ggplot() + plot.grid(grd, "Z1")
ggplot() + plot.grid(grd, "Z2")
nump = 500
data = Db_createSamplingDb(grd, number=nump, names = c("x1", "x2", "Z1", "Z2"))
dbStatisticsMono(data,c("Z*"), opers=EStatOption_fromKeys(c("MINI","MAXI","MEAN","STDV")))$display()
## Minimum Maximum Mean St. Dev.
## Z1 0.477 1.998 1.001 0.258
## Z2 0.083 1.861 0.498 0.272
## NULL
# histograms and correlation
ggplot() + plot.hist(data, name = "Z1", color = "gray", fill = "orange")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot() + plot.hist(data, name = "Z2", color = "gray", fill = "skyblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p = ggplot()
p = p + plot.correlation(data, namex = "Z1", namey = "Z2", bins=100, flagDiag = TRUE)
p = p + plot.decoration(title = "Initial lognormal variables (data)")
ggPrint(p)
# maps
p = ggplot()
p = p + plot.grid(grd,"Z1")
p = p + plot.point(data, nameColor = "Z1")
p = p + plot.decoration(title = "Initial lognormal variable - Z1")
ggPrint(p)