%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


import pandas as pd
import numpy as np
import gstlearn as gl
import gstlearn.plot as gp
import urllib.request

flagInternetAvailable = True ## Set to False if no internet


fileCSV='Scotland_Temperatures.csv'
if flagInternetAvailable:
    temp_csv, head = urllib.request.urlretrieve('https://soft.minesparis.psl.eu/gstlearn/data/Scotland/'+fileCSV,'./'+fileCSV)
else:
    temp_csv='./'+fileCSV


datcsv = pd.read_csv(temp_csv, na_values="MISS")
datcsv


data = gl.Db()
for field in datcsv.columns :
  data[field] = datcsv[field]
data.setLocators(["Longitude","Latitude"], gl.ELoc.X)
data.setLocator("January_temp", gl.ELoc.Z)
# Selection for defined values only
data["TempOK"] = ~np.isnan(data["January_temp"])
data.setLocator("TempOK", gl.ELoc.SEL)
data

Data Base Characteristics
=========================

Data Base Summary
-----------------
File is organized as a set of isolated points
Space dimension              = 2
Number of Columns            = 5
Maximum Number of UIDs       = 5
Total number of samples      = 236
Number of active samples     = 151

Variables
---------
Column = 0 - Name = Longitude - Locator = x1
Column = 1 - Name = Latitude - Locator = x2
Column = 2 - Name = Elevation - Locator = NA
Column = 3 - Name = January_temp - Locator = z1
Column = 4 - Name = TempOK - Locator = sel


gp.plot(data,sizmin=5,sizmax=50,flagLegendSymbol=True)


grid = gl.DbGrid.createCoveringDb(data, nx=[80,130], margin=[50,50])
grid

Data Base Grid Characteristics
==============================

Data Base Summary
-----------------
File is organized as a regular grid
Space dimension              = 2
Number of Columns            = 2
Maximum Number of UIDs       = 2
Total number of samples      = 10400

Grid characteristics:
---------------------
Origin :     28.200   480.400
Mesh   :      6.108     6.035
Number :         80       130

Variables
---------
Column = 0 - Name = x1 - Locator = x1
Column = 1 - Name = x2 - Locator = x2


gp.plot(grid)
gp.plot(data,sizmin=5,sizmax=50,flagLegendSymbol=True)


nmini = 1
nmaxi = 3
radius = 300 # Big radius
nsect = 1
neigh = gl.NeighMoving.create(False, nmaxi, radius, nmini, nsect)
neigh

Moving Neighborhood
===================
Minimum number of samples           = 1
Maximum number of samples           = 3
Maximum horizontal distance         = 300


node = 4511
neigh.attach(data, grid)
ranks = neigh.select(node)
ranks # Ranks 23, 75 and 143

array([ 23,  75, 143])


def show_neigh(data, grid, neigh, node, ranks):
  ax = data.plot(name_size="January_temp",sizmax=30, useSel=True)
  if (len(ranks) > 0):
    dataSel = data.clone()
    dataSel.addSelectionByRanks(ranks)
    ax = dataSel.plot(color='blue',name_size="January_temp",sizmax=30)
  ax.neigh(neigh, grid, node)
  ax.decoration("Standard Neighborhood")
    
show_neigh(data, grid, neigh, node, ranks)


nmini = 1
nmaxi = 3
radius = 30 # Smaller radius
nsect = 1
neigh = gl.NeighMoving.create(False, nmaxi, radius, nmini, nsect)
neigh

Moving Neighborhood
===================
Minimum number of samples           = 1
Maximum number of samples           = 3
Maximum horizontal distance         = 30


node = 4511
neigh.attach(data, grid)
ranks = neigh.select(node)
ranks # Only rank 143

array([143])


show_neigh(data, grid, neigh, node, ranks)


class BiTargetCheckMaxRadius(gl.ABiTargetCheck):
    def __init__(self, radius):
        super(BiTargetCheckMaxRadius,self).__init__()
        self.radius = radius
        
    def isOK(self,st1,st2):
        return st1.getDistance(st2) < self.radius
    
    def toString(self,strfmt):
        return "This is my Max Radius checker: {}".format(self.radius)


nmini = 1
nmaxi = 3
radius = 300 # Big radius
neigh2 = gl.NeighMoving.create(False, nmaxi=nmaxi, nmini=nmini,radius=radius)
# Prefer my own maximum radius checker!
btc = BiTargetCheckMaxRadius(30)
neigh2.addBiTargetCheck(btc)
neigh2.display()

Moving Neighborhood
===================
Minimum number of samples           = 1
Maximum number of samples           = 3
Maximum horizontal distance         = 300
This is my Max Radius checker: 30


node = 4511
neigh2.attach(data, grid)
ranks = neigh2.select(node)
ranks # Should return 143

array([143])


show_neigh(data, grid, neigh2, node, ranks)

	Longitude	Latitude	Elevation	January_temp
0	372.1	658.9	255	1.7
1	303.5	665.9	125	2.0
2	218.4	597.9	8	4.6
3	245.0	955.0	90	NaN
4	326.8	691.2	32	3.1
...	...	...	...	...
231	273.2	564.6	47	2.8
232	333.9	730.1	30	2.6
233	185.0	655.0	115	NaN
234	259.8	587.9	119	2.1
235	260.8	668.6	107	2.6

Extending the gstlearn¶

Initialization¶

Playing with NeighMoving¶

Extending the gstlearn¶