tvaLib
lib.tools_math Namespace Reference

## Classes

class  geneticAlg
Optimisation. More...

## Functions

def mat_divide (A, scalar)
Basic math. More...

def local_min_max (lists)

def gaus_mvgavg (lists, degree=5)
Filters on data sets. More...

def cat_mvgavg (cat_list, window=2, passes=1)

def cat_curvy_mvgavg (cat_list, curvy, window=2.0, passes=1)

def reduce_sequence (sequence)

def running_sum (a)

def remap_data (datapoints, old_map, new_map)
Constructors and generators. More...

def cantorID (obj)

def pdfToCDF (pdf_y)
Statistics. More...

def ksTest (df1, df2)

def getPercentileKeyFromList (items, percentile, sorting=False, sortingColumnIx=0)

def getPercintileBinFromList (data, percentile, sampleSize=0)

def getPercintileBinsFromList (data, bins=10, includeLeftEdge=False, sampleSize=0)

def sample (data, sampleSize=10000, method='random')

def combineMean (means, samples)

def combineVariance (variances, means, samples)

def combineStdDev (stddevs, means, samples)

def pdfStatsOnBinnedFreqData (data, centiles=[25)

def setTheoryUnionNIndEvents (events, recursionDepthEstimation=10, smallValueLimit=0.0)
Probability theory. More...

## ◆ cantorID()

 def lib.tools_math.cantorID ( obj )
`Cantor unique ID for 2 objects. `

Definition at line 140 of file tools_math.py.

140 def cantorID(obj):
141  ''' Cantor unique ID for 2 objects. '''
142
143  return int((obj[0]+obj[1])*(obj[0]+obj[1]+1)*.5+obj[1])
144
145
def cantorID(obj)
Definition: tools_math.py:140

## ◆ cat_curvy_mvgavg()

 def lib.tools_math.cat_curvy_mvgavg ( cat_list, curvy, window = `2.0`, passes = `1` )
```Similar to cat_mvgavg(), however, the window is treated continously
using curvy position.

Note:
=====
curvy should have the same dimension as cat_list```

Definition at line 84 of file tools_math.py.

84 def cat_curvy_mvgavg(cat_list, curvy, window=2.0, passes=1):
85  ''' Similar to cat_mvgavg(), however, the window is treated continously
86  using curvy position.
87
88  Note:
89  =====
90  curvy should have the same dimension as cat_list
91  '''
92  for pa in range(passes):
93  smoothed = deepcopy(cat_list)
94  for point in range(len(cat_list)):
95  lower_bound_check = min(range(len(curvy)), key=lambda x:abs(curvy[x]-(curvy[point]-window)))
96  upper_bound_check = min(range(len(curvy)), key=lambda x:abs(curvy[x]-(curvy[point]+window)))+1
97  window_values = cat_list[lower_bound_check:upper_bound_check]
98  smoothed[point] = max(set(window_values), key=window_values.count)
99  cat_list = smoothed
100  return cat_list
101
102
def cat_curvy_mvgavg(cat_list, curvy, window=2.0, passes=1)
Definition: tools_math.py:84

## ◆ cat_mvgavg()

 def lib.tools_math.cat_mvgavg ( cat_list, window = `2`, passes = `1` )
```Return a list of categories/values smoothed according to a window.
window is the search radius on either side```

Definition at line 70 of file tools_math.py.

70 def cat_mvgavg(cat_list, window=2, passes=1):
71  ''' Return a list of categories/values smoothed according to a window.
72  window is the search radius on either side'''
73  for pa in range(passes):
74  smoothed = deepcopy(cat_list)
75  for point in range(len(cat_list)):
76  lower_bound_check = max(0,point-window)
77  upper_bound_check = min(len(cat_list)-1,point+window+1)
78  window_values = cat_list[lower_bound_check:upper_bound_check]
79  smoothed[point] = max(set(window_values), key=window_values.count)
80  cat_list = smoothed
81  return cat_list
82
83
def cat_mvgavg(cat_list, window=2, passes=1)
Definition: tools_math.py:70

## ◆ combineMean()

 def lib.tools_math.combineMean ( means, samples )
```Combine various means knowing their sample size. AKA Weighted average.
The order of each list must be consistent.```

Definition at line 231 of file tools_math.py.

231 def combineMean(means, samples):
232  ''' Combine various means knowing their sample size. AKA Weighted average.
233  The order of each list must be consistent.
234  '''
235  try: return sum([mean*sample for mean,sample in zip(means,samples)])/float(sum(samples))
236  except ZeroDivisionError: return 0
237
def combineMean(means, samples)
Definition: tools_math.py:231

## ◆ combineStdDev()

 def lib.tools_math.combineStdDev ( stddevs, means, samples )
```http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
The order of each list must be consistent.```

Definition at line 246 of file tools_math.py.

246 def combineStdDev(stddevs, means, samples):
247  ''' http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
248  The order of each list must be consistent.
249  '''
250  return m.sqrt(combineVariance([m.pow(x,2) for x in stddevs], means, samples))
251
252
253
def combineStdDev(stddevs, means, samples)
Definition: tools_math.py:246
def combineVariance(variances, means, samples)
Definition: tools_math.py:238

## ◆ combineVariance()

 def lib.tools_math.combineVariance ( variances, means, samples )
```http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
The order of each list must be consistent.```

Definition at line 238 of file tools_math.py.

238 def combineVariance(variances, means, samples):
239  ''' http://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
240  The order of each list must be consistent.
241  '''
242  Xc = combineMean(means, samples)
243  try: return (sum([variance*sample for variance,sample in zip(variances,samples)])+sum([m.pow(mean-Xc,2)*sample for mean,sample in zip(means,samples)]))/float(sum(samples))
244  except ZeroDivisionError: return 0
245
def combineVariance(variances, means, samples)
Definition: tools_math.py:238
def combineMean(means, samples)
Definition: tools_math.py:231

## ◆ gaus_mvgavg()

 def lib.tools_math.gaus_mvgavg ( lists, degree = `5` )

Filters on data sets.

`Gaussian moving average smoothing. `

Definition at line 52 of file tools_math.py.

52 def gaus_mvgavg(lists, degree=5):
53  ''' Gaussian moving average smoothing. '''
54
55  window=degree*2-1
56  weight=np.array([1.0]*window)
57  weightGauss=[]
58  for i in range(window):
59  i=i-degree+1
60  frac=i/float(window)
61  gauss=1/(np.exp((4*(frac))**2))
62  weightGauss.append(gauss)
63  weight=np.array(weightGauss)*weight
64  smoothed=[0.0]*(len(lists)-window)
65  for i in range(len(smoothed)):
66  smoothed[i]=sum(np.array(lists[i:i+window])*weight)/sum(weight)
67  return smoothed
68
69
def gaus_mvgavg(lists, degree=5)
Filters on data sets.
Definition: tools_math.py:52

## ◆ getPercentileKeyFromList()

 def lib.tools_math.getPercentileKeyFromList ( items, percentile, sorting = `False`, sortingColumnIx = `0` )
`Find percentile of a list (optionally, of lists). `

Definition at line 163 of file tools_math.py.

163 def getPercentileKeyFromList(items, percentile, sorting=False, sortingColumnIx=0):
164  ''' Find percentile of a list (optionally, of lists). '''
165
166  if(sorting):
167  if(type(items[0]) is list): items = sorted(items, key=lambda x: x[sortingColumnIx])
168  else: items = sorted(items)
169  if(len(items) <= 0): return False
170  return items[min(int(round(percentile*(len(items)-1))),len(items)-1)]
171
def getPercentileKeyFromList(items, percentile, sorting=False, sortingColumnIx=0)
Definition: tools_math.py:163

## ◆ getPercintileBinFromList()

 def lib.tools_math.getPercintileBinFromList ( data, percentile, sampleSize = `0` )
```This function finds the percentile bin values for a very large list of floats.
Input data can be either a list or a n-dimension array.

Input:
======
percentile: float in range of [0,100]
sampleSize: if non zero, subsample this many observations from data```

Definition at line 172 of file tools_math.py.

172 def getPercintileBinFromList(data, percentile, sampleSize=0):
173  ''' This function finds the percentile bin values for a very large list of floats.
174  Input data can be either a list or a n-dimension array.
175
176  Input:
177  ======
178  percentile: float in range of [0,100]
179  sampleSize: if non zero, subsample this many observations from data
180  '''
181
182  if(sampleSize): data = sample(data, sampleSize=sampleSize, method='random')
183  return np.percentile(data, percentile)
184
def getPercintileBinFromList(data, percentile, sampleSize=0)
Definition: tools_math.py:172
def sample(data, sampleSize=10000, method='random')
Definition: tools_math.py:206

## ◆ getPercintileBinsFromList()

 def lib.tools_math.getPercintileBinsFromList ( data, bins = `10`, includeLeftEdge = `False`, sampleSize = `0` )
```This function finds the percentile bin values for a very large list of
floats. Input data can be either a list or a n-dimension array.
includeLeftEdge returns an additional edge bin at the begining.

Example:
========
>>> getPercintileBinsFromList([1,2,3,4,5,6,7,8,9])
[1.8, 2.6000000000000001, 3.3999999999999999, 4.2000000000000002, 5.0, 5.7999999999999998, 6.5999999999999996, 7.4000000000000004, 8.1999999999999993, 9]```

Definition at line 185 of file tools_math.py.

185 def getPercintileBinsFromList(data, bins=10, includeLeftEdge=False, sampleSize=0):
186  ''' This function finds the percentile bin values for a very large list of
187  floats. Input data can be either a list or a n-dimension array.
188  includeLeftEdge returns an additional edge bin at the begining.
189
190  Example:
191  ========
192  >>> getPercintileBinsFromList([1,2,3,4,5,6,7,8,9])
193  [1.8, 2.6000000000000001, 3.3999999999999999, 4.2000000000000002, 5.0, 5.7999999999999998, 6.5999999999999996, 7.4000000000000004, 8.1999999999999993, 9]
194
195  '''
196
197  binsize = 100.0/bins
198  binsize_incr = 0
199  if(includeLeftEdge): return_list = [np.percentile(data, 0)]
200  else: return_list = []
201  for i in range(bins):
202  binsize_incr += binsize
203  return_list.append(getPercintileBinFromList(data, min(100,binsize_incr), sampleSize=sampleSize))
204  return return_list
205
def getPercintileBinFromList(data, percentile, sampleSize=0)
Definition: tools_math.py:172
def getPercintileBinsFromList(data, bins=10, includeLeftEdge=False, sampleSize=0)
Definition: tools_math.py:185

## ◆ ksTest()

 def lib.tools_math.ksTest ( df1, df2 )
```Kolmogorov-Smirinov test between two distribution functions.
Automatically checks for pdf or cdf. ```

Definition at line 156 of file tools_math.py.

156 def ksTest(df1, df2):
157  ''' Kolmogorov-Smirinov test between two distribution functions.
158  Automatically checks for pdf or cdf. '''
159  if(round(sum(df1),2) == 1): df1 = pdfToCDF(df1)
160  if(round(sum(df2),2) == 1): df2 = pdfToCDF(df2)
161  return max([abs(x-y) for x,y in zip(df1,df2)])
162
def ksTest(df1, df2)
Definition: tools_math.py:156
def pdfToCDF(pdf_y)
Statistics.
Definition: tools_math.py:149

## ◆ local_min_max()

 def lib.tools_math.local_min_max ( lists )
```Return local min/max
Suggested using gaus_mvgavg(lists) first
Returns: maxs,mins,nmax,nmin. ```

Definition at line 28 of file tools_math.py.

28 def local_min_max(lists):
29  ''' Return local min/max
30  Suggested using gaus_mvgavg(lists) first
31  Returns: maxs,mins,nmax,nmin. '''
32
34  maxima_num=0
35  minima_num=0
36  max_locations=[]
37  min_locations=[]
38  count=0
40  count+=1
42  maxima_num+=1
43  max_locations.append(count)
45  minima_num+=1
46  min_locations.append(count)
47  return max_locations, min_locations, maxima_num, minima_num
48
def local_min_max(lists)
Definition: tools_math.py:28

## ◆ mat_divide()

 def lib.tools_math.mat_divide ( A, scalar )

Basic math.

`Divide list by scalar. `

Definition at line 22 of file tools_math.py.

22 def mat_divide(A, scalar):
23  ''' Divide list by scalar. '''
24
25  S = [scalar] * len(A)
26  return [[n/d for n, d in zip(rA, rS)] for rA, rS in zip(A, S)]
27
def mat_divide(A, scalar)
Basic math.
Definition: tools_math.py:22

## ◆ pdfStatsOnBinnedFreqData()

 def lib.tools_math.pdfStatsOnBinnedFreqData ( data, centiles = `[25` )
```For a list of bins with lists of data points, return mean, median,
std. dev. and percentile measures.

http://en.wikipedia.org/wiki/Percentile#Weighted_percentile```

Definition at line 254 of file tools_math.py.

254 def pdfStatsOnBinnedFreqData(data, centiles=[25,75]):
255  ''' For a list of bins with lists of data points, return mean, median,
256  std. dev. and percentile measures.
257
258  http://en.wikipedia.org/wiki/Percentile#Weighted_percentile
259  '''
260
261  mean_ = [np.mean(x) for x in data]
262  mean = [x/sum(mean_) for x in mean_]
263  median = [np.median(x) for x in data]
264  median = [x/sum(median) for x in median]
265  #Weighted percentile
266  percent_low = [np.percentile(x,centiles[0]) for x in data]
267  percent_low = [x/sum(percent_low) for x in percent_low]
268  percent_high = [np.percentile(x,centiles[1]) for x in data]
269  percent_high = [x/sum(percent_high) for x in percent_high]
270  stddev = [np.std(x) for x in data]
271  stddev = [x/sum(mean_) for x in stddev]
272
273  #import pdb; pdb.set_trace()
274
275  return mean, median, percent_low, percent_high, stddev
276
def pdfStatsOnBinnedFreqData(data, centiles=[25)
Definition: tools_math.py:254

## ◆ pdfToCDF()

 def lib.tools_math.pdfToCDF ( pdf_y )

Statistics.

`Convert y values of a PDF distribution (i.e. sum(pdf_y)==1). `

Definition at line 149 of file tools_math.py.

149 def pdfToCDF(pdf_y):
150  ''' Convert y values of a PDF distribution (i.e. sum(pdf_y)==1). '''
151  for pdf_yi in pdf_y:
152  try: cdf.append(cdf[-1]+pdf_yi)
153  except NameError: cdf = [pdf_yi]
154  return cdf
155
def pdfToCDF(pdf_y)
Statistics.
Definition: tools_math.py:149

## ◆ reduce_sequence()

 def lib.tools_math.reduce_sequence ( sequence )
```In a sequence of repeating integers, reduce the repetition by the
value of that series of repeating integers within the sequence.

Example:
========
>>>reduce_sequence([1,3,3,3,2,2,2,2])
[1,3,2,2]```

Definition at line 103 of file tools_math.py.

103 def reduce_sequence(sequence):
104  ''' In a sequence of repeating integers, reduce the repetition by the
105  value of that series of repeating integers within the sequence.
106
107  Example:
108  ========
109  >>>reduce_sequence([1,3,3,3,2,2,2,2])
110  [1,3,2,2]
111  '''
112  for i in range(len(sequence)):
113  if(sequence[i] != None):
114  for j in range(sequence[i]):
115  if(j!=0 and i+j+1 <= len(sequence) and sequence[i+j] == sequence[i]):
116  sequence[i+j] = None
117  return filter(None, sequence)
118
119
def reduce_sequence(sequence)
Definition: tools_math.py:103

## ◆ remap_data()

 def lib.tools_math.remap_data ( datapoints, old_map, new_map )

Constructors and generators.

Definition at line 133 of file tools_math.py.

133 def remap_data(datapoints, old_map, new_map):
134  scale_multiplier = (new_map[-1]-new_map[0])/(old_map[-1]-old_map[0])
135  for i in range(len(datapoints)):
136  datapoints[i] = new_map[0] + (datapoints[i]-old_map[0])*scale_multiplier
137  return datapoints
138
139
def remap_data(datapoints, old_map, new_map)
Constructors and generators.
Definition: tools_math.py:133

## ◆ running_sum()

 def lib.tools_math.running_sum ( a )
```Cumulative sum of a list. Returns a generator. Use list(running_sum(a))
to obtain a list.```

Definition at line 120 of file tools_math.py.

120 def running_sum(a):
121  ''' Cumulative sum of a list. Returns a generator. Use list(running_sum(a))
122  to obtain a list.
123  '''
124  tot = 0
125  for item in a:
126  tot += item
127  yield tot
128
129
def running_sum(a)
Definition: tools_math.py:120

## ◆ sample()

 def lib.tools_math.sample ( data, sampleSize = `10000`, method = `'random'` )
```Use this function to sub-sample or over-sample a very large list
(data).

Supported sampling methods are:
===============================
random:        randomly chosen using a uniform distribution
interpolation: interpolated linearly (always returns the same values
for the same list and same sample size). Only supports
sub-sampling, by design.```

Definition at line 206 of file tools_math.py.

206 def sample(data, sampleSize=10000, method='random'):
207  ''' Use this function to sub-sample or over-sample a very large list
208  (data).
209
210  Supported sampling methods are:
211  ===============================
212  random: randomly chosen using a uniform distribution
213  interpolation: interpolated linearly (always returns the same values
214  for the same list and same sample size). Only supports
215  sub-sampling, by design.
216  '''
217
218  if(method=='random'):
219  return_list = []
220  max_index = len(data)-1
221  for i in range(sampleSize):
222  return_list.append(data[int(round(random() * max_index))])
223  return return_list
224  elif(method=='interpolation'):
225  if(sampleSize > len(data)): return data
226  equiv_indeces = np.arange(0,len(data)-1,len(data)/float(sampleSize)).round().astype(int).tolist()
227  return [data[i] for i in equiv_indeces]
228  else: return None
229
230
def sample(data, sampleSize=10000, method='random')
Definition: tools_math.py:206

## ◆ setTheoryUnionNIndEvents()

 def lib.tools_math.setTheoryUnionNIndEvents ( events, recursionDepthEstimation = `10`, smallValueLimit = `0.0` )

Probability theory.

```Estimate the probability of union of N independant events using set
theory, the exclusion-inclusion principle, and bonferroni inequalities
where events is a list of probabilities for each event
https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
https://en.wikipedia.org/wiki/Boole%27s_inequality#Bonferroni_inequalities

If recursionDepthEstimation=1, Boole's inequality/2 will be returned.

This calculation is exponentially expensive as the size of the list of
events increases. For sets larger than 5-10, it is suggestested to
assume small values are mutually exclusive using simplifySmallValueCalculation
to reduce the complexity of event intersections.

A hard limit on recursion (recursionDepthEstimation) also exists to avoid

Definition at line 280 of file tools_math.py.

280 def setTheoryUnionNIndEvents(events, recursionDepthEstimation=10, smallValueLimit=0.0):
281  ''' Estimate the probability of union of N independant events using set
282  theory, the exclusion-inclusion principle, and bonferroni inequalities
283  where events is a list of probabilities for each event
285  https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
286  https://en.wikipedia.org/wiki/Boole%27s_inequality#Bonferroni_inequalities
287
288  If recursionDepthEstimation=1, Boole's inequality/2 will be returned.
289
290  This calculation is exponentially expensive as the size of the list of
291  events increases. For sets larger than 5-10, it is suggestested to
292  assume small values are mutually exclusive using simplifySmallValueCalculation
293  to reduce the complexity of event intersections.
294
295  A hard limit on recursion (recursionDepthEstimation) also exists to avoid
296  complete lockup, but this will likely lead to an unpredictable answer.
297  '''
298  if(type(events) is not list): return None
299  mutExclusiveEvents = []
300  if(smallValueLimit):
301  mutExclusiveEvents = [event for event in events if event <= smallValueLimit]
302  events = [event for event in events if event > smallValueLimit]
303  if(sum(mutExclusiveEvents) > 1): return 1
304  if(len(events) <= 1): return sum(events)+sum(mutExclusiveEvents)
305  probability = 0
306  bonferroni_Inequality_upper = 1-sum(mutExclusiveEvents)
307  bonferroni_Inequality_lower = 0
308
309
310  if(len(events) > 20): recursionDepthEstimation = min(recursionDepthEstimation,7)
311  if(len(events) > 60): recursionDepthEstimation = min(recursionDepthEstimation,3)
312  if(len(events) > 250): recursionDepthEstimation = min(recursionDepthEstimation,2)
313  # The number of steps is dependant on the number of events
314  for nIx in range(min(len(events),recursionDepthEstimation)):
315  ''' Calculate the total probability of the intersection of nIx+1 events,
316  across the entire range of events.
317
318  Example:
319  ========
320  For probabilities A, B, C, D and a set size of 2, get:
321  A*B + A*C + A*D + B*C + B*D + C*D
322  For probabilities A, B, C, D and a set size of 3, get:
323  A*B*C + A*B*D + A*C*D +B*C*D
324  For probabilities A, B, C, D and a set size of 4, get:
325  A*B*C*D
326  '''
327  subProb = sum([reduce(lambda x, y: x*y, comb) for comb in itertools_combinations(events, nIx+1)])
328  #For even steps, add the sum of the sub steps to the final value
329  if(nIx % 2 == 0):
330  probability += subProb
331  bonferroni_Inequality_upper = min(bonferroni_Inequality_upper, probability)
332  #For odd steps, subtract the sum of the sub steps from the final value
333  else:
334  probability -= subProb
335  bonferroni_Inequality_lower = max(bonferroni_Inequality_lower, probability)
336  #if(nIx % 2 == 0): print(str(nIx)+' -> '+str(probability)+' ('+str(subProb)+') upper_bound='+str(bonferroni_Inequality_upper))
337  #else: print(str(nIx)+' -> '+str(probability)+' (-'+str(subProb)+') lower_bound='+str(bonferroni_Inequality_lower))
338  return (bonferroni_Inequality_upper+bonferroni_Inequality_lower)/2.0+sum(mutExclusiveEvents)
339
def setTheoryUnionNIndEvents(events, recursionDepthEstimation=10, smallValueLimit=0.0)
Probability theory.
Definition: tools_math.py:280