9 from copy
import deepcopy
10 from inspect
import getfile
as inspect_getfile
11 from inspect
import currentframe
as inspect_currentframe
13 try:
import numpy
as np
14 except Exception:
raise Exception, [0101,
'Numpy is not installed.']
16 if __name__ ==
'__main__':
17 print(
'Analysis library loaded directly.')
18 sys.path.insert(0,os.path.dirname(os.path.dirname(os.path.abspath(inspect_getfile(inspect_currentframe())))))
23 ''' Take observations and total sample and format a string like "10/100 (10%)" ''' 24 if(total == 0):
return 'None' 25 if(
type(obs)
is float): obs_ = round(obs,1)
27 return str(obs_)+
'/'+
str(total)+
' ('+
str(round(float(obs)/float(total)*100,1))+
'%)' 31 ''' The most basic analysis container contains individual values, e.g. one 32 value for each observation, or each site analysis. This could contain, 33 for example the mean speed at a site, or, the total flow, etc. ''' 45 self.
values[-1].addInstructions(*args)
54 for i
in range(len(self.
values)):
55 if(self.
values[i].value): return_.append(i)
58 ''' Return measure of all analyses ''' 62 measures.append(self.
values[i].value)
63 if(round_>=0
and isinstance(round_, int)):
64 measures = [round(x, round_)
for x
in measures]
67 ''' Return depths of all analyses ''' 71 depths.append(self.
values[i].depth)
72 if(round_>=0
and isinstance(round_, int)):
73 depths = [round(x, round_)
for x
in depths]
76 ''' Return measure of all sites ''' 84 if(self.
values[i].metadata.idx == s): localmeasure = tvaLib.Math.combineMean([localmeasure,localDepth], [self.
values[i].value, self.
values[i].depth])
85 if(localmeasure): measures.append(localmeasure)
86 if(round_>=0
and isinstance(round_, int)):
87 measures = [round(x, round_)
for x
in measures]
90 ''' Return list of site IDs ''' 93 if(NonZeroOnly
and i.value == 0):
continue 94 ids.append(i.metadata.idx)
95 ids = tvaLib.unique(ids)
103 ''' Return ranked id aggregated by sites ''' 105 return sorted(range(len(measures)), key=measures.__getitem__)
107 ''' Return ranked id aggregated by sites ''' 110 return [siteIds[x]
for x
in indeces]
112 ''' Return sorted measure aggregated by sites ''' 116 ''' Return ranked index of all analyses ''' 119 if(measures
and siteIndeces):
120 measures, siteIndeces = zip(*sorted(zip(measures, siteIndeces)))
121 if(style==
'verbose'):
return [self.
values[x].metadata.name
for x
in siteIndeces]
122 else:
return [x
for x
in siteIndeces]
124 ''' Return sorted measure of all analyses ''' 128 print '============================================' 129 print 'Ranking by '+label+
' aggregated by site' 132 print 'Ranking by '+label+
' for all analyses' 137 ''' This analysis contains a large number of observations. ''' 140 for i
in range(len(self.
values)):
142 if(self.
values[i].binData): return_.append(i)
145 ''' Return bins of all analyses ''' 149 bins.append(self.
values[i].binStops)
152 ''' Return frequencies of all analyses ''' 156 frequencies.append(self.
values[i].binData)
160 for i
in range(len(freq)):
161 if(i==0): sumFreq = freq[i]
162 else: sumFreq = [x+y
for x,y
in zip(sumFreq,freq[i])]
165 ''' This function returns the standard deviation on each bin from the 166 sample (freqSamples). ''' 169 if(
not data
or type(data)
is not list):
return False 170 mean = sum(data)/len(data)
173 squareOfResult.append(math.pow(obs-mean,2))
174 return_.append(math.sqrt(sum(squareOfResult)/len(squareOfResult)))
177 ''' This function returns the min/max outliers on each bin from the 178 sample (freqSamples). ''' 181 if(
not data
or type(data)
is not list):
return False 182 if(mode==
'min'): return_.append(min(data))
183 else: return_.append(max(data))
187 ''' Cluster distributions by k-test distances into nklusters groups 191 self: should be a correctly instantiated, non-empty list of 193 nklusters: number of cluster centroids (equivalent to k in k-means) 197 returns a one-dimensional array of corresponding cluster associated 198 with each frequency object contained by this object 199 Also overwrites any self.clusters 202 if(
not self.
values):
return False 203 if(sum([1
for x
in self.
values if x]) < nklusters): nklusters = sum([1
for x
in self.
values if x])
207 for distroNeedleIx
in range(len(self.
values)):
208 if(
not self.
values[distroNeedleIx].getCDF()):
209 meanKsDistances.append(
None)
212 for distroStackIx
in range(len(self.
values)):
213 if(distroNeedleIx == distroStackIx
or not self.
values[distroStackIx].getCDF()):
continue 214 ksDistances.append(tvaLib.Math.ksTest(self.
values[distroNeedleIx].getCDF(),self.
values[distroStackIx].getCDF()))
215 if(len(ksDistances) == 0): meanKsDistances.append(0)
216 else: meanKsDistances.append(sum(ksDistances)/float(len(ksDistances)))
217 self.
centroidIxs = sorted(range(len(meanKsDistances)), key=
lambda i: meanKsDistances[i])[meanKsDistances.count(
None):nklusters+meanKsDistances.count(
None)]
222 for distroNeedleIx
in range(len(self.
values)):
223 if(
not self.
values[distroNeedleIx].getCDF()):
228 for klusterStackIx
in range(nklusters):
229 if(distroNeedleIx == self.
centroidIxs[klusterStackIx]):
232 ksDistances.append(tvaLib.Math.ksTest(self.
values[distroNeedleIx].getCDF(),self.
values[self.
centroidIxs[klusterStackIx]].getCDF()))
233 if(isCentroid): self.
clusters.append(klusterStackIx)
234 else: self.
clusters.append(ksDistances.index(min(ksDistances)))
240 ''' Return profile bins. ''' 242 try:
return value.bins
246 def getMean(self, min_up_coverage=0, min_down_coverage=0):
248 for bIx
in range(len(self.
getBins())):
249 try: mean.append(tvaLib.Math.combineMean([value.binData[bIx]
for value
in self.
values if value.binData
and value.up_coverage > min_up_coverage
and value.down_coverage > min_down_coverage], [value.binWeights[bIx]
for value
in self.
values if value.binData
and value.up_coverage > min_up_coverage
and value.down_coverage > min_down_coverage]))
250 except ZeroDivisionError: mean.append(0)
253 def getStdDev(self, min_up_coverage=0, min_down_coverage=0):
255 for bIx
in range(len(self.
getBins())):
256 try: stdev.append(tvaLib.Math.combineStdDev([value.binStdDev[bIx]
for value
in self.
values if value.binData
and value.up_coverage > min_up_coverage
and value.down_coverage > min_down_coverage], [value.binData[bIx]
for value
in self.
values if value.binData], [value.binWeights[bIx]
for value
in self.
values if value.binData
and value.up_coverage > min_up_coverage
and value.down_coverage > min_down_coverage]))
257 except ZeroDivisionError: stdev.append(0)
277 ''' data is a list of observations ''' 278 if(
type(data)
is not list): data = [data]
279 if(
type(weights)
is not list): weights = [weights]
280 if(len(data) <= 0):
return False 285 ''' data is a simple list ''' 286 if(
type(data)
is not list): data = [data]
287 if(weights
and type(weights)
is not list): weights = [weights]
288 if(len(data) <= 0):
return False 289 if(weights
and len(weights) == len(data)):
292 for i
in range(len(data)):
293 sum_val += data[i]*weights[i]
294 len_val += weights[i]
295 mean_val = sum_val/float(len_val)
298 mean_val = sum(data)/float(len_val)
299 self.
value = tvaLib.Math.combineMean([self.
value, mean_val], [self.
depth, len_val])
300 self.
depth += len(data)
306 ''' To get hourly values, use getCurve()[hour], etc. ''' 310 def increment(self, data, startTimes, startTime, duration, framerate):
311 ''' data: a list of measures. 312 startTimes: a list of starttimes corresponding to each measure in 313 data (lists should be the same size) 314 startTime: start time of measure recording 315 duration: duration of measure recording 317 if(len(data) <= 0):
return False 319 for measure,start
in zip(data,startTimes):
320 timestampBin =
int((startTime.hour*60 + startTime.minute + start/60.0/framerate))
321 self.
values[timestampBin].append(measure)
322 for bin
in range(startTime.hour*60 + startTime.minute,
int(startTime.hour*60 + startTime.minute + duration/60.0)):
325 def getCurve(self, bins=24, type='rate', emptyVal=None, divideBy=1, divideByValueVariety=False, strictDepthCompleteness=False):
326 ''' 2 bins is am/pm, 24 bins is hourly, 96 bins is quarter hourly, etc. 328 divideByValueVariety might be handy if the measures are 331 If strictDepthCompleteness is set to True, only return values for 332 bins with a complete depth coverage (in other words the entire 333 period has had data collected over that period). ''' 334 if(divideByValueVariety): divideBy = len(list(set(tvaLib.flatten_list(self.
values))))
337 for bin
in range(bins):
338 if(strictDepthCompleteness
and []
in self.
values[(bin)*
int(1440/bins):(bin+1)*
int(1440/bins)]):
339 returnList.append(emptyVal)
340 returnDepth.append(0)
341 elif(self.
values[(bin)*
int(1440/bins):(bin+1)*
int(1440/bins)].count([]) == len(self.
values[(bin)*
int(1440/bins):(bin+1)*
int(1440/bins)])):
342 returnList.append(emptyVal)
343 returnDepth.append(0)
345 returnList.append(tvaLib.flatten_list(self.
values[(bin)*
int(1440/bins):(bin+1)*
int(1440/bins)]))
346 returnDepth.append(sum(self.
depths[(bin)*
int(1440/bins):(bin+1)*
int(1440/bins)]))
347 for i,depth
in zip(range(len(returnList)),returnDepth):
348 if(returnList[i]
and returnList[i] != emptyVal):
349 if(type==
'mean'): returnList[i] = sum(returnList[i])/float(len(returnList[i]))
350 elif(type==
'std'): returnList[i] = np.std(returnList[i])
352 if(depth): returnList[i] = len(returnList[i])/float(depth)*
int(1440/bins)/float(divideBy)
353 else: returnList[i] = emptyVal
358 def increment(self, data, startTime, duration, framerate, mps_kmh=3.6):
359 ''' data is a list of objects (trajectories). ''' 360 if(len(data) <= 0):
return False 363 timestampBin =
int((startTime.hour*60 + startTime.minute + datum.getFirstInstant()/60.0/framerate))
364 self.
values[timestampBin].append(sum(datum.velocities.positions[2])/float(len(datum.velocities.positions[2]))*mps_kmh*framerate)
368 ''' Pool together raw data. Usefull if the data is structured using a custom scheme. ''' 374 ''' Data is a list of observations ''' 375 self.
value.append(data)
379 def __init__(self, binSize=0.25, xstart=0, xend=10):
390 if(xstart > xend):
break 393 def increment(self, data, weights=None, method='byData', keepSource=False):
399 Data is a simple list of individual observations 400 method=='byMeanHisto': 401 Use this method if the histogram has been pre calculated ''' 402 if(len(data) <= 0):
return False 404 if(method==
'byMeanHisto'):
410 self.
depth = sum(histo)
412 if(len(histo) != len(self.
binData)):
return False 413 for histIx
in range(len(histo)):
415 self.
depth += sum(histo)
417 if(keepSource): self.
source += data
418 if(weights): result = list(np.histogram(data, bins=self.
binStops, weights=weights)[0])
419 else: result = list(np.histogram(data, bins=self.
binStops)[0])
420 for i
in range(len(result)):
422 self.
depth += len(data)
427 ''' This method is called to build/expand''' 429 if(data[0] < self.
binStops[-1]
or data[-1] > self.
binStops[-1]):
return False 433 if(
not sum(self.
binData)):
return False 437 if(
not sum(self.
binData)):
return False 442 if(
not frequency):
return False 444 for i
in range(len(frequency)):
445 return_.append(sum(frequency[0:i+1]))
450 if(isinstance(refData, Frequency)): refData = refData.getCDF()
451 if(
not thisData
or type(refData) != list
or len(refData) != len(thisData)):
return False 453 for i
in range(len(thisData)):
454 if(math.fabs(thisData[i] - refData[i]) > supremum): supremum = math.fabs(thisData[i] - refData[i])
469 def setBins(self, values): self.bins = values;
return True 471 def increment(self, data, weights, stdDevs=None, mins=None, maxs=None):
472 ''' Increment data. ''' 473 if(len(data) <= 0):
return False 482 if(len(data) != len(self.
binData)
or len(data) != len(weights)):
return False 483 if(stdDevs): self.
binStdDev = [tvaLib.Math.combineStdDev([stdDevs[ix], self.
binStdDev[ix]], [data[ix], self.
binData[ix]], [weights[ix], self.
binWeights[ix]])
for ix
in range(len(data))]
484 self.
binData = [tvaLib.Math.combineMean([data[ix], self.
binData[ix]], [weights[ix], self.
binWeights[ix]])
for ix
in range(len(data))]
485 if(mins): self.
binMins = [m1
if m1 < m2
else m2
for m1,m2
in zip(mins, self.
binMins)]
486 if(maxs): self.
binMaxs = [m1
if m1 > m2
else m2
for m1,m2
in zip(maxs, self.
binMaxs)]
487 for ix
in range(len(data)): self.
binWeights[ix] += weights[ix]
495 ''' data is a generic rate (int) ''' 496 if(
not isinstance(data, list)): data = [data]
502 ''' data is a generic total (int). ''' 508 ''' data is a list of objects (trajectories). ''' 509 if(len(data) <= 0):
return False 513 data_.append(sum(datum.velocities.positions[2])/float(len(datum.velocities.positions[2]))*speed_conv)
519 ''' data is a list of objects (trajectories). ''' 520 if(len(data) <= 0):
return False 522 data_ = [0
for i
in range(len(self.
metadata.site.alignments))]
524 data_[datum.curvilinearPositions.getLanes()[location]] += 1
525 for i
in range(len(data_)):
526 data_[i] = data_[i] * 3600/duration
527 if(len(filter(
None, data_))): mean_val = self.
incrementByMean(filter(
None, data_))
533 ''' data is a list of objects (trajectories). ''' 534 if(len(data) <= 0):
return False 537 value += sum(datum.velocities.positions[2])/1000.0
550 ''' data is a a list of indicators 551 weights is a list of weights (0-1) of equal size to data 553 self.
total += len(data)
559 for weight,datum
in zip(weights,data):
561 self.
weights[thresh].append(weight)
564 def getValue(self, thresh):
return self.values[thresh]
def getNonZeroValueIndeces(self)
def verifyBinStops(self, data)
def getSiteIds(self, NonZeroOnly=False)
def increment(self, data, weights=None)
def increment(self, data, weights=1)
def increment(self, data)
def getSiteRankingByMeasure(self, round_=-1)
def getMean(self, min_up_coverage=0, min_down_coverage=0)
def getMeasuresAggregatedBySite(self, round_=-1)
def getSiteIdRanking(self)
def addInstructions(self, metadata=None)
def printResult(self, label='default', round_=-1)
def getCurve(self, bins=24, type='rate', emptyVal=None, divideBy=1, divideByValueVariety=False, strictDepthCompleteness=False)
def setBins(self, values)
def __init__(self, binSize=0.25, xstart=0, xend=10)
def getMAXIMAofBins(self, mode='max')
def __init__(self, thresh_ranges)
def increment(self, data, startTime, duration, framerate, mps_kmh=3.6)
def increment(self, data, duration=3600, location=0)
def increment(self, data)
def increment(self, data)
def ksTestWith(self, refData)
def getSiteRankingByIndex(self)
def getMeasureRankingByMeasure(self, round_=-1)
centroidIxs
Search for centroids: pick top nklusters according to minimum mean K-S-test with all others...
def increment(self, data, speed_conv)
def increment(self, data, weights, stdDevs=None, mins=None, maxs=None)
def __init__(self, prototype=None)
def incrementByMean(self, data, weights=1)
def rateString(obs, total)
def getSiteRankBySaIx(self, saIx)
def getWeightedValue(self, thresh)
def getSumFrequency(self)
def getMeasureRankingByIndex(self, style='verbose')
def clusterByKStest(self, nklusters)
def getDepths(self, round_=-1)
def increment(self, args, kwargs)
def getNonZeroValueIndeces(self)
def increment(self, data, startTimes, startTime, duration, framerate)
def getValue(self, thresh)
def increment(self, data, duration=3600)
def increment(self, data, weights=None, method='byData', keepSource=False)
def getMeasures(self, round_=-1)
def getStdDev(self, min_up_coverage=0, min_down_coverage=0)