tvaLib
tools.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # tvaLib Copyright (c) 2012-2016 Paul G. St-Aubin
3 # Ecole Polytechnique de Montreal, McGill University
4 # Python 2.7; (dt) Spyder Windows 10 64-bit; ipython Ubuntu 15.04 64-bit
5 
9 import math as m
10 import time, sys, os
11 from traceback import format_exception as traceback_format_exception
12 from types import MethodType as types_MethodType
13 from fnmatch import filter as fnmatch_filter
14 from copy_reg import pickle as copy_reg_pickle
15 from itertools import izip, chain, repeat
16 from inspect import isclass as inspect_isclass
17 from inspect import getargspec as inspect_getargspec
18 try: import include.config as tvaConfig
19 except ImportError: pass
20 
22 try: import numpy as np
23 except ImportError: raise Exception, [0101, 'Numpy is not installed.']
24 try: from colorama import Fore, Back, Style
25 except ImportError:
26  try:
27  Fore = tvaConfig.Fore()
28  Back = tvaConfig.Back()
29  Style = tvaConfig.Style()
30  except NameError: print('Warning: Tools imported directly. Some non-essential functionality (such as coloured ouput on Linux) may be impaired or unstable.')
31 
32 import tools_obj as Obj
33 import tools_parse as Parse
34 import tools_geo as Geo
35 import tools_math as Math
36 import tools_constructors as Constructors
37 
38 
39 
42 def printTimeStamp(message, justifyWidth=50):
43  print(Back.BLUE+''.rjust(justifyWidth,'=')+Style.RESET_ALL)
44  print(Back.BLUE+(str(time.asctime(time.localtime(time.time())))+' (Runtime: '+str(round(time.clock()))+'s)').ljust(justifyWidth,' ')+Style.RESET_ALL)
45  print(Back.BLUE+''.rjust(justifyWidth,'=')+Style.RESET_ALL)
46  print(message)
47  return True
48 
49 def printWarning(message, label='Warning', indent=0):
50  print(Back.YELLOW+Fore.RED+Style.BRIGHT+''.rjust(indent,' ')+label+':'+Style.RESET_ALL+' '+message)
51  return True
52 
53 def printNotice(message, label='Notice', indent=0):
54  print(Back.BLUE+Fore.WHITE+Style.BRIGHT+''.rjust(indent,' ')+label+':'+Style.RESET_ALL+' '+message)
55  return True
56 
57 def grabError():
58  ''' Search for error and return as traceback string
59  Most useful just after an except Exception: clause '''
60  etype, value, tb = sys.exc_info()
61  print(''.join(traceback_format_exception(etype, value, tb)))
62 
63 def printTable(tableData, indent=4, padding=' '):
64  ''' Print table data. Pass a single column with '==' to draw a horizontal
65  line on that row. '''
66  acceptedHLs = ['--','++','==','##','**']
67  colwidths = [[len(str(col)) for col in row] for row in tableData if row not in acceptedHLs]
68  colwidths = [max(col) for col in pointList2ColumnList(colwidths)]
69 
70  for row in tableData:
71  if(row in acceptedHLs): print(''.ljust(indent)+''.join([row[0] for x in range((len(colwidths)-1)*len(padding))])+''.ljust(sum(colwidths), row[0]))
72  else: print(''.ljust(indent)+padding.join([str(col).ljust(width) for col,width in zip(row, colwidths)]))
73 
74 
75 
78 def toChunks(threads, iterable, padvalue=None, asList=True):
79  ''' Split an iterable into chunks of threads size
80  toChunks(3, 'abcdefg', 'x')
81  >>> [['a','b','c'], ['d','e','f'], ['g','x','x']]
82  '''
83  if(asList): return [list(x) for x in izip(*[chain(iterable, repeat(padvalue, threads-1))]*threads)]
84  else: return izip(*[chain(iterable, repeat(padvalue, threads-1))]*threads)
85 
86 
87 def explodeList(haystack, needle=None, returnIndeces=False):
88  ''' Return a list of sublists seperated by needle in haystack. '''
89  if(type(haystack) is not list): return False
90  if(len(haystack) < 1): return False
91  if(len(haystack) < 2):
92  if(returnIndeces): return [[0]]
93  else: return [[haystack[0]]]
94 
95  listOLists = [[]]
96  for index in range(len(haystack)-1):
97  if(haystack[index] == needle and haystack[index+1] == needle): continue
98  if(haystack[index] == needle and haystack[index+1] != needle): listOLists.append([])
99  if(haystack[index] != needle): listOLists[-1].append(index)
100 
101  #Test last item
102  if(haystack[-1] != needle): listOLists[-1].append(len(haystack)-1)
103 
104  #Purge empty lists
105  listOLists = [x for x in listOLists if x != []]
106 
107  #Return
108  if(returnIndeces): return listOLists
109  else: return [[haystack[y] for y in x] for x in listOLists]
110 
111 
112 def flatten_list(data):
113  ''' Flatten list (this forces a memory copy).
114  Asymetric lists are supported and of the type: [1,2,[3,4]]. A symmetric
115  list can be unflattened into a square matrix using
116  wrapList2SquareMatrix().
117 
118  If recursion is not needed, use something like this instead:
119  data = [i for s in data for i in s]
120  https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
121  '''
122  result = []
123  for el in data:
124  if hasattr(el, '__iter__') and not isinstance(el, basestring): result.extend(flatten_list(el))
125  else: result.append(el)
126  return result
127 
128 
130  ''' Wrap a flat list into a 2D matrix with square/equal dimenssions.
131  Functionaly, this is the opposite of flatten_list(), assuming a square
132  size. '''
133  size = m.sqrt(len(data))
134  if(size % 1 != 0): return False
135  size = int(size)
136  r = []
137  for i in range(size):
138  r.append([])
139  for j in range(size):
140  r[i].append(data[i*size+j])
141  return r
142 
143 def drange(start, stop, step):
144  ''' Generate range of values using double-precision floats '''
145  r = [start]
146  i = 0
147  while r[-1] < stop:
148  i += 1
149  r.append(start + i*step)
150  return r
151 
152 
154  ''' Take a list of points and convert it to a list of columns. Essentially,
155  a transpose.
156  [[p1x,p1y,p1z,..],[p2x,p2y,p2z,..],..] -> [[p1x,p2x,..],[p1y,p2y,..],..]
157  '''
158  return_list = zip(*data)
159  for i in range(len(return_list)):
160  return_list[i] = list(return_list[i])
161  return return_list
162 
163 def unique(nonUniqueList):
164  ''' Remove duplicate entries in a list. This function is superior to
165  list(set(list)) as it keeps order. '''
166  seen = set()
167  return [seen.add(x) or x for x in nonUniqueList if x not in seen]
168 
169 def mergeDicts(dictA, dictB, overwrite=False, customMergeCommand=None):
170  ''' Merge dicts of lists, ints or floats.
171 
172  Input:
173  ======
174  overwrite: Duplicate dictionary keys will be overwritten (precedence to dictB)
175  customMergeCommand: Not implemented
176  '''
177  if(overwrite): return dict(list(dictA.items()) + list(dictB.items()))
178  for i in dictA:
179  if(i in dictB): dictA[i] += dictB[i]
180  for i in dictB:
181  if(i not in dictA):
182  if(dictA == []): dictA = {i:dictB[i]}
183  else: dictA[i] = dictB[i]
184  return dictA
185 
186 def list2int(var):
187  ''' Convert list of numbers to int recursively. Does not check types. '''
188  return [list2int(x) if type(x) is list else int(x) for x in var]
189 
190 def list2str(var):
191  ''' Convert list of numbers to str recursively. Does not check types. '''
192  return [list2str(x) if type(x) is list else str(x) for x in var]
193 
194 
197 def crawlDirsForFilePattern(pattern, path=os.getcwd(), returnList=False, prependPath=True):
198  ''' Search recursively through directory tree specified by path to find a
199  file that matches pattern (can have * wild cards).
200 
201  Input:
202  ======
203  path defaults to the currentworking directory
204 
205  Output:
206  =======
207  First filename with full path prepended if (prependPath not set to
208  False). If returnList=True, returns a list of all filenames instead.
209  '''
210  return_ = []
211  for rootDir, dirs, files in os.walk(path):
212  for filename in fnmatch_filter(files, pattern):
213  if(prependPath): returnVal = os.path.join(rootDir, filename)
214  else: returnVal = filename
215  if(not returnList): return returnVal
216  return_.append(returnVal)
217  if(not return_ and not returnList): return ''
218  return return_
219 
220 def fileLineLen(filename, countLimit=None):
221  ''' Return number of lines in a target filename.
222 
223  Input:
224  ======
225  If countLimit is passed as an int, True will be returned immediatly if
226  the file contains more lines than countLimit. This is a significantly
227  more efficient way of determining minimum file sizes for very large
228  files. '''
229  with open(filename) as f:
230  for i, l in enumerate(f):
231  if(type(countLimit) is int and (i + 1) > countLimit): return True
232  return i + 1
233 
234 
235 
238 def stripKwargsForUncleanTarget(kwargs, func):
239  ''' Use this function to seamlessly pass kwargs into a function (or class
240  instance constructor) that does not support **kwargs '''
241  if(len(kwargs) <= 0): return {}
242  if(inspect_isclass(func)): args = inspect_getargspec(func.__init__)
243  else: args = inspect_getargspec(func)
244  for kwarg in kwargs:
245  if(not kwarg in args.args):
246  kwargs[kwarg] = None
247  return dict((k, v) for k, v in kwargs.iteritems() if v)
248 
249 
253  ''' Prepare an object instance for multithreading by pre-pickling it with the copy_reg library
254 
255  This call to copy_reg.pickle allows you to pass methods as the first arg to
256  mp.Pool methods. If you comment out this line, `pool.map(self.foo, ...)` results in
257  PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup
258  __builtin__.instancemethod failed'''
259  copy_reg_pickle(types_MethodType, _pickle_method, _unpickle_method)
260  return True
261 
262 def _pickle_method(method):
263  # Author: Steven Bethard
264  # http://bytes.com/topic/python/answers/552476-why-cant-you-pickle-instancemethods
265  func_name = method.im_func.__name__
266  obj = method.im_self
267  cls = method.im_class
268  cls_name = ''
269  if func_name.startswith('__') and not func_name.endswith('__'):
270  cls_name = cls.__name__.lstrip('_')
271  if cls_name:
272  func_name = '_' + cls_name + func_name
273  return _unpickle_method, (func_name, obj, cls)
274 
275 def _unpickle_method(func_name, obj, cls):
276  # Author: Steven Bethard
277  # http://bytes.com/topic/python/answers/552476-why-cant-you-pickle-instancemethods
278  for cls in cls.mro():
279  try:
280  func = cls.__dict__[func_name]
281  except KeyError:
282  pass
283  else:
284  break
285  return func.__get__(obj, cls)
286 
287 
288 
292  ''' Prepare a shell-friendly command with a list of arguments. '''
293  return ' '.join(map(str, [x if x.startswith('-') or x.startswith('"') or x.startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) else '"'+x+'"' for x in cmd_stack]))
294 
295 
296 def command_timeout(cmd, timeout=0, prependMsg='', descriptor='command', debug=False, logging=None, verbose=0):
297  ''' Call shell-command and either return its output or kill it
298  if it doesn't normally exit within timeout seconds and return None.
299  '''
300  import subprocess
301  from datetime import datetime
302 
303  if(type(cmd) == str): cmd = cmd.split(' ')
304  if(debug and logging): logging.debug(' '.join(cmd))
305  elif(verbose): print(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') Processing...')
306  start = datetime.now()
307  process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
308 
309  while process.poll() is None:
310  time.sleep(1)
311  now = datetime.now()
312  if(timeout and (now - start).seconds > timeout):
313  terminate_process(process.pid)
314  return None
315 
316  finish_code = process.returncode
317  if(finish_code!=0):
318  if(debug and logging): logging.debug(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') '+str(descriptor).title()+' failed with code '+str(finish_code))
319  elif(verbose): print(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') '+str(descriptor).title()+' failed with code '+str(finish_code))
320 
321  if(verbose < 2): return True
322  return process.stdout.read()
323 
324 def command_async_pool(cmds, threads=1, timeout=0, prependMsg='', descriptor='command', sungrid=False, debug=False, logging=None, stopIfError=False, verbose=0):
325  ''' Pass a list of cmds to run asyncronously with a timeout.
326 
327  Sungrid is not yet supported.
328  '''
329  if(threads < 1): return False
330  if(type(cmds) is not list): cmds = [cmds]
331 
332  import subprocess
333  from datetime import datetime
334 
335 
336  starts = [None for _ in cmds]
337  finishConditions = [False for _ in cmds]
338  notTerminated = [False for _ in cmds]
339  procs = [None for _ in cmds]
340  out = [None for _ in cmds]
341  finish_codes = [0 for _ in cmds]
342 
343  for cIx in range(len(cmds)):
344  if(type(cmds[cIx]) == str):
345  cmds[cIx] = cmds[cIx].strip().split(' ')
346  cmds[cIx] = [x.replace('"','') if x!='""' else x for x in cmds[cIx]]
347 
348  thread = 1
349  while False in finishConditions:
350 
351  for cIx in range(len(procs)):
352  if(finishConditions[cIx]): continue
353  #Record closed processes
354  if(procs[cIx] and procs[cIx].poll() is not None):
355  finishConditions[cIx] = True
356  notTerminated[cIx] = True
357  if(verbose < 3): out[cIx] = procs[cIx].stdout.read()
358  finish_codes[cIx] = procs[cIx].returncode
359  if(finish_codes[cIx]!=0):
360  if(debug and logging): logging.debug(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') '+str(descriptor).title()+' #'+str(cIx+1)+' on thread #'+str(thread)+' (PID '+str(procs[cIx].pid)+') failed with code '+str(finish_codes[cIx]))
361  elif(verbose): print(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') '+str(descriptor).title()+' #'+str(cIx+1)+' on thread #'+str(thread)+' (PID '+str(procs[cIx].pid)+') failed with code '+str(finish_codes[cIx]))
362  procs[cIx] = None
363  #Verify if thread is due for timeout
364  elif(timeout and starts[cIx] and (datetime.now() - starts[cIx]).seconds > timeout):
365  try: terminate_process(procs[cIx].pid)
366  except: pass # Handle race condition errors when trying to kill a process
367  finishConditions[cIx] = True
368  if(verbose < 3): out[cIx] = procs[cIx].stdout.read()
369  finish_codes[cIx] = procs[cIx].returncode
370  if(debug and logging): logging.debug(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') '+str(descriptor).title()+' #'+str(cIx+1)+' on thread #'+str(thread)+' (PID '+str(procs[cIx].pid)+') timeout out and was terminated')
371  elif(verbose): print(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') '+str(descriptor).title()+' #'+str(cIx+1)+' on thread #'+str(thread)+' (PID '+str(procs[cIx].pid)+') timeout out and was terminated')
372  procs[cIx] = None
373  #Start command if below dedicated number of threads
374  elif(len(filter(None, procs)) < threads and procs[cIx]==None):
375  starts[cIx] = datetime.now()
376  if(verbose >= 4): procs[cIx] = subprocess.Popen(cmds[cIx])
377  elif(verbose >= 3): procs[cIx] = subprocess.Popen(cmds[cIx], stderr=open(os.devnull))
378  else: procs[cIx] = subprocess.Popen(cmds[cIx], stdout=subprocess.PIPE, stderr=open(os.devnull))
379  if(debug and logging): logging.debug(' '.join(cmds[cIx]))
380  elif(verbose): print(str(prependMsg)+'('+str(time.strftime('%H:%M:%S'))+') Running '+str(descriptor)+' #'+str(cIx+1)+' on thread #'+str(thread)+' (PID '+str(procs[cIx].pid)+')')
381  thread += 1
382  if(thread > threads): thread = 1
383 
384  #Check errors and stop if this mode elected
385  if(stopIfError and not all([x==0 for x in finish_codes])):
386  for cIx in range(len(procs)):
387  if(finishConditions[cIx]): continue
388  try: terminate_process(procs[cIx].pid)
389  except: pass
390  break
391 
392  time.sleep(1)
393 
394  if(len(cmds) == 1):
395  if(verbose < 3): return notTerminated[0]
396  return out[0]
397  else:
398  if(verbose < 3): return notTerminated
399  return out
400 
402  if(sys.platform == 'win32'):
403  import ctypes
404  PROCESS_TERMINATE = 1
405  handle = ctypes.windll.kernel32.OpenProcess(PROCESS_TERMINATE, False, pid)
406  ctypes.windll.kernel32.TerminateProcess(handle, -1)
407  ctypes.windll.kernel32.CloseHandle(handle)
408  else:
409  from signal import SIGKILL
410  os.kill(pid, SIGKILL)
411  os.waitpid(-1, os.WNOHANG)
412  return True
413 
414 
417 def memory_inspection(variable, name='', indent=0, column_size=45, verbose=0):
418  ''' Returns refferential and real size of variable in memory.
419 
420  Alternative Usage inline (for more advanced debugging):
421  =======================================================
422  import sys; import dev.memoryUsage as memUs; print('Memory usage in bytes; referential: {0}, real: {1}'.format(sys.getsizeof(variable),memUs.asizeof(variable)))
423 
424  Inspect full memory in variable space:
425  ======================================
426  [tvaLib.memory_inspection(eval(var), name=var, verbose=2) for var in dir() if var!=sys]
427  '''
428  import dev.memoryUsage as memUs
429  if(not name):
430  name = str(variable)
431  if(len(name) > 20): name = name[:20]+'...'
432  real_size = memUs.asizeof(variable)
433  if(verbose): print(('Memory usage of '+str(name)).ljust(column_size)+(' ref: '+'{:,}'.format(sys.getsizeof(variable))+' Bytes;').ljust(20)+' real: '+'{:,}'.format(real_size).rjust(indent, ' ')+' Bytes')
434  return real_size
435 
436 def memory_inspection_obj(obj, verbose=1):
437  ''' Returns refferential and real size of object in memory with breakdown
438  by attribute. '''
439  real_size = memory_inspection(obj, name='object', verbose=verbose)
440  for attributeName in dir(obj):
441  memory_inspection(getattr(obj, attributeName), name='object.'+attributeName, indent=4, verbose=verbose)
442  return real_size
443 
444 def memory_inspection_list(list_, names=[], verbose=1):
445  ''' Returns real size of list of objects in memory. '''
446  real_sizes = []
447  if(len(names)!=len(list_)): names = ['' for x in list_]
448  for l_,name in zip(list_,names):
449  if(not name):
450  name = str(l_)
451  if(len(name) > 20): name = name[:20]+'...'
452  real_sizes.append(memory_inspection(l_, name=name))
453  if(verbose >= 2): print('Real size of '+name+': '+'{:,}'.format(int(sum(real_sizes)/float(len(real_sizes))))+' Bytes')
454  if(verbose): print('Average object size: '+'{:,}'.format(int(sum(real_sizes)/float(len(real_sizes))))+' Bytes')
455 
456 
460  ''' Creates a text-based progress bar. Call the object with the `print'
461  command to see the progress bar, which looks something like this:
462 
463  [=======> 22% ]
464 
465  You may specify the progress bar's width, min and max values on init. '''
466 
467  def __init__(self, minValue = 0, maxValue = 100, totalWidth=80):
468  self.progBar = "[]" # This holds the progress bar string
469  self.min = minValue
470  self.max = maxValue
471  self.span = maxValue - minValue
472  self.width = totalWidth
473  self.amount = 0 # When amount == max, we are 100% done
474  self.oldProg = 0
475  self.updateAmount(0) # Build progress bar string
476 
477  def updateAmount(self, newAmount = 0):
478  ''' Update the progress bar with the new amount (with min and max
479  values set at initialization; if it is over or under, it takes the
480  min or max value as a default. '''
481 
482  if newAmount < self.min: newAmount = self.min
483  if newAmount > self.max: newAmount = self.max
484  self.amount = newAmount
485 
486  # Figure out the new percent done, round to an integer
487  diffFromMin = float(self.amount - self.min)
488  percentDone = (diffFromMin / float(self.span)) * 100.0
489  percentDone = int(round(percentDone))
490 
491  # Figure out how many hash bars the percentage should be
492  allFull = self.width - 2
493  numHashes = (percentDone / 100.0) * allFull
494  numHashes = int(round(numHashes))
495 
496  # Build a progress bar with an arrow of equal signs; special cases for
497  # empty and full
498  if numHashes == 0:
499  self.progBar = "[>%s]" % (' '*(allFull-1))
500  elif numHashes == allFull:
501  self.progBar = "[%s]" % ('='*allFull)
502  else:
503  self.progBar = "[%s>%s]" % ('='*(numHashes-1),
504  ' '*(allFull-numHashes))
505 
506  # figure out where to put the percentage, roughly centered
507  percentPlace = (len(self.progBar) / 2) - len(str(percentDone))
508  percentString = str(percentDone) + "%"
509 
510  # slice the percentage into the bar
511  self.progBar = ''.join([self.progBar[0:percentPlace], percentString, self.progBar[percentPlace+len(percentString):]])
512 
513  # Output
514  if(self.progBar != self.oldProg):
515  print(self.progBar+'\r')
516  self.oldProg = self.progBar
ProgressBar.
Definition: tools.py:459
def fileLineLen(filename, countLimit=None)
Definition: tools.py:220
def printTimeStamp(message, justifyWidth=50)
Output & debug.
Definition: tools.py:42
def __init__(self, minValue=0, maxValue=100, totalWidth=80)
Definition: tools.py:467
def join(obj1, obj2, postSmoothing=True)
Definition: tools_obj.py:816
def toChunks(threads, iterable, padvalue=None, asList=True)
Iterable operations.
Definition: tools.py:78
def pointList2ColumnList(data)
Definition: tools.py:153
def grabError()
Definition: tools.py:57
def flatten_list(data)
Definition: tools.py:112
def explodeList(haystack, needle=None, returnIndeces=False)
Definition: tools.py:87
def commandStacktoShellString(cmd_stack)
Process manipulation.
Definition: tools.py:291
def memory_inspection(variable, name='', indent=0, column_size=45, verbose=0)
Debug.
Definition: tools.py:417
def unique(nonUniqueList)
Definition: tools.py:163
def mergeDicts(dictA, dictB, overwrite=False, customMergeCommand=None)
Definition: tools.py:169
def list2str(var)
Definition: tools.py:190
def memory_inspection_list(list_, names=[], verbose=1)
Definition: tools.py:444
def printNotice(message, label='Notice', indent=0)
Definition: tools.py:53
def drange(start, stop, step)
Definition: tools.py:143
def terminate_process(pid)
Definition: tools.py:401
def list2int(var)
Definition: tools.py:186
def command_timeout(cmd, timeout=0, prependMsg='', descriptor='command', debug=False, logging=None, verbose=0)
Definition: tools.py:296
def memory_inspection_obj(obj, verbose=1)
Definition: tools.py:436
def printWarning(message, label='Warning', indent=0)
Definition: tools.py:49
def stripKwargsForUncleanTarget(kwargs, func)
Function tools.
Definition: tools.py:238
def updateAmount(self, newAmount=0)
Definition: tools.py:477
def prepareInstanceMethodForPickling()
Pickling hacks.
Definition: tools.py:252
def printTable(tableData, indent=4, padding=' ')
Definition: tools.py:63
def crawlDirsForFilePattern(pattern, path=os.getcwd(), returnList=False, prependPath=True)
File IO and metadata.
Definition: tools.py:197
def wrapList2SquareMatrix(data)
Definition: tools.py:129
def command_async_pool(cmds, threads=1, timeout=0, prependMsg='', descriptor='command', sungrid=False, debug=False, logging=None, stopIfError=False, verbose=0)
Definition: tools.py:324