tvaLib
tools_parse.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # tvaLib Copyright (c) 2012-2016 Paul G. St-Aubin
3 # Ecole Polytechnique de Montreal, McGill University
4 # Python 2.7; (dt) Spyder Windows 10 64-bit; ipython Ubuntu 15.04 64-bit
5 
8 import sys, unicodedata, os
9 from re import search as re_search
10 from re import sub as re_sub
11 
12 
15 def list1D(item, i_type='int', autosort=True, removeEmpty=True):
16  ''' Parse string format into 1 dimensional array (values). Will automatically generate range for int values (i.e. [1,3-5] -> [1,3,4,5]).
17 
18  Alternative:
19  ============
20  import ast
21  x = '[0.90837,0.90837]'
22  ast.literal_eval(x)
23  '''
24  if(item == None or item == False): return item
25 
26  item = item.translate(None, '[] \n')
27  item = item.split(',')
28  if(removeEmpty): item = filter(None, item)
29  if(len(item) > 0):
30  try:
31  if(i_type == 'float'): return [float(x) for x in item]
32  elif(i_type == 'bool'): return [str2bool(x) for x in item]
33  elif(i_type == 'int'):
34  if(len(list(set(item))) != len(item)): allowDuplicates = True
35  else: allowDuplicates = False
36  for i in range(len(item)):
37  if('-' in item[i]):
38  temp = item[i].split('-')
39  item += range(int(temp[0]),int(temp[-1])+1)
40  item[i] = None
41  item = filter(None, item)
42  item = [int(x) for x in item]
43  if(not allowDuplicates):
44  seen = set()
45  seen_add = seen.add
46  [x for x in item if not (x in seen or seen_add(x))]
47  if(autosort): item.sort()
48  return item
49  else: return [x for x in item]
50  except ValueError: return []
51  else:
52  return []
53 
54 
55 def list2D(item, i_type='int'):
56  ''' Parse string format into 2 dimensional array (values). '''
57  parsing = item.translate(None, ' \n')
58  parsing = parsing.split('],[')
59  parsing = filter(None, parsing)
60  item =[]
61  if(len(parsing) > 0):
62  for i in range(len(parsing)):
63  parsingx = parsing[i].split(',')
64  for j in range(len(parsingx)):
65  parsingx[j] = parsingx[j].translate(None, '[](),')
66  try:
67  if(i_type == 'float'): item.append([float(x) for x in parsingx])
68  elif(i_type == 'bool'): item.append([str2bool(x) for x in parsingx])
69  elif(i_type == 'int'): item.append([int(x) for x in parsingx])
70  else: item.append([x for x in parsingx])
71  except ValueError: pass
72  return item
73  else:
74  return [[]]
75 
76 
77 def list3D(item, i_type='int'):
78  ''' Parse string format into 3 dimensional array (values). '''
79  parsing = item.translate(None, ' \n')
80  parsing = parsing.split(']],[[')
81  parsing_i_r = []
82  for i in parsing:
83  parsing_i = i.split('],[')
84  parsing_j_r = []
85  for j in parsing_i:
86  parsing_j = j.split(',')
87  parsing_k_r = []
88  for k in parsing_j:
89  clean_k = k.translate(None, '[] \n')
90  if(i_type == 'float'): parsing_k_r.append(float(clean_k))
91  elif(i_type == 'bool'): parsing_k_r.append(str2bool(clean_k))
92  elif(i_type == 'int'): parsing_k_r.append(int(clean_k))
93  else: parsing_k_r.append(clean_k)
94  parsing_j_r.append(parsing_k_r)
95  parsing_i_r.append(parsing_j_r)
96  return parsing_i_r
97 
98 def lists2str(data):
99  return '['+','.join(map(str, data))+']'
100 
101 def str2bool(string):
102  ''' Interpret a string to a boolean. '''
103  if(string in ['True','true','1','y','Y']): return True
104  else: return False
105 
107  ''' Interpret a string into a specific version (handy for comparing version
108  changes by level). '''
109  #Find version pattern
110  version_string = re_search(r'(([0-9])+\.)+([0-9])+', string)
111  if(not version_string): return False
112  #Split by period
113  version_list = version_string.group().split('.')
114  return [int(x) for x in version_list]
115 
116 def versionIsMoreRecent(versionList1, versionList2, verificationDepth=3, orEqual=False):
117  ''' Verify if versionList1 is more recent than versionList2. '''
118  #Parse to version list if necessary
119  if(type(versionList1) == str): versionList1 = versionStringParser(versionList1)
120  if(type(versionList2) == str): versionList2 = versionStringParser(versionList2)
121  if(not versionList1): return True
122  if(not versionList2): return False
123  if(versionList1 == versionList2):
124  if(orEqual): return True
125  else: return False
126  for depth in range(len(versionList1)):
127  if(depth >= verificationDepth):
128  if(orEqual): return True
129  else: return False
130  if(depth > len(versionList2)-1): return True
131  if(versionList1[depth] < versionList2[depth]): return False
132 
133  if(len(versionList1) < len(versionList2)): return False
134 
135  return True
136 
137 def versionFromHgCommit(default=''):
138  ''' Get latest version number from mercurial repository, if it exists. '''
139  if(os.path.isfile(os.path.join('.hg','last-message.txt'))):
140  with open(os.path.join('.hg','last-message.txt')) as fp:
141  try: return re_search(r'R(([0-9])+\.)+([0-9])+ u. ([0-9])+-([0-9])+-([0-9])+', fp.readline()).group(0)
142  except: return default
143  else: return default
144 
145 def clean_file_name(filename, repl='_'):
146  ''' Replace any problematic characheters from a potential filename.
147  ***THIS ALSO REPLACES SLASHES***
148 
149  Input:
150  ======
151  repl: character to replace illegal characters with
152  '''
153  return re_sub(r'[\[\]\/\\;,><&\*:%=\+@!#\^\|\?]', repl, filename)
154 
155 def clean_file_path(filename, repl='_'):
156  ''' Replace any problematic characheters from a potential filepath.
157  Use clean_file_name for file-specific cleaning, particularly slashes
158 
159  Input:
160  ======
161  repl: character to replace illegal characters with
162  '''
163  return re_sub(r'[\[\];,><&\*:%=\+@!#\^\|\?]', repl, filename)
164 
165 
166 def multilineTrim(docstring):
167  ''' Trim leading and lagging white space of a multiline string entered
168  using the standard multiline syntax in python. Usefull for cleaning up
169  snippets of markup embedded in code.
170 
171  More reading:
172  https://stackoverflow.com/questions/2504411/proper-indentation-for-python-multiline-strings
173  '''
174  if(not docstring): return ''
175  # Convert tabs to spaces (following the normal Python rules)
176  # and split into a list of lines:
177  lines = docstring.expandtabs().splitlines()
178  # Determine minimum indentation (first line doesn't count):
179  indent = sys.maxint
180  for line in lines[1:]:
181  stripped = line.lstrip()
182  if stripped:
183  indent = min(indent, len(line) - len(stripped))
184  # Remove indentation (first line is special):
185  trimmed = [lines[0].strip()]
186  if indent < sys.maxint:
187  for line in lines[1:]:
188  trimmed.append(line[indent:].rstrip())
189  # Strip off trailing and leading blank lines:
190  while trimmed and not trimmed[-1]:
191  trimmed.pop()
192  while trimmed and not trimmed[0]:
193  trimmed.pop(0)
194  # Return a single string:
195  return '\n'.join(trimmed)
196 
197 def uni2tex(text):
198  ''' Converts unicode text (i.e. french characters) into LaTeX-compatible
199  markup.
200 
201  https://tex.stackexchange.com/questions/23410/how-to-convert-characters-to-latex-code
202  '''
203  accents = {0x0300: '`', 0x0301: "'", 0x0302: '^', 0x0308: '"',
204  0x030B: 'H', 0x0303: '~', 0x0327: 'c', 0x0328: 'k',
205  0x0304: '=', 0x0331: 'b', 0x0307: '.', 0x0323: 'd',
206  0x030A: 'r', 0x0306: 'u', 0x030C: 'v',}
207 
208  out = ''
209  txt = tuple(text)
210  i = 0
211  while i < len(txt):
212  char = text[i]
213  code = ord(char)
214 
215  # combining marks
216  if unicodedata.category(char) in ("Mn", "Mc") and code in accents:
217  out += "\\%s{%s}" %(accents[code], txt[i+1])
218  i += 1
219  # precomposed characters
220  elif unicodedata.decomposition(char):
221  base, acc = unicodedata.decomposition(char).split()
222  acc = int(acc, 16)
223  base = int(base, 16)
224  if acc in accents:
225  out += "\\%s{%s}" %(accents[acc], unichr(base))
226  else:
227  out += char
228  else:
229  out += char
230 
231  i += 1
232 
233  return out
def versionFromHgCommit(default='')
Definition: tools_parse.py:137
def versionStringParser(string)
Definition: tools_parse.py:106
def lists2str(data)
Definition: tools_parse.py:98
def join(obj1, obj2, postSmoothing=True)
Definition: tools_obj.py:816
def list1D(item, i_type='int', autosort=True, removeEmpty=True)
The following functions are used for parsing config string data.
Definition: tools_parse.py:15
def multilineTrim(docstring)
Definition: tools_parse.py:166
def list3D(item, i_type='int')
Definition: tools_parse.py:77
def uni2tex(text)
Definition: tools_parse.py:197
def list2D(item, i_type='int')
Definition: tools_parse.py:55
def str2bool(string)
Definition: tools_parse.py:101
def versionIsMoreRecent(versionList1, versionList2, verificationDepth=3, orEqual=False)
Definition: tools_parse.py:116
def clean_file_name(filename, repl='_')
Definition: tools_parse.py:145
def clean_file_path(filename, repl='_')
Definition: tools_parse.py:155