aggregator.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #!/usr/bin/env python
  2. # __author__ = "Ronie Martinez"
  3. # __copyright__ = "Copyright 2016-2019, Ronie Martinez"
  4. # __credits__ = ["Ronie Martinez"]
  5. # __license__ = "MIT"
  6. # __maintainer__ = "Ronie Martinez"
  7. # __email__ = "ronmarti18@gmail.com"
  8. from .commands import MATRICES
  9. from .exceptions import EmptyGroupError, NumeratorNotFoundError, DenominatorNotFoundError
  10. from .tokenizer import tokenize
  11. def group(tokens, opening='{', closing='}'):
  12. g = [] if opening=='{' else ['\left'+opening]
  13. while True:
  14. token = next(tokens)
  15. if token == closing:
  16. if len(g):
  17. break
  18. else:
  19. raise EmptyGroupError
  20. elif token == opening:
  21. try:
  22. g.append(group(tokens))
  23. except EmptyGroupError:
  24. g += [opening, closing]
  25. else:
  26. g.append(token)
  27. if closing != '}':
  28. g.append(r'\right'+closing)
  29. return _aggregate(iter(g))
  30. def process_row(tokens):
  31. row = []
  32. content = []
  33. for token in tokens:
  34. if token == '&':
  35. pass
  36. elif token == '\\\\':
  37. if len(row):
  38. content.append(row)
  39. row = []
  40. else:
  41. row.append(token)
  42. if len(row):
  43. content.append(row)
  44. while len(content) == 1 and isinstance(content[0], list):
  45. content = content.pop()
  46. return content
  47. def environment(begin, tokens):
  48. if begin.startswith(r'\begin'):
  49. env = begin[7:-1]
  50. else:
  51. env = begin[1:]
  52. alignment = None
  53. content = []
  54. row = []
  55. while True:
  56. try:
  57. token = next_item_or_group(tokens)
  58. if isinstance(token, list):
  59. if env == 'array' and any(x in token for x in ['l','c','r','|',['l'],['c'],['r'],['|']]):
  60. alignment = token
  61. else:
  62. row.append(process_row(token))
  63. elif token == r'\end{{{}}}'.format(env):
  64. break
  65. elif token == '&':
  66. pass
  67. elif token == '\\\\':
  68. content.append(row)
  69. row = [';']
  70. elif token == '[' and not len(content):
  71. try:
  72. alignment = group(tokens, '[', ']')
  73. except EmptyGroupError:
  74. pass
  75. elif token == '--':
  76. try:
  77. next_token = next(tokens)
  78. row.append([token, next_token])
  79. except StopIteration:
  80. row.append(token)
  81. elif token in '_^':
  82. process_sub_sup(row, token, tokens)
  83. else:
  84. row.append(token)
  85. except EmptyGroupError:
  86. row += ['{', '}']
  87. continue
  88. except StopIteration:
  89. break
  90. if len(row):
  91. content.append(row)
  92. while len(content) == 1 and isinstance(content[0], list):
  93. content = content.pop()
  94. if alignment:
  95. # return r'\{}'.format(env), ''.join(alignment), content
  96. return content
  97. else:
  98. #TODO mark
  99. # return r'\{}'.format(env), content
  100. return content
  101. def next_item_or_group(tokens):
  102. token = next(tokens)
  103. if token == '{':
  104. return group(tokens)
  105. return token
  106. def _aggregate(tokens):
  107. aggregated = []
  108. while True:
  109. try:
  110. token = next_item_or_group(tokens)
  111. if isinstance(token, list):
  112. aggregated.append(token)
  113. elif token == '[':
  114. try:
  115. g = group(tokens, '[', ']')
  116. if len(aggregated):
  117. previous = aggregated[-1]
  118. if previous == r'\sqrt':
  119. root = next(tokens)
  120. if root == '{':
  121. try:
  122. root = group(tokens)
  123. except EmptyGroupError:
  124. root = ''
  125. aggregated[-1] = r'\root'
  126. aggregated.append(root)
  127. else:
  128. pass # FIXME: possible issues
  129. aggregated.append(g)
  130. except EmptyGroupError:
  131. aggregated += ['[', ']']
  132. elif token in '_^':
  133. process_sub_sup(aggregated, token, tokens)
  134. elif token.startswith(r'\begin') or token in MATRICES:
  135. aggregated += environment(token, tokens)
  136. elif token == r'\over':
  137. try:
  138. numerator = aggregated.pop()
  139. aggregated.append(r'\frac')
  140. aggregated.append([numerator])
  141. denominator = next_item_or_group(tokens)
  142. aggregated.append([denominator])
  143. except IndexError:
  144. raise NumeratorNotFoundError
  145. except (StopIteration, EmptyGroupError):
  146. raise DenominatorNotFoundError
  147. else:
  148. aggregated.append(token)
  149. except EmptyGroupError:
  150. aggregated += ['{', '}']
  151. continue
  152. except StopIteration:
  153. break
  154. return aggregated
  155. def aggregate(data):
  156. tokens = tokenize(data)
  157. return _aggregate(tokens)
  158. def process_sub_sup(aggregated, token, tokens):
  159. try:
  160. previous = aggregated.pop()
  161. if isinstance(previous, str) and previous in '+-*/=[]_^{}':
  162. aggregated += [previous, token]
  163. return
  164. try:
  165. next_token = next_item_or_group(tokens)
  166. if len(aggregated) >= 2:
  167. if aggregated[-2] == '_' and token == '^':
  168. aggregated[-2] = '_^'
  169. aggregated += [previous, next_token]
  170. elif aggregated[-2] == '^' and token == '_':
  171. aggregated[-2] = '_^'
  172. aggregated += [next_token, previous]
  173. else:
  174. aggregated += [token, previous, next_token]
  175. else:
  176. aggregated += [token, previous, next_token]
  177. except EmptyGroupError:
  178. aggregated += [previous, token, '{', '}']
  179. except StopIteration:
  180. return
  181. except IndexError:
  182. aggregated.append(token)