123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- #!/usr/bin/env python
- # __author__ = "Ronie Martinez"
- # __copyright__ = "Copyright 2018-2019, Ronie Martinez"
- # __credits__ = ["Ronie Martinez"]
- # __maintainer__ = "Ronie Martinez"
- # __email__ = "ronmarti18@gmail.com"
- def tokenize(data):
- iterable = iter(data)
- buffer = ''
- while True:
- try:
- char = next(iterable)
- if char == '\\':
- if buffer == '\\':
- yield buffer + char
- buffer = ''
- continue
- elif len(buffer):
- yield buffer
- buffer = char
- try:
- buffer += next(iterable)
- except StopIteration:
- break
- elif char.isalpha():
- if len(buffer):
- if buffer.endswith('}'):
- yield buffer
- yield char
- buffer = ''
- elif buffer.startswith('\\'):
- buffer += char
- else:
- yield char
- elif char.isdigit():
- if len(buffer):
- yield buffer
- buffer = char
- while True:
- try:
- char = next(iterable)
- except StopIteration:
- break
- if char.isspace():
- yield buffer
- buffer = ''
- break
- elif char.isdigit() or char == '.':
- buffer += char
- else:
- if buffer.endswith('.'):
- yield buffer[:-1]
- yield buffer[-1]
- else:
- yield buffer
- buffer = ''
- if char == '\\':
- buffer = char
- else:
- yield char
- break
- elif char.isspace():
- if len(buffer):
- yield buffer
- buffer = ''
- elif char in '{}*':
- if buffer.startswith(r'\begin') or buffer.startswith(r'\end'):
- if buffer.endswith('}'):
- yield buffer
- yield char
- buffer = ''
- else:
- buffer += char
- else:
- if len(buffer):
- yield buffer
- buffer = ''
- yield char
- else:
- if len(buffer):
- yield buffer
- buffer = ''
- if len(char):
- yield char
- except StopIteration:
- break
- if len(buffer):
- yield buffer
|