dnd-file-gen/parse_pdf_text_block.py
2025-02-21 10:20:44 +01:00

164 lines
5 KiB
Python

import os, sys, re, unicodedata
def slugify(value, allow_unicode=False):
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics, whitespace,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value)
return value.strip()
class Spell(object):
blank_card = '\n'.join(('---',
'level: {level}',
'components: [{mats}]',
'material: {priced}',
'consumed: {consumed}',
'cast_time: {cast_time}',
'upcast: {upcastable}',
'classes: [{classes}]',
'tags: [{tags}]',
'---',
'#### {name}',
'*{subtitle}*',
'***',
'- **Casting Time:** {casting}',
'- **Range:** {range}',
'- **Components:** {components}',
'- **Duration:** {duration}',
'***',
'{description}',
'',
'*{book}*'))
def __init__(self, textblock, title = ""):
try:
if not textblock:
raise ValueError('Empty textblock given!')
self.booktitle = title
self._assign_attr_from_text(textblock)
except Exception as e:
print(textblock)
raise e
def _assign_attr_from_text(self,textblock):
lines = [line for line in textblock.split('\n') if line.strip() != '']
self.name = lines.pop(0)
# TODO
# make this into a function and catch missing lines with a default value!
info = lines.pop(0).lower()
self.level = 0 if 'cantrip' in info.lower() else int(info[0])
self.school = info.split()[0].title() if 'cantrip' in info.lower() else info.split()[1].title()
self.ritual = 'ritual' in info
self.classes = lines.pop(0)[9:]
self.casting = lines.pop(0)[14:]
self.range = lines.pop(0)[7:]
self.components = lines.pop(0)[12:]
if '(' in self.components and ')' not in self.components:
while ')' not in self.components:
self.components += lines.pop(0)
self.duration = lines.pop(0)[10:]
self.description = self._stich_description(lines)
self.cast_time = self._parse_cast_time(self.casting)
self.upcastable = 'true' if 'at higher levels' in self.description.lower() else 'false'
self.concentration = 'true' if 'concentration' in self.duration.lower() else 'false'
self.priced = 'true' if 'worth' in self.components.lower() else 'false'
self.consumed = 'true' if 'consume' in self.components.lower() else 'false'
self.mats = self.components.split(' (')[0]
self.tags = self._get_tags()
self.book = self.booktitle
self.subtitle = info
def _stich_description(self,lines):
breaks = []
for idx, line in enumerate(lines):
if line and line[-1] in ['.', ':']:
breaks.append(idx)
else:
if idx not in breaks:
lines[-1] = lines[-1]+'.'
breaks.append(idx)
if len(breaks) >1:
for idx in breaks[:-1]:
lines[idx]+='\n\n'
return ' '.join(lines).replace('\n ','\n')
def _parse_cast_time(self, text):
if 'reaction' in self.casting.lower():
time = 'Reaction'
elif 'bonus' in self.casting.lower():
time = 'Bonus acn.'
elif 'action' in self.casting.lower():
time = 'Action'
else:
time = self.casting
return time
def _get_tags(self):
tags = [self.school]
if self.concentration == 'true':
tags.append('Concentration')
if self.ritual:
tags.append('Ritual')
return ', '.join(tags)
def generate_spell_card(self):
return self.blank_card.format(**self.__dict__)
def write_spellcard(self, directory):
cardtext = self.generate_spell_card()
cardname = slugify(self.name)+'.md'
cardpath = os.path.join(directory, cardname)
with open(cardpath,'w') as fileout:
fileout.write(cardtext)
def parse_text_file(filepath, title, dirname):
with open(filepath) as filein:
blocks = filein.read().split('\n\n')
base_dir = os.path.join('./spells/', dirname)
if not os.path.exists(base_dir):
os.makedirs(base_dir)
for block in blocks:
s = Spell(block, title)
s.write_spellcard(base_dir)
if __name__=="__main__":
fp = 'kibbles_compendium.txt'
bt = "Kibble's Compendium of Craft and Creation"
dn = 'KCCC'
parse_text_file(fp, bt, dn)
fp = 'kibbles_generic_raw.txt'
bt = "Kibbles' Generic Spells"
dn = 'KIBBLES'
parse_text_file(fp, bt, dn)