webcrawl/APGTE/tex_epigraphs.py

import os, re


def extract_int(s):

	return int(re.search(r'\d+', s).group())#int(filter(str.isdigit, s))

dirs = [d for d in os.listdir() if os.path.isdir(d) and 'Book' in d]

print( dirs)

for base in dirs:
	dir_in = os.path.join(base, 'tex')
	dir_out = os.path.join(base, 'out')
	if not os.path.exists(dir_out):
		os.mkdir(dir_out)


	flist = [f for f in os.listdir(dir_in)]

	flist = sorted(flist, key=extract_int)


	files = [(os.path.join(dir_in,f),os.path.join(dir_out,f)) for f in flist if '.tex' in f]

	index = []

	for file_in, file_out in files:

		with open(file_in, 'r') as texfile:
			text = texfile.read()
		try:
			title, epi, body = text.split('{quote}')

			title = title.rpartition('\\begin')[0] if '\\begin' in title else title
			epi = epi.partition('\\end')[0]
		except Exception as e:

			title, epi, body = text, '', ''

		else:

			if '--' in epi:
				quote, quotee = epi.rsplit('--',1)
			elif '-' in epi:
				quote, quotee = epi.rsplit('-',1)
			else:
				quote, quotee = (epi,'')

			quote = quote.replace('\\emph','').strip()
			quote = quote.replace('{','').strip()
			quote = quote.replace('}','').strip()

			quotee = quotee.replace('\\emph{','').strip()
			quotee = quotee.replace('}','').strip()

			epi = '\\epigraph{'+quote+'}{'+quotee+'}'

		finally:

			hlp = title.split('section{')[1].split('}',1)[0].replace('\n',' ')

			if 'chapter' in hlp.lower():
				title = title.replace('\\section','\\chapter')

				hlp_title = title.split('chapter{')[1].split('}',1)[0]
				title = title.replace(hlp_title, hlp.split(': ')[1], 1)
			else:
				title = title.replace('\\section','\\chapter*')
				ref = title.split('label{')[1].split('}',1)[0].replace('\n','')
				title += '\\addcontentsline{toc}{chapter}{\\nameref{'+ref+'}}'


				if any([s in hlp.lower() for s in ('interlude', 'prologue', 'epilogue')]):
					title += ' \\chaptermark{'+hlp+'}'
				else:
					hlp_title = title.split('chapter*{')[1].split('}',1)[0]
					title = title.replace(hlp_title, 'Bonus Chapter: '+hlp_title, 1)
					title += ' \\chaptermark{Bonus Chapter: '+hlp_title+'}'


		newtext = "\n\n".join([title,epi,body])

		with open(file_out, 'w') as texfile:
			texfile.write(newtext)
			hlp_f = os.path.splitext(os.path.basename(file_out))[0]
			index.append((hlp_f,hlp))

	with open(os.path.join(base,'index.tex'),'w') as texfile:
		indextext = '\n'.join(['\\include{out/'+f+'} %'+t for f,t in index])
		texfile.write(indextext)