initial commit

This commit is contained in:
Daniel Mevec 2024-09-27 13:17:46 +02:00
commit 921ddc9faa
5 changed files with 242 additions and 0 deletions

132
TTOU_2023/index.txt Normal file
View file

@ -0,0 +1,132 @@
Ch-000.md: 001: JAVELIN
Ch-001.md: 002: DUTY
Ch-002.md: 003: ASSESSMENT
Ch-003.md: 004: TRAVEL
Ch-004.md: 005: GHOSTS
Ch-005.md: 006: RECOVERY
Ch-006.md: 007: PREDECESSOR
Ch-007.md: 008: OVERRIDE
Ch-008.md: 009: ASCEND
Ch-009.md: 010: ALONE
Ch-010.md: 011: POPULATION
Ch-011.md: 012: INFESTATION
Ch-012.md: 013: FALL
Ch-013.md: 014: COURSE
Ch-014.md: 015: FRIEND
Ch-015.md: 016: SUPPORT
Ch-016.md: 017: DEGRADATION
Ch-017.md: 018: CREW
Ch-018.md: 019: PLAN
Ch-019.md: 020: MASSACRE
Ch-020.md: 021: EVACUATE
Ch-021.md: 022: SACRIFICE
Ch-022.md: 023: RETRIEVE
Ch-023.md: 024: ACCESS
Ch-024.md: 025: FOX
Ch-025.md: 026: PURIFY
Ch-026.md: 027: PAST
Ch-027.md: 028: TIME
Ch-028.md: 029: PRESERVE
Ch-029.md: 030: BONE
Ch-030.md: 031: CONSPIRACY
Ch-031.md: 032: CONVICTION
Ch-032.md: 033: PREDICAMENT
Ch-033.md: 034: COMORBIDITY
Ch-034.md: 035: MORALE
Ch-035.md: 036: HEIGHTS
Ch-036.md: 037: MODIFY
Ch-037.md: 038: NERVES
Ch-038.md: 039: PEOPLE
Ch-039.md: 040: RECAP
Ch-040.md: 041: TARGET
Ch-041.md: 042: DECEPTION
Ch-042.md: 043: OUTSOURCING
Ch-043.md: 044: EARTH
Ch-044.md: 045: ZOMBIES
Ch-045.md: 046: INTERDEPENDENCE
Ch-046.md: 047: HELP
Ch-047.md: 048: ADAPTATION
Ch-048.md: 049: SAFETY
Ch-049.md: 050: SELECTION
Ch-050.md: 051: HULL
Ch-051.md: 052: EXPAND
Ch-052.md: 053: AUTONOMY
Ch-053.md: 054: RECORD
Ch-054.md: 055: ICEBREAKER
Ch-055.md: 056: HISTORY
Ch-056.md: 057: FAMILY
Ch-057.md: 058: DISCLOSURE
Ch-058.md: 059: RESPONSIBILITY
Ch-059.md: 060: INFORMATION
Ch-060.md: 061: NEGOTIATION
Ch-061.md: 062: LUMINSCENCE
Ch-062.md: 063: INVESTIGATE
Ch-063.md: 064: SABOTAGE
Ch-064.md: 065: VECTOR
Ch-065.md: 066: PROGRESS
Ch-066.md: 067: CYCLE
Ch-067.md: 068: RATE
Ch-068.md: 069: TRANSPLANT
Ch-069.md: 070: GENETICS
Ch-070.md: 071: HOUSEKEEPING
Ch-071.md: 072: FREEZE
Ch-072.md: 073: TRUST
Ch-073.md: 074: CULPABILITY
Ch-074.md: 075: RECIDIVISM
Ch-075.md: 076: TIES
Ch-076.md: 077: NOTES
Ch-077.md: 078: DIVIDED
Ch-078.md: 079: STABILITY
Ch-079.md: 080: ACACIA
Ch-080.md: 081: ANTARCTICA
Ch-081.md: 082: ALIENS
Ch-082.md: 083: SCENE
Ch-083.md: 084: INTERROGATION
Ch-084.md: 085: DRUG
Ch-085.md: 086: CONFESSION
Ch-086.md: 087: ACCOMPLICE
Ch-087.md: 088: RIFT
Ch-088.md: 089: TRUMPS
Ch-089.md: 090: MANAGEMENT
Ch-090.md: 091: DURESS
Ch-091.md: 092: GUILT
Ch-092.md: 093: DESPERATION
Ch-093.md: 094: IDENTITY
Ch-094.md: 095: DEFUSE
Ch-095.md: 096: REVIVE
Ch-096.md: 097: STABILISE
Ch-097.md: 098: LOBOTOMY
Ch-098.md: 099: TAL
Ch-099.md: 100: CAPTAIN
Ch-100.md: 101: GRIEF
Ch-101.md: 102: BED
Ch-102.md: 103: IRREPARABLE
Ch-103.md: 104: SHELL
Ch-104.md: 105: DOR
Ch-105.md: 106: PIRATES
Ch-106.md: 107: IMPLICATION
Ch-107.md: 108: GROWTH
Ch-108.md: 109: NORMAL
Ch-109.md: 110: GRAVES
Ch-110.md: 111: SCAN
Ch-111.md: 112: COHERENCE
Ch-112.md: 113: SENTENCE
Ch-113.md: 114: REACTION
Ch-114.md: 115: SAFE
Ch-115.md: 116: REVIEW
Ch-116.md: 117: TUBE
Ch-117.md: 118: VIEW
Ch-118.md: 119: SIGHT
Ch-119.md: 120: DELIVERY
Ch-120.md: 121: RESPONSE
Ch-121.md: 122: INFORMATION
Ch-122.md: 123: SIENNA
Ch-123.md: 124: UPDATE
Ch-124.md: 125: WIPED
Ch-125.md: 126: ASTRONAUTS
Ch-126.md: 127: HERITAGE
Ch-127.md: 128: CODE
Ch-128.md: 129: PREPARE
Ch-129.md: 130: EVE
Ch-130.md: 131: BOARD
Ch-131.md: 132: DROP

7
md_to_ebook.sh Normal file
View file

@ -0,0 +1,7 @@
for dir in */; do
cd $dir;
latex cover.tex -interaction=nonstopmode;
dvipng -p 1 cover.dvi;
pandoc metadata.txt md/Ch-*.md -o apgte.epub --top-level-division=chapter --toc;
cd ..;
done

5
metadata.txt Normal file
View file

@ -0,0 +1,5 @@
---
title: Time To Orbit Unknown
author: Derin Edala
lang: en-US
---

10
requirements.txt Normal file
View file

@ -0,0 +1,10 @@
beautifulsoup4==4.12.2
bs4==0.0.1
certifi==2023.11.17
charset-normalizer==3.3.2
idna==3.6
markdownify==0.11.6
requests==2.31.0
six==1.16.0
soupsieve==2.5
urllib3==2.1.0

88
ttou_download.py Normal file
View file

@ -0,0 +1,88 @@
import os
import bs4
import logging
import requests
from bs4 import BeautifulSoup
from markdownify import markdownify
output_dir = "./TTOU_2023"
logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)
def parse_apgte(url):
html = requests.get(url).text
soup = BeautifulSoup(html, "html.parser")
header = soup.find(attrs={"class": "entry-title"})
article = soup.find(attrs={"class": "entry-content"})
article.div.decompose() # delete sharedaddy
next_link = soup.find(attrs={"rel": "next"})
return (next_link.get("href") if next_link else None, header, article)
def chop_between_finds(s, f1, f2):
lines = s.splitlines()
f1_occ = [idx for idx, line in enumerate(lines) if f1 in line]
f2_occ = [idx for idx, line in enumerate(lines) if f2 in line] if f1 != f2 else f1_occ
idx0 = f1_occ[0] if f1_occ else 0
idx1 = f2_occ[-1] if f2_occ else -1
return "\n".join(lines[idx0 + 1: idx1])
if __name__ == "__main__":
next_url = (
"https://derinstories.com/2022/06/04/001-the-problem-with-the-javelin-program/"
)
count = 0
book = 1
chapter = 0
index = []
if not os.path.exists(output_dir):
os.mkdir(output_dir)
# while next_url and count < 30:
while next_url:
logging.info(f"parsing: {next_url}")
try:
next_url, title, text = parse_apgte(next_url)
except Exception:
logging.exception(f"Failed to parse: {next_url}")
# if not os.path.exists(f'{output_dir}/Book-{book}'):
# os.mkdir(f'{output_dir}/Book-{book}')
# if not os.path.exists(f'{output_dir}/Book-{book}/md'):
# os.mkdir(f'{output_dir}/Book-{book}/md')
if not os.path.exists(f"{output_dir}/md"):
os.mkdir(f"{output_dir}/md")
# filename = f'{output_dir}/Book-{book}/md/Ch-{chapter}.md'
filename = f"{output_dir}/md/Ch-{chapter:03d}.md"
index.append(f"Ch-{chapter:03d}.md:\t{title.string}")
with open(filename, "w") as mdfile:
mdfile.write(markdownify(str(title)))
# mdfile.write('\n\n')
txt_md = markdownify(str(text))
txt_md = chop_between_finds(txt_md, ".png", ".png")
mdfile.write(txt_md)
# if next_url and 'prologue' in next_url:
# with open(f'{output_dir}/Book-{book}/index.txt', 'w') as txtfile:
# txtfile.write("\n".join(index))
# index = []
# book += 1
# chapter = 0
# else:
chapter += 1
count += 1
with open(f"{output_dir}/index.txt", "w") as txtfile:
txtfile.write("\n".join(index))
logging.info("Done!")