commit 921ddc9faa99a52de910372218e0242dd407d821 Author: polymachine Date: Fri Sep 27 13:17:46 2024 +0200 initial commit diff --git a/TTOU_2023/index.txt b/TTOU_2023/index.txt new file mode 100644 index 0000000..8f8f016 --- /dev/null +++ b/TTOU_2023/index.txt @@ -0,0 +1,132 @@ +Ch-000.md: 001: JAVELIN +Ch-001.md: 002: DUTY +Ch-002.md: 003: ASSESSMENT +Ch-003.md: 004: TRAVEL +Ch-004.md: 005: GHOSTS +Ch-005.md: 006: RECOVERY +Ch-006.md: 007: PREDECESSOR +Ch-007.md: 008: OVERRIDE +Ch-008.md: 009: ASCEND +Ch-009.md: 010: ALONE +Ch-010.md: 011: POPULATION +Ch-011.md: 012: INFESTATION +Ch-012.md: 013: FALL +Ch-013.md: 014: COURSE +Ch-014.md: 015: FRIEND +Ch-015.md: 016: SUPPORT +Ch-016.md: 017: DEGRADATION +Ch-017.md: 018: CREW +Ch-018.md: 019: PLAN +Ch-019.md: 020: MASSACRE +Ch-020.md: 021: EVACUATE +Ch-021.md: 022: SACRIFICE +Ch-022.md: 023: RETRIEVE +Ch-023.md: 024: ACCESS +Ch-024.md: 025: FOX +Ch-025.md: 026: PURIFY +Ch-026.md: 027: PAST +Ch-027.md: 028: TIME +Ch-028.md: 029: PRESERVE +Ch-029.md: 030: BONE +Ch-030.md: 031: CONSPIRACY +Ch-031.md: 032: CONVICTION +Ch-032.md: 033: PREDICAMENT +Ch-033.md: 034: COMORBIDITY +Ch-034.md: 035: MORALE +Ch-035.md: 036: HEIGHTS +Ch-036.md: 037: MODIFY +Ch-037.md: 038: NERVES +Ch-038.md: 039: PEOPLE +Ch-039.md: 040: RECAP +Ch-040.md: 041: TARGET +Ch-041.md: 042: DECEPTION +Ch-042.md: 043: OUTSOURCING +Ch-043.md: 044: EARTH +Ch-044.md: 045: ZOMBIES +Ch-045.md: 046: INTERDEPENDENCE +Ch-046.md: 047: HELP +Ch-047.md: 048: ADAPTATION +Ch-048.md: 049: SAFETY +Ch-049.md: 050: SELECTION +Ch-050.md: 051: HULL +Ch-051.md: 052: EXPAND +Ch-052.md: 053: AUTONOMY +Ch-053.md: 054: RECORD +Ch-054.md: 055: ICEBREAKER +Ch-055.md: 056: HISTORY +Ch-056.md: 057: FAMILY +Ch-057.md: 058: DISCLOSURE +Ch-058.md: 059: RESPONSIBILITY +Ch-059.md: 060: INFORMATION +Ch-060.md: 061: NEGOTIATION +Ch-061.md: 062: LUMINSCENCE +Ch-062.md: 063: INVESTIGATE +Ch-063.md: 064: SABOTAGE +Ch-064.md: 065: VECTOR +Ch-065.md: 066: PROGRESS +Ch-066.md: 067: CYCLE +Ch-067.md: 068: RATE +Ch-068.md: 069: TRANSPLANT +Ch-069.md: 070: GENETICS +Ch-070.md: 071: HOUSEKEEPING +Ch-071.md: 072: FREEZE +Ch-072.md: 073: TRUST +Ch-073.md: 074: CULPABILITY +Ch-074.md: 075: RECIDIVISM +Ch-075.md: 076: TIES +Ch-076.md: 077: NOTES +Ch-077.md: 078: DIVIDED +Ch-078.md: 079: STABILITY +Ch-079.md: 080: ACACIA +Ch-080.md: 081: ANTARCTICA +Ch-081.md: 082: ALIENS +Ch-082.md: 083: SCENE +Ch-083.md: 084: INTERROGATION +Ch-084.md: 085: DRUG +Ch-085.md: 086: CONFESSION +Ch-086.md: 087: ACCOMPLICE +Ch-087.md: 088: RIFT +Ch-088.md: 089: TRUMPS +Ch-089.md: 090: MANAGEMENT +Ch-090.md: 091: DURESS +Ch-091.md: 092: GUILT +Ch-092.md: 093: DESPERATION +Ch-093.md: 094: IDENTITY +Ch-094.md: 095: DEFUSE +Ch-095.md: 096: REVIVE +Ch-096.md: 097: STABILISE +Ch-097.md: 098: LOBOTOMY +Ch-098.md: 099: TAL +Ch-099.md: 100: CAPTAIN +Ch-100.md: 101: GRIEF +Ch-101.md: 102: BED +Ch-102.md: 103: IRREPARABLE +Ch-103.md: 104: SHELL +Ch-104.md: 105: DOR +Ch-105.md: 106: PIRATES +Ch-106.md: 107: IMPLICATION +Ch-107.md: 108: GROWTH +Ch-108.md: 109: NORMAL +Ch-109.md: 110: GRAVES +Ch-110.md: 111: SCAN +Ch-111.md: 112: COHERENCE +Ch-112.md: 113: SENTENCE +Ch-113.md: 114: REACTION +Ch-114.md: 115: SAFE +Ch-115.md: 116: REVIEW +Ch-116.md: 117: TUBE +Ch-117.md: 118: VIEW +Ch-118.md: 119: SIGHT +Ch-119.md: 120: DELIVERY +Ch-120.md: 121: RESPONSE +Ch-121.md: 122: INFORMATION +Ch-122.md: 123: SIENNA +Ch-123.md: 124: UPDATE +Ch-124.md: 125: WIPED +Ch-125.md: 126: ASTRONAUTS +Ch-126.md: 127: HERITAGE +Ch-127.md: 128: CODE +Ch-128.md: 129: PREPARE +Ch-129.md: 130: EVE +Ch-130.md: 131: BOARD +Ch-131.md: 132: DROP \ No newline at end of file diff --git a/md_to_ebook.sh b/md_to_ebook.sh new file mode 100644 index 0000000..d53f3c5 --- /dev/null +++ b/md_to_ebook.sh @@ -0,0 +1,7 @@ +for dir in */; do +cd $dir; +latex cover.tex -interaction=nonstopmode; +dvipng -p 1 cover.dvi; +pandoc metadata.txt md/Ch-*.md -o apgte.epub --top-level-division=chapter --toc; +cd ..; +done \ No newline at end of file diff --git a/metadata.txt b/metadata.txt new file mode 100644 index 0000000..7b1f6b7 --- /dev/null +++ b/metadata.txt @@ -0,0 +1,5 @@ +--- +title: Time To Orbit Unknown +author: Derin Edala +lang: en-US +--- \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fcfe628 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +beautifulsoup4==4.12.2 +bs4==0.0.1 +certifi==2023.11.17 +charset-normalizer==3.3.2 +idna==3.6 +markdownify==0.11.6 +requests==2.31.0 +six==1.16.0 +soupsieve==2.5 +urllib3==2.1.0 diff --git a/ttou_download.py b/ttou_download.py new file mode 100644 index 0000000..34db2b8 --- /dev/null +++ b/ttou_download.py @@ -0,0 +1,88 @@ +import os +import bs4 +import logging +import requests +from bs4 import BeautifulSoup +from markdownify import markdownify + +output_dir = "./TTOU_2023" + +logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO) + + +def parse_apgte(url): + html = requests.get(url).text + soup = BeautifulSoup(html, "html.parser") + header = soup.find(attrs={"class": "entry-title"}) + article = soup.find(attrs={"class": "entry-content"}) + article.div.decompose() # delete sharedaddy + next_link = soup.find(attrs={"rel": "next"}) + + return (next_link.get("href") if next_link else None, header, article) + + +def chop_between_finds(s, f1, f2): + lines = s.splitlines() + f1_occ = [idx for idx, line in enumerate(lines) if f1 in line] + f2_occ = [idx for idx, line in enumerate(lines) if f2 in line] if f1 != f2 else f1_occ + idx0 = f1_occ[0] if f1_occ else 0 + idx1 = f2_occ[-1] if f2_occ else -1 + return "\n".join(lines[idx0 + 1: idx1]) + + +if __name__ == "__main__": + next_url = ( + "https://derinstories.com/2022/06/04/001-the-problem-with-the-javelin-program/" + ) + count = 0 + book = 1 + chapter = 0 + + index = [] + + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + # while next_url and count < 30: + while next_url: + logging.info(f"parsing: {next_url}") + + try: + next_url, title, text = parse_apgte(next_url) + except Exception: + logging.exception(f"Failed to parse: {next_url}") + + # if not os.path.exists(f'{output_dir}/Book-{book}'): + # os.mkdir(f'{output_dir}/Book-{book}') + # if not os.path.exists(f'{output_dir}/Book-{book}/md'): + # os.mkdir(f'{output_dir}/Book-{book}/md') + if not os.path.exists(f"{output_dir}/md"): + os.mkdir(f"{output_dir}/md") + + # filename = f'{output_dir}/Book-{book}/md/Ch-{chapter}.md' + filename = f"{output_dir}/md/Ch-{chapter:03d}.md" + + index.append(f"Ch-{chapter:03d}.md:\t{title.string}") + + with open(filename, "w") as mdfile: + mdfile.write(markdownify(str(title))) + # mdfile.write('\n\n') + + txt_md = markdownify(str(text)) + txt_md = chop_between_finds(txt_md, ".png", ".png") + mdfile.write(txt_md) + + # if next_url and 'prologue' in next_url: + # with open(f'{output_dir}/Book-{book}/index.txt', 'w') as txtfile: + # txtfile.write("\n".join(index)) + + # index = [] + # book += 1 + # chapter = 0 + # else: + chapter += 1 + + count += 1 + with open(f"{output_dir}/index.txt", "w") as txtfile: + txtfile.write("\n".join(index)) + logging.info("Done!")