initial commit
This commit is contained in:
commit
921ddc9faa
5 changed files with 242 additions and 0 deletions
132
TTOU_2023/index.txt
Normal file
132
TTOU_2023/index.txt
Normal file
|
|
@ -0,0 +1,132 @@
|
||||||
|
Ch-000.md: 001: JAVELIN
|
||||||
|
Ch-001.md: 002: DUTY
|
||||||
|
Ch-002.md: 003: ASSESSMENT
|
||||||
|
Ch-003.md: 004: TRAVEL
|
||||||
|
Ch-004.md: 005: GHOSTS
|
||||||
|
Ch-005.md: 006: RECOVERY
|
||||||
|
Ch-006.md: 007: PREDECESSOR
|
||||||
|
Ch-007.md: 008: OVERRIDE
|
||||||
|
Ch-008.md: 009: ASCEND
|
||||||
|
Ch-009.md: 010: ALONE
|
||||||
|
Ch-010.md: 011: POPULATION
|
||||||
|
Ch-011.md: 012: INFESTATION
|
||||||
|
Ch-012.md: 013: FALL
|
||||||
|
Ch-013.md: 014: COURSE
|
||||||
|
Ch-014.md: 015: FRIEND
|
||||||
|
Ch-015.md: 016: SUPPORT
|
||||||
|
Ch-016.md: 017: DEGRADATION
|
||||||
|
Ch-017.md: 018: CREW
|
||||||
|
Ch-018.md: 019: PLAN
|
||||||
|
Ch-019.md: 020: MASSACRE
|
||||||
|
Ch-020.md: 021: EVACUATE
|
||||||
|
Ch-021.md: 022: SACRIFICE
|
||||||
|
Ch-022.md: 023: RETRIEVE
|
||||||
|
Ch-023.md: 024: ACCESS
|
||||||
|
Ch-024.md: 025: FOX
|
||||||
|
Ch-025.md: 026: PURIFY
|
||||||
|
Ch-026.md: 027: PAST
|
||||||
|
Ch-027.md: 028: TIME
|
||||||
|
Ch-028.md: 029: PRESERVE
|
||||||
|
Ch-029.md: 030: BONE
|
||||||
|
Ch-030.md: 031: CONSPIRACY
|
||||||
|
Ch-031.md: 032: CONVICTION
|
||||||
|
Ch-032.md: 033: PREDICAMENT
|
||||||
|
Ch-033.md: 034: COMORBIDITY
|
||||||
|
Ch-034.md: 035: MORALE
|
||||||
|
Ch-035.md: 036: HEIGHTS
|
||||||
|
Ch-036.md: 037: MODIFY
|
||||||
|
Ch-037.md: 038: NERVES
|
||||||
|
Ch-038.md: 039: PEOPLE
|
||||||
|
Ch-039.md: 040: RECAP
|
||||||
|
Ch-040.md: 041: TARGET
|
||||||
|
Ch-041.md: 042: DECEPTION
|
||||||
|
Ch-042.md: 043: OUTSOURCING
|
||||||
|
Ch-043.md: 044: EARTH
|
||||||
|
Ch-044.md: 045: ZOMBIES
|
||||||
|
Ch-045.md: 046: INTERDEPENDENCE
|
||||||
|
Ch-046.md: 047: HELP
|
||||||
|
Ch-047.md: 048: ADAPTATION
|
||||||
|
Ch-048.md: 049: SAFETY
|
||||||
|
Ch-049.md: 050: SELECTION
|
||||||
|
Ch-050.md: 051: HULL
|
||||||
|
Ch-051.md: 052: EXPAND
|
||||||
|
Ch-052.md: 053: AUTONOMY
|
||||||
|
Ch-053.md: 054: RECORD
|
||||||
|
Ch-054.md: 055: ICEBREAKER
|
||||||
|
Ch-055.md: 056: HISTORY
|
||||||
|
Ch-056.md: 057: FAMILY
|
||||||
|
Ch-057.md: 058: DISCLOSURE
|
||||||
|
Ch-058.md: 059: RESPONSIBILITY
|
||||||
|
Ch-059.md: 060: INFORMATION
|
||||||
|
Ch-060.md: 061: NEGOTIATION
|
||||||
|
Ch-061.md: 062: LUMINSCENCE
|
||||||
|
Ch-062.md: 063: INVESTIGATE
|
||||||
|
Ch-063.md: 064: SABOTAGE
|
||||||
|
Ch-064.md: 065: VECTOR
|
||||||
|
Ch-065.md: 066: PROGRESS
|
||||||
|
Ch-066.md: 067: CYCLE
|
||||||
|
Ch-067.md: 068: RATE
|
||||||
|
Ch-068.md: 069: TRANSPLANT
|
||||||
|
Ch-069.md: 070: GENETICS
|
||||||
|
Ch-070.md: 071: HOUSEKEEPING
|
||||||
|
Ch-071.md: 072: FREEZE
|
||||||
|
Ch-072.md: 073: TRUST
|
||||||
|
Ch-073.md: 074: CULPABILITY
|
||||||
|
Ch-074.md: 075: RECIDIVISM
|
||||||
|
Ch-075.md: 076: TIES
|
||||||
|
Ch-076.md: 077: NOTES
|
||||||
|
Ch-077.md: 078: DIVIDED
|
||||||
|
Ch-078.md: 079: STABILITY
|
||||||
|
Ch-079.md: 080: ACACIA
|
||||||
|
Ch-080.md: 081: ANTARCTICA
|
||||||
|
Ch-081.md: 082: ALIENS
|
||||||
|
Ch-082.md: 083: SCENE
|
||||||
|
Ch-083.md: 084: INTERROGATION
|
||||||
|
Ch-084.md: 085: DRUG
|
||||||
|
Ch-085.md: 086: CONFESSION
|
||||||
|
Ch-086.md: 087: ACCOMPLICE
|
||||||
|
Ch-087.md: 088: RIFT
|
||||||
|
Ch-088.md: 089: TRUMPS
|
||||||
|
Ch-089.md: 090: MANAGEMENT
|
||||||
|
Ch-090.md: 091: DURESS
|
||||||
|
Ch-091.md: 092: GUILT
|
||||||
|
Ch-092.md: 093: DESPERATION
|
||||||
|
Ch-093.md: 094: IDENTITY
|
||||||
|
Ch-094.md: 095: DEFUSE
|
||||||
|
Ch-095.md: 096: REVIVE
|
||||||
|
Ch-096.md: 097: STABILISE
|
||||||
|
Ch-097.md: 098: LOBOTOMY
|
||||||
|
Ch-098.md: 099: TAL
|
||||||
|
Ch-099.md: 100: CAPTAIN
|
||||||
|
Ch-100.md: 101: GRIEF
|
||||||
|
Ch-101.md: 102: BED
|
||||||
|
Ch-102.md: 103: IRREPARABLE
|
||||||
|
Ch-103.md: 104: SHELL
|
||||||
|
Ch-104.md: 105: DOR
|
||||||
|
Ch-105.md: 106: PIRATES
|
||||||
|
Ch-106.md: 107: IMPLICATION
|
||||||
|
Ch-107.md: 108: GROWTH
|
||||||
|
Ch-108.md: 109: NORMAL
|
||||||
|
Ch-109.md: 110: GRAVES
|
||||||
|
Ch-110.md: 111: SCAN
|
||||||
|
Ch-111.md: 112: COHERENCE
|
||||||
|
Ch-112.md: 113: SENTENCE
|
||||||
|
Ch-113.md: 114: REACTION
|
||||||
|
Ch-114.md: 115: SAFE
|
||||||
|
Ch-115.md: 116: REVIEW
|
||||||
|
Ch-116.md: 117: TUBE
|
||||||
|
Ch-117.md: 118: VIEW
|
||||||
|
Ch-118.md: 119: SIGHT
|
||||||
|
Ch-119.md: 120: DELIVERY
|
||||||
|
Ch-120.md: 121: RESPONSE
|
||||||
|
Ch-121.md: 122: INFORMATION
|
||||||
|
Ch-122.md: 123: SIENNA
|
||||||
|
Ch-123.md: 124: UPDATE
|
||||||
|
Ch-124.md: 125: WIPED
|
||||||
|
Ch-125.md: 126: ASTRONAUTS
|
||||||
|
Ch-126.md: 127: HERITAGE
|
||||||
|
Ch-127.md: 128: CODE
|
||||||
|
Ch-128.md: 129: PREPARE
|
||||||
|
Ch-129.md: 130: EVE
|
||||||
|
Ch-130.md: 131: BOARD
|
||||||
|
Ch-131.md: 132: DROP
|
||||||
7
md_to_ebook.sh
Normal file
7
md_to_ebook.sh
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
for dir in */; do
|
||||||
|
cd $dir;
|
||||||
|
latex cover.tex -interaction=nonstopmode;
|
||||||
|
dvipng -p 1 cover.dvi;
|
||||||
|
pandoc metadata.txt md/Ch-*.md -o apgte.epub --top-level-division=chapter --toc;
|
||||||
|
cd ..;
|
||||||
|
done
|
||||||
5
metadata.txt
Normal file
5
metadata.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
title: Time To Orbit Unknown
|
||||||
|
author: Derin Edala
|
||||||
|
lang: en-US
|
||||||
|
---
|
||||||
10
requirements.txt
Normal file
10
requirements.txt
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
beautifulsoup4==4.12.2
|
||||||
|
bs4==0.0.1
|
||||||
|
certifi==2023.11.17
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
idna==3.6
|
||||||
|
markdownify==0.11.6
|
||||||
|
requests==2.31.0
|
||||||
|
six==1.16.0
|
||||||
|
soupsieve==2.5
|
||||||
|
urllib3==2.1.0
|
||||||
88
ttou_download.py
Normal file
88
ttou_download.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
import os
|
||||||
|
import bs4
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from markdownify import markdownify
|
||||||
|
|
||||||
|
output_dir = "./TTOU_2023"
|
||||||
|
|
||||||
|
logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_apgte(url):
|
||||||
|
html = requests.get(url).text
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
header = soup.find(attrs={"class": "entry-title"})
|
||||||
|
article = soup.find(attrs={"class": "entry-content"})
|
||||||
|
article.div.decompose() # delete sharedaddy
|
||||||
|
next_link = soup.find(attrs={"rel": "next"})
|
||||||
|
|
||||||
|
return (next_link.get("href") if next_link else None, header, article)
|
||||||
|
|
||||||
|
|
||||||
|
def chop_between_finds(s, f1, f2):
|
||||||
|
lines = s.splitlines()
|
||||||
|
f1_occ = [idx for idx, line in enumerate(lines) if f1 in line]
|
||||||
|
f2_occ = [idx for idx, line in enumerate(lines) if f2 in line] if f1 != f2 else f1_occ
|
||||||
|
idx0 = f1_occ[0] if f1_occ else 0
|
||||||
|
idx1 = f2_occ[-1] if f2_occ else -1
|
||||||
|
return "\n".join(lines[idx0 + 1: idx1])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
next_url = (
|
||||||
|
"https://derinstories.com/2022/06/04/001-the-problem-with-the-javelin-program/"
|
||||||
|
)
|
||||||
|
count = 0
|
||||||
|
book = 1
|
||||||
|
chapter = 0
|
||||||
|
|
||||||
|
index = []
|
||||||
|
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.mkdir(output_dir)
|
||||||
|
|
||||||
|
# while next_url and count < 30:
|
||||||
|
while next_url:
|
||||||
|
logging.info(f"parsing: {next_url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
next_url, title, text = parse_apgte(next_url)
|
||||||
|
except Exception:
|
||||||
|
logging.exception(f"Failed to parse: {next_url}")
|
||||||
|
|
||||||
|
# if not os.path.exists(f'{output_dir}/Book-{book}'):
|
||||||
|
# os.mkdir(f'{output_dir}/Book-{book}')
|
||||||
|
# if not os.path.exists(f'{output_dir}/Book-{book}/md'):
|
||||||
|
# os.mkdir(f'{output_dir}/Book-{book}/md')
|
||||||
|
if not os.path.exists(f"{output_dir}/md"):
|
||||||
|
os.mkdir(f"{output_dir}/md")
|
||||||
|
|
||||||
|
# filename = f'{output_dir}/Book-{book}/md/Ch-{chapter}.md'
|
||||||
|
filename = f"{output_dir}/md/Ch-{chapter:03d}.md"
|
||||||
|
|
||||||
|
index.append(f"Ch-{chapter:03d}.md:\t{title.string}")
|
||||||
|
|
||||||
|
with open(filename, "w") as mdfile:
|
||||||
|
mdfile.write(markdownify(str(title)))
|
||||||
|
# mdfile.write('\n\n')
|
||||||
|
|
||||||
|
txt_md = markdownify(str(text))
|
||||||
|
txt_md = chop_between_finds(txt_md, ".png", ".png")
|
||||||
|
mdfile.write(txt_md)
|
||||||
|
|
||||||
|
# if next_url and 'prologue' in next_url:
|
||||||
|
# with open(f'{output_dir}/Book-{book}/index.txt', 'w') as txtfile:
|
||||||
|
# txtfile.write("\n".join(index))
|
||||||
|
|
||||||
|
# index = []
|
||||||
|
# book += 1
|
||||||
|
# chapter = 0
|
||||||
|
# else:
|
||||||
|
chapter += 1
|
||||||
|
|
||||||
|
count += 1
|
||||||
|
with open(f"{output_dir}/index.txt", "w") as txtfile:
|
||||||
|
txtfile.write("\n".join(index))
|
||||||
|
logging.info("Done!")
|
||||||
Loading…
Add table
Reference in a new issue