initial commit
This commit is contained in:
commit
921ddc9faa
5 changed files with 242 additions and 0 deletions
132
TTOU_2023/index.txt
Normal file
132
TTOU_2023/index.txt
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
Ch-000.md: 001: JAVELIN
|
||||
Ch-001.md: 002: DUTY
|
||||
Ch-002.md: 003: ASSESSMENT
|
||||
Ch-003.md: 004: TRAVEL
|
||||
Ch-004.md: 005: GHOSTS
|
||||
Ch-005.md: 006: RECOVERY
|
||||
Ch-006.md: 007: PREDECESSOR
|
||||
Ch-007.md: 008: OVERRIDE
|
||||
Ch-008.md: 009: ASCEND
|
||||
Ch-009.md: 010: ALONE
|
||||
Ch-010.md: 011: POPULATION
|
||||
Ch-011.md: 012: INFESTATION
|
||||
Ch-012.md: 013: FALL
|
||||
Ch-013.md: 014: COURSE
|
||||
Ch-014.md: 015: FRIEND
|
||||
Ch-015.md: 016: SUPPORT
|
||||
Ch-016.md: 017: DEGRADATION
|
||||
Ch-017.md: 018: CREW
|
||||
Ch-018.md: 019: PLAN
|
||||
Ch-019.md: 020: MASSACRE
|
||||
Ch-020.md: 021: EVACUATE
|
||||
Ch-021.md: 022: SACRIFICE
|
||||
Ch-022.md: 023: RETRIEVE
|
||||
Ch-023.md: 024: ACCESS
|
||||
Ch-024.md: 025: FOX
|
||||
Ch-025.md: 026: PURIFY
|
||||
Ch-026.md: 027: PAST
|
||||
Ch-027.md: 028: TIME
|
||||
Ch-028.md: 029: PRESERVE
|
||||
Ch-029.md: 030: BONE
|
||||
Ch-030.md: 031: CONSPIRACY
|
||||
Ch-031.md: 032: CONVICTION
|
||||
Ch-032.md: 033: PREDICAMENT
|
||||
Ch-033.md: 034: COMORBIDITY
|
||||
Ch-034.md: 035: MORALE
|
||||
Ch-035.md: 036: HEIGHTS
|
||||
Ch-036.md: 037: MODIFY
|
||||
Ch-037.md: 038: NERVES
|
||||
Ch-038.md: 039: PEOPLE
|
||||
Ch-039.md: 040: RECAP
|
||||
Ch-040.md: 041: TARGET
|
||||
Ch-041.md: 042: DECEPTION
|
||||
Ch-042.md: 043: OUTSOURCING
|
||||
Ch-043.md: 044: EARTH
|
||||
Ch-044.md: 045: ZOMBIES
|
||||
Ch-045.md: 046: INTERDEPENDENCE
|
||||
Ch-046.md: 047: HELP
|
||||
Ch-047.md: 048: ADAPTATION
|
||||
Ch-048.md: 049: SAFETY
|
||||
Ch-049.md: 050: SELECTION
|
||||
Ch-050.md: 051: HULL
|
||||
Ch-051.md: 052: EXPAND
|
||||
Ch-052.md: 053: AUTONOMY
|
||||
Ch-053.md: 054: RECORD
|
||||
Ch-054.md: 055: ICEBREAKER
|
||||
Ch-055.md: 056: HISTORY
|
||||
Ch-056.md: 057: FAMILY
|
||||
Ch-057.md: 058: DISCLOSURE
|
||||
Ch-058.md: 059: RESPONSIBILITY
|
||||
Ch-059.md: 060: INFORMATION
|
||||
Ch-060.md: 061: NEGOTIATION
|
||||
Ch-061.md: 062: LUMINSCENCE
|
||||
Ch-062.md: 063: INVESTIGATE
|
||||
Ch-063.md: 064: SABOTAGE
|
||||
Ch-064.md: 065: VECTOR
|
||||
Ch-065.md: 066: PROGRESS
|
||||
Ch-066.md: 067: CYCLE
|
||||
Ch-067.md: 068: RATE
|
||||
Ch-068.md: 069: TRANSPLANT
|
||||
Ch-069.md: 070: GENETICS
|
||||
Ch-070.md: 071: HOUSEKEEPING
|
||||
Ch-071.md: 072: FREEZE
|
||||
Ch-072.md: 073: TRUST
|
||||
Ch-073.md: 074: CULPABILITY
|
||||
Ch-074.md: 075: RECIDIVISM
|
||||
Ch-075.md: 076: TIES
|
||||
Ch-076.md: 077: NOTES
|
||||
Ch-077.md: 078: DIVIDED
|
||||
Ch-078.md: 079: STABILITY
|
||||
Ch-079.md: 080: ACACIA
|
||||
Ch-080.md: 081: ANTARCTICA
|
||||
Ch-081.md: 082: ALIENS
|
||||
Ch-082.md: 083: SCENE
|
||||
Ch-083.md: 084: INTERROGATION
|
||||
Ch-084.md: 085: DRUG
|
||||
Ch-085.md: 086: CONFESSION
|
||||
Ch-086.md: 087: ACCOMPLICE
|
||||
Ch-087.md: 088: RIFT
|
||||
Ch-088.md: 089: TRUMPS
|
||||
Ch-089.md: 090: MANAGEMENT
|
||||
Ch-090.md: 091: DURESS
|
||||
Ch-091.md: 092: GUILT
|
||||
Ch-092.md: 093: DESPERATION
|
||||
Ch-093.md: 094: IDENTITY
|
||||
Ch-094.md: 095: DEFUSE
|
||||
Ch-095.md: 096: REVIVE
|
||||
Ch-096.md: 097: STABILISE
|
||||
Ch-097.md: 098: LOBOTOMY
|
||||
Ch-098.md: 099: TAL
|
||||
Ch-099.md: 100: CAPTAIN
|
||||
Ch-100.md: 101: GRIEF
|
||||
Ch-101.md: 102: BED
|
||||
Ch-102.md: 103: IRREPARABLE
|
||||
Ch-103.md: 104: SHELL
|
||||
Ch-104.md: 105: DOR
|
||||
Ch-105.md: 106: PIRATES
|
||||
Ch-106.md: 107: IMPLICATION
|
||||
Ch-107.md: 108: GROWTH
|
||||
Ch-108.md: 109: NORMAL
|
||||
Ch-109.md: 110: GRAVES
|
||||
Ch-110.md: 111: SCAN
|
||||
Ch-111.md: 112: COHERENCE
|
||||
Ch-112.md: 113: SENTENCE
|
||||
Ch-113.md: 114: REACTION
|
||||
Ch-114.md: 115: SAFE
|
||||
Ch-115.md: 116: REVIEW
|
||||
Ch-116.md: 117: TUBE
|
||||
Ch-117.md: 118: VIEW
|
||||
Ch-118.md: 119: SIGHT
|
||||
Ch-119.md: 120: DELIVERY
|
||||
Ch-120.md: 121: RESPONSE
|
||||
Ch-121.md: 122: INFORMATION
|
||||
Ch-122.md: 123: SIENNA
|
||||
Ch-123.md: 124: UPDATE
|
||||
Ch-124.md: 125: WIPED
|
||||
Ch-125.md: 126: ASTRONAUTS
|
||||
Ch-126.md: 127: HERITAGE
|
||||
Ch-127.md: 128: CODE
|
||||
Ch-128.md: 129: PREPARE
|
||||
Ch-129.md: 130: EVE
|
||||
Ch-130.md: 131: BOARD
|
||||
Ch-131.md: 132: DROP
|
||||
7
md_to_ebook.sh
Normal file
7
md_to_ebook.sh
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
for dir in */; do
|
||||
cd $dir;
|
||||
latex cover.tex -interaction=nonstopmode;
|
||||
dvipng -p 1 cover.dvi;
|
||||
pandoc metadata.txt md/Ch-*.md -o apgte.epub --top-level-division=chapter --toc;
|
||||
cd ..;
|
||||
done
|
||||
5
metadata.txt
Normal file
5
metadata.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Time To Orbit Unknown
|
||||
author: Derin Edala
|
||||
lang: en-US
|
||||
---
|
||||
10
requirements.txt
Normal file
10
requirements.txt
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
beautifulsoup4==4.12.2
|
||||
bs4==0.0.1
|
||||
certifi==2023.11.17
|
||||
charset-normalizer==3.3.2
|
||||
idna==3.6
|
||||
markdownify==0.11.6
|
||||
requests==2.31.0
|
||||
six==1.16.0
|
||||
soupsieve==2.5
|
||||
urllib3==2.1.0
|
||||
88
ttou_download.py
Normal file
88
ttou_download.py
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
import os
|
||||
import bs4
|
||||
import logging
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from markdownify import markdownify
|
||||
|
||||
output_dir = "./TTOU_2023"
|
||||
|
||||
logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)
|
||||
|
||||
|
||||
def parse_apgte(url):
|
||||
html = requests.get(url).text
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
header = soup.find(attrs={"class": "entry-title"})
|
||||
article = soup.find(attrs={"class": "entry-content"})
|
||||
article.div.decompose() # delete sharedaddy
|
||||
next_link = soup.find(attrs={"rel": "next"})
|
||||
|
||||
return (next_link.get("href") if next_link else None, header, article)
|
||||
|
||||
|
||||
def chop_between_finds(s, f1, f2):
|
||||
lines = s.splitlines()
|
||||
f1_occ = [idx for idx, line in enumerate(lines) if f1 in line]
|
||||
f2_occ = [idx for idx, line in enumerate(lines) if f2 in line] if f1 != f2 else f1_occ
|
||||
idx0 = f1_occ[0] if f1_occ else 0
|
||||
idx1 = f2_occ[-1] if f2_occ else -1
|
||||
return "\n".join(lines[idx0 + 1: idx1])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
next_url = (
|
||||
"https://derinstories.com/2022/06/04/001-the-problem-with-the-javelin-program/"
|
||||
)
|
||||
count = 0
|
||||
book = 1
|
||||
chapter = 0
|
||||
|
||||
index = []
|
||||
|
||||
if not os.path.exists(output_dir):
|
||||
os.mkdir(output_dir)
|
||||
|
||||
# while next_url and count < 30:
|
||||
while next_url:
|
||||
logging.info(f"parsing: {next_url}")
|
||||
|
||||
try:
|
||||
next_url, title, text = parse_apgte(next_url)
|
||||
except Exception:
|
||||
logging.exception(f"Failed to parse: {next_url}")
|
||||
|
||||
# if not os.path.exists(f'{output_dir}/Book-{book}'):
|
||||
# os.mkdir(f'{output_dir}/Book-{book}')
|
||||
# if not os.path.exists(f'{output_dir}/Book-{book}/md'):
|
||||
# os.mkdir(f'{output_dir}/Book-{book}/md')
|
||||
if not os.path.exists(f"{output_dir}/md"):
|
||||
os.mkdir(f"{output_dir}/md")
|
||||
|
||||
# filename = f'{output_dir}/Book-{book}/md/Ch-{chapter}.md'
|
||||
filename = f"{output_dir}/md/Ch-{chapter:03d}.md"
|
||||
|
||||
index.append(f"Ch-{chapter:03d}.md:\t{title.string}")
|
||||
|
||||
with open(filename, "w") as mdfile:
|
||||
mdfile.write(markdownify(str(title)))
|
||||
# mdfile.write('\n\n')
|
||||
|
||||
txt_md = markdownify(str(text))
|
||||
txt_md = chop_between_finds(txt_md, ".png", ".png")
|
||||
mdfile.write(txt_md)
|
||||
|
||||
# if next_url and 'prologue' in next_url:
|
||||
# with open(f'{output_dir}/Book-{book}/index.txt', 'w') as txtfile:
|
||||
# txtfile.write("\n".join(index))
|
||||
|
||||
# index = []
|
||||
# book += 1
|
||||
# chapter = 0
|
||||
# else:
|
||||
chapter += 1
|
||||
|
||||
count += 1
|
||||
with open(f"{output_dir}/index.txt", "w") as txtfile:
|
||||
txtfile.write("\n".join(index))
|
||||
logging.info("Done!")
|
||||
Loading…
Add table
Reference in a new issue