NovelDownloader/novelDownloader.py

92 lines
2.1 KiB
Python
Raw Normal View History

#!/usr/bin/python3
"""
Script to downloaded webpages, extract text and merge all of them
together to create one ebook.
"""
import errno
import os
import shutil
import subprocess
# Book name
name = "Name"
# Base url, this is used as url = mainUrl + <number of chapter>
mainUrl = "http://example.com/chapter-"
# Number of all chapter
chapters = 1
# Start from 0 or 1 ?
fromZero = False
# numerical representation of start for script
start = 0 if fromZero else 1
def runInShell(command):
"""Run giver string in shell"""
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
process.wait()
process.communicate()
def download(mainUrl, name, number):
"""Download webpage, extract test, add some empty lines"""
fileName = os.path.join('chapters', '%s-%d.txt' % (name, number))
# download webpage
command = 'wget -q -O- "%s%d" | unfluff | jq -r ".title, .text" > "%s"' %\
(mainUrl, number, fileName)
runInShell(command)
# New line after title
command = "sed -i '1 a\\\\' '%s'" % (fileName)
runInShell(command)
# New line at the end of the file
command = "sed -i -e '$a\\' '%s'" % (fileName)
runInShell(command)
if number != start:
# New lne at beginning of the file
command = "sed -i '1i\\\\' '%s'" % (fileName)
runInShell(command)
def main():
"""Start main downloader and converter"""
try:
os.makedirs("chapters")
except OSError as e:
if e.errno != errno.EEXIST:
raise
# Download all chapters one by one
print("Downloading...")
for i in range(start, chapters + 1, 1):
print("Downloading: ", name, i)
download(mainUrl, name, i)
# merge all chapter to one file
print("Merging...")
command = 'cat "chapters/%s-"{%d..%d}.txt > "%s.txt"' % (name, start, chapters, name)
runInShell(command)
# convert to epub
print("Converting...")
command = 'ebook-convert "%s.txt" "%s.epub"' % (name, name)
runInShell(command)
# remove download directory
print("Removing temporary data...")
shutil.rmtree('chapters')
os.remove("%s.txt" % (name))
print("Done")
# if yused standalone start the script
if __name__ == '__main__':
main()