From a2a08a4f9578d2d6651f9b56f71ed6b1298f1b27 Mon Sep 17 00:00:00 2001 From: Juraj Oravec Date: Tue, 7 Nov 2017 19:40:55 +0100 Subject: [PATCH] Add downloader script Signed-off-by: Juraj Oravec --- novelDownloader.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 novelDownloader.py diff --git a/novelDownloader.py b/novelDownloader.py new file mode 100755 index 0000000..e655247 --- /dev/null +++ b/novelDownloader.py @@ -0,0 +1,91 @@ +#!/usr/bin/python3 +""" +Script to downloaded webpages, extract text and merge all of them +together to create one ebook. +""" + +import errno +import os +import shutil +import subprocess + + +# Book name +name = "Name" +# Base url, this is used as url = mainUrl + +mainUrl = "http://example.com/chapter-" +# Number of all chapter +chapters = 1 +# Start from 0 or 1 ? +fromZero = False + + +# numerical representation of start for script +start = 0 if fromZero else 1 + + +def runInShell(command): + """Run giver string in shell""" + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) + process.wait() + process.communicate() + + +def download(mainUrl, name, number): + """Download webpage, extract test, add some empty lines""" + fileName = os.path.join('chapters', '%s-%d.txt' % (name, number)) + + # download webpage + command = 'wget -q -O- "%s%d" | unfluff | jq -r ".title, .text" > "%s"' %\ + (mainUrl, number, fileName) + runInShell(command) + + # New line after title + command = "sed -i '1 a\\\\' '%s'" % (fileName) + runInShell(command) + + # New line at the end of the file + command = "sed -i -e '$a\\' '%s'" % (fileName) + runInShell(command) + + if number != start: + # New lne at beginning of the file + command = "sed -i '1i\\\\' '%s'" % (fileName) + runInShell(command) + + +def main(): + """Start main downloader and converter""" + try: + os.makedirs("chapters") + except OSError as e: + if e.errno != errno.EEXIST: + raise + + # Download all chapters one by one + print("Downloading...") + for i in range(start, chapters + 1, 1): + print("Downloading: ", name, i) + download(mainUrl, name, i) + + # merge all chapter to one file + print("Merging...") + command = 'cat "chapters/%s-"{%d..%d}.txt > "%s.txt"' % (name, start, chapters, name) + runInShell(command) + + # convert to epub + print("Converting...") + command = 'ebook-convert "%s.txt" "%s.epub"' % (name, name) + runInShell(command) + + # remove download directory + print("Removing temporary data...") + shutil.rmtree('chapters') + os.remove("%s.txt" % (name)) + + print("Done") + + +# if yused standalone start the script +if __name__ == '__main__': + main()