2017-11-07 19:40:55 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
"""
|
|
|
|
Script to downloaded webpages, extract text and merge all of them
|
|
|
|
together to create one ebook.
|
|
|
|
"""
|
|
|
|
|
2020-12-21 03:03:32 +01:00
|
|
|
import argparse
|
2017-11-07 19:40:55 +01:00
|
|
|
|
2020-12-21 03:03:32 +01:00
|
|
|
from importlib import import_module
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
from typing import List
|
2017-11-07 19:40:55 +01:00
|
|
|
|
2020-12-21 03:03:32 +01:00
|
|
|
from chapter import Chapter
|
|
|
|
from ebook import Ebook
|
|
|
|
|
|
|
|
|
|
|
|
class NovelDownload:
|
|
|
|
def __init__(self, url):
|
|
|
|
self.url = url
|
|
|
|
|
|
|
|
self.title : str = ""
|
|
|
|
self.author : str = ""
|
|
|
|
|
|
|
|
self.chapters : List[Chapter] = list()
|
|
|
|
self.parser = None
|
|
|
|
|
|
|
|
if not self.loadModule():
|
|
|
|
print("Url is not supported")
|
|
|
|
|
|
|
|
def loadModule(self):
|
|
|
|
url = urlparse(self.url)
|
|
|
|
moduleName = url.netloc.replace(".", "_")
|
|
|
|
|
|
|
|
# import parser
|
|
|
|
try:
|
|
|
|
parserPackage = import_module('parsers.' + moduleName)
|
|
|
|
except ImportError:
|
|
|
|
print("Parser module not found: " + moduleName)
|
|
|
|
return False
|
|
|
|
# return getattr(module, name)
|
2017-11-07 19:40:55 +01:00
|
|
|
|
2020-12-21 03:03:32 +01:00
|
|
|
self.parser = parserPackage.Parser(self.url)
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
def download(self):
|
|
|
|
if not self.parser:
|
|
|
|
print("There was an error")
|
|
|
|
return
|
|
|
|
|
|
|
|
self.parser.prepare()
|
|
|
|
self.author = self.parser.getAuthor()
|
|
|
|
self.title = self.parser.getTitle()
|
|
|
|
|
|
|
|
for x in range(1, len(self.parser.getUrls()), 1):
|
|
|
|
number, title, content = self.parser.nextChapter()
|
|
|
|
if number == -1:
|
|
|
|
number = x
|
|
|
|
print("{number} - {title}".format(number=number, title=title))
|
|
|
|
chapter = Chapter(title, content, number)
|
|
|
|
self.chapters.append(chapter)
|
|
|
|
|
|
|
|
def save(self):
|
|
|
|
book = Ebook(self.title, self.chapters, self.author)
|
|
|
|
book.create()
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(description='Webnovel downloader')
|
|
|
|
parser.add_argument('-u', '--url', metavar='URL', type=str, nargs=1, required=True,
|
|
|
|
help='Url of the index page or first chapter, depends on parser support.')
|
|
|
|
parser.add_argument('--version', action='version', version='%(prog)s 1.0.0')
|
|
|
|
args = parser.parse_args()
|
2017-11-07 19:40:55 +01:00
|
|
|
|
2020-12-21 03:03:32 +01:00
|
|
|
nd = NovelDownload(args.url[0])
|
2017-11-07 19:40:55 +01:00
|
|
|
|
2020-12-21 03:03:32 +01:00
|
|
|
# Download all chapters one by one
|
|
|
|
print("Downloading...")
|
|
|
|
nd.download()
|
|
|
|
print("Saving...")
|
|
|
|
nd.save()
|
|
|
|
print("Done")
|
2017-11-07 19:40:55 +01:00
|
|
|
|
|
|
|
|
|
|
|
# if yused standalone start the script
|
|
|
|
if __name__ == '__main__':
|
2020-12-21 03:03:32 +01:00
|
|
|
main()
|