')
+
+ # Get title
+ bcol = page.findChild("ol", {"class": "breadcrumb"}, recursive=True)
+ titleText = bcol.findChild("li", {"class": "active"}, recursive=True)
+ titleText = titleText.text
+
+ try:
+ number, title = re.findall('^Chapter\s?([0-9]+)[^\s]?(.*)', titleText)[0]
+ title = title.strip()
+ except Exception:
+ number = -1
+ title = titleText
+
+ self.decompose(content.findChildren("script", recursive=True))
+ self.decompose(content.findChildren("ins", {"class": "adsbygoogle"}, recursive=True))
+ self.decompose(content.findChildren("div", {"data-endpoint": "//trends.revcontent.com"}, recursive=True))
+
+ return number, title.strip(), content.contents[1]
+
+ def getAuthor(self):
+ author = self.indexPage.find("div", {"class": "author-content"})
+ return author.text.strip()
+
+ def getTitle(self):
+ title = self.indexPage.find("div", {"class": "post-title"})
+ return title.text.strip()
+
+ def getUrls(self):
+ return self.chapterUrls
+
+ def decompose(self, objects):
+ for obj in objects:
+ obj.decompose()
+
+ def downloadIndex(self):
+ response = requests.get(self.indexUrl)
+ if response.status_code != 200:
+ raise FileNotFoundError("Unable to download {url}".format(url=self.indexUrl))
+
+ self.indexPage = BeautifulSoup(response.text, "lxml")
+
+ def parseIndex(self):
+ chapterListTag = self.indexPage.find("ul", {"class": "version-chap"})
+ if chapterListTag is None:
+ raise RuntimeError('Failed to find a chapter list