#!/user/bin/env python # ============================================================ # Slovak Driver Tests extractor # Copyright (C) 2025 Juraj Oravec # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ============================================================ import argparse import importlib import os import shutil from urllib.request import urlretrieve SCRIPT_VERSION = '0.2.0' LANGUAGE_INDEX_SK: int = 0 LANGUAGE_INDEX_EN: int = 1 LANGUAGE_INDEX_HU: int = 2 configuration: dict = { "language": LANGUAGE_INDEX_EN, "force_data_download": False } BASE_URL = "https://www.minv.sk/egovinet02/PCPZobrazFile?fileName=pcpfiles/" TESTS_DATA_URL: str = BASE_URL + "data5.js" class Autoskola(): def __init__(self): self.okruhy: tuple = (1, 9, 11, 19, 23, 24, 27, 29, 31, 39) self.dataFilenameTmp: str = "data.js" self.dataFilename: str = "autoskola_data.py" self.rawData: dict = dict() self.parsedData: list = list() def get_index(self, id_otazka): id: int = int(id_otazka) index: int = 0 for okruh in self.okruhy: if id <= (okruh - 1): break index += 1 return index - 1 def otazka_exist_id(self, otazka_id: str, okruh_id: int): data_list: list = self.parsedData[okruh_id]["data"] for dic in data_list: if otazka_id == dic["otazka_id"]: return True return False def downloadData(self): if not os.path.exists(self.dataFilenameTmp) or configuration["force_data_download"]: urlretrieve(TESTS_DATA_URL, self.dataFilenameTmp) if os.path.exists(self.dataFilenameTmp): # Remove leading "var " from the file with open(self.dataFilenameTmp) as readFile: readFile.read(4) with open(self.dataFilename, 'w') as writeFile: shutil.copyfileobj(readFile, writeFile) def loadData(self): self.dataModule = importlib.import_module('autoskola_data') self.rawData = self.dataModule.data def downloadImages(self): images: list = [] index: int = 0 for okruh in self.okruhy: for dic in self.parsedData[index]["data"]: if dic["obrazok"] and dic["obrazok"] not in images: images.append(dic["obrazok"]) index += 1 dirs: list = [] for obr in images: paths = obr.split("/") path = paths[0] + '/' + paths[1] if (len(paths) == 2): path = paths[0] if path not in dirs: dirs.append(path) if not os.path.exists('images/' + path): os.makedirs('images/' + path) imagePath = 'images/' + obr; if configuration["force_data_download"] and os.path.exists(imagePath): os.remove(imagePath) if not os.path.exists(imagePath): urlretrieve(BASE_URL + obr, imagePath) def parseData(self): for data_set in self.rawData[configuration["language"]]: # Initialize the dataset if not len(self.parsedData): for index, data in data_set["okruhy"].items(): self.parsedData.append({ "nazov": data[0]["txt"], "data": [] }) for index, data in data_set["otazky"].items(): okruh_id = self.get_index(index) if not self.otazka_exist_id(data[0]["id"], okruh_id): self.parsedData[okruh_id]["data"].append({ "cislo_otazky": index, "otazka_id": data[0]["id"], "otazka": data[0]["text"], "odpoved": data_set["odpovede"][index][data[0]["platna"] - 1]["odpoved"], "obrazok": data[0]["obrazok"] }) def print_to_html(self): okruhy_limits: tuple = ( 1000, 1000 + 525, 1000 + 525 + 75, 1000 + 525 + 75 + 189, 1000 + 525 + 75 + 189 + 89, 1000 + 525 + 75 + 189 + 89 + 28, 1000 + 525 + 75 + 189 + 89 + 28 + 92, 1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69, 1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69 + 59, 1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69 + 59 + 158 ) okruhy_temp: list = [] okruhy_sorted: list = [] index: int = 0 for okruh in self.okruhy: okruhy_temp.append([]) okruhy_sorted.append([]) for dic in self.parsedData[index]["data"]: okruhy_temp[index].append(dic["otazka_id"]) okruhy_sorted[index] = sorted(okruhy_temp[index]) index += 1 styles = """ table { border-spacing: 4px; border: 1px solid black; border: 1px solid black; } table td, table th { padding: 5px; border: 1px solid black; } img { max-height: 100px; float: left; margin-right: 1rem; } .section_4 img { max-height: 150px; } @media print { .pagebreak { clear: both; page-break-after: always; } tr { page-break-inside: avoid; } nav { display: none; } } """ print("") print("Car test data") print("

Car Test data

".format(styles=styles)) print("") index = 0 for okruh in self.okruhy: if index != 0: print("
") print("".format(section=(index + 1))) print("".format(section_id=(index + 1))) print("".format( section_id=(index + 1), caption=self.parsedData[index]["nazov"]) ) print("") for sorted_id in okruhy_sorted[index]: print("") obrazok: str = '' otazka_data: dict = {} for dic in self.parsedData[index]["data"]: if sorted_id == dic["otazka_id"]: otazka_data = dic break if otazka_data["obrazok"]: obrazok = "".format( image=otazka_data["obrazok"] ) print("".format( otazka_id=(sorted_id + 1 - okruhy_limits[index])) ) print("".format( obrazok=obrazok, question=otazka_data["otazka"]) ) print("".format(answer=otazka_data["odpoved"])) print("") print("
{section_id}. {caption}
IdQuestionAnswer
{otazka_id}{obrazok}{question}{answer}
") index += 1 print("") def main(): parser = argparse.ArgumentParser( description='Downloads and extracts the data from the Slovak online driver tests. ' 'Outputs the resulted HTML file into the stdout.', ) parser.add_argument('-l', '--language', dest='language', action='store', default=str(LANGUAGE_INDEX_EN), help='Test language: ' '0,SK - Slovak; ' '1,EN - English; ' '2,HU - Hungarian;') parser.add_argument('-f', '--force-data-download', dest='force_data_download', action='store_true', help='Download data even when the files already exist.') parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=SCRIPT_VERSION)) args = parser.parse_args() if (args.language == 'SK' or int(args.language) == LANGUAGE_INDEX_SK): configuration["language"] = LANGUAGE_INDEX_SK elif (args.language == 'EN' or int(args.language) == LANGUAGE_INDEX_EN): configuration["language"] = LANGUAGE_INDEX_EN elif (args.language == 'HU' or int(args.language) == LANGUAGE_INDEX_HU): configuration["language"] = LANGUAGE_INDEX_HU configuration["force_data_download"] = args.force_data_download autoskola = Autoskola() autoskola.downloadData() autoskola.loadData() autoskola.parseData() autoskola.downloadImages() autoskola.print_to_html() if __name__ == "__main__": main()