slovak_driver_tests/autoskola.py
Juraj Oravec 779fdfaead
Add LICENSE
Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
2025-05-17 15:09:02 +02:00

292 lines
9.8 KiB
Python

#!/user/bin/env python
# ============================================================
# Slovak Driver Tests extractor
# Copyright (C) 2025 Juraj Oravec <jurajoravec@mailo.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# ============================================================
import argparse
import importlib
import os
import shutil
from urllib.request import urlretrieve
SCRIPT_VERSION = '0.2.0'
LANGUAGE_INDEX_SK: int = 0
LANGUAGE_INDEX_EN: int = 1
LANGUAGE_INDEX_HU: int = 2
configuration: dict = {
"language": LANGUAGE_INDEX_EN,
"force_data_download": False
}
BASE_URL = "https://www.minv.sk/egovinet02/PCPZobrazFile?fileName=pcpfiles/"
TESTS_DATA_URL: str = BASE_URL + "data5.js"
class Autoskola():
def __init__(self):
self.okruhy: tuple = (1, 9, 11, 19, 23, 24, 27, 29, 31, 39)
self.dataFilenameTmp: str = "data.js"
self.dataFilename: str = "autoskola_data.py"
self.rawData: dict = dict()
self.parsedData: list = list()
def get_index(self, id_otazka):
id: int = int(id_otazka)
index: int = 0
for okruh in self.okruhy:
if id <= (okruh - 1):
break
index += 1
return index - 1
def otazka_exist_id(self, otazka_id: str, okruh_id: int):
data_list: list = self.parsedData[okruh_id]["data"]
for dic in data_list:
if otazka_id == dic["otazka_id"]:
return True
return False
def downloadData(self):
if not os.path.exists(self.dataFilenameTmp) or configuration["force_data_download"]:
urlretrieve(TESTS_DATA_URL, self.dataFilenameTmp)
if os.path.exists(self.dataFilenameTmp):
# Remove leading "var " from the file
with open(self.dataFilenameTmp) as readFile:
readFile.read(4)
with open(self.dataFilename, 'w') as writeFile:
shutil.copyfileobj(readFile, writeFile)
def loadData(self):
self.dataModule = importlib.import_module('autoskola_data')
self.rawData = self.dataModule.data
def downloadImages(self):
images: list = []
index: int = 0
for okruh in self.okruhy:
for dic in self.parsedData[index]["data"]:
if dic["obrazok"] and dic["obrazok"] not in images:
images.append(dic["obrazok"])
index += 1
dirs: list = []
for obr in images:
paths = obr.split("/")
path = paths[0] + '/' + paths[1]
if (len(paths) == 2):
path = paths[0]
if path not in dirs:
dirs.append(path)
if not os.path.exists('images/' + path):
os.makedirs('images/' + path)
imagePath = 'images/' + obr;
if configuration["force_data_download"] and os.path.exists(imagePath):
os.remove(imagePath)
if not os.path.exists(imagePath):
urlretrieve(BASE_URL + obr, imagePath)
def parseData(self):
for data_set in self.rawData[configuration["language"]]:
# Initialize the dataset
if not len(self.parsedData):
for index, data in data_set["okruhy"].items():
self.parsedData.append({
"nazov": data[0]["txt"],
"data": []
})
for index, data in data_set["otazky"].items():
okruh_id = self.get_index(index)
if not self.otazka_exist_id(data[0]["id"], okruh_id):
self.parsedData[okruh_id]["data"].append({
"cislo_otazky": index,
"otazka_id": data[0]["id"],
"otazka": data[0]["text"],
"odpoved": data_set["odpovede"][index][data[0]["platna"] - 1]["odpoved"],
"obrazok": data[0]["obrazok"]
})
def print_to_html(self):
okruhy_limits: tuple = (
1000,
1000 + 525,
1000 + 525 + 75,
1000 + 525 + 75 + 189,
1000 + 525 + 75 + 189 + 89,
1000 + 525 + 75 + 189 + 89 + 28,
1000 + 525 + 75 + 189 + 89 + 28 + 92,
1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69,
1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69 + 59,
1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69 + 59 + 158
)
okruhy_temp: list = []
okruhy_sorted: list = []
index: int = 0
for okruh in self.okruhy:
okruhy_temp.append([])
okruhy_sorted.append([])
for dic in self.parsedData[index]["data"]:
okruhy_temp[index].append(dic["otazka_id"])
okruhy_sorted[index] = sorted(okruhy_temp[index])
index += 1
styles = """
table {
border-spacing: 4px;
border: 1px solid black;
border: 1px solid black;
}
table td, table th {
padding: 5px;
border: 1px solid black;
}
img {
max-height: 100px;
float: left;
margin-right: 1rem;
}
.section_4 img {
max-height: 150px;
}
@media print {
.pagebreak {
clear: both;
page-break-after: always;
}
tr {
page-break-inside: avoid;
}
nav {
display: none;
}
}
"""
print("<html><head>")
print("<title>Car test data</title>")
print("<style>{styles}</style></head><body><h1>Car Test data</h1>".format(styles=styles))
print("<nav><ol>")
index = 0
for okruh in self.okruhy:
print("<li><a href='#section{section_id}'>{section_name}</a></li>".format(
section_id=(index + 1),
section_name=self.parsedData[index]["nazov"])
)
index += 1
print("</ol></nav>")
index = 0
for okruh in self.okruhy:
if index != 0:
print("<div class='pagebreak'> </div>")
print("<a name='section{section}'></a>".format(section=(index + 1)))
print("<table class='section_{section_id}'>".format(section_id=(index + 1)))
print("<caption>{section_id}. {caption}</caption>".format(
section_id=(index + 1),
caption=self.parsedData[index]["nazov"])
)
print("<thead><tr><th>Id</th><th>Question</th><th>Answer</th></tr></thead>")
for sorted_id in okruhy_sorted[index]:
print("<tr>")
obrazok: str = ''
otazka_data: dict = {}
for dic in self.parsedData[index]["data"]:
if sorted_id == dic["otazka_id"]:
otazka_data = dic
break
if otazka_data["obrazok"]:
obrazok = "<a href='images/{image}' target='_blank'><img src='images/{image}'></a>".format(
image=otazka_data["obrazok"]
)
print("<td>{otazka_id}</td>".format(
otazka_id=(sorted_id + 1 - okruhy_limits[index]))
)
print("<td>{obrazok}{question}</td>".format(
obrazok=obrazok,
question=otazka_data["otazka"])
)
print("<td>{answer}</td>".format(answer=otazka_data["odpoved"]))
print("</tr>")
print("</table>")
index += 1
print("</body></html>")
def main():
parser = argparse.ArgumentParser(
description='Downloads and extracts the data from the Slovak online driver tests. '
'Outputs the resulted HTML file into the stdout.',
)
parser.add_argument('-l', '--language', dest='language', action='store',
default=str(LANGUAGE_INDEX_EN),
help='Test language: '
'0,SK - Slovak; '
'1,EN - English; '
'2,HU - Hungarian;')
parser.add_argument('-f', '--force-data-download', dest='force_data_download', action='store_true',
help='Download data even when the files already exist.')
parser.add_argument('--version', action='version',
version='%(prog)s {version}'.format(version=SCRIPT_VERSION))
args = parser.parse_args()
if (args.language == 'SK' or int(args.language) == LANGUAGE_INDEX_SK):
configuration["language"] = LANGUAGE_INDEX_SK
elif (args.language == 'EN' or int(args.language) == LANGUAGE_INDEX_EN):
configuration["language"] = LANGUAGE_INDEX_EN
elif (args.language == 'HU' or int(args.language) == LANGUAGE_INDEX_HU):
configuration["language"] = LANGUAGE_INDEX_HU
configuration["force_data_download"] = args.force_data_download
autoskola = Autoskola()
autoskola.downloadData()
autoskola.loadData()
autoskola.parseData()
autoskola.downloadImages()
autoskola.print_to_html()
if __name__ == "__main__":
main()