292 lines
9.8 KiB
Python
292 lines
9.8 KiB
Python
#!/user/bin/env python
|
|
# ============================================================
|
|
# Slovak Driver Tests extractor
|
|
# Copyright (C) 2025 Juraj Oravec <jurajoravec@mailo.com>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
# ============================================================
|
|
|
|
|
|
import argparse
|
|
import importlib
|
|
import os
|
|
import shutil
|
|
from urllib.request import urlretrieve
|
|
|
|
SCRIPT_VERSION = '0.2.0'
|
|
|
|
LANGUAGE_INDEX_SK: int = 0
|
|
LANGUAGE_INDEX_EN: int = 1
|
|
LANGUAGE_INDEX_HU: int = 2
|
|
|
|
configuration: dict = {
|
|
"language": LANGUAGE_INDEX_EN,
|
|
"force_data_download": False
|
|
}
|
|
|
|
BASE_URL = "https://www.minv.sk/egovinet02/PCPZobrazFile?fileName=pcpfiles/"
|
|
TESTS_DATA_URL: str = BASE_URL + "data5.js"
|
|
|
|
|
|
class Autoskola():
|
|
def __init__(self):
|
|
self.okruhy: tuple = (1, 9, 11, 19, 23, 24, 27, 29, 31, 39)
|
|
self.dataFilenameTmp: str = "data.js"
|
|
self.dataFilename: str = "autoskola_data.py"
|
|
self.rawData: dict = dict()
|
|
self.parsedData: list = list()
|
|
|
|
def get_index(self, id_otazka):
|
|
id: int = int(id_otazka)
|
|
|
|
index: int = 0
|
|
for okruh in self.okruhy:
|
|
if id <= (okruh - 1):
|
|
break
|
|
index += 1
|
|
|
|
return index - 1
|
|
|
|
def otazka_exist_id(self, otazka_id: str, okruh_id: int):
|
|
data_list: list = self.parsedData[okruh_id]["data"]
|
|
|
|
for dic in data_list:
|
|
if otazka_id == dic["otazka_id"]:
|
|
return True
|
|
|
|
return False
|
|
|
|
def downloadData(self):
|
|
if not os.path.exists(self.dataFilenameTmp) or configuration["force_data_download"]:
|
|
urlretrieve(TESTS_DATA_URL, self.dataFilenameTmp)
|
|
|
|
if os.path.exists(self.dataFilenameTmp):
|
|
# Remove leading "var " from the file
|
|
with open(self.dataFilenameTmp) as readFile:
|
|
readFile.read(4)
|
|
|
|
with open(self.dataFilename, 'w') as writeFile:
|
|
shutil.copyfileobj(readFile, writeFile)
|
|
|
|
def loadData(self):
|
|
self.dataModule = importlib.import_module('autoskola_data')
|
|
self.rawData = self.dataModule.data
|
|
|
|
def downloadImages(self):
|
|
images: list = []
|
|
index: int = 0
|
|
for okruh in self.okruhy:
|
|
for dic in self.parsedData[index]["data"]:
|
|
if dic["obrazok"] and dic["obrazok"] not in images:
|
|
images.append(dic["obrazok"])
|
|
index += 1
|
|
|
|
dirs: list = []
|
|
for obr in images:
|
|
paths = obr.split("/")
|
|
path = paths[0] + '/' + paths[1]
|
|
|
|
if (len(paths) == 2):
|
|
path = paths[0]
|
|
|
|
if path not in dirs:
|
|
dirs.append(path)
|
|
if not os.path.exists('images/' + path):
|
|
os.makedirs('images/' + path)
|
|
|
|
imagePath = 'images/' + obr;
|
|
if configuration["force_data_download"] and os.path.exists(imagePath):
|
|
os.remove(imagePath)
|
|
|
|
if not os.path.exists(imagePath):
|
|
urlretrieve(BASE_URL + obr, imagePath)
|
|
|
|
def parseData(self):
|
|
for data_set in self.rawData[configuration["language"]]:
|
|
# Initialize the dataset
|
|
if not len(self.parsedData):
|
|
for index, data in data_set["okruhy"].items():
|
|
self.parsedData.append({
|
|
"nazov": data[0]["txt"],
|
|
"data": []
|
|
})
|
|
|
|
for index, data in data_set["otazky"].items():
|
|
okruh_id = self.get_index(index)
|
|
|
|
if not self.otazka_exist_id(data[0]["id"], okruh_id):
|
|
self.parsedData[okruh_id]["data"].append({
|
|
"cislo_otazky": index,
|
|
"otazka_id": data[0]["id"],
|
|
"otazka": data[0]["text"],
|
|
"odpoved": data_set["odpovede"][index][data[0]["platna"] - 1]["odpoved"],
|
|
"obrazok": data[0]["obrazok"]
|
|
})
|
|
|
|
def print_to_html(self):
|
|
okruhy_limits: tuple = (
|
|
1000,
|
|
1000 + 525,
|
|
1000 + 525 + 75,
|
|
1000 + 525 + 75 + 189,
|
|
1000 + 525 + 75 + 189 + 89,
|
|
1000 + 525 + 75 + 189 + 89 + 28,
|
|
1000 + 525 + 75 + 189 + 89 + 28 + 92,
|
|
1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69,
|
|
1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69 + 59,
|
|
1000 + 525 + 75 + 189 + 89 + 28 + 92 + 69 + 59 + 158
|
|
)
|
|
okruhy_temp: list = []
|
|
okruhy_sorted: list = []
|
|
index: int = 0
|
|
for okruh in self.okruhy:
|
|
okruhy_temp.append([])
|
|
okruhy_sorted.append([])
|
|
for dic in self.parsedData[index]["data"]:
|
|
okruhy_temp[index].append(dic["otazka_id"])
|
|
okruhy_sorted[index] = sorted(okruhy_temp[index])
|
|
|
|
index += 1
|
|
|
|
styles = """
|
|
table {
|
|
border-spacing: 4px;
|
|
border: 1px solid black;
|
|
border: 1px solid black;
|
|
}
|
|
table td, table th {
|
|
padding: 5px;
|
|
border: 1px solid black;
|
|
}
|
|
img {
|
|
max-height: 100px;
|
|
float: left;
|
|
margin-right: 1rem;
|
|
}
|
|
|
|
.section_4 img {
|
|
max-height: 150px;
|
|
}
|
|
@media print {
|
|
.pagebreak {
|
|
clear: both;
|
|
page-break-after: always;
|
|
}
|
|
tr {
|
|
page-break-inside: avoid;
|
|
}
|
|
nav {
|
|
display: none;
|
|
}
|
|
}
|
|
"""
|
|
|
|
print("<html><head>")
|
|
print("<title>Car test data</title>")
|
|
print("<style>{styles}</style></head><body><h1>Car Test data</h1>".format(styles=styles))
|
|
|
|
print("<nav><ol>")
|
|
index = 0
|
|
for okruh in self.okruhy:
|
|
print("<li><a href='#section{section_id}'>{section_name}</a></li>".format(
|
|
section_id=(index + 1),
|
|
section_name=self.parsedData[index]["nazov"])
|
|
)
|
|
index += 1
|
|
print("</ol></nav>")
|
|
|
|
index = 0
|
|
for okruh in self.okruhy:
|
|
if index != 0:
|
|
print("<div class='pagebreak'> </div>")
|
|
|
|
print("<a name='section{section}'></a>".format(section=(index + 1)))
|
|
print("<table class='section_{section_id}'>".format(section_id=(index + 1)))
|
|
print("<caption>{section_id}. {caption}</caption>".format(
|
|
section_id=(index + 1),
|
|
caption=self.parsedData[index]["nazov"])
|
|
)
|
|
|
|
print("<thead><tr><th>Id</th><th>Question</th><th>Answer</th></tr></thead>")
|
|
|
|
for sorted_id in okruhy_sorted[index]:
|
|
print("<tr>")
|
|
obrazok: str = ''
|
|
otazka_data: dict = {}
|
|
|
|
for dic in self.parsedData[index]["data"]:
|
|
if sorted_id == dic["otazka_id"]:
|
|
otazka_data = dic
|
|
break
|
|
|
|
if otazka_data["obrazok"]:
|
|
obrazok = "<a href='images/{image}' target='_blank'><img src='images/{image}'></a>".format(
|
|
image=otazka_data["obrazok"]
|
|
)
|
|
|
|
print("<td>{otazka_id}</td>".format(
|
|
otazka_id=(sorted_id + 1 - okruhy_limits[index]))
|
|
)
|
|
print("<td>{obrazok}{question}</td>".format(
|
|
obrazok=obrazok,
|
|
question=otazka_data["otazka"])
|
|
)
|
|
print("<td>{answer}</td>".format(answer=otazka_data["odpoved"]))
|
|
print("</tr>")
|
|
|
|
print("</table>")
|
|
|
|
index += 1
|
|
|
|
print("</body></html>")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Downloads and extracts the data from the Slovak online driver tests. '
|
|
'Outputs the resulted HTML file into the stdout.',
|
|
)
|
|
parser.add_argument('-l', '--language', dest='language', action='store',
|
|
default=str(LANGUAGE_INDEX_EN),
|
|
help='Test language: '
|
|
'0,SK - Slovak; '
|
|
'1,EN - English; '
|
|
'2,HU - Hungarian;')
|
|
parser.add_argument('-f', '--force-data-download', dest='force_data_download', action='store_true',
|
|
help='Download data even when the files already exist.')
|
|
parser.add_argument('--version', action='version',
|
|
version='%(prog)s {version}'.format(version=SCRIPT_VERSION))
|
|
|
|
args = parser.parse_args()
|
|
|
|
if (args.language == 'SK' or int(args.language) == LANGUAGE_INDEX_SK):
|
|
configuration["language"] = LANGUAGE_INDEX_SK
|
|
elif (args.language == 'EN' or int(args.language) == LANGUAGE_INDEX_EN):
|
|
configuration["language"] = LANGUAGE_INDEX_EN
|
|
elif (args.language == 'HU' or int(args.language) == LANGUAGE_INDEX_HU):
|
|
configuration["language"] = LANGUAGE_INDEX_HU
|
|
|
|
configuration["force_data_download"] = args.force_data_download
|
|
|
|
autoskola = Autoskola()
|
|
autoskola.downloadData()
|
|
autoskola.loadData()
|
|
autoskola.parseData()
|
|
autoskola.downloadImages()
|
|
autoskola.print_to_html()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|