1
mirror of https://invent.kde.org/network/falkon.git synced 2024-09-21 17:52:10 +02:00

[AutoFill] Fixed extracting form on Twitter.com

Twitter uses multiple same forms (differing in class and id names).
Now PageFormComplter also tests occurence of username name=value pair
in sent post data.
This commit is contained in:
nowrep 2013-02-16 11:20:03 +01:00
parent 12e49d1bf4
commit a84b180444
4 changed files with 124 additions and 59 deletions

View File

@ -32,14 +32,9 @@ PageFormCompleter::PageFormCompleter(QWebPage* page)
PageFormData PageFormCompleter::extractFormData(const QByteArray &postData) const
{
QString usernameName;
QString usernameValue;
QString passwordName;
QString passwordValue;
QWebElementCollection allForms;
QWebElement foundForm;
QByteArray data = convertWebKitFormBoundaryIfNecessary(postData);
PageFormData formData = {false, QString(), QString(), data};
@ -52,65 +47,41 @@ PageFormData PageFormCompleter::extractFormData(const QByteArray &postData) cons
return formData;
}
/* Find all form elements in page (in all frames) */
QList<QWebFrame*> frames;
frames.append(m_page->mainFrame());
while (!frames.isEmpty()) {
QWebFrame* frame = frames.takeFirst();
allForms.append(frame->findAllElements("form"));
frames += frame->childFrames();
}
const QWebElementCollection &allForms = getAllElementsFromPage(m_page, "form");
const QueryItems &queryItems = createQueryItems(data);
/* Find form that contains password value sent in data */
// Find form that contains password value sent in data
foreach(const QWebElement & formElement, allForms) {
const QWebElementCollection &inputs = formElement.findAll("input[type=\"password\"]");
foreach(QWebElement inputElement, inputs) {
passwordName = inputElement.attribute("name");
passwordValue = inputElement.evaluateJavaScript("this.value").toString();
if (queryItemsContains(queryItems, passwordName, passwordValue)) {
foundForm = formElement;
break;
}
}
if (!foundForm.isNull()) {
break;
}
}
if (foundForm.isNull()) {
return formData;
}
/* Try to find username (or email) field in the form. */
bool found = false;
QStringList selectors;
selectors << "input[type=\"text\"][name*=\"user\"]"
<< "input[type=\"text\"][name*=\"name\"]"
<< "input[type=\"text\"]"
<< "input[type=\"email\"]"
<< "input:not([type=\"hidden\"][type=\"password\"])";
const QWebElementCollection &inputs = formElement.findAll("input[type=\"password\"]");
foreach(const QString & selector, selectors) {
const QWebElementCollection &inputs = foundForm.findAll(selector);
foreach(QWebElement element, inputs) {
usernameName = element.attribute("name");
usernameValue = element.evaluateJavaScript("this.value").toString();
foreach(QWebElement inputElement, inputs) {
const QString &passName = inputElement.attribute("name");
const QString &passValue = inputElement.evaluateJavaScript("this.value").toString();
if (!usernameName.isEmpty() && !usernameValue.isEmpty()) {
if (queryItemsContains(queryItems, passName, passValue)) {
// Set passwordValue if not empty (to make it possible extract forms without username field)
passwordValue = passValue;
const QueryItem &item = findUsername(formElement);
if (queryItemsContains(queryItems, item.first, item.second)) {
usernameValue = item.second;
found = true;
break;
}
}
}
if (found) {
break;
}
}
// It is necessary only to find password, as there may be form without username field
if (passwordValue.isEmpty()) {
return formData;
}
formData.found = true;
formData.username = usernameValue;
formData.password = passwordValue;
@ -122,19 +93,12 @@ void PageFormCompleter::completePage(const QByteArray &data) const
{
const QueryItems &queryItems = createQueryItems(data);
/* Input types that are being completed */
// Input types that are being completed
QStringList inputTypes;
inputTypes << "text" << "password" << "email";
/* Find all input elements in the page */
QWebElementCollection inputs;
QList<QWebFrame*> frames;
frames.append(m_page->mainFrame());
while (!frames.isEmpty()) {
QWebFrame* frame = frames.takeFirst();
inputs.append(frame->findAllElements("input"));
frames += frame->childFrames();
}
// Find all input elements in the page
const QWebElementCollection &inputs = getAllElementsFromPage(m_page, "input");
for (int i = 0; i < queryItems.count(); i++) {
const QString &key = queryItems.at(i).first;
@ -222,6 +186,34 @@ QByteArray PageFormCompleter::convertWebKitFormBoundaryIfNecessary(const QByteAr
return formatedData;
}
PageFormCompleter::QueryItem PageFormCompleter::findUsername(const QWebElement &form) const
{
// Try to find username (or email) field in the form.
QStringList selectors;
selectors << "input[type=\"text\"][name*=\"user\"]"
<< "input[type=\"text\"][name*=\"name\"]"
<< "input[type=\"text\"]"
<< "input[type=\"email\"]"
<< "input:not([type=\"hidden\"][type=\"password\"])";
foreach(const QString & selector, selectors) {
const QWebElementCollection &inputs = form.findAll(selector);
foreach(QWebElement element, inputs) {
const QString &name = element.attribute("name");
const QString &value = element.evaluateJavaScript("this.value").toString();
if (!name.isEmpty() && !value.isEmpty()) {
QueryItem item;
item.first = name;
item.second = value;
return item;
}
}
}
return QueryItem();
}
PageFormCompleter::QueryItems PageFormCompleter::createQueryItems(const QByteArray &data) const
{
/* Why not to use encodedQueryItems = QByteArrays ?
@ -247,3 +239,18 @@ PageFormCompleter::QueryItems PageFormCompleter::createQueryItems(const QByteArr
return arguments;
}
QWebElementCollection PageFormCompleter::getAllElementsFromPage(QWebPage* page, const QString &selector) const
{
QWebElementCollection list;
QList<QWebFrame*> frames;
frames.append(page->mainFrame());
while (!frames.isEmpty()) {
QWebFrame* frame = frames.takeFirst();
list.append(frame->findAllElements(selector));
frames += frame->childFrames();
}
return list;
}

View File

@ -26,6 +26,7 @@
class QWebPage;
class QWebElement;
class QWebElementCollection;
struct PageFormData {
bool found;
@ -49,7 +50,9 @@ private:
bool queryItemsContains(const QueryItems &queryItems, const QString &attributeName,
const QString &attributeValue) const;
QByteArray convertWebKitFormBoundaryIfNecessary(const QByteArray &data) const;
QueryItem findUsername(const QWebElement &form) const;
QueryItems createQueryItems(const QByteArray &data) const;
QWebElementCollection getAllElementsFromPage(QWebPage* page, const QString &selector) const;
QWebPage* m_page;
};

View File

@ -209,6 +209,60 @@ void FormCompleterTest::extractFormTest4()
QCOMPARE(form.password, QString("tst_password"));
}
void FormCompleterTest::extractFormTest5()
{
// Twitter.com : Multiple almost same forms
QByteArray data = "session%5Busername_or_email%5D=user1&session%5Bpassword%5D=pass&"
"return_to_ssl=true&scribe_log=&redirect_after_login=%2F&"
"authenticity_token=0d37030972c34b021d4a5ebab35817821dc0358b";
QString html = "<!-- 1) -->"
"<form action='https://twitter.com/sessions' class='js-signin signin' method='post'>"
"<input class='js-username-field email-input' type='text' name='session[username_or_email]'"
"autocomplete='on' value='user2'>"
"<input class='js-password-field' type='password' value='pass' name='session[password]'>"
"<input type='checkbox' value='1' name='remember_me'>"
"<button type='submit' class='btn submit'>Login</button>"
"<input type='hidden' name='scribe_log'>"
"<input type='hidden' name='redirect_after_login' value='/'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"</form>"
"<!-- 2) Correct -->"
"<form action='https://twitter.com/sessions' class='signin' method='post'>"
"<input type='text' id='signin-email' class='text-input email-input'"
"name='session[username_or_email]' title='' autocomplete='on' tabindex='1' value='user1'>"
"<input type='password' id='signin-password' class='text-'"
"name='session[password]' title='' tabindex='2' value='pass'>"
"<button type='submit' class='submit btn primary-btn flex-table-btn js-submit' tabindex='4'>"
"<input type='checkbox' value='1' name='remember_me' tabindex='3'>"
"<input type='hidden' name='return_to_ssl' value='true'>"
"<input type='hidden' name='scribe_log'>"
"<input type='hidden' name='redirect_after_login' value='/'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"</form>"
"<!-- 3) -->"
"<form action='https://twitter.com/sessions' class='signin' method='post'>"
"<input class='js-username-field email-input' type='text' "
"name='session[username_or_email]' autocomplete='on' value='user2' tabindex='1'>"
"<input class='js-password-field' type='password' name='session[password]' tabindex='2' value='pass'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"<input type='hidden' name='scribe_log'>"
"<input type='hidden' name='redirect_after_login' value='/'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"<button type='submit' class='submit btn primary-btn' tabindex='4'></button>"
"<input type='checkbox' value='1' name='remember_me' tabindex='3'>"
"</form>";
PageFormData form = extractFormData(html, data);
QVERIFY(form.found == true);
QCOMPARE(form.username, QString("user1"));
QCOMPARE(form.password, QString("pass"));
}
void FormCompleterTest::completeWithData(const QString &html, const QByteArray &data)
{
view->setHtml(html);

View File

@ -42,6 +42,7 @@ private slots:
void extractFormTest2();
void extractFormTest3();
void extractFormTest4();
void extractFormTest5();
private:
void completeWithData(const QString &html, const QByteArray &data);