1
mirror of https://invent.kde.org/network/falkon.git synced 2024-12-20 10:46:35 +01:00

[AutoFill] Fixed extracting form on Twitter.com

Twitter uses multiple same forms (differing in class and id names).
Now PageFormComplter also tests occurence of username name=value pair
in sent post data.
This commit is contained in:
nowrep 2013-02-16 11:20:03 +01:00
parent 12e49d1bf4
commit a84b180444
4 changed files with 124 additions and 59 deletions

View File

@ -32,14 +32,9 @@ PageFormCompleter::PageFormCompleter(QWebPage* page)
PageFormData PageFormCompleter::extractFormData(const QByteArray &postData) const PageFormData PageFormCompleter::extractFormData(const QByteArray &postData) const
{ {
QString usernameName;
QString usernameValue; QString usernameValue;
QString passwordName;
QString passwordValue; QString passwordValue;
QWebElementCollection allForms;
QWebElement foundForm;
QByteArray data = convertWebKitFormBoundaryIfNecessary(postData); QByteArray data = convertWebKitFormBoundaryIfNecessary(postData);
PageFormData formData = {false, QString(), QString(), data}; PageFormData formData = {false, QString(), QString(), data};
@ -52,57 +47,28 @@ PageFormData PageFormCompleter::extractFormData(const QByteArray &postData) cons
return formData; return formData;
} }
/* Find all form elements in page (in all frames) */ const QWebElementCollection &allForms = getAllElementsFromPage(m_page, "form");
QList<QWebFrame*> frames;
frames.append(m_page->mainFrame());
while (!frames.isEmpty()) {
QWebFrame* frame = frames.takeFirst();
allForms.append(frame->findAllElements("form"));
frames += frame->childFrames();
}
const QueryItems &queryItems = createQueryItems(data); const QueryItems &queryItems = createQueryItems(data);
/* Find form that contains password value sent in data */ // Find form that contains password value sent in data
foreach(const QWebElement & formElement, allForms) { foreach(const QWebElement & formElement, allForms) {
bool found = false;
const QWebElementCollection &inputs = formElement.findAll("input[type=\"password\"]"); const QWebElementCollection &inputs = formElement.findAll("input[type=\"password\"]");
foreach(QWebElement inputElement, inputs) { foreach(QWebElement inputElement, inputs) {
passwordName = inputElement.attribute("name"); const QString &passName = inputElement.attribute("name");
passwordValue = inputElement.evaluateJavaScript("this.value").toString(); const QString &passValue = inputElement.evaluateJavaScript("this.value").toString();
if (queryItemsContains(queryItems, passwordName, passwordValue)) { if (queryItemsContains(queryItems, passName, passValue)) {
foundForm = formElement; // Set passwordValue if not empty (to make it possible extract forms without username field)
break; passwordValue = passValue;
}
}
if (!foundForm.isNull()) { const QueryItem &item = findUsername(formElement);
break; if (queryItemsContains(queryItems, item.first, item.second)) {
} usernameValue = item.second;
} found = true;
break;
if (foundForm.isNull()) { }
return formData;
}
/* Try to find username (or email) field in the form. */
bool found = false;
QStringList selectors;
selectors << "input[type=\"text\"][name*=\"user\"]"
<< "input[type=\"text\"][name*=\"name\"]"
<< "input[type=\"text\"]"
<< "input[type=\"email\"]"
<< "input:not([type=\"hidden\"][type=\"password\"])";
foreach(const QString & selector, selectors) {
const QWebElementCollection &inputs = foundForm.findAll(selector);
foreach(QWebElement element, inputs) {
usernameName = element.attribute("name");
usernameValue = element.evaluateJavaScript("this.value").toString();
if (!usernameName.isEmpty() && !usernameValue.isEmpty()) {
found = true;
break;
} }
} }
@ -111,6 +77,11 @@ PageFormData PageFormCompleter::extractFormData(const QByteArray &postData) cons
} }
} }
// It is necessary only to find password, as there may be form without username field
if (passwordValue.isEmpty()) {
return formData;
}
formData.found = true; formData.found = true;
formData.username = usernameValue; formData.username = usernameValue;
formData.password = passwordValue; formData.password = passwordValue;
@ -122,19 +93,12 @@ void PageFormCompleter::completePage(const QByteArray &data) const
{ {
const QueryItems &queryItems = createQueryItems(data); const QueryItems &queryItems = createQueryItems(data);
/* Input types that are being completed */ // Input types that are being completed
QStringList inputTypes; QStringList inputTypes;
inputTypes << "text" << "password" << "email"; inputTypes << "text" << "password" << "email";
/* Find all input elements in the page */ // Find all input elements in the page
QWebElementCollection inputs; const QWebElementCollection &inputs = getAllElementsFromPage(m_page, "input");
QList<QWebFrame*> frames;
frames.append(m_page->mainFrame());
while (!frames.isEmpty()) {
QWebFrame* frame = frames.takeFirst();
inputs.append(frame->findAllElements("input"));
frames += frame->childFrames();
}
for (int i = 0; i < queryItems.count(); i++) { for (int i = 0; i < queryItems.count(); i++) {
const QString &key = queryItems.at(i).first; const QString &key = queryItems.at(i).first;
@ -222,6 +186,34 @@ QByteArray PageFormCompleter::convertWebKitFormBoundaryIfNecessary(const QByteAr
return formatedData; return formatedData;
} }
PageFormCompleter::QueryItem PageFormCompleter::findUsername(const QWebElement &form) const
{
// Try to find username (or email) field in the form.
QStringList selectors;
selectors << "input[type=\"text\"][name*=\"user\"]"
<< "input[type=\"text\"][name*=\"name\"]"
<< "input[type=\"text\"]"
<< "input[type=\"email\"]"
<< "input:not([type=\"hidden\"][type=\"password\"])";
foreach(const QString & selector, selectors) {
const QWebElementCollection &inputs = form.findAll(selector);
foreach(QWebElement element, inputs) {
const QString &name = element.attribute("name");
const QString &value = element.evaluateJavaScript("this.value").toString();
if (!name.isEmpty() && !value.isEmpty()) {
QueryItem item;
item.first = name;
item.second = value;
return item;
}
}
}
return QueryItem();
}
PageFormCompleter::QueryItems PageFormCompleter::createQueryItems(const QByteArray &data) const PageFormCompleter::QueryItems PageFormCompleter::createQueryItems(const QByteArray &data) const
{ {
/* Why not to use encodedQueryItems = QByteArrays ? /* Why not to use encodedQueryItems = QByteArrays ?
@ -247,3 +239,18 @@ PageFormCompleter::QueryItems PageFormCompleter::createQueryItems(const QByteArr
return arguments; return arguments;
} }
QWebElementCollection PageFormCompleter::getAllElementsFromPage(QWebPage* page, const QString &selector) const
{
QWebElementCollection list;
QList<QWebFrame*> frames;
frames.append(page->mainFrame());
while (!frames.isEmpty()) {
QWebFrame* frame = frames.takeFirst();
list.append(frame->findAllElements(selector));
frames += frame->childFrames();
}
return list;
}

View File

@ -26,6 +26,7 @@
class QWebPage; class QWebPage;
class QWebElement; class QWebElement;
class QWebElementCollection;
struct PageFormData { struct PageFormData {
bool found; bool found;
@ -49,7 +50,9 @@ private:
bool queryItemsContains(const QueryItems &queryItems, const QString &attributeName, bool queryItemsContains(const QueryItems &queryItems, const QString &attributeName,
const QString &attributeValue) const; const QString &attributeValue) const;
QByteArray convertWebKitFormBoundaryIfNecessary(const QByteArray &data) const; QByteArray convertWebKitFormBoundaryIfNecessary(const QByteArray &data) const;
QueryItem findUsername(const QWebElement &form) const;
QueryItems createQueryItems(const QByteArray &data) const; QueryItems createQueryItems(const QByteArray &data) const;
QWebElementCollection getAllElementsFromPage(QWebPage* page, const QString &selector) const;
QWebPage* m_page; QWebPage* m_page;
}; };

View File

@ -209,6 +209,60 @@ void FormCompleterTest::extractFormTest4()
QCOMPARE(form.password, QString("tst_password")); QCOMPARE(form.password, QString("tst_password"));
} }
void FormCompleterTest::extractFormTest5()
{
// Twitter.com : Multiple almost same forms
QByteArray data = "session%5Busername_or_email%5D=user1&session%5Bpassword%5D=pass&"
"return_to_ssl=true&scribe_log=&redirect_after_login=%2F&"
"authenticity_token=0d37030972c34b021d4a5ebab35817821dc0358b";
QString html = "<!-- 1) -->"
"<form action='https://twitter.com/sessions' class='js-signin signin' method='post'>"
"<input class='js-username-field email-input' type='text' name='session[username_or_email]'"
"autocomplete='on' value='user2'>"
"<input class='js-password-field' type='password' value='pass' name='session[password]'>"
"<input type='checkbox' value='1' name='remember_me'>"
"<button type='submit' class='btn submit'>Login</button>"
"<input type='hidden' name='scribe_log'>"
"<input type='hidden' name='redirect_after_login' value='/'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"</form>"
"<!-- 2) Correct -->"
"<form action='https://twitter.com/sessions' class='signin' method='post'>"
"<input type='text' id='signin-email' class='text-input email-input'"
"name='session[username_or_email]' title='' autocomplete='on' tabindex='1' value='user1'>"
"<input type='password' id='signin-password' class='text-'"
"name='session[password]' title='' tabindex='2' value='pass'>"
"<button type='submit' class='submit btn primary-btn flex-table-btn js-submit' tabindex='4'>"
"<input type='checkbox' value='1' name='remember_me' tabindex='3'>"
"<input type='hidden' name='return_to_ssl' value='true'>"
"<input type='hidden' name='scribe_log'>"
"<input type='hidden' name='redirect_after_login' value='/'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"</form>"
"<!-- 3) -->"
"<form action='https://twitter.com/sessions' class='signin' method='post'>"
"<input class='js-username-field email-input' type='text' "
"name='session[username_or_email]' autocomplete='on' value='user2' tabindex='1'>"
"<input class='js-password-field' type='password' name='session[password]' tabindex='2' value='pass'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"<input type='hidden' name='scribe_log'>"
"<input type='hidden' name='redirect_after_login' value='/'>"
"<input type='hidden' value='0d37030972c34b021d4a5ebab35817821dc0358b' name='authenticity_token'>"
"<button type='submit' class='submit btn primary-btn' tabindex='4'></button>"
"<input type='checkbox' value='1' name='remember_me' tabindex='3'>"
"</form>";
PageFormData form = extractFormData(html, data);
QVERIFY(form.found == true);
QCOMPARE(form.username, QString("user1"));
QCOMPARE(form.password, QString("pass"));
}
void FormCompleterTest::completeWithData(const QString &html, const QByteArray &data) void FormCompleterTest::completeWithData(const QString &html, const QByteArray &data)
{ {
view->setHtml(html); view->setHtml(html);

View File

@ -42,6 +42,7 @@ private slots:
void extractFormTest2(); void extractFormTest2();
void extractFormTest3(); void extractFormTest3();
void extractFormTest4(); void extractFormTest4();
void extractFormTest5();
private: private:
void completeWithData(const QString &html, const QByteArray &data); void completeWithData(const QString &html, const QByteArray &data);