From ebec81a6fb2dff0b2fe6b569b021e057995ee6c7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 19 Apr 2013 13:17:28 +0400 Subject: [PATCH] subscribe: verify XML before adding to the database; fetch: try to work around entity problems if initial parsing fails --- include/functions.php | 19 +++++++++++++++++++ include/rssfuncs.php | 29 ++++++++++++++++++++++++++++- js/functions.js | 38 +++++--------------------------------- 3 files changed, 52 insertions(+), 34 deletions(-) diff --git a/include/functions.php b/include/functions.php index 4cc8f134d..8ac5753c9 100644 --- a/include/functions.php +++ b/include/functions.php @@ -1558,6 +1558,7 @@ * Here you should call extractfeedurls in rpc-backend * to get all possible feeds. * 5 - Couldn't download the URL content. + * 6 - Content is an invalid XML. */ function subscribe_to_feed($url, $cat_id = 0, $auth_login = '', $auth_pass = '') { @@ -1588,6 +1589,18 @@ $url = key($feedUrls); } + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadXML(html_entity_decode($contents)); + $error = libxml_get_last_error(); + libxml_clear_errors(); + + if ($error) { + $error_message = format_libxml_error($error); + + return array("code" => 6, "message" => $error_message); + } + if ($cat_id == "0" || !$cat_id) { $cat_qpart = "NULL"; } else { @@ -4203,4 +4216,10 @@ return LABEL_BASE_INDEX - 1 + abs($feed); } + function format_libxml_error($error) { + return T_sprintf("LibXML error %s at line %d (column %d): %s", + $error->code, $error->line, $error->column, + $error->message); + } + ?> diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 31d35bf8e..47d622169 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -316,6 +316,25 @@ _debug("update_rss_feed: fetch done."); } + $error = verify_feed_xml($feed_data); + + if ($error) { + if ($debug_enabled) { + _debug("update_rss_feed: error verifying XML, code: " . $error->code); + } + + if ($error->code == 26) { + if ($debug_enabled) { + _debug("update_rss_feed: got error 26, trying to decode entities..."); + } + + $feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8'); + + $error = verify_feed_xml($feed_data); + + if ($error) $feed_data = ''; + } + } } if (!$feed_data) { @@ -559,7 +578,7 @@ _debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]"); } - $entry_title = html_entity_decode($item->get_title()); + $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8'); $entry_link = rewrite_relative_url($site_url, $item->get_link()); @@ -1421,5 +1440,13 @@ mb_strtolower(strip_tags($title), 'utf-8')); } + function verify_feed_xml($feed_data) { + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadXML($feed_data); + $error = libxml_get_last_error(); + libxml_clear_errors(); + return $error; + } ?> diff --git a/js/functions.js b/js/functions.js index e02767504..04be58bf7 100644 --- a/js/functions.js +++ b/js/functions.js @@ -816,39 +816,6 @@ function quickAddFeed() { alert(__("Specified URL doesn't seem to contain any feeds.")); break; case 4: - /* notify_progress("Searching for feed urls...", true); - - new Ajax.Request("backend.php", { - parameters: 'op=rpc&method=extractfeedurls&url=' + param_escape(feed_url), - onComplete: function(transport, dialog, feed_url) { - - notify(''); - - var reply = JSON.parse(transport.responseText); - - var feeds = reply['urls']; - - console.log(transport.responseText); - - var select = dijit.byId("feedDlg_feedContainerSelect"); - - while (select.getOptions().length > 0) - select.removeOption(0); - - var count = 0; - for (var feedUrl in feeds) { - select.addOption({value: feedUrl, label: feeds[feedUrl]}); - count++; - } - -// if (count > 5) count = 5; -// select.size = count; - - Effect.Appear('feedDlg_feedsContainer', {duration : 0.5}); - } - }); - break; */ - feeds = rc['feeds']; var select = dijit.byId("feedDlg_feedContainerSelect"); @@ -871,6 +838,11 @@ function quickAddFeed() { alert(__("Couldn't download the specified URL: %s"). replace("%s", rc['message'])); break; + case 6: + alert(__("XML validation failed: %s"). + replace("%s", rc['message'])); + break; + break; case 0: alert(__("You are already subscribed to this feed.")); break;