From 2c08214a7f4dd693cb401714412b7a1ad635ed41 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 13 Dec 2011 16:06:25 +0400 Subject: [PATCH] split rss updating stuff into separate include file --- include/functions.php | 1272 +---------------------------------------- include/rssfuncs.php | 1271 ++++++++++++++++++++++++++++++++++++++++ update.php | 1 + update_daemon2.php | 1 + 4 files changed, 1278 insertions(+), 1267 deletions(-) create mode 100644 include/rssfuncs.php diff --git a/include/functions.php b/include/functions.php index 7bd64cc5b..2246e6bf3 100644 --- a/include/functions.php +++ b/include/functions.php @@ -389,1117 +389,6 @@ } } - function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false) { - - global $memcache; - - /* Update all feeds with the same URL to utilize memcache */ - - if ($memcache) { - $result = db_query($link, "SELECT f1.id - FROM ttrss_feeds AS f1, ttrss_feeds AS f2 - WHERE f2.feed_url = f1.feed_url AND f2.id = '$feed'"); - - while ($line = db_fetch_assoc($result)) { - update_rss_feed_real($link, $line["id"], $ignore_daemon, $no_cache); - } - } else { - update_rss_feed_real($link, $feed, $ignore_daemon, $no_cache); - } - } - - function update_rss_feed_real($link, $feed, $ignore_daemon = false, $no_cache = false, - $override_url = false) { - - require_once "lib/simplepie/simplepie.inc"; - require_once "lib/magpierss/rss_fetch.inc"; - require_once 'lib/magpierss/rss_utils.inc'; - - global $memcache; - - $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']; - - if (!$_REQUEST["daemon"] && !$ignore_daemon) { - return false; - } - - if ($debug_enabled) { - _debug("update_rss_feed: start"); - } - - if (!$ignore_daemon) { - - if (DB_TYPE == "pgsql") { - $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')"; - } else { - $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))"; - } - - $result = db_query($link, "SELECT id,update_interval,auth_login, - auth_pass,cache_images,update_method - FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart"); - - } else { - - $result = db_query($link, "SELECT id,update_interval,auth_login, - feed_url,auth_pass,cache_images,update_method,last_updated, - mark_unread_on_update, owner_uid, update_on_checksum_change, - pubsub_state - FROM ttrss_feeds WHERE id = '$feed'"); - - } - - if (db_num_rows($result) == 0) { - if ($debug_enabled) { - _debug("update_rss_feed: feed $feed NOT FOUND/SKIPPED"); - } - return false; - } - - $update_method = db_fetch_result($result, 0, "update_method"); - $last_updated = db_fetch_result($result, 0, "last_updated"); - $owner_uid = db_fetch_result($result, 0, "owner_uid"); - $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, - 0, "mark_unread_on_update")); - $update_on_checksum_change = sql_bool_to_bool(db_fetch_result($result, - 0, "update_on_checksum_change")); - $pubsub_state = db_fetch_result($result, 0, "pubsub_state"); - - db_query($link, "UPDATE ttrss_feeds SET last_update_started = NOW() - WHERE id = '$feed'"); - - $auth_login = db_fetch_result($result, 0, "auth_login"); - $auth_pass = db_fetch_result($result, 0, "auth_pass"); - - if ($update_method == 0) - $update_method = DEFAULT_UPDATE_METHOD + 1; - - // 1 - Magpie - // 2 - SimplePie - // 3 - Twitter OAuth - - if ($update_method == 2) - $use_simplepie = true; - else - $use_simplepie = false; - - if ($debug_enabled) { - _debug("update method: $update_method (feed setting: $update_method) (use simplepie: $use_simplepie)\n"); - } - - if ($update_method == 1) { - $auth_login = urlencode($auth_login); - $auth_pass = urlencode($auth_pass); - } - - $update_interval = db_fetch_result($result, 0, "update_interval"); - $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); - $fetch_url = db_fetch_result($result, 0, "feed_url"); - - if ($update_interval < 0) { return false; } - - $feed = db_escape_string($feed); - - if ($auth_login && $auth_pass ){ - $url_parts = array(); - preg_match("/(^[^:]*):\/\/(.*)/", $fetch_url, $url_parts); - - if ($url_parts[1] && $url_parts[2]) { - $fetch_url = $url_parts[1] . "://$auth_login:$auth_pass@" . $url_parts[2]; - } - - } - - if ($override_url) - $fetch_url = $override_url; - - if ($debug_enabled) { - _debug("update_rss_feed: fetching [$fetch_url]..."); - } - - $obj_id = md5("FDATA:$use_simplepie:$fetch_url"); - - if ($memcache && $obj = $memcache->get($obj_id)) { - - if ($debug_enabled) { - _debug("update_rss_feed: data found in memcache."); - } - - $rss = $obj; - - } else { - - if ($update_method == 3) { - $rss = fetch_twitter_rss($link, $fetch_url, $owner_uid); - } else if ($update_method == 1) { - - define('MAGPIE_CACHE_AGE', get_feed_update_interval($link, $feed) * 60); - define('MAGPIE_CACHE_ON', !$no_cache); - define('MAGPIE_FETCH_TIME_OUT', 60); - define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie"); - - $rss = @fetch_rss($fetch_url); - } else { - $simplepie_cache_dir = CACHE_DIR . "/simplepie"; - - if (!is_dir($simplepie_cache_dir)) { - mkdir($simplepie_cache_dir); - } - - $rss = new SimplePie(); - $rss->set_useragent(SELF_USER_AGENT); - # $rss->set_timeout(10); - $rss->set_feed_url($fetch_url); - $rss->set_output_encoding('UTF-8'); - $rss->force_feed(true); - - if (SIMPLEPIE_CACHE_IMAGES && $cache_images) { - - if ($debug_enabled) { - _debug("enabling image cache"); - } - - $rss->set_image_handler("image.php", 'i'); - } - - if ($debug_enabled) { - _debug("feed update interval (sec): " . - get_feed_update_interval($link, $feed)*60); - } - - $rss->enable_cache(!$no_cache); - - if (!$no_cache) { - $rss->set_cache_location($simplepie_cache_dir); - $rss->set_cache_duration(get_feed_update_interval($link, $feed) * 60); - } - - $rss->init(); - } - - if ($memcache && $rss) $memcache->add($obj_id, $rss, 0, 300); - } - -// print_r($rss); - - if ($debug_enabled) { - _debug("update_rss_feed: fetch done, parsing..."); - } - - $feed = db_escape_string($feed); - - if ($update_method == 2) { - $fetch_ok = !$rss->error(); - } else { - $fetch_ok = !!$rss; - } - - if ($fetch_ok) { - - if ($debug_enabled) { - _debug("update_rss_feed: processing feed data..."); - } - -// db_query($link, "BEGIN"); - - $result = db_query($link, "SELECT title,icon_url,site_url,owner_uid - FROM ttrss_feeds WHERE id = '$feed'"); - - $registered_title = db_fetch_result($result, 0, "title"); - $orig_icon_url = db_fetch_result($result, 0, "icon_url"); - $orig_site_url = db_fetch_result($result, 0, "site_url"); - - $owner_uid = db_fetch_result($result, 0, "owner_uid"); - - if ($use_simplepie) { - $site_url = $rss->get_link(); - } else { - $site_url = $rss->channel["link"]; - } - - $site_url = rewrite_relative_url($fetch_url, $site_url); - - if ($debug_enabled) { - _debug("update_rss_feed: checking favicon..."); - } - - check_feed_favicon($site_url, $feed, $link); - - if (!$registered_title || $registered_title == "[Unknown]") { - - if ($use_simplepie) { - $feed_title = db_escape_string($rss->get_title()); - } else { - $feed_title = db_escape_string($rss->channel["title"]); - } - - if ($debug_enabled) { - _debug("update_rss_feed: registering title: $feed_title"); - } - - db_query($link, "UPDATE ttrss_feeds SET - title = '$feed_title' WHERE id = '$feed'"); - } - - // weird, weird Magpie - if (!$use_simplepie) { - if (!$site_url) $site_url = db_escape_string($rss->channel["link_"]); - } - - if ($site_url && $orig_site_url != db_escape_string($site_url)) { - db_query($link, "UPDATE ttrss_feeds SET - site_url = '$site_url' WHERE id = '$feed'"); - } - -// print "I: " . $rss->channel["image"]["url"]; - - if (!$use_simplepie) { - $icon_url = db_escape_string($rss->image["url"]); - } else { - $icon_url = db_escape_string($rss->get_image_url()); - } - - $icon_url = substr($icon_url, 0, 250); - - if ($icon_url && $orig_icon_url != $icon_url) { - db_query($link, "UPDATE ttrss_feeds SET icon_url = '$icon_url' WHERE id = '$feed'"); - } - - if ($debug_enabled) { - _debug("update_rss_feed: loading filters..."); - } - - $filters = load_filters($link, $feed, $owner_uid); - -// if ($debug_enabled) { -// print_r($filters); -// } - - if ($use_simplepie) { - $iterator = $rss->get_items(); - } else { - $iterator = $rss->items; - if (!$iterator || !is_array($iterator)) $iterator = $rss->entries; - if (!$iterator || !is_array($iterator)) $iterator = $rss; - } - - if (!is_array($iterator)) { - /* db_query($link, "UPDATE ttrss_feeds - SET last_error = 'Parse error: can\'t find any articles.' - WHERE id = '$feed'"); */ - - // clear any errors and mark feed as updated if fetched okay - // even if it's blank - - if ($debug_enabled) { - _debug("update_rss_feed: entry iterator is not an array, no articles?"); - } - - db_query($link, "UPDATE ttrss_feeds - SET last_updated = NOW(), last_error = '' WHERE id = '$feed'"); - - return; // no articles - } - - if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) { - - if ($debug_enabled) _debug("update_rss_feed: checking for PUSH hub..."); - - $feed_hub_url = false; - if ($use_simplepie) { - $links = $rss->get_links('hub'); - - if ($links && is_array($links)) { - foreach ($links as $l) { - $feed_hub_url = $l; - break; - } - } - - } else { - $atom = $rss->channel['atom']; - - if ($atom) { - if ($atom['link@rel'] == 'hub') { - $feed_hub_url = $atom['link@href']; - } - - if (!$feed_hub_url && $atom['link#'] > 1) { - for ($i = 2; $i <= $atom['link#']; $i++) { - if ($atom["link#$i@rel"] == 'hub') { - $feed_hub_url = $atom["link#$i@href"]; - break; - } - } - } - } else { - $feed_hub_url = $rss->channel['link_hub']; - } - } - - if ($debug_enabled) _debug("update_rss_feed: feed hub url: $feed_hub_url"); - - if ($feed_hub_url && function_exists('curl_init') && - !ini_get("open_basedir")) { - - require_once 'lib/pubsubhubbub/subscriber.php'; - - $callback_url = get_self_url_prefix() . - "/public.php?op=pubsub&id=$feed"; - - $s = new Subscriber($feed_hub_url, $callback_url); - - $rc = $s->subscribe($fetch_url); - - if ($debug_enabled) - _debug("update_rss_feed: feed hub url found, subscribe request sent."); - - db_query($link, "UPDATE ttrss_feeds SET pubsub_state = 1 - WHERE id = '$feed'"); - } - } - - if ($debug_enabled) { - _debug("update_rss_feed: processing articles..."); - } - - foreach ($iterator as $item) { - - if ($_REQUEST['xdebug'] == 2) { - print_r($item); - } - - if ($use_simplepie) { - $entry_guid = $item->get_id(); - if (!$entry_guid) $entry_guid = $item->get_link(); - if (!$entry_guid) $entry_guid = make_guid_from_title($item->get_title()); - - } else { - - $entry_guid = $item["id"]; - - if (!$entry_guid) $entry_guid = $item["guid"]; - if (!$entry_guid) $entry_guid = $item["about"]; - if (!$entry_guid) $entry_guid = $item["link"]; - if (!$entry_guid) $entry_guid = make_guid_from_title($item["title"]); - } - - if ($debug_enabled) { - _debug("update_rss_feed: guid $entry_guid"); - } - - if (!$entry_guid) continue; - - $entry_timestamp = ""; - - if ($use_simplepie) { - $entry_timestamp = strtotime($item->get_date()); - } else { - $rss_2_date = $item['pubdate']; - $rss_1_date = $item['dc']['date']; - $atom_date = $item['issued']; - if (!$atom_date) $atom_date = $item['updated']; - - if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date); - if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date); - if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date); - - } - - if ($entry_timestamp == "" || $entry_timestamp == -1 || !$entry_timestamp) { - $entry_timestamp = time(); - $no_orig_date = 'true'; - } else { - $no_orig_date = 'false'; - } - - $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); - - if ($debug_enabled) { - _debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]"); - } - - if ($use_simplepie) { - $entry_title = $item->get_title(); - } else { - $entry_title = trim(strip_tags($item["title"])); - } - - if ($use_simplepie) { - $entry_link = $item->get_link(); - } else { - // strange Magpie workaround - $entry_link = $item["link_"]; - if (!$entry_link) $entry_link = $item["link"]; - } - - $entry_link = rewrite_relative_url($site_url, $entry_link); - - if ($debug_enabled) { - _debug("update_rss_feed: title $entry_title"); - _debug("update_rss_feed: link $entry_link"); - } - - if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);; - - $entry_link = strip_tags($entry_link); - - if ($use_simplepie) { - $entry_content = $item->get_content(); - if (!$entry_content) $entry_content = $item->get_description(); - } else { - $entry_content = $item["content:escaped"]; - - if (!$entry_content) $entry_content = $item["content:encoded"]; - if (!$entry_content) $entry_content = $item["content"]["encoded"]; - if (!$entry_content) $entry_content = $item["content"]; - - if (is_array($entry_content)) $entry_content = $entry_content[0]; - - // Magpie bugs are getting ridiculous - if (trim($entry_content) == "Array") $entry_content = false; - - if (!$entry_content) $entry_content = $item["atom_content"]; - if (!$entry_content) $entry_content = $item["summary"]; - - if (!$entry_content || - strlen($entry_content) < strlen($item["description"])) { - $entry_content = $item["description"]; - }; - - // WTF - if (is_array($entry_content)) { - $entry_content = $entry_content["encoded"]; - if (!$entry_content) $entry_content = $entry_content["escaped"]; - } - } - - if ($_REQUEST["xdebug"] == 2) { - print "update_rss_feed: content: "; - print_r(htmlspecialchars($entry_content)); - } - - $entry_content_unescaped = $entry_content; - - if ($use_simplepie) { - $entry_comments = strip_tags($item->data["comments"]); - if ($item->get_author()) { - $entry_author_item = $item->get_author(); - $entry_author = $entry_author_item->get_name(); - if (!$entry_author) $entry_author = $entry_author_item->get_email(); - - $entry_author = db_escape_string($entry_author); - } - } else { - $entry_comments = strip_tags($item["comments"]); - - $entry_author = db_escape_string(strip_tags($item['dc']['creator'])); - - if ($item['author']) { - - if (is_array($item['author'])) { - - if (!$entry_author) { - $entry_author = db_escape_string(strip_tags($item['author']['name'])); - } - - if (!$entry_author) { - $entry_author = db_escape_string(strip_tags($item['author']['email'])); - } - } - - if (!$entry_author) { - $entry_author = db_escape_string(strip_tags($item['author'])); - } - } - } - - if (preg_match('/^[\t\n\r ]*$/', $entry_author)) $entry_author = ''; - - $entry_guid = db_escape_string(strip_tags($entry_guid)); - $entry_guid = mb_substr($entry_guid, 0, 250); - - $result = db_query($link, "SELECT id FROM ttrss_entries - WHERE guid = '$entry_guid'"); - - $entry_content = db_escape_string($entry_content, false); - - $content_hash = "SHA1:" . sha1(strip_tags($entry_content)); - - $entry_title = db_escape_string($entry_title); - $entry_link = db_escape_string($entry_link); - $entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250); - $entry_author = mb_substr($entry_author, 0, 250); - - if ($use_simplepie) { - $num_comments = 0; #FIXME# - } else { - $num_comments = db_escape_string($item["slash"]["comments"]); - } - - if (!$num_comments) $num_comments = 0; - - if ($debug_enabled) { - _debug("update_rss_feed: looking for tags [1]..."); - } - - // parse entries into tags - - $additional_tags = array(); - - if ($use_simplepie) { - - $additional_tags_src = $item->get_categories(); - - if (is_array($additional_tags_src)) { - foreach ($additional_tags_src as $tobj) { - array_push($additional_tags, $tobj->get_term()); - } - } - - if ($debug_enabled) { - _debug("update_rss_feed: category tags:"); - print_r($additional_tags); - } - - } else { - - $t_ctr = $item['category#']; - - if ($t_ctr == 0) { - $additional_tags = array(); - } else if ($t_ctr > 0) { - $additional_tags = array($item['category']); - - if ($item['category@term']) { - array_push($additional_tags, $item['category@term']); - } - - for ($i = 0; $i <= $t_ctr; $i++ ) { - if ($item["category#$i"]) { - array_push($additional_tags, $item["category#$i"]); - } - - if ($item["category#$i@term"]) { - array_push($additional_tags, $item["category#$i@term"]); - } - } - } - - // parse elements - - $t_ctr = $item['dc']['subject#']; - - if ($t_ctr > 0) { - array_push($additional_tags, $item['dc']['subject']); - - for ($i = 0; $i <= $t_ctr; $i++ ) { - if ($item['dc']["subject#$i"]) { - array_push($additional_tags, $item['dc']["subject#$i"]); - } - } - } - } - - if ($debug_enabled) { - _debug("update_rss_feed: looking for tags [2]..."); - } - - /* taaaags */ - // , // - - $entry_tags = null; - - preg_match_all("/([^<]+)<\/a>/i", - $entry_content_unescaped, $entry_tags); - - $entry_tags = $entry_tags[1]; - - $entry_tags = array_merge($entry_tags, $additional_tags); - $entry_tags = array_unique($entry_tags); - - for ($i = 0; $i < count($entry_tags); $i++) - $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8'); - - if ($debug_enabled) { - _debug("update_rss_feed: unfiltered tags found:"); - print_r($entry_tags); - } - - # sanitize content - - $entry_content = sanitize_article_content($entry_content); - $entry_title = sanitize_article_content($entry_title); - - if ($debug_enabled) { - _debug("update_rss_feed: done collecting data [TITLE:$entry_title]"); - } - - db_query($link, "BEGIN"); - - if (db_num_rows($result) == 0) { - - if ($debug_enabled) { - _debug("update_rss_feed: base guid not found"); - } - - // base post entry does not exist, create it - - $result = db_query($link, - "INSERT INTO ttrss_entries - (title, - guid, - link, - updated, - content, - content_hash, - no_orig_date, - date_updated, - date_entered, - comments, - num_comments, - author) - VALUES - ('$entry_title', - '$entry_guid', - '$entry_link', - '$entry_timestamp_fmt', - '$entry_content', - '$content_hash', - $no_orig_date, - NOW(), - NOW(), - '$entry_comments', - '$num_comments', - '$entry_author')"); - } else { - // we keep encountering the entry in feeds, so we need to - // update date_updated column so that we don't get horrible - // dupes when the entry gets purged and reinserted again e.g. - // in the case of SLOW SLOW OMG SLOW updating feeds - - $base_entry_id = db_fetch_result($result, 0, "id"); - - db_query($link, "UPDATE ttrss_entries SET date_updated = NOW() - WHERE id = '$base_entry_id'"); - } - - // now it should exist, if not - bad luck then - - $result = db_query($link, "SELECT - id,content_hash,no_orig_date,title, - ".SUBSTRING_FOR_DATE."(date_updated,1,19) as date_updated, - ".SUBSTRING_FOR_DATE."(updated,1,19) as updated, - num_comments - FROM - ttrss_entries - WHERE guid = '$entry_guid'"); - - $entry_ref_id = 0; - $entry_int_id = 0; - - if (db_num_rows($result) == 1) { - - if ($debug_enabled) { - _debug("update_rss_feed: base guid found, checking for user record"); - } - - // this will be used below in update handler - $orig_content_hash = db_fetch_result($result, 0, "content_hash"); - $orig_title = db_fetch_result($result, 0, "title"); - $orig_num_comments = db_fetch_result($result, 0, "num_comments"); - $orig_date_updated = strtotime(db_fetch_result($result, - 0, "date_updated")); - - $ref_id = db_fetch_result($result, 0, "id"); - $entry_ref_id = $ref_id; - - // check for user post link to main table - - // do we allow duplicate posts with same GUID in different feeds? - if (get_pref($link, "ALLOW_DUPLICATE_POSTS", $owner_uid, false)) { - $dupcheck_qpart = "AND (feed_id = '$feed' OR feed_id IS NULL)"; - } else { - $dupcheck_qpart = ""; - } - - /* Collect article tags here so we could filter by them: */ - - $article_filters = get_article_filters($filters, $entry_title, - $entry_content, $entry_link, $entry_timestamp, $entry_author, - $entry_tags); - - if ($debug_enabled) { - _debug("update_rss_feed: article filters: "); - if (count($article_filters) != 0) { - print_r($article_filters); - } - } - - if (find_article_filter($article_filters, "filter")) { - db_query($link, "COMMIT"); // close transaction in progress - continue; - } - - $score = calculate_article_score($article_filters); - - if ($debug_enabled) { - _debug("update_rss_feed: initial score: $score"); - } - - $query = "SELECT ref_id, int_id FROM ttrss_user_entries WHERE - ref_id = '$ref_id' AND owner_uid = '$owner_uid' - $dupcheck_qpart"; - -// if ($_REQUEST["xdebug"]) print "$query\n"; - - $result = db_query($link, $query); - - // okay it doesn't exist - create user entry - if (db_num_rows($result) == 0) { - - if ($debug_enabled) { - _debug("update_rss_feed: user record not found, creating..."); - } - - if ($score >= -500 && !find_article_filter($article_filters, 'catchup')) { - $unread = 'true'; - $last_read_qpart = 'NULL'; - } else { - $unread = 'false'; - $last_read_qpart = 'NOW()'; - } - - if (find_article_filter($article_filters, 'mark') || $score > 1000) { - $marked = 'true'; - } else { - $marked = 'false'; - } - - if (find_article_filter($article_filters, 'publish')) { - $published = 'true'; - } else { - $published = 'false'; - } - - $result = db_query($link, - "INSERT INTO ttrss_user_entries - (ref_id, owner_uid, feed_id, unread, last_read, marked, - published, score, tag_cache, label_cache, uuid) - VALUES ('$ref_id', '$owner_uid', '$feed', $unread, - $last_read_qpart, $marked, $published, '$score', '', '', '')"); - - if (PUBSUBHUBBUB_HUB && $published == 'true') { - $rss_link = get_self_url_prefix() . - "/public.php?op=rss&id=-2&key=" . - get_feed_access_key($link, -2, false, $owner_uid); - - $p = new Publisher(PUBSUBHUBBUB_HUB); - - $pubsub_result = $p->publish_update($rss_link); - } - - $result = db_query($link, - "SELECT int_id FROM ttrss_user_entries WHERE - ref_id = '$ref_id' AND owner_uid = '$owner_uid' AND - feed_id = '$feed' LIMIT 1"); - - if (db_num_rows($result) == 1) { - $entry_int_id = db_fetch_result($result, 0, "int_id"); - } - } else { - if ($debug_enabled) { - _debug("update_rss_feed: user record FOUND"); - } - - $entry_ref_id = db_fetch_result($result, 0, "ref_id"); - $entry_int_id = db_fetch_result($result, 0, "int_id"); - } - - if ($debug_enabled) { - _debug("update_rss_feed: RID: $entry_ref_id, IID: $entry_int_id"); - } - - $post_needs_update = false; - $update_insignificant = false; - - if ($orig_num_comments != $num_comments) { - $post_needs_update = true; - $update_insignificant = true; - } - - if ($content_hash != $orig_content_hash) { - $post_needs_update = true; - $update_insignificant = false; - } - - if (db_escape_string($orig_title) != $entry_title) { - $post_needs_update = true; - $update_insignificant = false; - } - - // if post needs update, update it and mark all user entries - // linking to this post as updated - if ($post_needs_update) { - - if (defined('DAEMON_EXTENDED_DEBUG')) { - _debug("update_rss_feed: post $entry_guid needs update..."); - } - -// print ""; - - db_query($link, "UPDATE ttrss_entries - SET title = '$entry_title', content = '$entry_content', - content_hash = '$content_hash', - updated = '$entry_timestamp_fmt', - num_comments = '$num_comments' - WHERE id = '$ref_id'"); - - if (!$update_insignificant) { - if ($mark_unread_on_update) { - db_query($link, "UPDATE ttrss_user_entries - SET last_read = null, unread = true WHERE ref_id = '$ref_id'"); - } else if ($update_on_checksum_change) { - db_query($link, "UPDATE ttrss_user_entries - SET last_read = null WHERE ref_id = '$ref_id' - AND unread = false"); - } - } - } - } - - db_query($link, "COMMIT"); - - if ($debug_enabled) { - _debug("update_rss_feed: assigning labels..."); - } - - assign_article_to_labels($link, $entry_ref_id, $article_filters, - $owner_uid); - - if ($debug_enabled) { - _debug("update_rss_feed: looking for enclosures..."); - } - - // enclosures - - $enclosures = array(); - - if ($use_simplepie) { - $encs = $item->get_enclosures(); - - if (is_array($encs)) { - foreach ($encs as $e) { - $e_item = array( - $e->link, $e->type, $e->length); - - array_push($enclosures, $e_item); - } - } - - } else { - // - - $e_ctr = $item['enclosure#']; - - if ($e_ctr > 0) { - $e_item = array($item['enclosure@url'], - $item['enclosure@type'], - $item['enclosure@length']); - - array_push($enclosures, $e_item); - - for ($i = 0; $i <= $e_ctr; $i++ ) { - - if ($item["enclosure#$i@url"]) { - $e_item = array($item["enclosure#$i@url"], - $item["enclosure#$i@type"], - $item["enclosure#$i@length"]); - array_push($enclosures, $e_item); - } - } - } - - // - // can there be many of those? yes -fox - - $m_ctr = $item['media']['content#']; - - if ($m_ctr > 0) { - $e_item = array($item['media']['content@url'], - $item['media']['content@medium'], - $item['media']['content@length']); - - array_push($enclosures, $e_item); - - for ($i = 0; $i <= $m_ctr; $i++ ) { - - if ($item["media"]["content#$i@url"]) { - $e_item = array($item["media"]["content#$i@url"], - $item["media"]["content#$i@medium"], - $item["media"]["content#$i@length"]); - array_push($enclosures, $e_item); - } - } - - } - } - - - if ($debug_enabled) { - _debug("update_rss_feed: article enclosures:"); - print_r($enclosures); - } - - db_query($link, "BEGIN"); - - foreach ($enclosures as $enc) { - $enc_url = db_escape_string($enc[0]); - $enc_type = db_escape_string($enc[1]); - $enc_dur = db_escape_string($enc[2]); - - $result = db_query($link, "SELECT id FROM ttrss_enclosures - WHERE content_url = '$enc_url' AND post_id = '$entry_ref_id'"); - - if (db_num_rows($result) == 0) { - db_query($link, "INSERT INTO ttrss_enclosures - (content_url, content_type, title, duration, post_id) VALUES - ('$enc_url', '$enc_type', '', '$enc_dur', '$entry_ref_id')"); - } - } - - db_query($link, "COMMIT"); - - // check for manual tags (we have to do it here since they're loaded from filters) - - foreach ($article_filters as $f) { - if ($f[0] == "tag") { - - $manual_tags = trim_array(explode(",", $f[1])); - - foreach ($manual_tags as $tag) { - if (tag_is_valid($tag)) { - array_push($entry_tags, $tag); - } - } - } - } - - // Skip boring tags - - $boring_tags = trim_array(explode(",", mb_strtolower(get_pref($link, - 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8'))); - - $filtered_tags = array(); - $tags_to_cache = array(); - - if ($entry_tags && is_array($entry_tags)) { - foreach ($entry_tags as $tag) { - if (array_search($tag, $boring_tags) === false) { - array_push($filtered_tags, $tag); - } - } - } - - $filtered_tags = array_unique($filtered_tags); - - if ($debug_enabled) { - _debug("update_rss_feed: filtered article tags:"); - print_r($filtered_tags); - } - - // Save article tags in the database - - if (count($filtered_tags) > 0) { - - db_query($link, "BEGIN"); - - foreach ($filtered_tags as $tag) { - - $tag = sanitize_tag($tag); - $tag = db_escape_string($tag); - - if (!tag_is_valid($tag)) continue; - - $result = db_query($link, "SELECT id FROM ttrss_tags - WHERE tag_name = '$tag' AND post_int_id = '$entry_int_id' AND - owner_uid = '$owner_uid' LIMIT 1"); - - if ($result && db_num_rows($result) == 0) { - - db_query($link, "INSERT INTO ttrss_tags - (owner_uid,tag_name,post_int_id) - VALUES ('$owner_uid','$tag', '$entry_int_id')"); - } - - array_push($tags_to_cache, $tag); - } - - /* update the cache */ - - $tags_to_cache = array_unique($tags_to_cache); - - $tags_str = db_escape_string(join(",", $tags_to_cache)); - - db_query($link, "UPDATE ttrss_user_entries - SET tag_cache = '$tags_str' WHERE ref_id = '$entry_ref_id' - AND owner_uid = $owner_uid"); - - db_query($link, "COMMIT"); - } - - if ($debug_enabled) { - _debug("update_rss_feed: article processed"); - } - } - - if (!$last_updated) { - if ($debug_enabled) { - _debug("update_rss_feed: new feed, catching it up..."); - } - catchup_feed($link, $feed, false, $owner_uid); - } - - if ($debug_enabled) { - _debug("purging feed..."); - } - - purge_feed($link, $feed, 0, $debug_enabled); - - db_query($link, "UPDATE ttrss_feeds - SET last_updated = NOW(), last_error = '' WHERE id = '$feed'"); - -// db_query($link, "COMMIT"); - - } else { - - if ($use_simplepie) { - $error_msg = mb_substr($rss->error(), 0, 250); - } else { - $error_msg = mb_substr(magpie_error(), 0, 250); - } - - if ($debug_enabled) { - _debug("update_rss_feed: error fetching feed: $error_msg"); - } - - $error_msg = db_escape_string($error_msg); - - db_query($link, - "UPDATE ttrss_feeds SET last_error = '$error_msg', - last_updated = NOW() WHERE id = '$feed'"); - } - - if ($use_simplepie) { - unset($rss); - } - - if ($debug_enabled) { - _debug("update_rss_feed: done"); - } - - } - function print_select($id, $default, $values, $attributes = "") { print "