use transactions in feed update process, better handle feeds without entry dates (schema updated)

master
Andrew Dolgov 19 years ago
parent 9d9c9dd645
commit b82af8c382

@ -16,6 +16,14 @@
if ($op == "rpc") {
$subop = $_GET["subop"];
if ($subop == "forceUpdateAll") {
update_all_feeds($link, true);
}
if ($subop == "updateAll") {
update_all_feeds($link, false);
}
if ($subop == "catchupPage") {

@ -3,6 +3,8 @@
function update_all_feeds($link, $fetch) {
pg_query("BEGIN");
if (!$fetch) {
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE
@ -21,6 +23,8 @@
$num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
}
pg_query("COMMIT");
}
function update_rss_feed($link, $feed_url, $feed) {
@ -30,13 +34,14 @@
$num_unread = 0;
if ($rss) {
pg_query("BEGIN");
$result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'");
$registered_title = pg_fetch_result($result, 0, "title");
if (!$registered_title) {
$feed_title = $rss->channel["title"];
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
}
@ -49,15 +54,22 @@
if (!$entry_guid) $entry_guid = $item["link"];
$entry_timestamp = "";
$rss_2_date = $item['pubdate'];
$rss_1_date = $item['dc']['date'];
$atom_date = $item['issued'];
$no_orig_date = 'false';
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
if ($entry_timestamp == "") $entry_timestamp = 0;
// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
if ($entry_timestamp == "") {
$entry_timestamp = time();
$no_orig_date = 'true';
}
if (!$entry_timestamp) continue;
@ -77,7 +89,7 @@
$result = pg_query($link, "
SELECT
id,unread,md5_hash,last_read,
id,unread,md5_hash,last_read,no_orig_date,title,
EXTRACT(EPOCH FROM updated) as updated_timestamp
FROM
ttrss_entries
@ -89,11 +101,12 @@
$entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
$query = "INSERT INTO ttrss_entries
(title, guid, link, updated, content, feed_id, md5_hash)
(title, guid, link, updated, content, feed_id,
md5_hash, no_orig_date)
VALUES
('$entry_title', '$entry_guid', '$entry_link',
'$entry_timestamp', '$entry_content', '$feed',
'$content_md5')";
'$content_md5', $no_orig_date)";
$result = pg_query($link, $query);
@ -108,33 +121,41 @@
$unread = pg_fetch_result($result, 0, "unread");
$md5_hash = pg_fetch_result($result, 0, "md5_hash");
// if ($md5_hash != $content_md5 && CONTENT_CHECK_MD5)
// $unread = "true";
$no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
$orig_title = pg_fetch_result($result, 0, "title");
if (!$last_read || $md5_hash != $content_md5) {
$last_read = 'null';
// disable update detection for posts which didn't have correct
// publishment date, because they will always register as updated
// sadly this doesn't catch feed generators which input current date
// in posts all the time (some planets do this)
if ($no_orig_date != 't' && (!$last_read || $md5_hash != $content_md5)) {
$last_read_qpart = 'last_read = null,';
} else {
$last_read = "'$last_read'";
$last_read_qpart = '';
}
// if ($unread || !CONTENT_CHECK_MD5) {
// $updated_query_part = "updated = '$entry_timestamp',";
// }
// mark post as updated on title change
// maybe we should mark it as unread instead?
// if ($updated_timestamp > $entry_timestamp) {
// $unread = "true";
// print "$updated_timestamp : $entry_timestamp<br>";
// }
if ($orig_title != $entry_title) {
$last_read_qpart = 'last_read = null,';
}
// don't bother updating timestamps on posts with broken pubDate
if ($no_orig_date != 't') {
$update_timestamp_qpart = "updated = '$entry_timestamp_fmt',";
}
$query = "UPDATE ttrss_entries
SET
title ='$entry_title',
link = '$entry_link',
updated = '$entry_timestamp_fmt',
$update_timestamp_qpart
$last_read_qpart
content = '$entry_content',
md5_hash = '$content_md5',
last_read = $last_read,
unread = '$unread'
WHERE
id = '$entry_id'";
@ -151,6 +172,8 @@
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
}
pg_query("COMMIT");
}
}

@ -34,5 +34,6 @@ create table ttrss_entries (id serial not null primary key,
md5_hash varchar(200) not null unique,
content text not null,
last_read timestamp,
unread boolean default true);
no_orig_date boolean not null default false,
unread boolean not null default true);

Loading…
Cancel
Save