diff --git a/src/reader/_update.py b/src/reader/_update.py index 4e3a74c5..685e3502 100644 --- a/src/reader/_update.py +++ b/src/reader/_update.py @@ -114,42 +114,39 @@ def stale(self) -> bool: def should_update_feed(self, new: FeedData, entries_to_update: bool) -> bool: old = self.old_feed - self.log.debug("old updated %s, new updated %s", old.updated, new.updated) + + if self.stale: + # logging for stale happened in process_feed_for_update() + return True if not old.last_updated: self.log.info("feed has no last_updated, treating as updated") assert not old.updated, "updated must be None if last_updated is None" return True - if not new.updated: - self.log.info("feed has no updated, treating as updated") - return True - - if self.stale: - # logging for stale happened in process_feed_for_update() - return True - - # we only care if feed.updated changed if any entries changed: - # https://github.com/lemon24/reader/issues/231#issuecomment-812601988 - # - # for RSS, if there's no date element, - # feedparser user lastBuildDate as .updated, - # which may (obviously) change without the feed actually changing - # - if entries_to_update and (not old.updated or new.updated > old.updated): - self.log.info("feed updated") + # Some feeds have entries newer than the feed; + # we always update the feed if entries changed, for simplicity. + # https://github.com/lemon24/reader/issues/76 + if entries_to_update: + self.log.info("feed has entries to update, treating as updated") return True - # check if the feed content actually changed: + # Check if the feed content actually changed: # https://github.com/lemon24/reader/issues/179 if not old.hash or new.hash != old.hash: - self.log.debug("feed hash changed, treating as updated") + self.log.info("feed hash changed, treating as updated") return True - # Some feeds have entries newer than the feed. - # https://github.com/lemon24/reader/issues/76 - self.log.info("feed not updated, updating entries anyway") + # For RSS feeds with no date element, + # feedparser user lastBuildDate as .updated, + # which can change without the feed actually changing, + # so feed.updated is excluded from the hash. + # https://github.com/lemon24/reader/issues/231#issuecomment-812601988 + if new.updated != old.updated: + self.log.info("only feed updated changed, skipping") + return False + self.log.info("feed not updated, skipping") return False def should_update_entry( @@ -236,8 +233,6 @@ def get_feed_to_update( parsed_feed.http_etag, parsed_feed.http_last_modified, ) - if entries_to_update: - return FeedUpdateIntent(self.url, self.now) return None def update( diff --git a/tests/test_reader.py b/tests/test_reader.py index 327ee52b..cdab8dcc 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -113,7 +113,7 @@ def test_update_feed_updated(reader, update_feed, caplog): last_updated=datetime(2010, 1, 3), ), } - assert "feed not updated, updating entries anyway" in caplog.text + assert "feed has entries to update, treating as updated" in caplog.text caplog.clear() # Feed gets updated because content (hash) changed. @@ -130,7 +130,7 @@ def test_update_feed_updated(reader, update_feed, caplog): assert "feed hash changed, treating as updated" in caplog.text caplog.clear() - # The feed doesn't change, because .updated is older. + # The feed changes if entries changed, regardless of .updated being older. # Entries get updated regardless. old_feed = parser.feed(1, datetime(2009, 1, 1), title='old-different-title') entry_three = parser.entry(1, 3, datetime(2010, 2, 1)) @@ -141,9 +141,7 @@ def test_update_feed_updated(reader, update_feed, caplog): feed = old_feed.as_feed( added=datetime(2010, 1, 1), - # doesn't change because it's not newer - updated=datetime(2010, 1, 1), - # changes because entries changed + updated=datetime(2009, 1, 1), last_updated=datetime(2010, 1, 4), ) assert set(reader.get_entries()) == { @@ -163,10 +161,10 @@ def test_update_feed_updated(reader, update_feed, caplog): last_updated=datetime(2010, 1, 4), ), } - assert "feed not updated, updating entries anyway" in caplog.text + assert "feed has entries to update, treating as updated" in caplog.text caplog.clear() - # The feed doesn't change; despite being newer, no entries have changed. + # The feed doesn't change if only .updated changes (and there are no updated entries). old_feed = parser.feed(1, datetime(2010, 1, 2), title='old-different-title') reader._now = lambda: datetime(2010, 1, 4, 12) @@ -175,8 +173,8 @@ def test_update_feed_updated(reader, update_feed, caplog): feed = old_feed.as_feed( added=datetime(2010, 1, 1), - # doesn't change because no entries have changed - updated=datetime(2010, 1, 1), + # doesn't change because nothing except .updated changed + updated=datetime(2009, 1, 1), # doesn't change because nothing changed last_updated=datetime(2010, 1, 4), ) @@ -197,10 +195,11 @@ def test_update_feed_updated(reader, update_feed, caplog): last_updated=datetime(2010, 1, 4), ), } - assert "feed not updated, updating entries anyway" in caplog.text + assert "only feed updated changed, skipping" in caplog.text caplog.clear() - # The feeds changes because it is newer *and* entries get updated. + # The feeds changes because ~~it is newer *and*~~ entries get updated. + # TODO: same as "The feed changes if entries changed, regardless ...", remove? new_feed = parser.feed(1, datetime(2010, 1, 2), title='new') entry_four = parser.entry(1, 4, datetime(2010, 2, 1)) reader._now = lambda: datetime(2010, 1, 5) @@ -233,7 +232,7 @@ def test_update_feed_updated(reader, update_feed, caplog): last_updated=datetime(2010, 1, 5), ), } - assert "feed updated" in caplog.text + assert "feed has entries to update, treating as updated" in caplog.text caplog.clear()