Add <enclosure> support.

lwindolf · Jun 16, 2024 · 3b593d1 · 3b593d1
1 parent 72cf275
commit 3b593d1
Show file tree

Hide file tree

Showing 7 changed files with 178 additions and 246 deletions.
diff --git a/tests/rdf.test.js b/tests/rdf.test.js
@@ -54,67 +54,4 @@ test('rss 1.0 parse', () => {
         expect(feed.items[0].description).toBe(`Processing document inclusions with general XML tools can be ...`);
         expect(feed.items[0].source).toBe('http://xml.com/pub/2000/08/09/xslt/xslt.html');
         expect(feed.items[0].time).toBe(1694950440);
-});
-
-test('rss Dublin Core', () => {
-    // XML example from spec https://web.resource.org/rss/1.0/modules/dc/
-    let feed = RDFParser.parse(`<?xml version="1.0" encoding="utf-8"?> 
-    
-    <rdf:RDF 
-      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
-      xmlns:dc="http://purl.org/dc/elements/1.1/"
-      xmlns="http://purl.org/rss/1.0/"
-    > 
-    
-      <channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
-        <title>Meerkat</title>
-        <link>http://meerkat.oreillynet.com</link>
-        <description>Meerkat: An Open Wire Service</description>
-        <dc:publisher>The O'Reilly Network</dc:publisher>
-        <dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
-        <dc:rights>Copyright © 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
-        <dc:date>2000-01-01T12:00+00:00</dc:date>
-    
-        <image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
-    
-        <items>
-          <rdf:Seq>
-            <rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
-          </rdf:Seq>
-        </items>
-    
-        <textinput rdf:resource="http://meerkat.oreillynet.com" />
-    
-      </channel>
-    
-      <image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
-        <title>Meerkat Powered!</title>
-        <url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
-        <link>http://meerkat.oreillynet.com</link>
-      </image>
-    
-      <item rdf:about="http://c.moreover.com/click/here.pl?r123">
-        <title>XML: A Disruptive Technology</title> 
-        <link>http://c.moreover.com/click/here.pl?r123</link>
-        <dc:description>XML is placing increasingly heavy...</dc:description>
-        <dc:publisher>The O'Reilly Network</dc:publisher>
-        <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
-        <dc:rights>Copyright © 2000 O'Reilly &amp; Associates, Inc.</dc:rights>
-        <dc:subject>XML</dc:subject>
-      </item> 
-    
-      <textinput rdf:about="http://meerkat.oreillynet.com">
-        <title>Search Meerkat</title>
-        <description>Search Meerkat's RSS Database...</description>
-        <name>s</name>
-        <link>http://meerkat.oreillynet.com/</link>
-      </textinput>
-    
-    </rdf:RDF>`);
-
-    expect(feed.error).toBe(undefined);
-    expect(feed.items.length).toBe(1);
-    expect(feed.items[0].description).toBe(`XML is placing increasingly heavy...`);   
-    // with no date given date should be similar to current date
-    expect(Math.floor(Date.now() / 10000) - Math.floor(feed.items[0].time / 10000)).toBe(0)
 });
diff --git a/www/assets/js/feed.js b/www/assets/js/feed.js
@@ -5,74 +5,84 @@
 import { FeedUpdater } from './feedupdater.js';
 
 export class Feed {
-        // state
-        id;
-        error;
-        orig_source;
-        last_updated;
-        etag;
+    // state
+    id;
+    error;
+    orig_source;
+    last_updated;
+    etag;
 
-        // feed content
-        title;
-        source;
-        description;
-        icon;
-        metadata = {};
-        items = [];
+    // feed content
+    title;
+    source;
+    description;
+    icon;
+    metadata = {};
+    items = [];
 
-        // error code constants
-        static ERROR_NONE     = 0;
-        static ERROR_AUTH     = 1 << 0;
-        static ERROR_NET      = 1 << 1;
-        static ERROR_DISCOVER = 1 << 2;
-        static ERROR_XML      = 1 << 3;
+    // error code constants
+    static ERROR_NONE = 0;
+    static ERROR_AUTH = 1 << 0;
+    static ERROR_NET = 1 << 1;
+    static ERROR_DISCOVER = 1 << 2;
+    static ERROR_XML = 1 << 3;
 
-        constructor(defaults) {
-            Object.keys(defaults).forEach((k) => { this[k] = defaults[k] });
+    constructor(defaults) {
+        Object.keys(defaults).forEach((k) => { this[k] = defaults[k] });
+    }
+
+    async update() {
+        const f = await FeedUpdater.fetch(this.source);
+        if (Feed.ERROR_NONE == f.error) {
+            this.title = f.title;
+            this.source = f.source;
+            this.homepage = f.homepage;
+            this.description = f.description;
+            this.items = f.items;
+            this.metadata = f.metadata;
+            this.items.forEach((i) => {
+                i.node = this;
+            })
+
+            // feed provided favicon should always win
+            if (f.icon)
+                this.icon = f.icon;
         }
 
-        async update() {
-            const f = await FeedUpdater.fetch(this.source);
-            if(Feed.ERROR_NONE == f.error) {                 
-                this.title       = f.title;
-                this.source      = f.source;
-                this.homepage    = f.homepage;
-                this.description = f.description;
-                this.items       = f.items;
-                this.metadata    = f.metadata;
-                this.items.forEach((i) => {
-                    i.node = this;
-                })
+        this.last_updated = f.last_updated;
+        this.error = f.error;
+        document.dispatchEvent(new CustomEvent('nodeUpdated', { detail: this }));
+    }
 
-                // feed provided favicon should always win
-                if(f.icon)
-                    this.icon = f.icon;
-            }
+    // Return the next unread item after the given id
+    getNextUnread(id) {
+        let item, idx = 0;
 
-            this.last_updated = f.last_updated;
-            this.error = f.error;
-            document.dispatchEvent(new CustomEvent('nodeUpdated', { detail: this }));
+        // search forward in feed items starting from id
+        if (id) {
+            this.items.find((i) => { idx++; return (i.id === id); });   // find current item index
+            item = this.items.slice(idx).find((i) => !i.read);          // find next unread item
+            if (item)
+                return item;
         }
 
-        // Return the next unread item after the given id
-        getNextUnread(id) {
-            let item, idx = 0;
+        // if nothing found search from start of feed
+        return this.items.find((i) => !i.read);
+    }
 
-            // search forward in feed items starting from id
-            if(id) {
-                this.items.find((i) => { idx++; return (i.id === id); });   // find current item index
-                item = this.items.slice(idx).find((i) => !i.read);          // find next unread item
-                if(item)
-                    return item;
-            }
+    getItemById(id) {
+        let itemsById = {};
+        this.items.forEach((i) => { itemsById[i.id] = i; });
+        return itemsById[id];
+    }
 
-            // if nothing found search from start of feed
-            return this.items.find((i) => !i.read);
-        }
+    addItem(item) {
+        // Finally some guessing
+        if (!item.time)
+            item.time = Date.now();
 
-        getItemById(id) {
-            let itemsById = {};
-            this.items.forEach((i) => { itemsById[i.id] = i; });
-            return itemsById[id];
-        }
+        // FIXME: set an id if sourceId is missing
+
+        this.items.push(item);
+    }
 }
diff --git a/www/assets/js/item.js b/www/assets/js/item.js
@@ -33,6 +33,23 @@ export class Item {
         }
 
         addMedia(url, mime, length) {
-            this.media.push({ url, mime, length });
+            let l = parseInt(length, 10);
+
+            if (Number.isNaN(l))
+                l = undefined;
+
+            if(!url || !mime)
+                return;
+
+            /* gravatars are often supplied as media:content with medium='image'
+               so we do not treat such occurences as enclosures */
+            if (-1 !== url.indexOf('www.gravatar.com'))
+                return;
+
+            /* Never add enclosures for images already contained in the description */
+            if (-1 !== this.description.indexOf(url))
+                return;
+
+            this.media.push({ url, mime, l });
         }
 }
diff --git a/www/assets/js/parsers/atom.js b/www/assets/js/parsers/atom.js
@@ -5,6 +5,7 @@
 // Specification https://www.ietf.org/rfc/rfc4287.txt
 
 import { DateParser } from './date.js';
+import { NamespaceParser } from './namespace.js'
 import { XPath } from './xpath.js';
 import { Feed } from '../feed.js';
 import { Item } from '../item.js';
@@ -44,11 +45,11 @@ class AtomParser {
                         time        : DateParser.parse(XPath.lookup(node, 'ns:updated'))
                 });
 
-                if(!item.time)
-                        item.time = DateParser.parse(XPath.lookup(node, 'dc:date'));
+                NamespaceParser.parseItem(node, ['dc', 'content', 'media'], feed, item);
 
                 XPath.foreach(node, 'ns:link', AtomParser.parseEntryLink, item);
-                feed.items.push(item);
+                console.log(feed)
+                feed.addItem(item);
         }
 
         static parse(str) {              

diff --git a/www/assets/js/parsers/namespace.js b/www/assets/js/parsers/namespace.js
@@ -63,29 +63,11 @@ export class NamespaceParser {
                 (example quoted from specification)
             */
             XPath.foreach(node, '//media:content', (n) => {
-                try {
-                    const url = n.lookup('@url');
-                    const mime = n.lookup('@type') || n.lookup('@medium');
-                    let add = true;
-                    let length = parseInt(n.lookup('@duration'), 10);
-
-                    if (Number.isNaN(length))
-                        length = undefined;
-
-                    /* gravatars are often supplied as media:content with medium='image'
-                       so we do not treat such occurences as enclosures */
-                    if (-1 !== url.indexOf('www.gravatar.com'))
-                        add = false;
-
-                    /* Never add enclosures for images already contained in the description */
-                    if (-1 !== item.description.indexOf(url))
-                        add = false;
-
-                    if (add)
-                        item.addMedia(url, mime, length);
-                } catch (e) {
-                    console.log(`Failed to parse <media:content> (${e})!`);
-                }
+                    item.addMedia(
+                        n.lookup('@url'),
+                        n.lookup('@type') || n.lookup('@medium'),
+                        n.lookup('@duration')
+                    );    
             });
         }
     }

diff --git a/www/assets/js/parsers/rdf.js b/www/assets/js/parsers/rdf.js
@@ -2,62 +2,48 @@
 
 // RSS 1.0 parser
 
-import { DateParser } from './date.js';
 import { NamespaceParser } from './namespace.js'
 import { XPath } from './xpath.js';
 import { Feed } from '../feed.js';
 import { Item } from '../item.js';
 
 class RDFParser {
-        static id = 'rdf';
-        static autoDiscover = [
-                '/rdf:RDF/ns:channel'
-        ];
-
-        static parseItem(node, feed) {
-                let item = new Item({
-                        title       : XPath.lookup(node, 'ns:title'),
-                        description : XPath.lookup(node, 'ns:description'),
-                        source      : XPath.lookup(node, 'ns:link'),
-                });
-
-                // Dublin Core support
-                if(!item.description)
-                        item.description = XPath.lookup(node, 'dc:description');
-                if(!item.time)
-                        item.time = DateParser.parse(XPath.lookup(node, 'dc:date'));
-
-                // Finally some guessing
-                if(!item.time)
-                        item.time = Date.now();
-                // FIXME: set an id
-
-                NamespaceParser.parseItem(node, ["dc", "content", "media"], feed, item);
-
-                feed.items.push(item);
-        }
-
-        static parse(str) {              
-                const parser = new DOMParser();
-                const doc = parser.parseFromString(str, 'application/xml');
-                const root = doc.firstChild;
-                let feed = new Feed({
-                        error       : XPath.lookup(root, '/parsererror'),
-                });
-
-                // RSS 1.0
-                if(doc.firstChild.nodeName === 'rdf:RDF') {
-                        feed = {...feed, ...{
-                                title       : XPath.lookup(root, '/rdf:RDF/ns:channel/ns:title'),
-                                description : XPath.lookup(root, '/rdf:RDF/ns:channel/ns:description'),
-                                homepage    : XPath.lookup(root, '/rdf:RDF/ns:channel/ns:link')
-                        }};
-
-                        XPath.foreach(root, '/rdf:RDF/ns:item', this.parseItem, feed);
-                }
-
-                return feed;
-        }
+	static id = 'rdf';
+	static autoDiscover = [
+		'/rdf:RDF/ns:channel'
+	];
+
+	static parseItem(node, feed) {
+		let item = new Item({
+			title: XPath.lookup(node, 'ns:title'),
+			description: XPath.lookup(node, 'ns:description'),
+			source: XPath.lookup(node, 'ns:link'),
+		});
+
+		NamespaceParser.parseItem(node, ['dc', 'content', 'media'], feed, item);
+
+		feed.addItem(item);
+	}
+
+	static parse(str) {
+		const parser = new DOMParser();
+		const doc = parser.parseFromString(str, 'application/xml');
+		const root = doc.firstChild;
+		let feed = new Feed({
+			error: XPath.lookup(root, '/parsererror'),
+		});
+
+		// RSS 1.0
+		if (doc.firstChild.nodeName === 'rdf:RDF') {
+			feed.title = XPath.lookup(root, '/rdf:RDF/ns:channel/ns:title');
+			feed.description = XPath.lookup(root, '/rdf:RDF/ns:channel/ns:description');
+			feed.homepage = XPath.lookup(root, '/rdf:RDF/ns:channel/ns:link');
+
+			XPath.foreach(root, '/rdf:RDF/ns:item', this.parseItem, feed);
+		}
+
+		return feed;
+	}
 }
 
 export { RDFParser };