#!/usr/bin/env python # ------------------------------------------------------------------------------ # TODO: look at changing entire makefile to python import calendar import feedparser import os import re import sys # ------------------------------------------------------------------------------ # for simplicity, I'll define these in the script, rather than in separate # files. I'll use the $ symbol to represent variables # this will appear once at the top of the output file frm_header=''' WaterBlogged! RSS $feed_type Mail
WaterBlogged! RSS $feed_type Mail
''' # this is what each new feed item is transformed into. this may appear multiple # times within the output file frm_item='''

$title

Author: $creator
Published: $pubdate
$content
---
Comment
''' # this will appear once at the bottom of the output file frm_footer='''
Email mr.wb@waterblogged.net to unsubscribe.
''' # ------------------------------------------------------------------------------ # functions to replace variables in header and items def parse_header(header, feed_type): return re.sub(r'\$feed_type', feed_type, header) def parse_item(item, content, creator, link, pubdate, title): rv = re.sub(r'\$content', content, item) rv = re.sub(r'\$creator', creator, rv) rv = re.sub(r'\$link', link, rv) rv = re.sub(r'\$pubdate', pubdate, rv) rv = re.sub(r'\$title', title, rv) return rv # ------------------------------------------------------------------------------ def main(argv = None): if argv is None: argv = sys.argv # args feed_filename = argv[1] htm_filename = argv[2] # TODO: calculate this in script last_update = float(argv[3]) d = feedparser.parse(feed_filename) # TODO: account for HTML sanitization http://www.feedparser.org/docs/html-sanitization.html # HACK: determine feed type if d.feed.title.find('Comments') < 0: feed_type = 'Entries' else: feed_type = 'Comments' # grab and parse new items new_items = '' for entry in d.entries: pubdate = calendar.timegm(entry.updated_parsed) if pubdate <= last_update: break new_items += parse_item(frm_item, entry.content[0]['value'], entry.author, entry.link, entry.updated, entry.title) # write to file f = open(htm_filename, 'wb') if new_items != '': stamp = calendar.timegm(d.entries[0].updated_parsed) f.write(parse_header(frm_header, feed_type) + new_items.encode('utf-8') + frm_footer) else: # dump file contents stamp = os.stat(htm_filename).st_mtime f.write('') f.close() # touch file to last item date os.utime(htm_filename, (stamp, stamp)) return 0 # ------------------------------------------------------------------------------ if __name__ == '__main__': sys.exit(main())