#!/usr/bin/env python
# ------------------------------------------------------------------------------
# TODO: look at changing entire makefile to python
import calendar
import feedparser
import os
import re
import sys
# ------------------------------------------------------------------------------
# for simplicity, I'll define these in the script, rather than in separate
# files. I'll use the $ symbol to represent variables
# this will appear once at the top of the output file
frm_header='''
WaterBlogged! RSS $feed_type Mail
'''
# this is what each new feed item is transformed into. this may appear multiple
# times within the output file
frm_item='''
Author: $creator
Published: $pubdate
$content
'''
# this will appear once at the bottom of the output file
frm_footer='''
'''
# ------------------------------------------------------------------------------
# functions to replace variables in header and items
def parse_header(header,
feed_type):
return re.sub(r'\$feed_type', feed_type, header)
def parse_item(item,
content,
creator,
link,
pubdate,
title):
rv = re.sub(r'\$content', content, item)
rv = re.sub(r'\$creator', creator, rv)
rv = re.sub(r'\$link', link, rv)
rv = re.sub(r'\$pubdate', pubdate, rv)
rv = re.sub(r'\$title', title, rv)
return rv
# ------------------------------------------------------------------------------
def main(argv = None):
if argv is None:
argv = sys.argv
# args
feed_filename = argv[1]
htm_filename = argv[2]
# TODO: calculate this in script
last_update = float(argv[3])
d = feedparser.parse(feed_filename)
# TODO: account for HTML sanitization http://www.feedparser.org/docs/html-sanitization.html
# HACK: determine feed type
if d.feed.title.find('Comments') < 0:
feed_type = 'Entries'
else:
feed_type = 'Comments'
# grab and parse new items
new_items = ''
for entry in d.entries:
pubdate = calendar.timegm(entry.updated_parsed)
if pubdate <= last_update:
break
new_items += parse_item(frm_item,
entry.content[0]['value'],
entry.author,
entry.link,
entry.updated,
entry.title)
# write to file
f = open(htm_filename, 'wb')
if new_items != '':
stamp = calendar.timegm(d.entries[0].updated_parsed)
f.write(parse_header(frm_header, feed_type) +
new_items.encode('utf-8') +
frm_footer)
else:
# dump file contents
stamp = os.stat(htm_filename).st_mtime
f.write('')
f.close()
# touch file to last item date
os.utime(htm_filename, (stamp, stamp))
return 0
# ------------------------------------------------------------------------------
if __name__ == '__main__':
sys.exit(main())