#!/bin/env python import feedparser, sys, os.path, time class blog: def __init__(self, url): self.feed = feedparser.parse( url ) def entries(self, after_time = 0): for entry in self.feed["entries"]: if int(time.mktime(entry["date_parsed"])) > after_time: tags = [] for tag in entry["tags"]: tags.append( tag["term"] ) yield { "title": entry["title"], "content": entry["content"][0]["value"], "time":int(time.mktime( entry["date_parsed"] )), "tags": tags } def write_entry( dir, entry ): filename = dir + "/" + str(int(entry["time"])) if not os.path.exists( filename ): file = open ( filename, "w" ) file.write( """%(time)s: %(title)s\ntags:%(tags)s\n%(content)s\n""" % entry ) file.close() def help(): print """Usage: blog-archive.py """ sys.exit() if len( sys.argv ) < 2: print "I need a feed URL, please." help() if len( sys.argv ) < 3: print "I need a directory to archive into." help() if not os.path.isdir( sys.argv[2] ): print "I need to archive into a directory.", sys.argv[2], "isn't a directory" help() archive_dir = os.path.normpath( sys.argv[2] ) the_blog = blog( sys.argv[1] ) #Discover the latest time archived if os.path.isfile( archive_dir + "/latest" ): lfile = open( archive_dir + "/latest", "r" ) latest = {"time":int( lfile.readline() )} else: latest = {"time":0} entry_count = 0 for entry in the_blog.entries( after_time = latest["time"] ): write_entry( archive_dir, entry ) if entry["time"] > latest["time"]: latest = entry entry_count += 1 if entry_count > 0: print "Added", entry_count, "entries" else: print "No new entries" #Write the latest time if we have a later item if latest.has_key("title"): latest_file = open( archive_dir + "/" + "latest", "w" ) latest_file.write( str(int(latest["time"])) + "\n" ) latest_file.close()