#!/usr/bin/env python import feedparser import re import time import subprocess import urllib2 import tempfile import shelve import json class App(object): def __init__(self): self.seen = [] self.downloaded = [] self.feeds = [] def load_config(self, ini): config = json.load(file(ini)) for i in config["feeds"]: feed = Feed() feed.load_config(i) self.add_feed(feed) def load_stor(self, data): self.stor = shelve.open(data) if not "downloaded" in self.stor: self.stor["downloaded"] = [] if not "seen" in self.stor: self.stor["seen"] = [] self.downloaded = self.stor["downloaded"] self.seen = self.stor["seen"] def setup_env(self): opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) urllib2.install_opener(opener) def add_feed(self, feed): self.feeds.append(feed) def get_next_feed(self): """Returns a (delay, feed) tuple of time to delay before next feed, and next feed to run """ next_expiration = None next_feed = None for feed in self.feeds: expir = feed.poll_delay + feed.last_load if next_expiration < expir: next_expiration = expir next_feed = feed delay = next_expiration - int(time.time()) if delay < 0: delay = 0 return (delay, next_feed) def _run_relavent(self, feed): found = feed.find_relavant(self.seen) self.seen = self.seen[-500:] self.stor["seen"] = self.seen for i in found: if i in self.downloaded: continue self.downloaded.append(i) # download file here instead of relying on transmission since we may # have cookies that transmission does not f = urllib2.urlopen(i) temp = tempfile.NamedTemporaryFile() temp.write(f.read()) f.close() temp.flush() ret = subprocess.call(["transmission-remote", "localhost", "-a", temp.name]) temp.close() if ret: print "Error adding torrent" self.downloaded = self.downloaded[-500:] self.stor["downloaded"] = self.downloaded self.stor.sync() def main(self): try: for feed in self.feeds: self._run_relavent(feed) while True: (delay, feed) = self.get_next_feed() time.sleep(delay) try: self._run_relavent(feed) except urllib2.URLError, e: print e except KeyboardInterrupt: pass class Feed(object): def __init__(self): self.url = None # in seconds self.poll_delay = 60 * 60 # compiled regular expression match object self.match = None # initialize to Epoch. Should always be integer number of seconds since # Epoch self.last_load = 0 def _load_feed(self): self.last_load = int(time.time()) f = urllib2.urlopen(self.url) d = feedparser.parse(f.read()) f.close() return d def find_relavant(self, seen): d = self._load_feed() found = [] print "New RSS Items:" for i in d['items']: if i.link in seen: continue seen.append(i.link) print " ", i.title if not self.match.match(i.title): continue print " Matched" found.append(i.link) return found def load_config(self, config): url = str(config["url"]) poll_delay = int(config["poll_delay"]) matches = list(config["matches"]) for i, m in enumerate(matches): try: re.compile(m) except re.error: print "Invalid regular expression in matches list with index %d: %s" % (i, m) matches = "|".join(matches) matches = re.compile(matches) self.url = url if poll_delay: self.poll_delay = poll_delay self.match = matches if __name__ == "__main__": app = App() app.load_config('.feed-transmission.json') app.load_stor(".feed-transmission.data") app.setup_env() app.main()