From 8ab545b6b0453d2bd05017b4dca07d09b313dbbc Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Fri, 13 Aug 2010 21:32:15 -0500 Subject: Implement get_next_feed() and move configuration around slightly --- feed-transmission.py | 92 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/feed-transmission.py b/feed-transmission.py index f98651c..dbb9a8f 100755 --- a/feed-transmission.py +++ b/feed-transmission.py @@ -15,31 +15,19 @@ class App(object): self.downloaded = [] self.feeds = [] - def load_config(self): + def load_config(self, ini): config = ConfigParser.RawConfigParser({}) - config.readfp(file('.feed-transmission.ini')) + config.readfp(file(ini)) feed_names = config.get("config", "feeds") feed_names = feed_names.split(",") for i in feed_names: i = i.strip() - f = {} - url = config.get(i, "url") - poll_delay = config.getint(i, "poll_delay") - match = [] - for m in config.items(i): - if not m[0].startswith("match"): - continue - try: - re.compile(m[1]) - except re.error: - print "Invalid regular expression at %s, %s: %s" % (i, m[0], m[1]) - match.append(m[1]) - match = "|".join(match) - match = re.compile(match) - self.add_feed(Feed(url, poll_delay, match)) + feed = Feed() + feed.load_config(config, i) + self.add_feed(feed) - def load_stor(self): - self.stor = shelve.open(".feed-transmission.data") + def load_stor(self, data): + self.stor = shelve.open(data) if not "downloaded" in self.stor: self.stor["downloaded"] = [] @@ -59,10 +47,19 @@ class App(object): """Returns a (delay, feed) tuple of time to delay before next feed, and next feed to run """ - # TODO - return (self.feeds[0].poll_delay, self.feeds[0]) + next_expiration = None + next_feed = None + for feed in self.feeds: + expir = feed.poll_delay + feed.last_load + if next_expiration < expir: + next_expiration = expir + next_feed = feed + delay = next_expiration - int(time.time()) + if delay < 0: + delay = 0 + return (delay, next_feed) - def run_relavent(self, feed): + def _run_relavent(self, feed): found = feed.find_relavant(self.seen) self.seen = self.seen[-500:] self.stor["seen"] = self.seen @@ -70,6 +67,8 @@ class App(object): if i in self.downloaded: continue self.downloaded.append(i) + # download file here instead of relying on transmission since we may + # have cookies that transmission does not f = urllib2.urlopen(i) temp = tempfile.NamedTemporaryFile() temp.write(f.read()) @@ -86,37 +85,43 @@ class App(object): def main(self): try: for feed in self.feeds: - self.run_relavent(feed) + self._run_relavent(feed) while True: (delay, feed) = self.get_next_feed() time.sleep(delay) try: - self.run_relavent(feed) + self._run_relavent(feed) except urllib2.URLError, e: print e except KeyboardInterrupt: pass class Feed(object): - def __init__(self, url, poll_delay, match): - self.url = url - self.poll_delay = poll_delay - self.match = match + def __init__(self): + self.url = None + # in seconds + self.poll_delay = 60 * 60 + # compiled regular expression match object + self.match = None + # initialize to Epoch. Should always be integer number of seconds since + # Epoch + self.last_load = 0 - def load_feed(self): + def _load_feed(self): + self.last_load = int(time.time()) f = urllib2.urlopen(self.url) d = feedparser.parse(f.read()) f.close() return d def find_relavant(self, seen): - d = self.load_feed() + d = self._load_feed() found = [] print "New RSS Items:" for i in d['items']: - if i.title in seen: + if i.link in seen: continue - seen.append(i.title) + seen.append(i.link) print " ", i.title if not self.match.match(i.title): continue @@ -124,10 +129,29 @@ class Feed(object): found.append(i.link) return found + def load_config(self, config, section): + url = config.get(section, "url") + poll_delay = config.getint(section, "poll_delay") + match = [] + for m in config.items(section): + if not m[0].startswith("match"): + continue + try: + re.compile(m[1]) + except re.error: + print "Invalid regular expression at %s, %s: %s" % (section, m[0], m[1]) + match.append(m[1]) + match = "|".join(match) + match = re.compile(match) + self.url = url + if poll_delay: + self.poll_delay = poll_delay + self.match = match + if __name__ == "__main__": app = App() - app.load_config() - app.load_stor() + app.load_config('.feed-transmission.ini') + app.load_stor(".feed-transmission.data") app.setup_env() app.main() -- cgit v1.2.3-54-g00ecf