From da0ce517795493bd348e5b28dfbd764bc2d9f484 Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Sat, 21 Aug 2010 13:59:42 -0500 Subject: Use JSON instead of shelve, implement ability to save configuration, and reorganization and other features --- feed-transmission.example.json | 13 ++- feed-transmission.py | 225 +++++++++++++++++++++++++++++++---------- 2 files changed, 180 insertions(+), 58 deletions(-) diff --git a/feed-transmission.example.json b/feed-transmission.example.json index d934246..7252575 100644 --- a/feed-transmission.example.json +++ b/feed-transmission.example.json @@ -2,8 +2,17 @@ "feeds": [ { "matches": [ - ".*Thread: Some thread", - ".*Thread: Some other thread" + { + "type": "re", + "value": ".*Thread: Some thread", + "white": true, + "matched_count": 0 + }, + { + "type": "re", + "value": ".*Thread: Some other thread", + "white": true, + "matched_count": 0 ], "name": "example.com", "poll_delay": 3600, diff --git a/feed-transmission.py b/feed-transmission.py index 521fecf..167dd6b 100755 --- a/feed-transmission.py +++ b/feed-transmission.py @@ -6,39 +6,102 @@ import time import subprocess import urllib2 import tempfile -import shelve import json -class App(object): +class ConfigParticipant(object): + def _import_attribute(self, config, key, cast, default): + if key in config: + try: + v = cast(config[key]) + except StandardError as e: + print "Failed to cast value of %s '%s' to %s: %s" % (key, config[key], cast, e) + return default + return v + else: + return default + + def _import_list(self, config, key, cast): + va = self._import_attribute(config, key, list, []) + va_objs = [] + for v in va: + try: + v = cast(v) + except StandardError as e: + print "Failed to cast value of %s '%s' to %s: %s" % (key, config[key], cast, e) + continue + va_objs.append(v) + return va_objs + + def _import_data_for_list(self, data, key, obj_list): + if key not in data: + return + try: + l = list(data[key]) + except TypeError: + print "Failed to load list for key %s" % key + return + for i, v in enumerate(l): + if i < len(obj_list): + obj_list[i].import_data(v) + else: + print "Too many elements in data - data may be out-of-date" + +class App(ConfigParticipant): def __init__(self): - self.seen = [] - self.downloaded = [] self.feeds = [] + self.config_filename = None + self.data_filename = None + + def load_config(self, json_filename): + self.config_filename = json_filename + try: + fd = file(self.config_filename) + except IOError: + print "No config file" + return + self.import_config(json.load(fd)) + fd.close() + + def save_config(self): + fd = file(self.config_filename, "w") + json.dump(self.export_data(), fd) + fd.close() + + def load_data(self, json_filename): + self.data_filename = json_filename + try: + fd = file(json_filename) + except IOError: + print "No data file" + return + self.import_data(json.load(fd)) + fd.close() + + def save_data(self): + fd = file(self.data_filename, "w") + json.dump(self.export_data(), fd) + fd.close() + + def import_config(self, config): + self.feeds = self._import_list(config, "feeds", Feed.import_config) - def load_config(self, ini): - config = json.load(file(ini)) - for i in config["feeds"]: - feed = Feed() - feed.load_config(i) - self.add_feed(feed) + def export_config(self): + return { + "feeds": [f.export_config() for f in self.feeds], + } - def load_stor(self, data): - self.stor = shelve.open(data) + def import_data(self, data): + self._import_data_for_list(data, "feeds", self.feeds) - if not "downloaded" in self.stor: - self.stor["downloaded"] = [] - if not "seen" in self.stor: - self.stor["seen"] = [] - self.downloaded = self.stor["downloaded"] - self.seen = self.stor["seen"] + def export_data(self): + return { + "feeds": [f.export_data() for f in self.feeds], + } def setup_env(self): opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) urllib2.install_opener(opener) - def add_feed(self, feed): - self.feeds.append(feed) - def get_next_feed(self): """Returns a (delay, feed) tuple of time to delay before next feed, and next feed to run @@ -56,13 +119,8 @@ class App(object): return (delay, next_feed) def _run_relavent(self, feed): - found = feed.find_relavant(self.seen) - self.seen = self.seen[-500:] - self.stor["seen"] = self.seen + found = feed.find_relavant() for i in found: - if i in self.downloaded: - continue - self.downloaded.append(i) # download file here instead of relying on transmission since we may # have cookies that transmission does not f = urllib2.urlopen(i) @@ -74,9 +132,7 @@ class App(object): temp.close() if ret: print "Error adding torrent" - self.downloaded = self.downloaded[-500:] - self.stor["downloaded"] = self.downloaded - self.stor.sync() + self.save_data() def main(self): try: @@ -92,16 +148,20 @@ class App(object): except KeyboardInterrupt: pass -class Feed(object): +class Feed(ConfigParticipant): def __init__(self): self.url = None # in seconds self.poll_delay = 60 * 60 - # compiled regular expression match object - self.match = None + # list of Matcher objects + self.matches = None # initialize to Epoch. Should always be integer number of seconds since # Epoch self.last_load = 0 + # true when the default action (when no matcher matches) should be to + # count the item as relavant + self.default_white = False + self.seen = [] def _load_feed(self): self.last_load = int(time.time()) @@ -110,41 +170,94 @@ class Feed(object): f.close() return d - def find_relavant(self, seen): + def find_relavant(self): d = self._load_feed() found = [] print "New RSS Items:" for i in d['items']: - if i.link in seen: + if i.link in self.seen: continue - seen.append(i.link) + self.seen.append(i.link) print " ", i.title - if not self.match.match(i.title): - continue - print " Matched" - found.append(i.link) + white = self.default_white + for m in self.matches: + if not m.matches(i): + continue + print " Matched. White:", m.white + m.matched_count += 1 + white = m.white + if white: + found.append(i.link) + self.seen = self.seen[-len(d['items']):] return found - def load_config(self, config): - url = str(config["url"]) - poll_delay = int(config["poll_delay"]) - matches = list(config["matches"]) - for i, m in enumerate(matches): - try: - re.compile(m) - except re.error: - print "Invalid regular expression in matches list with index %d: %s" % (i, m) - matches = "|".join(matches) - matches = re.compile(matches) - self.url = url - if poll_delay: - self.poll_delay = poll_delay - self.match = matches + @staticmethod + def import_config(config): + f = Feed() + f.url = f._import_attribute(config, "url", str, f.url) + f.poll_delay = f._import_attribute(config, "poll_delay", int, f.poll_delay) + f.matches = f._import_list(config, "matches", Matcher.import_config) + f.default_white = f._import_attribute(config, "default_white", bool, f.default_white) + return f + + def export_config(self): + return { + "url": self.url, + "poll_delay": self.poll_delay, + "matches": [m.export_config() for m in self.matches], + "default_white": self.white, + } + + def import_data(self, data): + self.seen = self._import_list(data, "seen", str) + self._import_data_for_list(data, "matches", self.matches) + + def export_data(self): + return { + "seen": self.seen, + "matches": [m.export_data() for m in self.matches], + } + +class Matcher(ConfigParticipant): + def __init__(self): + self.str = None + self.re = None + # white marks item to be downloaded, black means don't download + # similar to white and black lists + self.white = True + self.matched_count = 0 + + @staticmethod + def import_config(config): + m = Matcher() + m.white = m._import_attribute(config, "white", bool, m.white) + m.str = m._import_attribute(config, "value", str, m.str) + m.matched_count = m._import_attribute(config, "matched_count", int, m.matched_count) + m.re = re.compile(m.str) + return m + + def export_config(self): + return { + "white": self.white, + "str": self.str, + "matched_count": self.matched_count, + } + + def import_data(self, data): + self.matched_count = self._import_attribute(data, "matched_count", int, self.matched_count) + + def export_data(self): + return { + "matched_count": self.matched_count, + } + + def matches(self, item): + return self.re.match(item.title) if __name__ == "__main__": app = App() app.load_config('.feed-transmission.json') - app.load_stor(".feed-transmission.data") + app.load_data(".feed-transmission.data") app.setup_env() app.main() -- cgit v1.2.3-54-g00ecf