summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anderson <ejona86@gmail.com>2010-08-21 13:59:42 -0500
committerEric Anderson <ejona86@gmail.com>2010-08-21 13:59:42 -0500
commitda0ce517795493bd348e5b28dfbd764bc2d9f484 (patch)
treeec961de05b165773bba432d679b0a55c583c42f3
parentd3d34b73a427973c450d705e9a6acf496c5e9384 (diff)
downloadfeed-transmission-da0ce517795493bd348e5b28dfbd764bc2d9f484.tar.gz
feed-transmission-da0ce517795493bd348e5b28dfbd764bc2d9f484.zip
Use JSON instead of shelve, implement ability to save configuration, and reorganization and other features
-rw-r--r--feed-transmission.example.json13
-rwxr-xr-xfeed-transmission.py225
2 files changed, 180 insertions, 58 deletions
diff --git a/feed-transmission.example.json b/feed-transmission.example.json
index d934246..7252575 100644
--- a/feed-transmission.example.json
+++ b/feed-transmission.example.json
@@ -2,8 +2,17 @@
"feeds": [
{
"matches": [
- ".*Thread: Some thread",
- ".*Thread: Some other thread"
+ {
+ "type": "re",
+ "value": ".*Thread: Some thread",
+ "white": true,
+ "matched_count": 0
+ },
+ {
+ "type": "re",
+ "value": ".*Thread: Some other thread",
+ "white": true,
+ "matched_count": 0
],
"name": "example.com",
"poll_delay": 3600,
diff --git a/feed-transmission.py b/feed-transmission.py
index 521fecf..167dd6b 100755
--- a/feed-transmission.py
+++ b/feed-transmission.py
@@ -6,39 +6,102 @@ import time
import subprocess
import urllib2
import tempfile
-import shelve
import json
-class App(object):
+class ConfigParticipant(object):
+ def _import_attribute(self, config, key, cast, default):
+ if key in config:
+ try:
+ v = cast(config[key])
+ except StandardError as e:
+ print "Failed to cast value of %s '%s' to %s: %s" % (key, config[key], cast, e)
+ return default
+ return v
+ else:
+ return default
+
+ def _import_list(self, config, key, cast):
+ va = self._import_attribute(config, key, list, [])
+ va_objs = []
+ for v in va:
+ try:
+ v = cast(v)
+ except StandardError as e:
+ print "Failed to cast value of %s '%s' to %s: %s" % (key, config[key], cast, e)
+ continue
+ va_objs.append(v)
+ return va_objs
+
+ def _import_data_for_list(self, data, key, obj_list):
+ if key not in data:
+ return
+ try:
+ l = list(data[key])
+ except TypeError:
+ print "Failed to load list for key %s" % key
+ return
+ for i, v in enumerate(l):
+ if i < len(obj_list):
+ obj_list[i].import_data(v)
+ else:
+ print "Too many elements in data - data may be out-of-date"
+
+class App(ConfigParticipant):
def __init__(self):
- self.seen = []
- self.downloaded = []
self.feeds = []
+ self.config_filename = None
+ self.data_filename = None
+
+ def load_config(self, json_filename):
+ self.config_filename = json_filename
+ try:
+ fd = file(self.config_filename)
+ except IOError:
+ print "No config file"
+ return
+ self.import_config(json.load(fd))
+ fd.close()
+
+ def save_config(self):
+ fd = file(self.config_filename, "w")
+ json.dump(self.export_data(), fd)
+ fd.close()
+
+ def load_data(self, json_filename):
+ self.data_filename = json_filename
+ try:
+ fd = file(json_filename)
+ except IOError:
+ print "No data file"
+ return
+ self.import_data(json.load(fd))
+ fd.close()
+
+ def save_data(self):
+ fd = file(self.data_filename, "w")
+ json.dump(self.export_data(), fd)
+ fd.close()
+
+ def import_config(self, config):
+ self.feeds = self._import_list(config, "feeds", Feed.import_config)
- def load_config(self, ini):
- config = json.load(file(ini))
- for i in config["feeds"]:
- feed = Feed()
- feed.load_config(i)
- self.add_feed(feed)
+ def export_config(self):
+ return {
+ "feeds": [f.export_config() for f in self.feeds],
+ }
- def load_stor(self, data):
- self.stor = shelve.open(data)
+ def import_data(self, data):
+ self._import_data_for_list(data, "feeds", self.feeds)
- if not "downloaded" in self.stor:
- self.stor["downloaded"] = []
- if not "seen" in self.stor:
- self.stor["seen"] = []
- self.downloaded = self.stor["downloaded"]
- self.seen = self.stor["seen"]
+ def export_data(self):
+ return {
+ "feeds": [f.export_data() for f in self.feeds],
+ }
def setup_env(self):
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
urllib2.install_opener(opener)
- def add_feed(self, feed):
- self.feeds.append(feed)
-
def get_next_feed(self):
"""Returns a (delay, feed) tuple of time to delay before next feed, and
next feed to run
@@ -56,13 +119,8 @@ class App(object):
return (delay, next_feed)
def _run_relavent(self, feed):
- found = feed.find_relavant(self.seen)
- self.seen = self.seen[-500:]
- self.stor["seen"] = self.seen
+ found = feed.find_relavant()
for i in found:
- if i in self.downloaded:
- continue
- self.downloaded.append(i)
# download file here instead of relying on transmission since we may
# have cookies that transmission does not
f = urllib2.urlopen(i)
@@ -74,9 +132,7 @@ class App(object):
temp.close()
if ret:
print "Error adding torrent"
- self.downloaded = self.downloaded[-500:]
- self.stor["downloaded"] = self.downloaded
- self.stor.sync()
+ self.save_data()
def main(self):
try:
@@ -92,16 +148,20 @@ class App(object):
except KeyboardInterrupt:
pass
-class Feed(object):
+class Feed(ConfigParticipant):
def __init__(self):
self.url = None
# in seconds
self.poll_delay = 60 * 60
- # compiled regular expression match object
- self.match = None
+ # list of Matcher objects
+ self.matches = None
# initialize to Epoch. Should always be integer number of seconds since
# Epoch
self.last_load = 0
+ # true when the default action (when no matcher matches) should be to
+ # count the item as relavant
+ self.default_white = False
+ self.seen = []
def _load_feed(self):
self.last_load = int(time.time())
@@ -110,41 +170,94 @@ class Feed(object):
f.close()
return d
- def find_relavant(self, seen):
+ def find_relavant(self):
d = self._load_feed()
found = []
print "New RSS Items:"
for i in d['items']:
- if i.link in seen:
+ if i.link in self.seen:
continue
- seen.append(i.link)
+ self.seen.append(i.link)
print " ", i.title
- if not self.match.match(i.title):
- continue
- print " Matched"
- found.append(i.link)
+ white = self.default_white
+ for m in self.matches:
+ if not m.matches(i):
+ continue
+ print " Matched. White:", m.white
+ m.matched_count += 1
+ white = m.white
+ if white:
+ found.append(i.link)
+ self.seen = self.seen[-len(d['items']):]
return found
- def load_config(self, config):
- url = str(config["url"])
- poll_delay = int(config["poll_delay"])
- matches = list(config["matches"])
- for i, m in enumerate(matches):
- try:
- re.compile(m)
- except re.error:
- print "Invalid regular expression in matches list with index %d: %s" % (i, m)
- matches = "|".join(matches)
- matches = re.compile(matches)
- self.url = url
- if poll_delay:
- self.poll_delay = poll_delay
- self.match = matches
+ @staticmethod
+ def import_config(config):
+ f = Feed()
+ f.url = f._import_attribute(config, "url", str, f.url)
+ f.poll_delay = f._import_attribute(config, "poll_delay", int, f.poll_delay)
+ f.matches = f._import_list(config, "matches", Matcher.import_config)
+ f.default_white = f._import_attribute(config, "default_white", bool, f.default_white)
+ return f
+
+ def export_config(self):
+ return {
+ "url": self.url,
+ "poll_delay": self.poll_delay,
+ "matches": [m.export_config() for m in self.matches],
+ "default_white": self.white,
+ }
+
+ def import_data(self, data):
+ self.seen = self._import_list(data, "seen", str)
+ self._import_data_for_list(data, "matches", self.matches)
+
+ def export_data(self):
+ return {
+ "seen": self.seen,
+ "matches": [m.export_data() for m in self.matches],
+ }
+
+class Matcher(ConfigParticipant):
+ def __init__(self):
+ self.str = None
+ self.re = None
+ # white marks item to be downloaded, black means don't download
+ # similar to white and black lists
+ self.white = True
+ self.matched_count = 0
+
+ @staticmethod
+ def import_config(config):
+ m = Matcher()
+ m.white = m._import_attribute(config, "white", bool, m.white)
+ m.str = m._import_attribute(config, "value", str, m.str)
+ m.matched_count = m._import_attribute(config, "matched_count", int, m.matched_count)
+ m.re = re.compile(m.str)
+ return m
+
+ def export_config(self):
+ return {
+ "white": self.white,
+ "str": self.str,
+ "matched_count": self.matched_count,
+ }
+
+ def import_data(self, data):
+ self.matched_count = self._import_attribute(data, "matched_count", int, self.matched_count)
+
+ def export_data(self):
+ return {
+ "matched_count": self.matched_count,
+ }
+
+ def matches(self, item):
+ return self.re.match(item.title)
if __name__ == "__main__":
app = App()
app.load_config('.feed-transmission.json')
- app.load_stor(".feed-transmission.data")
+ app.load_data(".feed-transmission.data")
app.setup_env()
app.main()