import rssparser, rfc822, sys, os, re, operator, cPickle, xml.utils.iso8601, time, RSS
from bloginfo import FOAF
from RSS import ns

class Aggregator:
    def __init__(self):
        self.old_datas = {}
        self.item_hashes = {}
        self.blogs = []
    def top(self,count):
        return self.blogs[:count]
    def aggregate(self,foaf):
        new = False
        for blog in foaf.blogs():
            print "Checking",blog
            rss = str(foaf.feed(blog).uri)
            etag = None
            modified = None
            if rss in self.old_datas:
                old_data = self.old_datas[rss]
                if 'etag' in old_data:
                    etag = old_data['etag']
                if 'modified' in old_data:
                    modified = old_data['modified']
            #print "Retrieving",rss
            data = rssparser.parse(rss,etag=etag,modified=modified,agent='Chumpologica 0.1')
            self.old_datas[rss] = data

            data['items'].reverse()
            for item in data['items']:
                timestamp = time.time()
                if 'date' in item:
                    try:
                        timestamp = xml.utils.iso8601.parse(item['date'])
                    except ValueError:
                        rfc822date = rfc822.parsedate_tz(item['date'])
                        if rfc822date is not None:
                            timestamp = rfc822.mktime_tz(rfc822date)

                item['timestamp'] = timestamp
                item['from'] = str(blog.uri)
                hash = self.item_hash(item)
                if hash not in self.item_hashes:
                    print "New:",item['link']
                    new = True
                    idx = self.find_place_for(timestamp)
                    self.blogs.insert(idx,item)
                    self.item_hashes[hash] = 1
        return new

    def find_place_for(self,timestamp):
        count = 0
        for i in self.blogs:
            if(i['timestamp'] < timestamp):
                return count
            else:
                count += 1
        return count

    def item_hash(self,item):
        return reduce(operator.mul,[(x.__hash__()+10)*(y.__hash__()+10) for x,y in item.items() if x != 'timestamp'])

def html2text(text):
    removetags = re.compile(r'<[^>]+>')
    return removetags.sub(" ",text)

if __name__ == '__main__':
    foaf = FOAF("file:"+sys.argv[1])
    try:
        ag = cPickle.load(file('blogs'))
    except IOError:
        ag = Aggregator()
    if ag.aggregate(foaf):
        channel = x = RSS.CollectionChannel(
                {(ns.rss10,'channel'):
                  {(ns.rss10,'link') : 'http://pants.heddley.com/logica/',
                   (ns.rss10,'title') : 'chumpologica',
                   (ns.rss10,'description') : 'the collected writings of the chumps',
                   (ns.dc,'date') : xml.utils.iso8601.tostring(time.time()),
                  }
                })
        items = ag.top(30)
        items.reverse()
        for item in items:
            if 'description' in item:
                description = html2text(item['description'])
            else:
                description = ""
            channel.addItem({
                    (ns.rss10,"title"): item['title'],
                    (ns.dc,"source"): str(foaf.property(item['from'],foaf.TITLE)),
                    (ns.dc,"relation"): item['from'],
                    (ns.dc,"creator"): str(foaf.property(foaf.owner(item['from']),foaf.NICK)),
                    (ns.rss10,'link'): item['link'],
                    (ns.rss10,'description'): description,
                    (ns.dc,'date'): xml.utils.iso8601.tostring(item['timestamp']),
                    })
        file("chumpologica.rdf","w").write(str(channel))

    cPickle.dump(ag,file('blogs','w'))
