from __future__ import generators
import time
import sys
from zipfile import ZipFile
from xml.dom.ext.reader import PyExpat
from xml.xpath import Evaluate
from xml.xpath.Context import Context
import re

drawing_ns = 'http://openoffice.org/2000/drawing'
text_ns = 'http://openoffice.org/2000/text'

class OoDrawingModel:
    def __init__(self,filename):
        self.read(filename)

    def read(self,filename):
        zip = ZipFile(filename)
        content = zip.read("content.xml")
        reader = PyExpat.Reader()
        dom = reader.fromString(content)
        self.read_nodes(dom)
        self.read_arcs(dom)

    def read_nodes(self,dom):
        self.nodes = {}
        context = Context(dom, processorNss={ 
                'draw' : drawing_ns })
        for node in Evaluate('//draw:ellipse|//draw:rect', context=context):
            id = node.getAttributeNS(drawing_ns,"id")
            label = ""
            context = Context(node, processorNss={ 
                'draw' : drawing_ns,
                'text' : text_ns })
            for text in Evaluate('text:p/text()|text:p/text:a/text()',context=context):
                label = text.data
            self.nodes[id] = OoNode(id,label)

    def read_arcs(self,dom):
        self.arcs = []
        context = Context(dom, processorNss={ 
                'draw' : drawing_ns })
        for node in Evaluate('//draw:connector', context=context):
            if node.hasChildNodes():
                from_id = node.getAttributeNS(drawing_ns,"start-shape")
                to_id = node.getAttributeNS(drawing_ns,"end-shape")
                context = Context(node, processorNss={ 
                    'draw' : drawing_ns,
                    'text' : text_ns })
                for text in Evaluate('text:p/text()|text:p/text:a/text()',context=context):
                    label = text.data
                self.arcs.append(OoArc(from_id,to_id,label))

    def statements(self):
        for arc in self.arcs:
            try:
                from_node = self.nodes[arc.from_id].as_ntriple_component()
                to_node = self.nodes[arc.to_id].as_ntriple_component()
                arc_node = arc.as_ntriple_component()
                yield (from_node,arc_node,to_node)
            except KeyError:
                print "Couldn't find a node for arc "+arc_node+" from "+arc.from_id+" to "+arc.to_id

class OoArc:
    def __init__(self,from_id,to_id,label):
        self.from_id = from_id
        self.to_id = to_id
        self.label = label
    def as_ntriple_component(self):
        uri_re = re.compile('^([a-z]+:\/\/.*)$')
        if(uri_re.match(self.label)):
            return "<"+uri_re.match(self.label).group(1)+">"

class OoNode:
    def __init__(self,id,label):
        self.id = id
        self.label = label
        literal_re = re.compile(u'^\u201c(.*)\u201d$')
        uri_re = re.compile('^([a-z]+:\/\/.*)$')
        if self.label == "":
            self.label = "_:a"+self.id+str(int(time.time()))
        if literal_re.match(self.label):
            self.label = "\""+self.escape(literal_re.match(self.label).group(1))+"\""
        if uri_re.match(self.label):
            self.label = "<"+uri_re.match(self.label).group(1)+">"
    def escape(self,text):
        return re.sub('"',"\\\"",text)
    def as_ntriple_component(self):
        return self.label

if __name__ == '__main__':
    o = OoDrawingModel(sys.argv[1])
    for (s,p,o) in o.statements():
        print s+" "+p+" "+o+" ."
