# sitemap.py -*- coding: iso-8859-1 -*- # # Copyright © 2000, 2004 Carey Evans. # # Permission to use, copy, modify, and distribute this software and # its documentation for any purpose and without fee is hereby granted, # provided that the above copyright notice appear in all copies and # that both that copyright notice and this permission notice appear in # supporting documentation. # # $Id: sitemap.py 17 2004-01-18 01:33:03Z carey $ """sitemap.py Parse a sitemap XML file, and return an object encapsulating the data. """ import os import xml.sax def parse(filename): handler = SiteMapHandler() xml.sax.parse('file://' + filename, handler) return handler.topdir class SiteMapHandler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): self.text = '' if name == 'sitemap': self.topdir = self.curdir = SiteDir() self.curnode = None elif name == 'dir': self.curdir = self.curdir.add_dir(attrs['path']) elif name == 'index': self.curnode = self.curdir.add_index(attrs['file']) elif name == 'page': type = None if attrs.has_key('type'): type = attrs['type'] self.curnode = self.curdir.add_page(attrs['file'], type) elif name == 'data': type = None if attrs.has_key('type'): type = attrs['type'] self.curnode = self.curdir.add_data(attrs['file'], type) def endElement(self, name): s = self.text.strip() if name == 'sitemap': self.curdir = None elif name == 'dir': self.curdir = self.curdir.parent elif self.curnode and s: self.curnode.set_title(s) self.curnode = None def characters(self, s): self.text += s class SiteFile: def __init__(self, parent=None, path=None): self.title = None self.parent = parent if self.parent: self.path = parent.path + [path] else: self.path = [] def set_title(self, title): self.title = title def filename(self, basedir): return os.path.join(basedir, *self.path) def srcfile(self, basedir): return self.filename(basedir) def dstfile(self, basedir): return self.filename(basedir) class SiteDir(SiteFile): def __init__(self, parent=None, path=None): if parent: SiteFile.__init__(self, parent, path) else: SiteFile.__init__(self) self.dirs = [] self.pages = [] self.datafiles = [] def all_dirs(self): dirs = [self] for d in self.dirs: dirs.extend(d.all_dirs()) return dirs def rootpath(self): return '../' * len(self.path) def navbar(self, count=0, skip=False): if self.parent is None: nb = [] else: nb = self.parent.navbar(count + 1) if skip: return nb if self.title: title = self.title else: title = self.path[-1] if count == 0: relpath = './' else: relpath = '../' * count nb.append((title, relpath)) return nb def add_dir(self, path): newdir = SiteDir(self, path) self.dirs.append(newdir) return newdir def add_index(self, path): newpage = SiteIndex(self, path) self.pages.append(newpage) return newpage def add_page(self, path, type): newpage = SitePage(self, path, type) self.pages.append(newpage) return newpage def add_data(self, path, type): newdata = SiteData(self, path, type) self.datafiles.append(newdata) return newdata class SitePage(SiteFile): def __init__(self, parent, path, type='xml'): SiteFile.__init__(self, parent, path) self.type = type def dstfile(self, basedir): return os.path.splitext(self.filename(basedir))[0] + '.html' def rootpath(self): return self.parent.rootpath() def navbar(self): return self.parent.navbar() def is_text(self): return self.type == 'text' class SiteIndex(SitePage): def set_title(self, title): self.title = title self.parent.title = title def navbar(self): return self.parent.navbar(skip=True) class SiteData(SiteFile): def __init__(self, parent, path, type='text'): SiteFile.__init__(self, parent, path) self.type = type def openmode(self, mode): if self.type == 'text': return mode else: return mode + 'b'