23 import sys, zipfile, xml.dom.minidom
 
   24 from namespaces 
import nsdict
 
   25 from elementtypes 
import *
 
   35 ] + [ nsdict[item[0]]+
":"+item[1] 
for item 
in empty_elements]
 
   37 INLINE_TAGS = [ nsdict[item[0]]+
":"+item[1] 
for item 
in inline_elements]
 
   57         elif value == 
"normal":
 
   63         elif value == 
"normal":
 
   70         if value 
and value != 
"none":
 
   74         if value 
and value != 
"none":
 
   78         if value 
is None or value == 
'':
 
   80         posisize = value.split(
' ')
 
   82         if textpos.find(
'%') == -1:
 
   86             elif textpos == 
"super":
 
   90             itextpos = int(textpos[:textpos.find(
'%')])
 
  100         return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.
italic),
 
  131         return "[bq=%s, h=%d, code=%s]" % (str(self.
blockquote),
 
  170         for tag 
in IGNORED_TAGS:
 
  173         for tag 
in INLINE_TAGS:
 
  186         for fontFace 
in fontDecl.getElementsByTagName(
"style:font-face"):
 
  187             if fontFace.getAttribute(
"style:font-pitch") == 
"fixed":
 
  188                 self.fixedFonts.append(fontFace.getAttribute(
"style:name"))
 
  199             parentProp = self.textStyles.get(parent, 
None)
 
  201                 textProp = parentProp
 
  203         textPropEl = style.getElementsByTagName(
"style:text-properties")
 
  204         if not textPropEl: 
return textProps
 
  206         textPropEl = textPropEl[0]
 
  208         textProps.setItalic(textPropEl.getAttribute(
"fo:font-style"))
 
  209         textProps.setBold(textPropEl.getAttribute(
"fo:font-weight"))
 
  210         textProps.setUnderlined(textPropEl.getAttribute(
"style:text-underline-style"))
 
  211         textProps.setStrikethrough(textPropEl.getAttribute(
"style:text-line-through-style"))
 
  212         textProps.setPosition(textPropEl.getAttribute(
"style:text-position"))
 
  214         if textPropEl.getAttribute(
"style:font-name") 
in self.
fixedFonts:
 
  215             textProps.setFixed(
True)
 
  225         name = style.getAttribute(
"style:name")
 
  227         if name.startswith(
"Heading_20_"):
 
  231                 paraProps.setHeading(level)
 
  236             paraProps.setTitle(
True)
 
  238         paraPropEl = style.getElementsByTagName(
"style:paragraph-properties")
 
  240             paraPropEl = paraPropEl[0]
 
  241             leftMargin = paraPropEl.getAttribute(
"fo:margin-left")
 
  244                     leftMargin = float(leftMargin[:-2])
 
  245                     if leftMargin > 0.01:
 
  246                         paraProps.setIndented(
True)
 
  252             paraProps.setCode(
True)
 
  262         for style 
in styleElements:
 
  264             name = style.getAttribute(
"style:name")
 
  266             if name == 
"Standard": 
continue 
  268             family = style.getAttribute(
"style:family")
 
  269             parent = style.getAttribute(
"style:parent-style-name")
 
  274             elif family == 
"paragraph":
 
  281         for style 
in listStyleElements:
 
  282             name = style.getAttribute(
"style:name")
 
  285             if style.hasChildNodes():
 
  286                 subitems = [el 
for el 
in style.childNodes
 
  287                      if el.nodeType == xml.dom.Node.ELEMENT_NODE
 
  288                      and el.tagName == 
"text:list-level-style-number"]
 
  289                 if len(subitems) > 0:
 
  290                     prop.setOrdered(
True)
 
  299         zip = zipfile.ZipFile(filepath)
 
  301         styles_doc = xml.dom.minidom.parseString(zip.read(
"styles.xml"))
 
  302         fontfacedecls = styles_doc.getElementsByTagName(
"office:font-face-decls")
 
  305         self.
processStyles(styles_doc.getElementsByTagName(
"style:style"))
 
  308         self.
content = xml.dom.minidom.parseString(zip.read(
"content.xml"))
 
  309         fontfacedecls = self.content.getElementsByTagName(
"office:font-face-decls")
 
  313         self.
processStyles(self.content.getElementsByTagName(
"style:style"))
 
  321         lines = text.split(
"\n")
 
  323         numLines = len(lines)
 
  324         for i 
in range(numLines):
 
  326             if (lines[i].strip() 
or i == numLines-1  
or i == 0 
or 
  327                 not ( lines[i-1].startswith(
"    ")
 
  328                       and lines[i+1].startswith(
"    ") ) ):
 
  329                 buffer.append(
"\n" + lines[i])
 
  331         return ''.join(buffer)
 
  342         link = node.getAttribute(
"xlink:href")
 
  343         if link 
and link[:2] == 
'./': 
 
  345         if link 
and link[:9] == 
'Pictures/':
 
  347         return "[[Image(%s)]]\n" % link
 
  351         link = node.getAttribute(
"xlink:href")
 
  352         if link.strip() == text.strip():
 
  353             return "[%s] " % link.strip()
 
  355             return "[%s %s] " % (link.strip(), text.strip())
 
  362         cite = (node.getElementsByTagName(
"text:note-citation")[0]
 
  363                     .childNodes[0].nodeValue)
 
  364         body = (node.getElementsByTagName(
"text:note-body")[0]
 
  371             num = int(node.getAttribute(
"text:c"))
 
  385         styleName = node.getAttribute(
"text:style-name")
 
  386         style = self.textStyles.get(styleName, 
TextProps())
 
  389             return "`" + text + 
"`" 
  399             if style.strikethrough:
 
  401             if style.superscript:
 
  407         return "%s%s%s" % (
''.join(mark), text, 
''.join(revmark))
 
  415         styleName = listElement.getAttribute(
"text:style-name")
 
  419         for item 
in listElement.childNodes:
 
  420             buffer.append(
" "*indent)
 
  424                 number = 
" " + number + 
". " 
  425                 buffer.append(
" 1. ")
 
  428             subitems = [el 
for el 
in item.childNodes
 
  429                           if el.tagName 
in [
"text:p", 
"text:h", 
"text:list"]]
 
  430             for subitem 
in subitems:
 
  431                 if subitem.tagName == 
"text:list":
 
  440         return ''.join(buffer)
 
  450         for item 
in tableElement.childNodes:
 
  452             if item.tagName == 
"table:table-header-rows":
 
  454             if item.tagName == 
"table:table-row":
 
  455                 buffer.append(
"\n||")
 
  456                 for cell 
in item.childNodes:
 
  460         return ''.join(buffer)
 
  468         body = self.content.getElementsByTagName(
"office:body")[0]
 
  469         text = body.childNodes[0]
 
  473         paragraphs = [el 
for el 
in text.childNodes
 
  474                       if el.tagName 
in [
"draw:page", 
"text:p", 
"text:h",
"text:section",
 
  475                                         "text:list", 
"table:table"]]
 
  477         for paragraph 
in paragraphs:
 
  478             if paragraph.tagName == 
"text:list":
 
  480             elif paragraph.tagName == 
"text:section":
 
  482             elif paragraph.tagName == 
"table:table":
 
  491             buffer.append(
"----")
 
  493                 buffer.append(
"%s: %s" % (cite, body))
 
  504         for node 
in element.childNodes:
 
  506             if node.nodeType == xml.dom.Node.TEXT_NODE:
 
  507                 buffer.append(node.nodeValue)
 
  509             elif node.nodeType == xml.dom.Node.ELEMENT_NODE:
 
  512                 if tag 
in (
"draw:text-box", 
"draw:frame"):
 
  515                 elif tag 
in (
"text:p", 
"text:h"):
 
  519                 elif tag == 
"text:list":
 
  522                     method = self.elements.get(tag)
 
  524                         buffer.append(method(node))
 
  526                         buffer.append(
" {" + tag + 
"} ")
 
  528         return ''.join(buffer)
 
  534         style_name = paragraph.getAttribute(
"text:style-name")
 
  535         paraProps = self.paragraphStyles.get(style_name, dummyParaProps)
 
  538         if paraProps 
and not paraProps.code:
 
  541         if paragraph.tagName == 
"text:p" and self.
lastsegment == 
"text:p":
 
  548             return "= " + text + 
" =\n" 
  550         outlinelevel = paragraph.getAttribute(
"text:outline-level")
 
  553             level = int(outlinelevel)
 
  557                 return "=" * level + 
" " + text + 
" " + 
"=" * level + 
"\n" 
  560             return "{{{\n" + text + 
"\n}}}\n" 
  562         if paraProps.indented:
 
  563             return self.
wrapParagraph(text, indent = indent, blockquote = 
True)
 
  578         return ''.join(buffer) + text
 
  580         for token 
in text.split():
 
  582             if counter > LIMIT - indent:
 
  583                 buffer.append(
"\n" + 
" "*indent)
 
  588             buffer.append(token + 
" ")
 
  589             counter += len(token)
 
  591         return ''.join(buffer)
 
Holds properties for a text style. 
Holds properties of a paragraph style. 
def compressCodeBlocks
Removes extra blank lines from code blocks. 
def tableToString
MoinMoin uses || to delimit table cells. 
def extractParagraphProperties
Extracts paragraph properties from a style element. 
def extractTextProperties
Extracts text properties from a style element. 
def toString
Converts the document to a string. 
def processStyles
Runs through "style" elements extracting necessary information. 
def processFontDeclarations
Extracts necessary font information from a font-declaration element. 
Holds properties for a list style. 
def load
Loads an ODT file.