XML processing to CSV
From Apache OpenOffice Wiki
This script was published by Tommy at the OpenOffice Basic, Python, BeanShell, JavaScript.
This script allows to convert from XML to CSV.
Original code
#-*- coding: utf-8 -*-
import os, sys, zipfile, xml.dom.minidom
##########################################################
# Script to export LibreOffice Auto Correct Entries
# into a flat file (e.g. to reuse some of them with autokey)
##########################################################
ACEfile=r'C:\Program Files\OpenOffice\User\LibreOffice 3\user\autocorr\acor_it-IT.dat' # This is a ZIP where LibreOffice stores its auto correct entries
'''ACEfile=r'C:\Program Files\OpenOffice\User\LibreOffice 3\user\autocorr\acor_it-IT.dat' #for windows '''
ifname='DocumentList.xml' # Name of the file inside the ZIP archive that contains auto correct entries
ofname='AutoCorrectEntries.csv' # any desired output file name for the export
tagname= 'block-list:block' # (as in DocumentList.xml)
schema=['block-list:abbreviated-name','block-list:name'] # (as in DocumentList.xml)
default_encoding='UTF-8' # (as in DocumentList.xml)
ofdelimiter=";" # any desired delimiter for export
##########################################################
of = open(ofname,"w")
oACE = zipfile.ZipFile(ACEfile)
zif = oACE.open(ifname, "r") # access as read-only ZipExtFile object
doctree = xml.dom.minidom.parse(zif) # Parse the input file as DOM (document object model, xml-tree) into memory
if doctree.encoding:
encoding = doctree.encoding
else:
encoding = default_encoding
for elem in doctree.getElementsByTagName(tagname):
acEntry=[]
for fieldname in schema:
acEntry.append(elem.getAttribute(fieldname))
of.write(ofdelimiter.join(acEntry).encode(encoding)+"\n")
of.close() # Close output file
doctree.unlink # and deallocate DOM object