当前位置 博文首页 > python解析xml模块封装代码

    python解析xml模块封装代码

    作者:admin 时间:2021-06-23 17:48

    有如下的xml文件:

    复制代码 代码如下:

    <?xml version="1.0" encoding="utf-8" ?> 
    <root> 
    <childs> 
    <child name='first' >1</child> 
    <child value="2">2</child> 
    </childs> 
    </root>

    下面介绍python解析xml文件的几种方法,使用python模块实现。

    方式1,python模块实现自动遍历所有节点:

    复制代码 代码如下:

    #!/usr/bin/env python 
    # -*- coding: utf-8 -*- 
    from xml.sax.handler import ContentHandler 
    from xml.sax import parse
    class TestHandle(ContentHandler): 
        def __init__(self, inlist): 
            self.inlist = inlist 

        def startElement(self,name,attrs): 
            print 'name:',name, 'attrs:',attrs.keys() 

        def endElement(self,name): 
            print 'endname',name 

        def characters(self,chars): 
            print 'chars',chars 
            self.inlist.append(chars) 

                 
    if __name__ == '__main__': 
        lt = [] 
        parse('test.xml', TestHandle(lt)) 
        print lt

    结果:
    [html] view plaincopy
    name: root attrs: [] 
    chars  

    name: childs attrs: [] 
    chars  

    name: child attrs: [u'name'] 
    chars 1 
    endname child 
    chars  

    name: child attrs: [u'value'] 
    chars 2 
    endname child 
    chars  

    endname childs 
    chars  

    endname root 
    [u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

    方式2,python模块实现获取根节点,按需查找指定节点:

    复制代码 代码如下:

    #!/usr/bin/env python   
    # -*- coding: utf-8 -*-   
    from xml.dom import minidom   
    xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?>
    <hash>
        <request name='first'>/2/photos/square/type.xml</request>
        <error_code>21301</error_code>
        <error>auth faild!</error>
    </hash>
    ''' 
    def doxml(xmlstr): 
        dom = minidom.parseString(xmlstr)     
        print 'Dom:'     
        print dom.toxml()   

        root = dom.firstChild     
        print 'root:'     
        print root.toxml()   

        childs = root.childNodes   
        for child in childs: 
            print child.toxml() 
            if child.nodeType == child.TEXT_NODE: 
                pass 
            else: 
                print 'child node attribute name:', child.getAttribute('name') 
                print 'child node name:', child.nodeName 
                print 'child node len:',len(child.childNodes) 
                print 'child data:',child.childNodes[0].data 
                print '=======================================' 
                print 'more help info to see:' 
                for med in dir(child): 
                    print help(med)     

                   
    if __name__ == '__main__':   
        doxml(xmlstr)

    结果:
    [html] view plaincopy
    Dom: 
    <?xml version="1.0" ?><hash> 
        <request name="first">/2/photos/square/type.xml</request> 
        <error_code>21301</error_code> 
        <error>auth faild!</error> 
    </hash> 
    root: 
    <hash> 
        <request name="first">/2/photos/square/type.xml</request> 
        <error_code>21301</error_code> 
        <error>auth faild!</error> 
    </hash> 

    <request name="first">/2/photos/square/type.xml</request> 
    child node attribute name: first 
    child node name: request 
    child node len: 1 
    child data: /2/photos/square/type.xml 
    ======================================= 
    more help info to see: 
    两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。

    =====补充分割线================
    实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。
    网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。

    本节是 python解析xml模块封装代码 的第二部分。
    ====写xml内容的分割线=========

    复制代码 代码如下:

    #!\urs\bin\env python 
    #encoding: utf-8 
    from xml.dom import minidom 

    class xmlwrite: 
        def __init__(self, resultfile): 
            self.resultfile = resultfile 
            self.rootname = 'api' 
            self.__create_xml_dom() 

        def __create_xml_dom(self): 
            xmlimpl = minidom.getDOMImplementation() 
            self.dom = xmlimpl.createDocument(None, self.rootname, None) 
            self.root = self.dom.documentElement 

        def __get_spec_node(self, xpath): 
            patharr = xpath.split(r'/') 
            parentnode = self.root 
            exist = 1 
            for nodename in patharr: 
                if nodename.strip() == '': 
                    continue 
                if not exist: 
                    return None 
                spcindex = nodename.find('[') 
                if spcindex > -1: 
                    index = int(nodename[spcindex+1:-1]) 
                else: 
                    index = 0 
                count = 0 
                childs = parentnode.childNodes 
                for child in childs: 
                    if child.nodeName == nodename[:spcindex]: 
                        if count == index: 
                            parentnode = child 
                            exist = 1 
                            break 
                        count += 1 
                        continue 
                    else: 
                        exist = 0 
            return parentnode 

             
        def write_node(self, parent, nodename, value, attribute=None, CDATA=False): 
            node = self.dom.createElement(nodename) 
            if value: 
                if CDATA: 
                    nodedata = self.dom.createCDATASection(value) 
                else: 
                    nodedata = self.dom.createTextNode(value) 
                node.appendChild(nodedata) 
                if attribute and isinstance(attribute, dict): 
                    for key, value in attribute.items(): 
                        node.setAttribute(key, value)    
            try: 
                parentnode = self.__get_spec_node(parent) 
            except: 
                print 'Get parent Node Fail, Use the Root as parent Node' 
                parentnode = self.root 
            parentnode.appendChild(node) 

         
        def write_start_time(self, time): 
            self.write_node('/','StartTime', time) 

        def write_end_time(self, time): 
            self.write_node('/','EndTime', time)     

        def write_pass_count(self, count): 
            self.write_node('/','PassCount', count)    

        def write_fail_count(self, count): 
            self.write_node('/','FailCount', count)    

        def write_case(self): 
            self.write_node('/','Case', None)    

        def write_case_no(self, index, value): 
            self.write_node('/Case[%s]/' % index,'No', value) 

        def write_case_url(self, index, value): 
            self.write_node('/Case[%s]/' % index,'URL', value) 

        def write_case_dbdata(self, index, value): 
            self.write_node('/Case[%s]/' % index,'DBData', value) 

        def write_case_apidata(self, index, value): 
            self.write_node('/Case[%s]/' % index,'APIData', value) 

        def write_case_dbsql(self, index, value): 
            self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True) 

        def write_case_apixpath(self, index, value): 
            self.write_node('/Case[%s]/' % index,'APIXPath', value)        

        def save_xml(self): 
            myfile = file(self.resultfile, 'w') 
            self.dom.writexml(myfile, encoding='utf-8') 
            myfile.close() 

    if __name__ == '__main__': 
          xr = xmlwrite(r'D:\test.xml') 
          xr.write_start_time('2223') 
          xr.write_end_time('444')       
          xr.write_pass_count('22') 
          xr.write_fail_count('33')   
          xr.write_case() 
          xr.write_case() 
          xr.write_case_no(0, '0') 
          xr.write_case_url(0, 'http://www.google.com')    
          xr.write_case_url(0, 'http://www.google.com')    
          xr.write_case_dbsql(0, 'select * from ') 
          xr.write_case_dbdata(0, 'dbtata') 
          xr.write_case_apixpath(0, '/xpath') 
          xr.write_case_apidata(0, 'apidata') 
          xr.write_case_no(1, '1')        
          xr.write_case_url(1, 'http://www.baidu.com')    
          xr.write_case_url(1, 'http://www.baidu.com')    
          xr.write_case_dbsql(1, 'select 1 from ') 
          xr.write_case_dbdata(1, 'dbtata1') 
          xr.write_case_apixpath(1, '/xpath1') 
          xr.write_case_apidata(1, 'apidata1') 
          xr.save_xml()

    以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。
    比如:/root/child[1], 表示root的第2个child节点。

    js
    下一篇:没有了