Issue
I wonder how can I do this code, but with multiply files coming from one directory:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import os
import xml.etree.ElementTree as ET
tree = ET.parse('C:/Users/STJ2TW/Desktop/Pliki XML/0204N01323-00_2021082310500900')
root = tree.getroot()
for Type in root.iter('Type'):
PN = Type.get('name')
print(f"Part number: {PN}")
for Component in root.iter('Component'):
CName = Component.get('name')
if CName == 'Pos010_GearHousing':
for ComponentNo in Component.iter('ComponentNo'):
GH = ComponentNo.get('name')
print(f"Gear Housing: {GH}")
if CName == 'Pos058_PowerPack':
for ComponentNo in Component.iter('ComponentNo'):
PP = ComponentNo.get('name')
print(f"Power Pack: {PP}")
df = pd.DataFrame(
{
"Part number:": [PN],
"Gear Housing:": [GH],
"Power Pack:": [PP],
}
)
df
df = df.set_index('Part number:', drop = True)
df.to_csv("C:/Users/STJ2TW/Desktop/Pliki XML/plik.csv", sep=";")
df.head(10)
I think I should go with this:
path = 'C:/Users/STJ2TW/Desktop/Pliki XML/'
for filename in os.listdir(path):
if not filename.endswith('.xml'): continue
fullname = os.path.join(path, filename)
tree = ET.parse(fullname)
But I don't know how to operate on files in the rest of the code. Some loops might be helpful? Thanks in advance.
Solution
This should work. It's hard to say for sure without knowing the structure of the xml file, but based on the code you provided it seems like this might work.
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import os
path = 'C:/Users/STJ2TW/Desktop/Pliki XML/'
master = []
for filename in os.listdir(path):
if not filename.endswith('.xml'): continue
fullname = os.path.join(path, filename)
tree = ET.parse(fullname)
root = tree.getroot()
items = [] # collect items into a list of dictionaries
for Type in root.iter('Type'):
PN = Type.get('name')
items.append({"Part Number": PN}) # add data to dictionary
print(f"Part number: {PN}")
for i, Component in enumerate(root.iter('Component')):
CName = Component.get('name')
if CName == 'Pos010_GearHousing':
for ComponentNo in Component.iter('ComponentNo'):
GH = ComponentNo.get('name')
items[i]["Gear Housing"] = GH # update dictionary
print(f"Gear Housing: {GH}")
if CName == 'Pos058_PowerPack':
for ComponentNo in Component.iter('ComponentNo'):
PP = ComponentNo.get('name')
items[i]["Power Pack"] = PP # update dictionary
print(f"Power Pack: {PP}")
master += items # add dictionary list for this file to the
# master list for all files
df = pd.DataFrame(master) # Create dataframe from master list
df = df.set_index('Part Number', drop = True)
df.to_csv("C:/Users/STJ2TW/Desktop/Pliki XML/plik.csv", sep=";")
df.head(10)
Answered By - Alexander
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.