完整代码如下:
def parse_and_remove(filename, path):
path_parts = path.split('/')
doc = iterparse(filename, ('start', 'end'))
# Skip the root element
next(doc)
tag_stack = []
elem_stack = []
for event, elem in doc:
if event == 'start':
tag_stack.append(elem.tag)
elem_stack.append(elem)
elif event == 'end':
if tag_stack == path_parts:
yield elem
elem.clear() ##这里加上 elem.clear(),内存占用几乎不变,可以完成任务;如果去掉的话,内存占用会越来越大直至 crash
try:
tag_stack.pop()
elem_stack.pop()
except IndexError:
pass
def count_stats(filepath, filename):
start = time.time()
print('\nStart processing file ', filepath)
data = parse_and_remove(filepath, 'Job')
results = []
count = 1
for job_node in data:
continue ##即使这里什么都不做,内存占用也会越来越大
print 的程序如主楼所示。内存占用越来越大直至 crash
谢谢二位
@
winglight2016 @
weiwoxinyou