Wikipedia:Elenchi generati offline/Voci monitorate non in vetrina/ordina.py

import wikipedia, pagegenerators, re

site = wikipedia.getSite()

f = open('lista.txt', 'r')
lista = f.read().split('\n')
f.close()

pages = []

for title in lista:
	pages.append( wikipedia.Page(site, title.decode('utf8') ) )

gen = pagegenerators.PreloadingGenerator(pages)

d = {}
for p in gen:
	txt = p.get()
	names = []
	ref = re.findall('< *ref(.*?)/?>', txt)
	for r in ref:
		name = r.strip()
		if name == 'erences':
			continue
		elif name == '':
			names.append(name)
		else:
			try:
				names.index(name)
			except:
				names.append(name)
	
	i = str(len(names))
		
	if not d.has_key(i):
		d[i] = []
	d[i].append(p.title())
	
keys = d.keys()

keys = sorted(keys, key=lambda k: int(k), reverse=True )

f = open('out.txt', 'w')

for k in keys:
	d[k].sort()
	for t in d[k]:
		line = u"*[[%s]] (%d)\n" % (t, int(k) ) 
		f.write(line.encode('utf8') )
		
f.close()