Wikipedia:Elenchi generati offline/Collegamenti esterni/Elenco

Per generare l'elenco sono stati utilizzati i seguenti due script:

deadlinks.sh modifica

#!/bin/bash

mysql --defaults-file=~/replica.my.cnf -h s2.labsdb -e "SELECT CONCAT(page_title, \" \", el_to) FROM externallinks JOIN page ON el_from=page_id WHERE page_namespace=0" itwiki_p > url_raw.txt

sed -i -n -e '/http/p' url_raw.txt

cat url_raw.txt | awk 'BEGIN { FS = " " }; {print $2}' > url_pure.txt

split -n l/20 -d url_pure.txt

for i in {00..19}
do
	./curl.sh $i &
done

wait

for i in {00..19}
do
	cat c$i >> url_codes.txt
	rm x$i
	rm c$i
done

pr -m -t -s url_raw.txt url_codes.txt | awk '{print $1,$2,$3}' > url_raw_codes.txt

cat url_raw_codes.txt | sort -gk 3 > url_ord.txt

cat url_ord.txt | awk '{print "# [["$1"]]: "$2" ("$3")"}' > url_form.txt

sed -i -r '/ \([23]..\)/d' url_form.txt

mv url_form.txt 999.txt

sed -n -e '/ (400)/p' 999.txt > 400.txt
sed -i '/ (400)/d' 999.txt
cat 400.txt | awk '{print $1" "$2" "$3}' > 400.txt

sed -n -e '/ (401)/p' 999.txt > 401.txt
sed -i '/ (401)/d' 999.txt
cat 401.txt | awk '{print $1" "$2" "$3}' > 401.txt

sed -n -e '/ (403)/p' 999.txt > 403.txt
sed -i '/ (403)/d' 999.txt
cat 403.txt | awk '{print $1" "$2" "$3}' > 403.txt

sed -n -e '/ (404)/p' 999.txt > 404.txt
sed -i '/ (404)/d' 999.txt
cat 404.txt | awk '{print $1" "$2" "$3}' > 404.txt

sed -n -e '/ (405)/p' 999.txt > 405.txt
sed -i '/ (405)/d' 999.txt
cat 405.txt | awk '{print $1" "$2" "$3}' > 405.txt

sed -n -e '/ (406)/p' 999.txt > 406.txt
sed -i '/ (406)/d' 999.txt
cat 406.txt | awk '{print $1" "$2" "$3}' > 406.txt

sed -n -e '/ (410)/p' 999.txt > 410.txt
sed -i '/ (410)/d' 999.txt
cat 410.txt | awk '{print $1" "$2" "$3}' > 410.txt

sed -n -e '/ (000)/p' 999.txt > 000.txt
sed -i '/ (000)/d' 999.txt
cat 000.txt | awk '{print $1" "$2" "$3}' > 000.txt

sed -n -e '/ (500)/p' 999.txt > 500.txt
sed -i '/ (500)/d' 999.txt
cat 500.txt | awk '{print $1" "$2" "$3}' > 500.txt

curl.sh modifica

#!/bin/bash

for j in `cat x$1`
do
	curl -s -o /dev/null -I -w "%{http_code}\n" "$j" >> c$1
done