b7fc0bf0121d — russes02 11 years ago
autocompare, scripts: Move memory use tracking into autocompare & logs
6 files changed, 115 insertions(+), 57 deletions(-)

M README.txt
M autocompare/main.go
M scripts/memuse.plot
M scripts/postprocess
M scripts/runstats
M scripts/stats
M README.txt +33 -20
@@ 3,33 3,46 @@ This autocompare slowly eats memory, at 
 	mkdir run1
 	cp page-report.csv run1
 	time autocompare -threads=8 -retries=3 run1/page-report.csv 2> run1/errs.txt | tee run1/out.csv
-	./scripts/memory | tee run1/memory.txt
+	./scripts/postprocess run1/page-report.csv
 
-run1/memory.txt will contain 10-second interval counts of: 
-	date : system VSZ RSZ .. autocompore VSZ RSZ
-
-	./scripts/filtermissing run1/page-report.csv run1/out.csv
+runX/page-report.csv 
+	The input data set from the Kapow bot run, for this run
 
-run1/page-report.csv.new will contain all of the pages that either have not yet
-been run (not in out.csv), or for which not all of the pages for the site have
-been run.
+runX/page-report.csv.new 
+	After processing, this file contains the data that was not run.
+	This can be used as the input for the next run.
 
-run1/out.csv.new will contain all of the pages for sites which have had all of
-their pages run.
+runX/out.csv 
+	The results of autocompare, unfiltered.
 
-	./scripts/mksites < run1/out.csv.new > run1/sites.csv
+runX/out.csv.new
+	The results of autocompare, filtered to remove pages for sites that
+	weren't fully processed. For example, if processing was interrupted,
+	not all of the pages of a site may have been compared; this file
+	contains only pages for sites where all of the site's pages were
+	compared.
+	
+runX/sites.csv 
+	The results of autocompare, collated to the site level. Collation rules
+	are:
 
-run1/sites.csv will contain success counts for all sites aggregated from their
-pages.
-
-	./scripts/stats run1
+	* If any page in the site fails, the site fails
+	* The NRMSD for the site is the highest NRMSD for any page in the site
+	* If any page has failing links, the links column will be false
 
-dumps a list of statistics about the run.
+runX/mem.dat 
+	Memory use statistics about the run.
 
-	./scripts/retrytimeouts run1 >> run1/page-report.csv.new
+runX/memuse.svgz 
+	A graph of the memory use statistics.
 
-will re-add all of the timeout failures to the new page report for the next run.
+runX/stats.txt 
+	Statistics about the run, including failure rates, summaries, and
+	timings.
 
-	./scripts/runstats run1
+runX/errs.txt 
+	Error log.  Contains run statistics at the end, but also information
+	about site timeouts and other errors.
 
-prints out progress statistics.
+runX/att.csv 
+	A list of sites that can be sent to AT&T (OK & links OK)

          
M autocompare/main.go +4 -1
@@ 55,9 55,10 @@ func main() {
 	// Memory printer
 	go func() {
 		mem := new(runtime.MemStats)
+		MB := uint64(1024)
 		for {
 			runtime.ReadMemStats(mem)
-			log.Printf("memory %d %d %d", mem.Alloc, mem.TotalAlloc, mem.Sys)
+			log.Printf("memory %d %d", mem.Alloc / MB, mem.Sys / MB)
 			time.Sleep(10 * time.Second)
 		}
 	}()

          
@@ 165,12 166,14 @@ func startProducers(urlStream chan Page)
 						proc1.Kill()
 						proc1.Release()
 						_, proc1 = forkPhantom(k * 2)
+						time.Sleep(2 * time.Second)
 					}
 					if proc2TimedOut {
 						log.Printf("kill/restart %s\n", uri2)
 						proc2.Kill()
 						proc2.Release()
 						_, proc2 = forkPhantom(k*2 + 1)
+						time.Sleep(2 * time.Second)
 					}
 				}
 				pageStream <- page

          
M scripts/memuse.plot +15 -6
@@ 1,16 1,25 @@ 
 #!/bin/bash
-gnuplot <<EOF
 
-set datafile separator " "
+if [[ $1 == "" ]]; then
+	echo "USAGE: $0 <dir>"
+	exit 1
+fi
+BASE=$1
+DAT=$BASE/mem.dat
+
+awk '/memory/ {print $2","$4","$6}' < $BASE/errs.txt > $DAT
+
+gnuplot <<EOF
+set datafile separator ","
 set term svg enhanced size 1200,900
-set output "$1/memuse.svg"
+set output "${BASE}/memuse.svg"
 set ylabel "Memory (KB)"
 set xlabel "Time"
 set xdata time  
-set timefmt "%s"
+set timefmt "%H:%M:%S"
 set xtics format "%H:%M:%S"
-plot '$1/mem.txt' using 1:6 title "VSZ" with lines, \
-     '$1/mem.txt' using 1:7 title "RSZ" with lines
+plot '${DAT}' using 1:2 title "Alloc" with lines, \
+     '${DAT}' using 1:3 title "System" with lines
 
 EOF
 gzip $1/memuse.svg

          
M scripts/postprocess +31 -10
@@ 1,22 1,43 @@ 
 #!/bin/bash
+# 	./scripts/filtermissing run1/page-report.csv run1/out.csv
+# 
+# run1/page-report.csv.new will contain all of the pages that either have not yet
+# been run (not in out.csv), or for which not all of the pages for the site have
+# been run.
+# 
+# run1/out.csv.new will contain all of the pages for sites which have had all of
+# their pages run.
+# 
+# 	./scripts/mksites < run1/out.csv.new > run1/sites.csv
+# 
+# run1/sites.csv will contain success counts for all sites aggregated from their
+# pages.
+# 
+# 	./scripts/stats run1
+# 
+# dumps a list of statistics about the run.
+# 
+# 	./scripts/retrytimeouts run1 >> run1/page-report.csv.new
+# 
+# will re-add all of the timeout failures to the new page report for the next run.
+# 
+# 	./scripts/runstats run1
+# 
+# prints out progress statistics.
 
 MYPATH=`dirname $0`
 
-PREV=$1
-if [[ $# -eq 0 ]]; then
-	echo "USAGE: $0 <dir>"
+INPUT=$1
+if [[ $# -ne 1 ]]; then
+	echo "USAGE: $0 <input>"
 	exit 1
 fi
+PREV=`dirname $1`
 
-${MYPATH}/filtermissing ${PREV}/page-report.csv ${PREV}/out.csv
+${MYPATH}/filtermissing ${INPUT} ${PREV}/out.csv
 ${MYPATH}/mksites < ${PREV}/out.csv.new > ${PREV}/sites.csv
 egrep -e 'OK.*true$' ${PREV}/sites.csv | awk -F, '{print $2","$4}' > ${PREV}/att.csv
-${MYPATH}/stats ${PREV} > ${PREV}/stats.txt
+${MYPATH}/stats ${INPUT} > ${PREV}/stats.txt
 ${MYPATH}/memuse.plot ${PREV}
 cp ${MYPATH}/../run_readme.txt ${PREV}/README.txt
 zip -r ${PREV}.zip ${PREV}
-
-#mkdir run$RUNNUM
-#mv run$PREV/page-report.csv.new run${RUNNUM}/page-report.csv
-#echo "time autocompare -threads=8 -retries=3 run${RUNNUM}/page-report.csv 2> run${RUNNUM}/errs.txt | tee run${RUNNUM}/out.csv"
-#echo "./scripts/memory | tee run${RUNNUM}/memory.txt"

          
M scripts/runstats +8 -5
@@ 15,12 15,15 @@ printf "Processed %d / %d (%g%%)\n" $P $
 E=`egrep 'FAIL|false' ${BASE}/out.csv | grep -v MIGRATION | wc -l`
 printf "Failures: %d (%g%%)\n" $E `dc -e "2k$E $P/100*p"`
 
-SS=`head -n1 ${BASE}/mem.txt | awk '{print $1}'`
-ES=`tail -n1 ${BASE}/mem.txt | awk '{print $1}'`
+SD=`grep memory ${BASE}/errs.txt | head -n 1 | awk '{print $1" "$2}'`
+SS=`date -d "$SD" +%s`
+ED=`grep memory ${BASE}/errs.txt | tail -n 1 | awk '{print $1" "$2}'`
+ES=`date -d "$ED" +%s`
 SECS=$(($ES - $SS))
 pph=`dc -e "2k$P $SECS 3600//p"`
 printf "Run time: %02d:%02d:%02d (%g p/h)\n" $(($SECS/3600)) $(($SECS%3600/60)) $(($SECS%60))  $pph
 
-ETA=`dc -e "4k$T $P-$pph/3600*p"`
-ETA=`dc -e "$ETA 1/p"`
-printf "Remaining time est.: %02d:%02d:%02d\n" $(($ETA/3600)) $(($ETA%3600/60)) $(($ETA%60))
+TIMEREMAIN=`dc -e "4k$T $P-$pph/3600*p"`
+TIMEREMAIN=`dc -e "$TIMEREMAIN 1/p"`
+ETA=`date -d "$TIMEREMAIN seconds" +%H:%M:%S`
+printf "Remaining time est.: %02d:%02d:%02d (%s)\n" $(($TIMEREMAIN/3600)) $(($TIMEREMAIN%3600/60)) $(($TIMEREMAIN%60)) $ETA

          
M scripts/stats +24 -15
@@ 1,15 1,22 @@ 
-#!/bin/sh
+#!/bin/bash
+
+if [[ $# -ne 1 ]]; then
+	echo "USAGE: $0 <input>"
+	exit 1
+fi
+INPUT=$1
+DIR=`dirname $INPUT`
 
-TOTAL_PAGE=`wc -l < $1/page-report.csv`
-TOTAL_SITE=`awk -F, '{print $3}' < $1/page-report.csv | sort | uniq | wc -l`
-COMP_PAGE=`wc -l <$1/out.csv.new`
-COMP_SITE=`wc -l <$1/sites.csv`
-FAIL_PAGE=`grep FAIL $1/out.csv.new | grep -v MIGRATION | wc -l`
-FAIL_SITE=`grep FAIL $1/sites.csv | grep -v MIGRATION | wc -l`
-AGGREGATED_FAIL_PAGE=`egrep 'FAIL|false$' $1/out.csv.new | grep -v MIGRATION | wc -l`
-AGGREGATED_FAIL_SITE=`egrep 'FAIL|false$' $1/sites.csv | grep -v MIGRATION | wc -l`
-LINK_FAIL_PAGE=`egrep "false$" $1/out.csv.new | wc -l`
-LINK_FAIL_SITE=`egrep "false$" $1/sites.csv | wc -l`
+TOTAL_PAGE=`wc -l < ${INPUT}`
+TOTAL_SITE=`awk -F, '{print $3}' < ${INPUT} | sort | uniq | wc -l`
+COMP_PAGE=`wc -l <${DIR}/out.csv.new`
+COMP_SITE=`wc -l <${DIR}/sites.csv`
+FAIL_PAGE=`grep FAIL ${DIR}/out.csv.new | grep -v MIGRATION | wc -l`
+FAIL_SITE=`grep FAIL ${DIR}/sites.csv | grep -v MIGRATION | wc -l`
+AGGREGATED_FAIL_PAGE=`egrep 'FAIL|false$' ${DIR}/out.csv.new | grep -v MIGRATION | wc -l`
+AGGREGATED_FAIL_SITE=`egrep 'FAIL|false$' ${DIR}/sites.csv | grep -v MIGRATION | wc -l`
+LINK_FAIL_PAGE=`egrep "false$" ${DIR}/out.csv.new | wc -l`
+LINK_FAIL_SITE=`egrep "false$" ${DIR}/sites.csv | wc -l`
 COMP_PAGE_P=`dc -e "$COMP_PAGE $TOTAL_PAGE 2k/100*p"`
 COMP_SITE_P=`dc -e "$COMP_SITE $TOTAL_SITE 2k/100*p"`
 FAIL_SITE_P=`dc -e "$FAIL_SITE $COMP_SITE 2k/100*p"`

          
@@ 18,7 25,7 @@ LINK_FAIL_SITE_P=`dc -e "$LINK_FAIL_SITE
 LINK_FAIL_PAGE_P=`dc -e "$LINK_FAIL_PAGE $COMP_PAGE 2k/100*p"`
 AGGREGATED_FAIL_PAGE_P=`dc -e "2k${AGGREGATED_FAIL_PAGE} ${COMP_PAGE}/100*p"`
 AGGREGATED_FAIL_SITE_P=`dc -e "2k${AGGREGATED_FAIL_SITE} ${COMP_SITE}/100*p"`
-TIMEOUTS_PAGE=`egrep 'FAIL timeout' $1/out.csv.new | wc -l`
+TIMEOUTS_PAGE=`egrep 'FAIL timeout' ${DIR}/out.csv.new | wc -l`
 TIMEOUTS_PAGE_P=`dc -e "2k$TIMEOUTS_PAGE $COMP_PAGE /100*p"`
 
 printf "Total pages       : %d\n" $TOTAL_PAGE

          
@@ 33,8 40,10 @@ printf "Aggregated page failures: %d (%g
 printf "Aggregated site failures: %d (%g%%)\n" $AGGREGATED_FAIL_SITE $AGGREGATED_FAIL_SITE_P
 printf "Timeouts          : %d (%g%%)\n" $TIMEOUTS_PAGE $TIMEOUTS_PAGE_P
 
-SS=`head -n1 $1/mem.txt | awk '{print $1}'`
-ES=`tail -n1 $1/mem.txt | awk '{print $1}'`
+SD=`grep memory ${BASE}/errs.txt | head -n 1 | awk '{print $1" "$2}'`
+SS=`date -d "$SD" +%s`
+ED=`grep memory ${BASE}/errs.txt | tail -n 1 | awk '{print $1" "$2}'`
+ES=`date -d "$ED" +%s`
 SECS=$(($ES - $SS))
-PPH=`dc -e "2k$COMP_PAGE $SECS 3600//p"`
+PPH=`dc -e "2k$P $SECS 3600//p"`
 printf "Run time: %02d:%02d:%02d (%g p/h)\n" $(($SECS/3600)) $(($SECS%3600/60)) $(($SECS%60))  $PPH