753138334a45 — russes02 12 years ago
autocompare: Log memory use; use logger for errors, timeouts; kill/restart if post err
scripts: postprocess relative to base; runstats based on input file
5 files changed, 43 insertions(+), 18 deletions(-)

M autocompare/main.go
M fetchpage.go
M kop.go
M scripts/postprocess
M scripts/runstats
M autocompare/main.go +14 -3
@@ 18,6 18,7 @@ import (
 	"strconv"
 	"sync"
 	"time"
+	"runtime"
 )
 
 const (

          
@@ 51,6 52,16 @@ func main() {
 		os.Exit(0)
 	}
 
+	// Memory printer
+	go func() {
+		mem := new(runtime.MemStats)
+		for {
+			runtime.ReadMemStats(mem)
+			log.Printf("memory %d %d %d", mem.Alloc, mem.TotalAlloc, mem.Sys)
+			time.Sleep(10 * time.Second)
+		}
+	}()
+
 	fname := flag.Arg(0)
 
 	urlStream := make(chan Page, MAX_BUFFER)

          
@@ 68,7 79,7 @@ func main() {
 		pages.Add(1)
 		for len(urlStream) > thresh {
 			if len(urlStream) > warnThresh {
-				fmt.Fprintf(os.Stderr, "Buffer is %d\n", len(urlStream))
+				log.Printf("Buffer is %d\n", len(urlStream))
 			}
 			time.Sleep(10 * time.Second)
 		}

          
@@ 150,13 161,13 @@ func startProducers(urlStream chan Page)
 					page.Try()
 					proc1TimedOut, proc2TimedOut := Fetch(uri1, uri2, timeout, page)
 					if proc1TimedOut {
-						fmt.Fprintf(os.Stderr, "timeout %s %s\n", uri1, page.OriginURL())
+						log.Printf("kill/restart %s\n", uri1)
 						proc1.Kill()
 						proc1.Release()
 						_, proc1 = forkPhantom(k * 2)
 					}
 					if proc2TimedOut {
-						fmt.Fprintf(os.Stderr, "timeout %s %s\n", uri2, page.CopyURL())
+						log.Printf("kill/restart %s\n", uri2)
 						proc2.Kill()
 						proc2.Release()
 						_, proc2 = forkPhantom(k*2 + 1)

          
M fetchpage.go +13 -4
@@ 3,6 3,7 @@ package autocompare
 import (
 	"crypto/md5"
 	"fmt"
+	"log"
 	"io/ioutil"
 	"net/http"
 	"net/url"

          
@@ 39,6 40,7 @@ func Fetch(phan1, phan2 string, timeout 
 	if page.OriginImage() == "" {
 		ra = <-resultA
 		if ra == nil {
+			log.Printf("timeout %s %s\n", phan1, page.OriginURL())
 			phan1TimedOut = true
 			msg = "timeout "
 		} else if ra.success {

          
@@ 50,6 52,7 @@ func Fetch(phan1, phan2 string, timeout 
 	if page.CopyImage() == "" {
 		rb = <-resultB
 		if rb == nil {
+			log.Printf("timeout %s %s\n", phan2, page.CopyURL())
 			phan2TimedOut = true
 			msg += "timeout"
 		} else if rb.success {

          
@@ 78,16 81,22 @@ func fetchPage(phantomUrl string, timeou
 	outFileName := filepath.Join(os.TempDir(), fname)
 
 	resp, err := http.PostForm(phantomUrl, url.Values{"output": {outFileName}, "address": {src}, "timeout": {timeout}})
-	if err == nil {
+	if resp != nil && resp.Body != nil {
 		defer resp.Body.Close()
 	}
-	if err != nil || resp.StatusCode == 500 {
+	if err != nil {
+		log.Printf("ERROR! posting form %v\n", err)
+		results <- nil
+		return
+	}
+	if resp != nil && resp.StatusCode == 500 {
+		log.Printf("ERROR! bad server response %s\n", resp.Status)
 		results <- &Result{"", false, true}
 		return
 	}
-	if resp.StatusCode != 200 {
+	if resp != nil && resp.StatusCode != 200 {
 		bdy, _ := ioutil.ReadAll(resp.Body)
-		fmt.Fprintf(os.Stderr, "ERROR! bad server response %s %s\n", resp.Status, string(bdy))
+		log.Printf("ERROR! bad server response %s %s\n", resp.Status, string(bdy))
 		results <- &Result{"", false, true}
 	} else {
 		results <- &Result{outFileName, true, true}

          
M kop.go +1 -0
@@ 44,3 44,4 @@ func (s *KOPSites) NextPage() Page {
 	}
 	return &KOP{BasePage{attempts: 0, subdomain: line[3], domain1: line[4], origPath: line[7], path: line[8], adID: line[6], origImg: "", copyImg: "", state: state, message: message, id: line[2]}}
 }
+// vim:ts=4:sw=4:noet

          
M scripts/postprocess +7 -5
@@ 1,17 1,19 @@ 
 #!/bin/bash
 
+MYPATH=`dirname $0`
+
 PREV=$1
 if [[ $# -eq 0 ]]; then
 	echo "USAGE: $0 <dir>"
 	exit 1
 fi
 
-./scripts/filtermissing ${PREV}/page-report.csv ${PREV}/out.csv
-./scripts/mksites < ${PREV}/out.csv.new > ${PREV}/sites.csv
+${MYPATH}/filtermissing ${PREV}/page-report.csv ${PREV}/out.csv
+${MYPATH}/mksites < ${PREV}/out.csv.new > ${PREV}/sites.csv
 egrep -e 'OK.*true$' ${PREV}/sites.csv | awk -F, '{print $2","$4}' > ${PREV}/att.csv
-./scripts/stats ${PREV} > ${PREV}/stats.txt
-./scripts/memuse.plot ${PREV}
-cp run_readme.txt ${PREV}/README.txt
+${MYPATH}/stats ${PREV} > ${PREV}/stats.txt
+${MYPATH}/memuse.plot ${PREV}
+cp ${MYPATH}/../run_readme.txt ${PREV}/README.txt
 zip -r ${PREV}.zip ${PREV}
 
 #mkdir run$RUNNUM

          
M scripts/runstats +8 -6
@@ 1,20 1,22 @@ 
 #!/bin/bash
 
 if [[ $1 == "" ]]; then
-	echo "USAGE: $0 <rundir>"
+	echo "USAGE: $0 <input>"
 	exit 1
 fi
+INPT=$1
+BASE=`dirname $1`
 
-P=`wc -l < $1/out.csv`
-T=`wc -l < $1/page-report.csv`
+P=`wc -l < ${BASE}/out.csv`
+T=`wc -l < ${INPT}`
 pc=`dc -e "2k$P $T/100*p" | awk -F . '{print $1}'`
 printf "Processed %d / %d (%g%%)\n" $P $T $pc
 
-E=`egrep 'FAIL|false' $1/out.csv | grep -v MIGRATION | wc -l`
+E=`egrep 'FAIL|false' ${BASE}/out.csv | grep -v MIGRATION | wc -l`
 printf "Failures: %d (%g%%)\n" $E `dc -e "2k$E $P/100*p"`
 
-SS=`head -n1 $1/mem.txt | awk '{print $1}'`
-ES=`tail -n1 $1/mem.txt | awk '{print $1}'`
+SS=`head -n1 ${BASE}/mem.txt | awk '{print $1}'`
+ES=`tail -n1 ${BASE}/mem.txt | awk '{print $1}'`
 SECS=$(($ES - $SS))
 pph=`dc -e "2k$P $SECS 3600//p"`
 printf "Run time: %02d:%02d:%02d (%g p/h)\n" $(($SECS/3600)) $(($SECS%3600/60)) $(($SECS%60))  $pph