Strict version.
3 files changed, 52 insertions(+), 32 deletions(-)

M main.go
M page.go
M render.js
M main.go +47 -26
@@ 10,6 10,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"sync"
+	"time"
 )
 
 const (

          
@@ 23,7 24,7 @@ var retries *int
 
 func main() {
 	fname := flag.String("f", "", "CSV containing pages")
-	timeout = flag.Int64("timeout", 60, "Timeout on page load, in seconds")
+	timeout = flag.Int64("timeout", 30, "Timeout on page load, in seconds")
 	nthreads := flag.Int("threads", 50, "Number of concurrent connections")
 	retries = flag.Int("retries", 5, "Number of times to retry timed-out pages")
 	help := flag.Bool("h", false, "Print usage information")

          
@@ 81,8 82,8 @@ func logger(output <-chan string, waiter
 func handler(threadsGroup *sync.WaitGroup, pages chan Page, output chan<- string) {
 	for page := range pages {
 		page.Attempts++
-		out, ok := fetch(page)
-		if !ok {
+		out, retry := fetch(page)
+		if retry {
 			if page.Attempts < *retries {
 				page.Attempts++
 				threadsGroup.Add(1)

          
@@ 97,10 98,16 @@ func handler(threadsGroup *sync.WaitGrou
 	}
 }
 
+type Response struct {
+	payload []byte
+	err     bool
+}
 
-func fetch(page Page) (msg string, ok bool) {
+
+func fetch(page Page) (msg string, retry bool) {
 	waiter := new(sync.WaitGroup)
-	results := make(chan []byte, 2)
+	results := make(chan Response, 2)
+	defer close(results)
 	waiter.Add(2)
 	go fetchPage(waiter, page.OriginURL(), results)
 	go fetchPage(waiter, page.CopyURL(), results)

          
@@ 109,42 116,56 @@ func fetch(page Page) (msg string, ok bo
 	ores := <-results
 	cres := <-results
 	var result string
-	ok = false
-	if len(ores) == 0 {
-		result = "UNABLE TO READ"
-	} else if bytes.Compare(ores, cres) == 0 {
+	retry = false
+	if ores.err {
+		result = string(ores.payload)
+		retry = true
+	} else if cres.err {
+		result = string(cres.payload)
+		retry = true
+	} else if bytes.Compare(ores.payload, cres.payload) == 0 {
 		result = "OK"
-		ok = true
-	} else if len(ores) == 0 {
-		result = "TIMEOUT 0"
-	} else if cres == nil {
-		result = "TIMEOUT 1"
 	} else {
-		result = "FAIL"
+		result = "FAIL COMPARE"
 	}
 	msg = fmt.Sprintf("%s,%s,%s,%d,%s", page.OriginURL(), page.CopyURL(), page.AdID, page.Attempts, result)
 	return
 }
 
 
-func fetchPage(waiter *sync.WaitGroup, src string, results chan<- []byte) {
-	waiter.Done()
+func fetchPage(waiter *sync.WaitGroup, src string, results chan<- Response) {
+	defer waiter.Done()
 	h := md5.New()
 	h.Write([]byte(src))
 	fname := fmt.Sprintf("autocompare%x.png", h.Sum(nil))
 	outFileName := filepath.Join(os.TempDir(), fname)
-	defer os.Remove(outFileName)
 
+	doneChan := make(chan bool)
+	timeoutChan := make(chan bool)
+	go func() {
+	    time.Sleep(time.Duration(*timeout) * time.Second)
+	    timeoutChan <- true
+	}()
 	cmd := exec.Command("./phantomjs", "render.js", src, outFileName)
-	cmd.Run()
-	//outBytes, err := cmd.Output()
-	//fmt.Printf("%v: \"%s\", %v\n", cmd.Args, string(outBytes), nil)
+	go func() {
+	    cmd.Run()
+	    //outBytes, err := cmd.Output()
+	    //fmt.Printf("%v: \"%s\", %v\n", cmd.Args, string(outBytes), nil)
+	    doneChan <- true
+	}()
 
 	hash := md5.New()
-	outFile, err := os.Open(outFileName)
-	if err == nil {
-		io.Copy(hash, outFile)
-		outFile.Close()
+	select {
+	case <-doneChan:
+		outFile, err := os.Open(outFileName)
+		if err == nil {
+			io.Copy(hash, outFile)
+			outFile.Close()
+			os.Remove(outFileName)
+		}
+		results <- Response{hash.Sum(nil), false}
+	case <-timeoutChan:
+		cmd.Process.Kill()
+		results <- Response{[]byte("TIMEOUT"), true}
 	}
-	results <- hash.Sum(nil)
 }

          
M page.go +4 -5
@@ 1,9 1,11 @@ 
 package main
 
 import "fmt"
+import "strings"
 
 type Page struct {
 	Attempts    int
+	Retry       bool
 	Subdomain   string
 	Domain1     string
 	OrigPath    string

          
@@ 11,12 13,9 @@ type Page struct {
 	AdID        string
 }
 
-func (p Page) Domain() string {
-	return fmt.Sprintf("http://%s", p.Domain1)
-}
-
 func (p Page) OriginURL() string {
-	return fmt.Sprintf("http://%s.paginasamarillas.es%s", p.Domain1, p.OrigPath)
+	newDomain := strings.Replace(p.Domain1, "www", p.Subdomain, 1)
+	return fmt.Sprintf("http://%s%s", newDomain, p.OrigPath)
 }
 
 func (p Page) CopyURL() string {

          
M render.js +1 -1
@@ 17,7 17,7 @@ if (phantom.args.length < 2 || phantom.a
 				window.setTimeout(function () {
 					page.render(output);
 					phantom.exit(status);
-				}, 3000);
+				}, 10000);
 			}
 		});
 }