Skip to content

Commit 079f79a

Browse files
authored
Improve page loading error handling (#58)
1 parent e761c22 commit 079f79a

5 files changed

Lines changed: 53 additions & 21 deletions

File tree

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ Available flags:
6262
- Specifies Redis server's address
6363
- Type: string
6464
- Default value: none
65+
- `-scraperfailurepause`
66+
- Amount of time in seconds to wait after a failed task to idle (0 for immediate retry, negative for pause until end of minute)
67+
- Type: integer
68+
- Default value: `-1`
6569

6670
You can use them like this:
6771
```bash

handlers/getStatus.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
)
1313

1414
var initTime = time.Now()
15-
var version = "1.17.0"
15+
var version = "1.18.0"
1616

1717
func getStatus(w http.ResponseWriter, r *http.Request) {
1818
json.NewEncoder(w).Encode(map[string]interface{}{

main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ func main() {
2626
flagProxyReloadWebhook := flag.String("proxyreloadwebhook", "", "Webhook address to request proxy reload")
2727
flagRateLimit := flag.Uint64("ratelimit", 512, "Maximum number of requests per minute per IP")
2828
flagRedis := flag.String("redis", "", "Redis connection string")
29+
flagScraperFailurePause := flag.Int("scraperfailurepause", -1, "Amount of time in seconds to wait after a failed task to idle")
2930
flagTaskRetries := flag.Uint("taskretries", 3, "Number of retries for a scraping task")
3031
flagVerbose := flag.Bool("verbose", false, "Print out additional logs into stdout")
3132
flag.Parse()
@@ -58,6 +59,7 @@ func main() {
5859
viper.Set("proxyreloadwebhook", *flagProxyReloadWebhook)
5960
viper.Set("ratelimit", int64(*flagRateLimit))
6061
viper.Set("redis", *flagRedis)
62+
viper.Set("scraperfailurepause", time.Duration(*flagScraperFailurePause)*time.Second)
6163
viper.Set("taskretries", int(*flagTaskRetries))
6264
viper.Set("verbose", *flagVerbose)
6365

scraper/handleTaskError.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package scraper
2+
3+
import (
4+
"bdo-rest-api/logger"
5+
"bdo-rest-api/utils"
6+
"fmt"
7+
"strconv"
8+
"time"
9+
10+
"github.com/gocolly/colly/v2"
11+
"github.com/spf13/viper"
12+
)
13+
14+
func handleTaskError(r *colly.Request, imperva bool, err error) {
15+
taskRetries, _ := strconv.Atoi(r.Ctx.Get("taskRetries"))
16+
17+
if imperva {
18+
logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", r.URL, taskRetries))
19+
} else {
20+
logger.Error(fmt.Sprintf("Error occured while loading %v: %v, retries: %v", r.URL, err, taskRetries))
21+
}
22+
23+
if proxyReloadWebhook := viper.GetString("proxyreloadwebhook"); proxyReloadWebhook != "" {
24+
utils.SendDummyRequest(proxyReloadWebhook)
25+
}
26+
27+
if scraperFailurePause := viper.GetDuration("scraperfailurepause"); scraperFailurePause >= 0 {
28+
taskQueue.Pause(scraperFailurePause)
29+
} else {
30+
taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second)
31+
}
32+
33+
taskQueue.ConfirmTaskCompletion(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"))
34+
35+
if taskRetries < viper.GetInt("taskretries") {
36+
taskRegion := r.Ctx.Get("taskRegion")
37+
taskType := r.Ctx.Get("taskType")
38+
taskQueue.AddTask(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"), utils.BuildRequest(r.URL.String(), map[string]string{
39+
"taskRegion": taskRegion,
40+
"taskRetries": strconv.Itoa(taskRetries + 1),
41+
"taskType": taskType,
42+
}))
43+
}
44+
}

scraper/scraper.go

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ func InitScraper() {
5858
})
5959

6060
scraper.OnError(func(r *colly.Response, err error) {
61-
logger.Error(fmt.Sprintf("Error occured while loading %v: %v", r.Request.URL, err))
62-
taskQueue.ConfirmTaskCompletion(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"))
61+
handleTaskError(r.Request, false, err)
6362
})
6463

6564
scraper.OnResponse(func(r *colly.Response) {
@@ -80,24 +79,7 @@ func InitScraper() {
8079
})
8180

8281
if imperva {
83-
taskRetries, _ := strconv.Atoi(body.Request.Ctx.Get("taskRetries"))
84-
logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", body.Request.URL.String(), taskRetries))
85-
if proxyReloadWebhook := viper.GetString("proxyreloadwebhook"); proxyReloadWebhook != "" {
86-
utils.SendDummyRequest(proxyReloadWebhook)
87-
taskQueue.Pause(time.Second * 5)
88-
} else {
89-
taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second)
90-
}
91-
taskQueue.ConfirmTaskCompletion(taskClient, taskHash)
92-
93-
if taskRetries < viper.GetInt("taskretries") {
94-
taskQueue.AddTask(taskClient, taskHash, utils.BuildRequest(body.Request.URL.String(), map[string]string{
95-
"taskRegion": taskRegion,
96-
"taskRetries": strconv.Itoa(taskRetries + 1),
97-
"taskType": taskType,
98-
}))
99-
}
100-
82+
handleTaskError(body.Request, true, nil)
10183
return
10284
}
10385

0 commit comments

Comments
 (0)