55 "fmt"
66 "net"
77 "net/url"
8+ "strings"
89 "sync"
910 "time"
1011
@@ -13,64 +14,100 @@ import (
1314 "github.com/threefoldtech/zosbase/pkg/environment"
1415)
1516
16- const defaultRequestTimeout = 5 * time .Second
17+ const defaultRequestTimeout = 10 * time .Second
1718
19+ // function: at least one instance of each service should be reachable
20+ // returns errors as a report for perf healthcheck
21+ // a side effect: set/delete the not-reachable flag
1822func networkCheck (ctx context.Context ) []error {
19- env := environment . MustGet ()
20- servicesUrl := [] string { env . FlistURL }
21-
22- servicesUrl = append ( append ( servicesUrl , env . SubstrateURL ... ), env . RelayURL ... )
23- servicesUrl = append ( append ( servicesUrl , env . ActivationURL ... ), env . GraphQL ... )
23+ var (
24+ wg sync. WaitGroup
25+ errMu sync. Mutex
26+ errors [] error
27+ )
2428
25- var errors []error
29+ env := environment .MustGet ()
30+ services := map [string ][]string {
31+ "substrate" : env .SubstrateURL ,
32+ "activation" : env .ActivationURL ,
33+ "relay" : environment .GetRelaysURLs (),
34+ "graphql" : env .GraphQL ,
35+ "hub" : {env .FlistURL },
36+ "kyc" : {env .KycURL },
37+ }
2638
27- var wg sync.WaitGroup
28- var mut sync.Mutex
29- for _ , serviceUrl := range servicesUrl {
39+ for service , instances := range services {
3040 wg .Add (1 )
31- go func (serviceUrl string ) {
41+ go func (service string , instances [] string ) {
3242 defer wg .Done ()
3343
34- err := checkService (ctx , serviceUrl )
35- if err != nil {
36- mut .Lock ()
37- defer mut .Unlock ()
38-
44+ if err := verifyAtLeastOneIsReachable (ctx , service , instances ); err != nil {
45+ errMu .Lock ()
3946 errors = append (errors , err )
47+ errMu .Unlock ()
4048 }
41- }(serviceUrl )
49+
50+ }(service , instances )
4251 }
52+
4353 wg .Wait ()
4454
4555 if len (errors ) == 0 {
56+ log .Debug ().Msg ("all network checks passed" )
4657 if err := app .DeleteFlag (app .NotReachable ); err != nil {
47- log .Error ().Err (err ).Msg ("failed to delete readonly flag" )
58+ log .Error ().Err (err ).Msg ("failed to delete not-reachable flag" )
59+ }
60+ } else {
61+ log .Warn ().Int ("failed_checks" , len (errors )).Msg ("some network checks failed" )
62+ if err := app .SetFlag (app .NotReachable ); err != nil {
63+ log .Error ().Err (err ).Msg ("failed to set not-reachable flag" )
4864 }
4965 }
5066
5167 return errors
5268}
5369
70+ func verifyAtLeastOneIsReachable (ctx context.Context , service string , instances []string ) error {
71+ if len (instances ) == 0 {
72+ return fmt .Errorf ("no instances provided for service %s" , service )
73+ }
74+
75+ var unreachableErrors []string
76+ for _ , instance := range instances {
77+ if err := checkService (ctx , instance ); err == nil {
78+ return nil
79+ } else {
80+ unreachableErrors = append (unreachableErrors , err .Error ())
81+ }
82+ }
83+
84+ return fmt .Errorf ("all %s instances are unreachable: %s" , service , strings .Join (unreachableErrors , "; " ))
85+ }
86+
5487func checkService (ctx context.Context , serviceUrl string ) error {
55- ctx , cancel := context .WithTimeout (ctx , defaultRequestTimeout )
88+ timeoutCtx , cancel := context .WithTimeout (ctx , defaultRequestTimeout )
5689 defer cancel ()
5790
58- address := parseUrl (serviceUrl )
59- err := isReachable (ctx , address )
91+ address , err := parseUrl (serviceUrl )
6092 if err != nil {
61- if err := app .SetFlag (app .NotReachable ); err != nil {
62- log .Error ().Err (err ).Msg ("failed to set not reachable flag" )
63- }
93+ return fmt .Errorf ("invalid URL %s: %w" , serviceUrl , err )
94+ }
95+
96+ if err := isReachable (timeoutCtx , address ); err != nil {
6497 return fmt .Errorf ("%s is not reachable: %w" , serviceUrl , err )
6598 }
6699
67100 return nil
68101}
69102
70- func parseUrl (serviceUrl string ) string {
103+ func parseUrl (serviceUrl string ) ( string , error ) {
71104 u , err := url .Parse (serviceUrl )
72105 if err != nil {
73- return ""
106+ return "" , fmt .Errorf ("failed to parse URL: %w" , err )
107+ }
108+
109+ if u .Host == "" {
110+ return "" , fmt .Errorf ("missing hostname in URL" )
74111 }
75112
76113 port := ":80"
@@ -82,11 +119,11 @@ func parseUrl(serviceUrl string) string {
82119 u .Host += port
83120 }
84121
85- return u .Host
122+ return u .Host , nil
86123}
87124
88125func isReachable (ctx context.Context , address string ) error {
89- d := net.Dialer { Timeout : defaultRequestTimeout }
126+ var d net.Dialer
90127 conn , err := d .DialContext (ctx , "tcp" , address )
91128 if err != nil {
92129 return fmt .Errorf ("failed to connect: %w" , err )
0 commit comments