Skip to content

Commit

Permalink
end2end: refresh to get paths discovered after initial request
Browse files Browse the repository at this point in the history
  • Loading branch information
matzf committed Jun 26, 2023
1 parent 2fd2949 commit ad25c26
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 34 deletions.
25 changes: 12 additions & 13 deletions daemon/internal/servers/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,16 @@ func (s *DaemonServer) paths(ctx context.Context,
defer cancelF()
}
srcIA, dstIA := addr.IA(req.SourceIsdAs), addr.IA(req.DestinationIsdAs)
go func() {
defer log.HandlePanic()
s.backgroundPaths(ctx, srcIA, dstIA, req.Refresh)
}()
paths, err := s.fetchPaths(ctx, &s.foregroundPathDedupe, srcIA, dstIA, req.Refresh)
if err != nil {
log.FromCtx(ctx).Debug("Fetching paths", "err", err,
"src", srcIA, "dst", dstIA, "refresh", req.Refresh)
// Retry with longer timeout in background
go func() {
defer log.HandlePanic()
s.backgroundPaths(opentracing.SpanFromContext(ctx),
srcIA, dstIA, req.Refresh)
}()
return nil, err
}
reply := &sdpb.PathsResponse{}
Expand Down Expand Up @@ -201,20 +203,17 @@ func linkTypeToPB(lt snet.LinkType) sdpb.LinkType {
}
}

func (s *DaemonServer) backgroundPaths(origCtx context.Context, src, dst addr.IA, refresh bool) {
backgroundTimeout := 5 * time.Second
deadline, ok := origCtx.Deadline()
if !ok || time.Until(deadline) > backgroundTimeout {
// the original context is large enough no need to spin a background fetch.
return
}
func (s *DaemonServer) backgroundPaths(span opentracing.Span, src, dst addr.IA, refresh bool) {

// backgroundTimeout is very long; this allows a large number of retries
backgroundTimeout := 15 * time.Second
ctx, cancelF := context.WithTimeout(context.Background(), backgroundTimeout)
defer cancelF()
var spanOpts []opentracing.StartSpanOption
if span := opentracing.SpanFromContext(origCtx); span != nil {
if span != nil {
spanOpts = append(spanOpts, opentracing.FollowsFrom(span.Context()))
}
span, ctx := opentracing.StartSpanFromContext(ctx, "fetch.paths.background", spanOpts...)
span, ctx = opentracing.StartSpanFromContext(ctx, "fetch.paths.background", spanOpts...)
defer span.Finish()
if _, err := s.fetchPaths(ctx, &s.backgroundPathDedupe, src, dst, refresh); err != nil {
log.FromCtx(ctx).Debug("Error fetching paths (background)", "err", err,
Expand Down
32 changes: 23 additions & 9 deletions tools/end2end/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"errors"
"flag"
"fmt"
"math"
"net"
"os"
"time"
Expand Down Expand Up @@ -247,7 +246,8 @@ type client struct {
port uint16
sdConn daemon.Connector

errorPaths map[snet.PathFingerprint]int // number of encountered errors/timeouts per path
errorPaths map[snet.PathFingerprint]struct{}
triedAllPaths bool
}

func (c *client) run() int {
Expand All @@ -274,7 +274,7 @@ func (c *client) run() int {
fmt.Sprintf("%v,[%v]:%d", integration.Local.IA, integration.Local.Host.IP, c.port))
c.sdConn = integration.SDConn()
defer c.sdConn.Close()
c.errorPaths = make(map[snet.PathFingerprint]int)
c.errorPaths = make(map[snet.PathFingerprint]struct{})
return integration.AttemptRepeatedly("End2End", c.attemptRequest)
}

Expand Down Expand Up @@ -313,7 +313,7 @@ func (c *client) attemptRequest(n int) bool {
tracing.Error(span, err)
logger.Error("Error receiving pong", "err", err)
if path != nil {
c.errorPaths[snet.Fingerprint(path)]++
c.errorPaths[snet.Fingerprint(path)] = struct{}{}
}
return false
}
Expand Down Expand Up @@ -375,20 +375,34 @@ func (c *client) getRemote(ctx context.Context, n int) (snet.Path, error) {
return err
}

refresh := false
if c.triedAllPaths {
// All paths have been tried, and as we're trying again it appears there was no success.
// We'll refresh and retry all available paths.
// The refresh could help in case that the beaconing has discovered new paths since the
// daemon/CS have first cached the paths to this destination.
refresh = true
c.errorPaths = make(map[snet.PathFingerprint]struct{})
c.triedAllPaths = false
}
paths, err := c.sdConn.Paths(ctx, remote.IA, integration.Local.IA,
daemon.PathReqFlags{Refresh: false})
daemon.PathReqFlags{Refresh: refresh})
if err != nil {
return nil, withTag(serrors.WrapStr("requesting paths", err))
}
// Select path that errored fewest times before
// Select first path that didn't error before.
var path snet.Path
lowestErrCount := math.MaxInt
lastAvailablePath := true
for _, p := range paths {
if e := c.errorPaths[snet.Fingerprint(p)]; e < lowestErrCount {
if _, ok := c.errorPaths[snet.Fingerprint(p)]; !ok {
if path != nil {
lastAvailablePath = false
break
}
path = p
lowestErrCount = e
}
}
c.triedAllPaths = lastAvailablePath
if path == nil {
return nil, withTag(serrors.New("no path found",
"candidates", len(paths),
Expand Down
19 changes: 17 additions & 2 deletions tools/integration/revocation_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ done

# Bring down routers.
echo "Revocation test"
echo "Stopping routers and waiting for ${SLEEP}s."
echo "Stopping routers."
./scion.sh mstop $REV_BRS
if [ $? -ne 0 ]; then
echo "Failed stopping routers."
Expand All @@ -36,5 +36,20 @@ fi
sleep 4
# Do another round of e2e test with retries
echo "Testing connectivity between all the hosts (with retries)."
bin/end2end_integration -log.console info -attempts 15 -subset 1-ff00:0:131#2-ff00:0:222 $DOCKER_ARGS
bin/end2end_integration -log.console info -attempts 15 -subset 1-ff00:0:131#2-ff00:0:222
exit $?

# Downed interfaces:
# 1-ff00:0:110#3
# 2-ff00:0:222#302
# 1-ff00:0:111#105
# 1-ff00:0:111#103
# 1-ff00:0:111#100
# 1-ff00:0:131#480
# 1-ff00:0:220#502
# 1-ff00:0:210#452
# 1-ff00:0:212#201
#
# Remaining good paths:
# Hops: [1-ff00:0:131 479>111 1-ff00:0:130 105>1 1-ff00:0:120 2>501 2-ff00:0:220 503>450 2-ff00:0:210 451>7 2-ff00:0:211 4>301 2-ff00:0:222]
# Hops: [1-ff00:0:131 479>111 1-ff00:0:130 104>2 1-ff00:0:110 1>6 1-ff00:0:120 2>501 2-ff00:0:220 503>450 2-ff00:0:210 451>7 2-ff00:0:211 4>301 2-ff00:0:222]
68 changes: 58 additions & 10 deletions tools/topodot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from collections import defaultdict

from plumbum import cli, local
from typing import Dict, List, NamedTuple
from typing import Dict, List, NamedTuple, Set

from topology.topo import LinkEP, LinkType, TopoID
from topology.scion_addr import ISD_AS
from topology.topo import LinkType, TopoID

graph_fmt = """digraph topo {{
\tnode [margin=0.2]
Expand Down Expand Up @@ -52,26 +53,61 @@
"""


class LinkEP:
def __init__(self, raw):
parts = raw.split('#')
if len(parts) != 2:
raise ValueError("Invalid link endpoint, expected exactly one '#'")
isd_as_brid = parts[0]
second_dash = isd_as_brid.find("-", isd_as_brid.find("-")+1)
if second_dash > 0:
self.isd_as = ISD_AS(isd_as_brid[:second_dash])
# self.brid = isd_as_brid[second_dash+1:]
else:
self.isd_as = ISD_AS(isd_as_brid)
self.ifid = int(parts[1])

def __eq__(self, other):
if isinstance(other, self.__class__):
return self.__dict__ == other.__dict__
else:
return False

def __hash__(self):
return hash(tuple(self.__dict__))

def __str__(self):
return "%s#%s" % (self.isd_as, self.ifid)

def __repr__(self):
return "<LinkEP: %s>" % self


class TopoDot(cli.Application):
show = cli.Flag(
["-s", "--show"],
help="Run dot and show the graph, " +
"instead of only outputting the dot file.",
)
mark_interfaces = cli.SwitchAttr(
["--mark-interfaces"],
cli.Set(LinkEP, csv=True),
)

def main(self, topofile):
if self.show:
print(self.mark_interfaces)
prefix = pathlib.PurePath(topofile).stem + '-'
with tempfile.NamedTemporaryFile(prefix=prefix,
suffix='.png') as tmp:
dot = local['dot']
xdg_open = local['xdg-open']
p = dot.popen(('-Tpng', '-o', tmp.name), encoding='utf-8')
p.stdin.write(topodot(topofile))
p = dot.popen(('-Tpng', '-Gdpi=300', '-o', tmp.name), encoding='utf-8')
p.stdin.write(topodot(topofile, self.mark_interfaces))
p.communicate()
xdg_open(tmp.name)
else:
sys.stdout.write(topodot(topofile))
sys.stdout.write(topodot(topofile, self.mark_interfaces))


class Link(NamedTuple):
Expand All @@ -80,20 +116,22 @@ class Link(NamedTuple):
type: LinkType


def topodot(topofile) -> str:
def topodot(topofile, mark_interfaces) -> str:
with open(topofile, 'r') as f:
topo_config = yaml.safe_load(f)

links = topo_links(topo_config)
clusters = topo_clusters(topo_config)
marked_interfaces = set(mark_interfaces)

def format_nodes(indent, ases):
fmt = '\t' * indent + '"%s"'
return '\n'.join(fmt % isd_as for isd_as in ases)

def format_links(indent, links):
fmt = '\t' * indent + '"%s" -> "%s"%s'
return '\n'.join(fmt % (link.a, link.b, fmt_attrs(link_attrs(link)))
return '\n'.join(fmt % (link.a.isd_as, link.b.isd_as,
fmt_attrs(link_attrs(link, marked_interfaces)))
for link in links)

formatted_clusters = []
Expand All @@ -116,20 +154,30 @@ def fmt_attrs(attrs: Dict[str, str]) -> str:
return ''


def link_attrs(link: Link) -> Dict[str, str]:
def link_attrs(link: Link, marked_interfaces: Set[LinkEP]) -> Dict[str, str]:
taillabel = interface_label(link.a.ifid, marked=link.a in marked_interfaces)
headlabel = interface_label(link.b.ifid, marked=link.b in marked_interfaces)
attrs = {
'taillabel': link.a.ifid,
'headlabel': link.b.ifid,
'taillabel': taillabel,
'headlabel': headlabel,
}
if link.type == LinkType.CORE:
attrs['dir'] = 'none'
if link.type == LinkType.PEER:
attrs['constraint'] = 'false'
attrs['dir'] = 'none'
attrs['style'] = 'dotted'
if link.a in marked_interfaces or link.b in marked_interfaces:
attrs['color'] = 'red'
return attrs


def interface_label(ifid: int, marked: bool) -> str:
if marked:
return "⚡" + str(ifid)
return str(ifid)


def topo_links(topo_config) -> List[Link]:
return [
Link(a=LinkEP(link['a']),
Expand Down

0 comments on commit ad25c26

Please sign in to comment.